Squashed 'l1/' content from commit 670aa58

git-subtree-dir: l1
git-subtree-split: 670aa582df99e87fca7c247b949baf452e8c234f
This commit is contained in:
giles
2026-02-24 23:07:19 +00:00
commit 80c94ebea7
225 changed files with 57298 additions and 0 deletions

44
streaming/__init__.py Normal file
View File

@@ -0,0 +1,44 @@
"""
Streaming video compositor for real-time effect processing.
This module provides a frame-by-frame streaming architecture that:
- Reads from multiple video sources with automatic looping
- Applies effects inline (no intermediate files)
- Composites layers with time-varying weights
- Outputs to display, file, or stream
Usage:
from streaming import StreamingCompositor, VideoSource, AudioAnalyzer
compositor = StreamingCompositor(
sources=["video1.mp4", "video2.mp4"],
effects_per_source=[...],
compositor_config={...},
)
# With live audio
audio = AudioAnalyzer(device=0)
compositor.run(output="output.mp4", duration=60, audio=audio)
# With preview window
compositor.run(output="preview", duration=60)
Backends:
- numpy: Works everywhere, ~3-5 fps (default)
- glsl: Requires GPU, 30+ fps real-time (future)
"""
from .sources import VideoSource, ImageSource
from .compositor import StreamingCompositor
from .backends import NumpyBackend, get_backend
from .output import DisplayOutput, FileOutput
__all__ = [
"StreamingCompositor",
"VideoSource",
"ImageSource",
"NumpyBackend",
"get_backend",
"DisplayOutput",
"FileOutput",
]

486
streaming/audio.py Normal file
View File

@@ -0,0 +1,486 @@
"""
Live audio analysis for reactive effects.
Provides real-time audio features:
- Energy (RMS amplitude)
- Beat detection
- Frequency bands (bass, mid, high)
"""
import numpy as np
from typing import Optional
import threading
import time
class AudioAnalyzer:
"""
Real-time audio analyzer using sounddevice.
Captures audio from microphone/line-in and computes
features in real-time for effect parameter bindings.
Example:
analyzer = AudioAnalyzer(device=0)
analyzer.start()
# In compositor loop:
energy = analyzer.get_energy()
beat = analyzer.get_beat()
analyzer.stop()
"""
def __init__(
self,
device: int = None,
sample_rate: int = 44100,
block_size: int = 1024,
buffer_seconds: float = 0.5,
):
"""
Initialize audio analyzer.
Args:
device: Audio input device index (None = default)
sample_rate: Audio sample rate
block_size: Samples per block
buffer_seconds: Ring buffer duration
"""
self.sample_rate = sample_rate
self.block_size = block_size
self.device = device
# Ring buffer for recent audio
buffer_size = int(sample_rate * buffer_seconds)
self._buffer = np.zeros(buffer_size, dtype=np.float32)
self._buffer_pos = 0
self._lock = threading.Lock()
# Beat detection state
self._last_energy = 0
self._energy_history = []
self._last_beat_time = 0
self._beat_threshold = 1.5 # Energy ratio for beat detection
self._min_beat_interval = 0.1 # Min seconds between beats
# Stream state
self._stream = None
self._running = False
def _audio_callback(self, indata, frames, time_info, status):
"""Called by sounddevice for each audio block."""
with self._lock:
# Add to ring buffer
data = indata[:, 0] if len(indata.shape) > 1 else indata
n = len(data)
if self._buffer_pos + n <= len(self._buffer):
self._buffer[self._buffer_pos:self._buffer_pos + n] = data
else:
# Wrap around
first = len(self._buffer) - self._buffer_pos
self._buffer[self._buffer_pos:] = data[:first]
self._buffer[:n - first] = data[first:]
self._buffer_pos = (self._buffer_pos + n) % len(self._buffer)
def start(self):
"""Start audio capture."""
try:
import sounddevice as sd
except ImportError:
print("Warning: sounddevice not installed. Audio analysis disabled.")
print("Install with: pip install sounddevice")
return
self._stream = sd.InputStream(
device=self.device,
channels=1,
samplerate=self.sample_rate,
blocksize=self.block_size,
callback=self._audio_callback,
)
self._stream.start()
self._running = True
def stop(self):
"""Stop audio capture."""
if self._stream:
self._stream.stop()
self._stream.close()
self._stream = None
self._running = False
def get_energy(self) -> float:
"""
Get current audio energy (RMS amplitude).
Returns:
Energy value normalized to 0-1 range (approximately)
"""
with self._lock:
# Use recent samples
recent = 2048
if self._buffer_pos >= recent:
data = self._buffer[self._buffer_pos - recent:self._buffer_pos]
else:
data = np.concatenate([
self._buffer[-(recent - self._buffer_pos):],
self._buffer[:self._buffer_pos]
])
# RMS energy
rms = np.sqrt(np.mean(data ** 2))
# Normalize (typical mic input is quite low)
normalized = min(1.0, rms * 10)
return normalized
def get_beat(self) -> bool:
"""
Detect if current moment is a beat.
Simple onset detection based on energy spikes.
Returns:
True if beat detected, False otherwise
"""
current_energy = self.get_energy()
now = time.time()
# Update energy history
self._energy_history.append(current_energy)
if len(self._energy_history) > 20:
self._energy_history.pop(0)
# Need enough history
if len(self._energy_history) < 5:
self._last_energy = current_energy
return False
# Average recent energy
avg_energy = np.mean(self._energy_history[:-1])
# Beat if current energy is significantly above average
is_beat = (
current_energy > avg_energy * self._beat_threshold and
now - self._last_beat_time > self._min_beat_interval and
current_energy > self._last_energy # Rising edge
)
if is_beat:
self._last_beat_time = now
self._last_energy = current_energy
return is_beat
def get_spectrum(self, bands: int = 3) -> np.ndarray:
"""
Get frequency spectrum divided into bands.
Args:
bands: Number of frequency bands (default 3: bass, mid, high)
Returns:
Array of band energies, normalized to 0-1
"""
with self._lock:
# Use recent samples for FFT
n = 2048
if self._buffer_pos >= n:
data = self._buffer[self._buffer_pos - n:self._buffer_pos]
else:
data = np.concatenate([
self._buffer[-(n - self._buffer_pos):],
self._buffer[:self._buffer_pos]
])
# FFT
fft = np.abs(np.fft.rfft(data * np.hanning(len(data))))
# Divide into bands
band_size = len(fft) // bands
result = np.zeros(bands)
for i in range(bands):
start = i * band_size
end = start + band_size
result[i] = np.mean(fft[start:end])
# Normalize
max_val = np.max(result)
if max_val > 0:
result = result / max_val
return result
@property
def is_running(self) -> bool:
return self._running
def __enter__(self):
self.start()
return self
def __exit__(self, *args):
self.stop()
class FileAudioAnalyzer:
"""
Audio analyzer that reads from a file (for testing/development).
Pre-computes analysis and plays back in sync with video.
"""
def __init__(self, path: str, analysis_data: dict = None):
"""
Initialize from audio file.
Args:
path: Path to audio file
analysis_data: Pre-computed analysis (times, values, etc.)
"""
self.path = path
self.analysis_data = analysis_data or {}
self._current_time = 0
def set_time(self, t: float):
"""Set current playback time."""
self._current_time = t
def get_energy(self) -> float:
"""Get energy at current time from pre-computed data."""
track = self.analysis_data.get("energy", {})
return self._interpolate(track, self._current_time)
def get_beat(self) -> bool:
"""Check if current time is near a beat."""
track = self.analysis_data.get("beats", {})
times = track.get("times", [])
# Check if we're within 50ms of a beat
for beat_time in times:
if abs(beat_time - self._current_time) < 0.05:
return True
return False
def _interpolate(self, track: dict, t: float) -> float:
"""Interpolate value at time t."""
times = track.get("times", [])
values = track.get("values", [])
if not times or not values:
return 0.0
if t <= times[0]:
return values[0]
if t >= times[-1]:
return values[-1]
# Find bracket and interpolate
for i in range(len(times) - 1):
if times[i] <= t <= times[i + 1]:
alpha = (t - times[i]) / (times[i + 1] - times[i])
return values[i] * (1 - alpha) + values[i + 1] * alpha
return values[-1]
@property
def is_running(self) -> bool:
return True
class StreamingAudioAnalyzer:
"""
Real-time audio analyzer that streams from a file.
Reads audio in sync with video time and computes features on-the-fly.
No pre-computation needed - analysis happens as frames are processed.
"""
def __init__(self, path: str, sample_rate: int = 22050, hop_length: int = 512):
"""
Initialize streaming audio analyzer.
Args:
path: Path to audio file
sample_rate: Sample rate for analysis
hop_length: Hop length for feature extraction
"""
import subprocess
import json
self.path = path
self.sample_rate = sample_rate
self.hop_length = hop_length
self._current_time = 0.0
# Get audio duration
cmd = ["ffprobe", "-v", "quiet", "-print_format", "json",
"-show_format", str(path)]
result = subprocess.run(cmd, capture_output=True, text=True)
info = json.loads(result.stdout)
self.duration = float(info["format"]["duration"])
# Audio buffer and state
self._audio_data = None
self._energy_history = []
self._last_energy = 0
self._last_beat_time = -1
self._beat_threshold = 1.5
self._min_beat_interval = 0.15
# Load audio lazily
self._loaded = False
def _load_audio(self):
"""Load audio data on first use."""
if self._loaded:
return
import subprocess
# Use ffmpeg to decode audio to raw PCM
cmd = [
"ffmpeg", "-v", "quiet",
"-i", str(self.path),
"-f", "f32le", # 32-bit float, little-endian
"-ac", "1", # mono
"-ar", str(self.sample_rate),
"-"
]
result = subprocess.run(cmd, capture_output=True)
self._audio_data = np.frombuffer(result.stdout, dtype=np.float32)
self._loaded = True
def set_time(self, t: float):
"""Set current playback time."""
self._current_time = t
def get_energy(self) -> float:
"""Compute energy at current time."""
self._load_audio()
if self._audio_data is None or len(self._audio_data) == 0:
return 0.0
# Get sample index for current time
sample_idx = int(self._current_time * self.sample_rate)
window_size = self.hop_length * 2
start = max(0, sample_idx - window_size // 2)
end = min(len(self._audio_data), sample_idx + window_size // 2)
if start >= end:
return 0.0
# RMS energy
chunk = self._audio_data[start:end]
rms = np.sqrt(np.mean(chunk ** 2))
# Normalize to 0-1 range (approximate)
energy = min(1.0, rms * 3.0)
self._last_energy = energy
return energy
def get_beat(self) -> bool:
"""Detect beat using spectral flux (change in frequency content)."""
self._load_audio()
if self._audio_data is None or len(self._audio_data) == 0:
return False
# Get audio chunks for current and previous frame
sample_idx = int(self._current_time * self.sample_rate)
chunk_size = self.hop_length * 2
# Current chunk
start = max(0, sample_idx - chunk_size // 2)
end = min(len(self._audio_data), sample_idx + chunk_size // 2)
if end - start < chunk_size // 2:
return False
current_chunk = self._audio_data[start:end]
# Previous chunk (one hop back)
prev_start = max(0, start - self.hop_length)
prev_end = max(0, end - self.hop_length)
if prev_end <= prev_start:
return False
prev_chunk = self._audio_data[prev_start:prev_end]
# Compute spectra
current_spec = np.abs(np.fft.rfft(current_chunk * np.hanning(len(current_chunk))))
prev_spec = np.abs(np.fft.rfft(prev_chunk * np.hanning(len(prev_chunk))))
# Spectral flux: sum of positive differences (onset = new frequencies appearing)
min_len = min(len(current_spec), len(prev_spec))
diff = current_spec[:min_len] - prev_spec[:min_len]
flux = np.sum(np.maximum(0, diff)) # Only count increases
# Normalize by spectrum size
flux = flux / (min_len + 1)
# Update flux history
self._energy_history.append((self._current_time, flux))
while self._energy_history and self._energy_history[0][0] < self._current_time - 1.5:
self._energy_history.pop(0)
if len(self._energy_history) < 3:
return False
# Adaptive threshold based on recent flux values
flux_values = [f for t, f in self._energy_history]
mean_flux = np.mean(flux_values)
std_flux = np.std(flux_values) + 0.001 # Avoid division by zero
# Beat if flux is above mean (more sensitive threshold)
threshold = mean_flux + std_flux * 0.3 # Lower = more sensitive
min_interval = 0.1 # Allow up to 600 BPM
time_ok = self._current_time - self._last_beat_time > min_interval
is_beat = flux > threshold and time_ok
if is_beat:
self._last_beat_time = self._current_time
return is_beat
def get_spectrum(self, bands: int = 3) -> np.ndarray:
"""Get frequency spectrum at current time."""
self._load_audio()
if self._audio_data is None or len(self._audio_data) == 0:
return np.zeros(bands)
sample_idx = int(self._current_time * self.sample_rate)
n = 2048
start = max(0, sample_idx - n // 2)
end = min(len(self._audio_data), sample_idx + n // 2)
if end - start < n // 2:
return np.zeros(bands)
chunk = self._audio_data[start:end]
# FFT
fft = np.abs(np.fft.rfft(chunk * np.hanning(len(chunk))))
# Divide into bands
band_size = len(fft) // bands
result = np.zeros(bands)
for i in range(bands):
s, e = i * band_size, (i + 1) * band_size
result[i] = np.mean(fft[s:e])
# Normalize
max_val = np.max(result)
if max_val > 0:
result = result / max_val
return result
@property
def is_running(self) -> bool:
return True

572
streaming/backends.py Normal file
View File

@@ -0,0 +1,572 @@
"""
Effect processing backends.
Provides abstraction over different rendering backends:
- numpy: CPU-based, works everywhere, ~3-5 fps
- glsl: GPU-based, requires OpenGL, 30+ fps (future)
"""
import numpy as np
from abc import ABC, abstractmethod
from typing import List, Dict, Any, Optional
from pathlib import Path
class Backend(ABC):
"""Abstract base class for effect processing backends."""
@abstractmethod
def process_frame(
self,
frames: List[np.ndarray],
effects_per_frame: List[List[Dict]],
compositor_config: Dict,
t: float,
analysis_data: Dict,
) -> np.ndarray:
"""
Process multiple input frames through effects and composite.
Args:
frames: List of input frames (one per source)
effects_per_frame: List of effect chains (one per source)
compositor_config: How to blend the layers
t: Current time in seconds
analysis_data: Analysis data for binding resolution
Returns:
Composited output frame
"""
pass
@abstractmethod
def load_effect(self, effect_path: Path) -> Any:
"""Load an effect definition."""
pass
class NumpyBackend(Backend):
"""
CPU-based effect processing using NumPy.
Uses existing sexp_effects interpreter for effect execution.
Works on any system, but limited to ~3-5 fps for complex effects.
"""
def __init__(self, recipe_dir: Path = None, minimal_primitives: bool = True):
self.recipe_dir = recipe_dir or Path(".")
self.minimal_primitives = minimal_primitives
self._interpreter = None
self._loaded_effects = {}
def _get_interpreter(self):
"""Lazy-load the sexp interpreter."""
if self._interpreter is None:
from sexp_effects import get_interpreter
self._interpreter = get_interpreter(minimal_primitives=self.minimal_primitives)
return self._interpreter
def load_effect(self, effect_path: Path) -> Any:
"""Load an effect from sexp file."""
if isinstance(effect_path, str):
effect_path = Path(effect_path)
effect_key = str(effect_path)
if effect_key not in self._loaded_effects:
interp = self._get_interpreter()
interp.load_effect(str(effect_path))
self._loaded_effects[effect_key] = effect_path.stem
return self._loaded_effects[effect_key]
def _resolve_binding(self, value: Any, t: float, analysis_data: Dict) -> Any:
"""Resolve a parameter binding to its value at time t."""
if not isinstance(value, dict):
return value
if "_binding" in value or "_bind" in value:
source = value.get("source") or value.get("_bind")
feature = value.get("feature", "values")
range_map = value.get("range")
track = analysis_data.get(source, {})
times = track.get("times", [])
values = track.get("values", [])
if not times or not values:
return 0.0
# Find value at time t (linear interpolation)
if t <= times[0]:
val = values[0]
elif t >= times[-1]:
val = values[-1]
else:
# Binary search for bracket
for i in range(len(times) - 1):
if times[i] <= t <= times[i + 1]:
alpha = (t - times[i]) / (times[i + 1] - times[i])
val = values[i] * (1 - alpha) + values[i + 1] * alpha
break
else:
val = values[-1]
# Apply range mapping
if range_map and len(range_map) == 2:
val = range_map[0] + val * (range_map[1] - range_map[0])
return val
return value
def _apply_effect(
self,
frame: np.ndarray,
effect_name: str,
params: Dict,
t: float,
analysis_data: Dict,
) -> np.ndarray:
"""Apply a single effect to a frame."""
# Resolve bindings in params
resolved_params = {"_time": t}
for key, value in params.items():
if key in ("effect", "effect_path", "cid", "analysis_refs"):
continue
resolved_params[key] = self._resolve_binding(value, t, analysis_data)
# Try fast native effects first
result = self._apply_native_effect(frame, effect_name, resolved_params)
if result is not None:
return result
# Fall back to sexp interpreter for complex effects
interp = self._get_interpreter()
if effect_name in interp.effects:
result, _ = interp.run_effect(effect_name, frame, resolved_params, {})
return result
# Unknown effect - pass through
return frame
def _apply_native_effect(
self,
frame: np.ndarray,
effect_name: str,
params: Dict,
) -> Optional[np.ndarray]:
"""Fast native numpy effects for real-time streaming."""
import cv2
if effect_name == "zoom":
amount = float(params.get("amount", 1.0))
if abs(amount - 1.0) < 0.01:
return frame
h, w = frame.shape[:2]
# Crop center and resize
new_w, new_h = int(w / amount), int(h / amount)
x1, y1 = (w - new_w) // 2, (h - new_h) // 2
cropped = frame[y1:y1+new_h, x1:x1+new_w]
return cv2.resize(cropped, (w, h))
elif effect_name == "rotate":
angle = float(params.get("angle", 0))
if abs(angle) < 0.5:
return frame
h, w = frame.shape[:2]
center = (w // 2, h // 2)
matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
return cv2.warpAffine(frame, matrix, (w, h))
elif effect_name == "brightness":
amount = float(params.get("amount", 1.0))
return np.clip(frame * amount, 0, 255).astype(np.uint8)
elif effect_name == "invert":
amount = float(params.get("amount", 1.0))
if amount < 0.5:
return frame
return 255 - frame
# Not a native effect
return None
def process_frame(
self,
frames: List[np.ndarray],
effects_per_frame: List[List[Dict]],
compositor_config: Dict,
t: float,
analysis_data: Dict,
) -> np.ndarray:
"""
Process frames through effects and composite.
"""
if not frames:
return np.zeros((720, 1280, 3), dtype=np.uint8)
processed = []
# Apply effects to each input frame
for i, (frame, effects) in enumerate(zip(frames, effects_per_frame)):
result = frame.copy()
for effect_config in effects:
effect_name = effect_config.get("effect", "")
if effect_name:
result = self._apply_effect(
result, effect_name, effect_config, t, analysis_data
)
processed.append(result)
# Composite layers
if len(processed) == 1:
return processed[0]
return self._composite(processed, compositor_config, t, analysis_data)
def _composite(
self,
frames: List[np.ndarray],
config: Dict,
t: float,
analysis_data: Dict,
) -> np.ndarray:
"""Composite multiple frames into one."""
mode = config.get("mode", "alpha")
weights = config.get("weights", [1.0 / len(frames)] * len(frames))
# Resolve weight bindings
resolved_weights = []
for w in weights:
resolved_weights.append(self._resolve_binding(w, t, analysis_data))
# Normalize weights
total = sum(resolved_weights)
if total > 0:
resolved_weights = [w / total for w in resolved_weights]
else:
resolved_weights = [1.0 / len(frames)] * len(frames)
# Resize frames to match first frame
target_h, target_w = frames[0].shape[:2]
resized = []
for frame in frames:
if frame.shape[:2] != (target_h, target_w):
import cv2
frame = cv2.resize(frame, (target_w, target_h))
resized.append(frame.astype(np.float32))
# Weighted blend
result = np.zeros_like(resized[0])
for frame, weight in zip(resized, resolved_weights):
result += frame * weight
return np.clip(result, 0, 255).astype(np.uint8)
class WGPUBackend(Backend):
"""
GPU-based effect processing using wgpu/WebGPU compute shaders.
Compiles sexp effects to WGSL at load time, executes on GPU.
Achieves 30+ fps real-time processing on supported hardware.
Requirements:
- wgpu-py library
- Vulkan-capable GPU (or software renderer)
"""
def __init__(self, recipe_dir: Path = None):
self.recipe_dir = recipe_dir or Path(".")
self._device = None
self._loaded_effects: Dict[str, Any] = {} # name -> compiled shader info
self._numpy_fallback = NumpyBackend(recipe_dir)
# Buffer pool for reuse - keyed by (width, height)
self._buffer_pool: Dict[tuple, Dict] = {}
def _ensure_device(self):
"""Lazy-initialize wgpu device."""
if self._device is not None:
return
try:
import wgpu
adapter = wgpu.gpu.request_adapter_sync(power_preference="high-performance")
self._device = adapter.request_device_sync()
print(f"[WGPUBackend] Using GPU: {adapter.info.get('device', 'unknown')}")
except Exception as e:
print(f"[WGPUBackend] GPU init failed: {e}, falling back to CPU")
self._device = None
def load_effect(self, effect_path: Path) -> Any:
"""Load and compile an effect from sexp file to WGSL."""
effect_key = str(effect_path)
if effect_key in self._loaded_effects:
return self._loaded_effects[effect_key]
try:
from sexp_effects.wgsl_compiler import compile_effect_file
compiled = compile_effect_file(str(effect_path))
self._ensure_device()
if self._device is None:
# Fall back to numpy
return self._numpy_fallback.load_effect(effect_path)
# Create shader module
import wgpu
shader_module = self._device.create_shader_module(code=compiled.wgsl_code)
# Create compute pipeline
pipeline = self._device.create_compute_pipeline(
layout="auto",
compute={"module": shader_module, "entry_point": "main"}
)
self._loaded_effects[effect_key] = {
'compiled': compiled,
'pipeline': pipeline,
'name': compiled.name,
}
return compiled.name
except Exception as e:
print(f"[WGPUBackend] Failed to compile {effect_path}: {e}")
# Fall back to numpy for this effect
return self._numpy_fallback.load_effect(effect_path)
def _resolve_binding(self, value: Any, t: float, analysis_data: Dict) -> Any:
"""Resolve a parameter binding to its value at time t."""
# Delegate to numpy backend's implementation
return self._numpy_fallback._resolve_binding(value, t, analysis_data)
def _get_or_create_buffers(self, w: int, h: int):
"""Get or create reusable buffers for given dimensions."""
import wgpu
key = (w, h)
if key in self._buffer_pool:
return self._buffer_pool[key]
size = w * h * 4 # u32 per pixel
# Create staging buffer for uploads (MAP_WRITE)
staging_buffer = self._device.create_buffer(
size=size,
usage=wgpu.BufferUsage.MAP_WRITE | wgpu.BufferUsage.COPY_SRC,
mapped_at_creation=False,
)
# Create input buffer (STORAGE, receives data from staging)
input_buffer = self._device.create_buffer(
size=size,
usage=wgpu.BufferUsage.STORAGE | wgpu.BufferUsage.COPY_DST,
)
# Create output buffer (STORAGE + COPY_SRC for readback)
output_buffer = self._device.create_buffer(
size=size,
usage=wgpu.BufferUsage.STORAGE | wgpu.BufferUsage.COPY_SRC,
)
# Params buffer (uniform, 256 bytes should be enough)
params_buffer = self._device.create_buffer(
size=256,
usage=wgpu.BufferUsage.UNIFORM | wgpu.BufferUsage.COPY_DST,
)
self._buffer_pool[key] = {
'staging': staging_buffer,
'input': input_buffer,
'output': output_buffer,
'params': params_buffer,
'size': size,
}
return self._buffer_pool[key]
def _apply_effect_gpu(
self,
frame: np.ndarray,
effect_name: str,
params: Dict,
t: float,
) -> Optional[np.ndarray]:
"""Apply effect using GPU. Returns None if GPU not available."""
import wgpu
# Find the loaded effect
effect_info = None
for key, info in self._loaded_effects.items():
if info.get('name') == effect_name:
effect_info = info
break
if effect_info is None or self._device is None:
return None
compiled = effect_info['compiled']
pipeline = effect_info['pipeline']
h, w = frame.shape[:2]
# Get reusable buffers
buffers = self._get_or_create_buffers(w, h)
# Pack frame as u32 array (RGB -> packed u32)
r = frame[:, :, 0].astype(np.uint32)
g = frame[:, :, 1].astype(np.uint32)
b = frame[:, :, 2].astype(np.uint32)
packed = (r << 16) | (g << 8) | b
input_data = packed.flatten().astype(np.uint32)
# Upload input data via queue.write_buffer (more efficient than recreation)
self._device.queue.write_buffer(buffers['input'], 0, input_data.tobytes())
# Build params struct
import struct
param_values = [w, h] # width, height as u32
param_format = "II" # two u32
# Add time as f32
param_values.append(t)
param_format += "f"
# Add effect-specific params
for param in compiled.params:
val = params.get(param.name, param.default)
if val is None:
val = 0
if param.wgsl_type == 'f32':
param_values.append(float(val))
param_format += "f"
elif param.wgsl_type == 'i32':
param_values.append(int(val))
param_format += "i"
elif param.wgsl_type == 'u32':
param_values.append(int(val))
param_format += "I"
# Pad to 16-byte alignment
param_bytes = struct.pack(param_format, *param_values)
while len(param_bytes) % 16 != 0:
param_bytes += b'\x00'
self._device.queue.write_buffer(buffers['params'], 0, param_bytes)
# Create bind group (unfortunately this can't be easily reused with different effects)
bind_group = self._device.create_bind_group(
layout=pipeline.get_bind_group_layout(0),
entries=[
{"binding": 0, "resource": {"buffer": buffers['input']}},
{"binding": 1, "resource": {"buffer": buffers['output']}},
{"binding": 2, "resource": {"buffer": buffers['params']}},
]
)
# Dispatch compute
encoder = self._device.create_command_encoder()
compute_pass = encoder.begin_compute_pass()
compute_pass.set_pipeline(pipeline)
compute_pass.set_bind_group(0, bind_group)
# Workgroups: ceil(w/16) x ceil(h/16)
wg_x = (w + 15) // 16
wg_y = (h + 15) // 16
compute_pass.dispatch_workgroups(wg_x, wg_y, 1)
compute_pass.end()
self._device.queue.submit([encoder.finish()])
# Read back result
result_data = self._device.queue.read_buffer(buffers['output'])
result_packed = np.frombuffer(result_data, dtype=np.uint32).reshape(h, w)
# Unpack u32 -> RGB
result = np.zeros((h, w, 3), dtype=np.uint8)
result[:, :, 0] = ((result_packed >> 16) & 0xFF).astype(np.uint8)
result[:, :, 1] = ((result_packed >> 8) & 0xFF).astype(np.uint8)
result[:, :, 2] = (result_packed & 0xFF).astype(np.uint8)
return result
def _apply_effect(
self,
frame: np.ndarray,
effect_name: str,
params: Dict,
t: float,
analysis_data: Dict,
) -> np.ndarray:
"""Apply a single effect to a frame."""
# Resolve bindings in params
resolved_params = {"_time": t}
for key, value in params.items():
if key in ("effect", "effect_path", "cid", "analysis_refs"):
continue
resolved_params[key] = self._resolve_binding(value, t, analysis_data)
# Try GPU first
self._ensure_device()
if self._device is not None:
result = self._apply_effect_gpu(frame, effect_name, resolved_params, t)
if result is not None:
return result
# Fall back to numpy
return self._numpy_fallback._apply_effect(
frame, effect_name, params, t, analysis_data
)
def process_frame(
self,
frames: List[np.ndarray],
effects_per_frame: List[List[Dict]],
compositor_config: Dict,
t: float,
analysis_data: Dict,
) -> np.ndarray:
"""Process frames through effects and composite."""
if not frames:
return np.zeros((720, 1280, 3), dtype=np.uint8)
processed = []
# Apply effects to each input frame
for i, (frame, effects) in enumerate(zip(frames, effects_per_frame)):
result = frame.copy()
for effect_config in effects:
effect_name = effect_config.get("effect", "")
if effect_name:
result = self._apply_effect(
result, effect_name, effect_config, t, analysis_data
)
processed.append(result)
# Composite layers (use numpy backend for now)
if len(processed) == 1:
return processed[0]
return self._numpy_fallback._composite(
processed, compositor_config, t, analysis_data
)
# Keep GLSLBackend as alias for backwards compatibility
GLSLBackend = WGPUBackend
def get_backend(name: str = "numpy", **kwargs) -> Backend:
"""
Get a backend by name.
Args:
name: "numpy", "wgpu", or "glsl" (alias for wgpu)
**kwargs: Backend-specific options
Returns:
Backend instance
"""
if name == "numpy":
return NumpyBackend(**kwargs)
elif name in ("wgpu", "glsl", "gpu"):
return WGPUBackend(**kwargs)
else:
raise ValueError(f"Unknown backend: {name}")

595
streaming/compositor.py Normal file
View File

@@ -0,0 +1,595 @@
"""
Streaming video compositor.
Main entry point for the streaming pipeline. Combines:
- Multiple video sources (with looping)
- Per-source effect chains
- Layer compositing
- Optional live audio analysis
- Output to display/file/stream
"""
import time
import sys
import numpy as np
from typing import List, Dict, Any, Optional, Union
from pathlib import Path
from .sources import Source, VideoSource
from .backends import Backend, NumpyBackend, get_backend
from .output import Output, DisplayOutput, FileOutput, MultiOutput
class StreamingCompositor:
"""
Real-time streaming video compositor.
Reads frames from multiple sources, applies effects, composites layers,
and outputs the result - all frame-by-frame without intermediate files.
Example:
compositor = StreamingCompositor(
sources=["video1.mp4", "video2.mp4"],
effects_per_source=[
[{"effect": "rotate", "angle": 45}],
[{"effect": "zoom", "amount": 1.5}],
],
compositor_config={"mode": "alpha", "weights": [0.5, 0.5]},
)
compositor.run(output="preview", duration=60)
"""
def __init__(
self,
sources: List[Union[str, Source]],
effects_per_source: List[List[Dict]] = None,
compositor_config: Dict = None,
analysis_data: Dict = None,
backend: str = "numpy",
recipe_dir: Path = None,
fps: float = 30,
audio_source: str = None,
):
"""
Initialize the streaming compositor.
Args:
sources: List of video paths or Source objects
effects_per_source: List of effect chains, one per source
compositor_config: How to blend layers (mode, weights)
analysis_data: Pre-computed analysis data for bindings
backend: "numpy" or "glsl"
recipe_dir: Directory for resolving relative effect paths
fps: Output frame rate
audio_source: Path to audio file for streaming analysis
"""
self.fps = fps
self.recipe_dir = recipe_dir or Path(".")
self.analysis_data = analysis_data or {}
# Initialize streaming audio analyzer if audio source provided
self._audio_analyzer = None
self._audio_source = audio_source
if audio_source:
from .audio import StreamingAudioAnalyzer
self._audio_analyzer = StreamingAudioAnalyzer(audio_source)
print(f"Streaming audio: {audio_source}", file=sys.stderr)
# Initialize sources
self.sources: List[Source] = []
for src in sources:
if isinstance(src, Source):
self.sources.append(src)
elif isinstance(src, (str, Path)):
self.sources.append(VideoSource(str(src), target_fps=fps))
else:
raise ValueError(f"Unknown source type: {type(src)}")
# Effect chains (default: no effects)
self.effects_per_source = effects_per_source or [[] for _ in self.sources]
if len(self.effects_per_source) != len(self.sources):
raise ValueError(
f"effects_per_source length ({len(self.effects_per_source)}) "
f"must match sources length ({len(self.sources)})"
)
# Compositor config (default: equal blend)
self.compositor_config = compositor_config or {
"mode": "alpha",
"weights": [1.0 / len(self.sources)] * len(self.sources),
}
# Initialize backend
self.backend: Backend = get_backend(
backend,
recipe_dir=self.recipe_dir,
)
# Load effects
self._load_effects()
def _load_effects(self):
"""Pre-load all effect definitions."""
for effects in self.effects_per_source:
for effect_config in effects:
effect_path = effect_config.get("effect_path")
if effect_path:
full_path = self.recipe_dir / effect_path
if full_path.exists():
self.backend.load_effect(full_path)
def _create_output(
self,
output: Union[str, Output],
size: tuple,
) -> Output:
"""Create output target from string or Output object."""
if isinstance(output, Output):
return output
if output == "preview":
return DisplayOutput("Streaming Preview", size,
audio_source=self._audio_source, fps=self.fps)
elif output == "null":
from .output import NullOutput
return NullOutput()
elif isinstance(output, str):
return FileOutput(output, size, fps=self.fps, audio_source=self._audio_source)
else:
raise ValueError(f"Unknown output type: {output}")
def run(
self,
output: Union[str, Output] = "preview",
duration: float = None,
audio_analyzer=None,
show_fps: bool = True,
recipe_executor=None,
):
"""
Run the streaming compositor.
Args:
output: Output target - "preview", filename, or Output object
duration: Duration in seconds (None = run until quit)
audio_analyzer: Optional AudioAnalyzer for live audio reactivity
show_fps: Show FPS counter in console
recipe_executor: Optional StreamingRecipeExecutor for full recipe logic
"""
# Determine output size from first source
output_size = self.sources[0].size
# Create output
out = self._create_output(output, output_size)
# Determine duration
if duration is None:
# Run until stopped (or min source duration if not looping)
duration = min(s.duration for s in self.sources)
if duration == float('inf'):
duration = 3600 # 1 hour max for live sources
total_frames = int(duration * self.fps)
frame_time = 1.0 / self.fps
print(f"Streaming: {len(self.sources)} sources -> {output}", file=sys.stderr)
print(f"Duration: {duration:.1f}s, {total_frames} frames @ {self.fps}fps", file=sys.stderr)
print(f"Output size: {output_size[0]}x{output_size[1]}", file=sys.stderr)
print(f"Press 'q' to quit (if preview)", file=sys.stderr)
# Frame loop
start_time = time.time()
frame_count = 0
fps_update_interval = 30 # Update FPS display every N frames
last_fps_time = start_time
last_fps_count = 0
try:
for frame_num in range(total_frames):
if not out.is_open:
print(f"\nOutput closed at frame {frame_num}", file=sys.stderr)
break
t = frame_num * frame_time
try:
# Update analysis data from streaming audio (file-based)
energy = 0.0
is_beat = False
if self._audio_analyzer:
self._update_from_audio(self._audio_analyzer, t)
energy = self.analysis_data.get("live_energy", {}).get("values", [0])[0]
is_beat = self.analysis_data.get("live_beat", {}).get("values", [0])[0] > 0.5
elif audio_analyzer:
self._update_from_audio(audio_analyzer, t)
energy = self.analysis_data.get("live_energy", {}).get("values", [0])[0]
is_beat = self.analysis_data.get("live_beat", {}).get("values", [0])[0] > 0.5
# Read frames from all sources
frames = [src.read_frame(t) for src in self.sources]
# Process through recipe executor if provided
if recipe_executor:
result = self._process_with_executor(
frames, recipe_executor, energy, is_beat, t
)
else:
# Simple backend processing
result = self.backend.process_frame(
frames,
self.effects_per_source,
self.compositor_config,
t,
self.analysis_data,
)
# Output
out.write(result, t)
frame_count += 1
# FPS display
if show_fps and frame_count % fps_update_interval == 0:
now = time.time()
elapsed = now - last_fps_time
if elapsed > 0:
current_fps = (frame_count - last_fps_count) / elapsed
progress = frame_num / total_frames * 100
print(
f"\r {progress:5.1f}% | {current_fps:5.1f} fps | "
f"frame {frame_num}/{total_frames}",
end="", file=sys.stderr
)
last_fps_time = now
last_fps_count = frame_count
except Exception as e:
print(f"\nError at frame {frame_num}, t={t:.1f}s: {e}", file=sys.stderr)
import traceback
traceback.print_exc()
break
except KeyboardInterrupt:
print("\nInterrupted", file=sys.stderr)
finally:
out.close()
for src in self.sources:
if hasattr(src, 'close'):
src.close()
# Final stats
elapsed = time.time() - start_time
avg_fps = frame_count / elapsed if elapsed > 0 else 0
print(f"\nCompleted: {frame_count} frames in {elapsed:.1f}s ({avg_fps:.1f} fps avg)", file=sys.stderr)
def _process_with_executor(
self,
frames: List[np.ndarray],
executor,
energy: float,
is_beat: bool,
t: float,
) -> np.ndarray:
"""
Process frames using the recipe executor for full pipeline.
Implements:
1. process-pair: two clips per source with effects, blended
2. cycle-crossfade: dynamic composition with zoom and weights
3. Final effects: whole-spin, ripple
"""
import cv2
# Target size from first source
target_h, target_w = frames[0].shape[:2]
# Resize all frames to target size (letterbox to preserve aspect ratio)
resized_frames = []
for frame in frames:
fh, fw = frame.shape[:2]
if (fh, fw) != (target_h, target_w):
# Calculate scale to fit while preserving aspect ratio
scale = min(target_w / fw, target_h / fh)
new_w, new_h = int(fw * scale), int(fh * scale)
resized = cv2.resize(frame, (new_w, new_h))
# Center on black canvas
canvas = np.zeros((target_h, target_w, 3), dtype=np.uint8)
x_off = (target_w - new_w) // 2
y_off = (target_h - new_h) // 2
canvas[y_off:y_off+new_h, x_off:x_off+new_w] = resized
resized_frames.append(canvas)
else:
resized_frames.append(frame)
frames = resized_frames
# Update executor state
executor.on_frame(energy, is_beat, t)
# Get weights to know which sources are active
weights = executor.get_cycle_weights()
# Process each source as a "pair" (clip A and B with different effects)
processed_pairs = []
for i, frame in enumerate(frames):
# Skip sources with zero weight (but still need placeholder)
if i < len(weights) and weights[i] < 0.001:
processed_pairs.append(None)
continue
# Get effect params for clip A and B
params_a = executor.get_effect_params(i, "a", energy)
params_b = executor.get_effect_params(i, "b", energy)
pair_params = executor.get_pair_params(i)
# Process clip A
clip_a = self._apply_clip_effects(frame.copy(), params_a, t)
# Process clip B
clip_b = self._apply_clip_effects(frame.copy(), params_b, t)
# Blend A and B using pair_mix opacity
opacity = pair_params["blend_opacity"]
blended = cv2.addWeighted(
clip_a, 1 - opacity,
clip_b, opacity,
0
)
# Apply pair rotation
h, w = blended.shape[:2]
center = (w // 2, h // 2)
angle = pair_params["pair_rotation"]
if abs(angle) > 0.5:
matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
blended = cv2.warpAffine(blended, matrix, (w, h))
processed_pairs.append(blended)
# Cycle-crossfade composition
weights = executor.get_cycle_weights()
zooms = executor.get_cycle_zooms()
# Apply zoom per pair and composite
h, w = target_h, target_w
result = np.zeros((h, w, 3), dtype=np.float32)
for idx, (pair, weight, zoom) in enumerate(zip(processed_pairs, weights, zooms)):
# Skip zero-weight sources
if pair is None or weight < 0.001:
continue
orig_shape = pair.shape
# Apply zoom
if zoom > 1.01:
# Zoom in: crop center and resize up
new_w, new_h = int(w / zoom), int(h / zoom)
if new_w > 0 and new_h > 0:
x1, y1 = (w - new_w) // 2, (h - new_h) // 2
cropped = pair[y1:y1+new_h, x1:x1+new_w]
pair = cv2.resize(cropped, (w, h))
elif zoom < 0.99:
# Zoom out: shrink video and center on black
scaled_w, scaled_h = int(w * zoom), int(h * zoom)
if scaled_w > 0 and scaled_h > 0:
shrunk = cv2.resize(pair, (scaled_w, scaled_h))
canvas = np.zeros((h, w, 3), dtype=np.uint8)
x_off, y_off = (w - scaled_w) // 2, (h - scaled_h) // 2
canvas[y_off:y_off+scaled_h, x_off:x_off+scaled_w] = shrunk
pair = canvas.copy()
# Draw colored border - size indicates zoom level
border_colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0)]
color = border_colors[idx % 4]
thickness = max(3, int(10 * weight)) # Thicker border = higher weight
pair = np.ascontiguousarray(pair)
pair[:thickness, :] = color
pair[-thickness:, :] = color
pair[:, :thickness] = color
pair[:, -thickness:] = color
result += pair.astype(np.float32) * weight
result = np.clip(result, 0, 255).astype(np.uint8)
# Apply final effects (whole-spin, ripple)
final_params = executor.get_final_effects(energy)
# Whole spin
spin_angle = final_params["whole_spin_angle"]
if abs(spin_angle) > 0.5:
center = (w // 2, h // 2)
matrix = cv2.getRotationMatrix2D(center, spin_angle, 1.0)
result = cv2.warpAffine(result, matrix, (w, h))
# Ripple effect
amp = final_params["ripple_amplitude"]
if amp > 1:
result = self._apply_ripple(result, amp,
final_params["ripple_cx"],
final_params["ripple_cy"],
t)
return result
def _apply_clip_effects(self, frame: np.ndarray, params: dict, t: float) -> np.ndarray:
"""Apply per-clip effects: rotate, zoom, invert, hue_shift, ascii."""
import cv2
h, w = frame.shape[:2]
# Rotate
angle = params["rotate_angle"]
if abs(angle) > 0.5:
center = (w // 2, h // 2)
matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
frame = cv2.warpAffine(frame, matrix, (w, h))
# Zoom
zoom = params["zoom_amount"]
if abs(zoom - 1.0) > 0.01:
new_w, new_h = int(w / zoom), int(h / zoom)
if new_w > 0 and new_h > 0:
x1, y1 = (w - new_w) // 2, (h - new_h) // 2
x1, y1 = max(0, x1), max(0, y1)
x2, y2 = min(w, x1 + new_w), min(h, y1 + new_h)
if x2 > x1 and y2 > y1:
cropped = frame[y1:y2, x1:x2]
frame = cv2.resize(cropped, (w, h))
# Invert
if params["invert_amount"] > 0.5:
frame = 255 - frame
# Hue shift
hue_deg = params["hue_degrees"]
if abs(hue_deg) > 1:
hsv = cv2.cvtColor(frame, cv2.COLOR_RGB2HSV)
hsv[:, :, 0] = (hsv[:, :, 0].astype(np.int32) + int(hue_deg / 2)) % 180
frame = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
# ASCII art
if params["ascii_mix"] > 0.5:
char_size = max(4, int(params["ascii_char_size"]))
frame = self._apply_ascii(frame, char_size)
return frame
def _apply_ascii(self, frame: np.ndarray, char_size: int) -> np.ndarray:
"""Apply ASCII art effect."""
import cv2
from PIL import Image, ImageDraw, ImageFont
h, w = frame.shape[:2]
chars = " .:-=+*#%@"
# Get font
try:
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf", char_size)
except:
font = ImageFont.load_default()
# Sample cells using area interpolation (fast block average)
rows = h // char_size
cols = w // char_size
if rows < 1 or cols < 1:
return frame
# Crop to exact grid and downsample
cropped = frame[:rows * char_size, :cols * char_size]
cell_colors = cv2.resize(cropped, (cols, rows), interpolation=cv2.INTER_AREA)
# Compute luminance
luminances = (0.299 * cell_colors[:, :, 0] +
0.587 * cell_colors[:, :, 1] +
0.114 * cell_colors[:, :, 2]) / 255.0
# Create output image
out_h = rows * char_size
out_w = cols * char_size
output = Image.new('RGB', (out_w, out_h), (0, 0, 0))
draw = ImageDraw.Draw(output)
# Draw characters
for r in range(rows):
for c in range(cols):
lum = luminances[r, c]
color = tuple(cell_colors[r, c])
# Map luminance to character
idx = int(lum * (len(chars) - 1))
char = chars[idx]
# Draw character
x = c * char_size
y = r * char_size
draw.text((x, y), char, fill=color, font=font)
# Convert back to numpy and resize to original
result = np.array(output)
if result.shape[:2] != (h, w):
result = cv2.resize(result, (w, h), interpolation=cv2.INTER_LINEAR)
return result
def _apply_ripple(self, frame: np.ndarray, amplitude: float,
cx: float, cy: float, t: float = 0) -> np.ndarray:
"""Apply ripple distortion effect."""
import cv2
h, w = frame.shape[:2]
center_x, center_y = cx * w, cy * h
max_dim = max(w, h)
# Create coordinate grids
y_coords, x_coords = np.mgrid[0:h, 0:w].astype(np.float32)
# Distance from center
dx = x_coords - center_x
dy = y_coords - center_y
dist = np.sqrt(dx*dx + dy*dy)
# Ripple parameters (matching recipe: frequency=8, decay=2, speed=5)
freq = 8
decay = 2
speed = 5
phase = t * speed * 2 * np.pi
# Ripple displacement (matching original formula)
ripple = np.sin(2 * np.pi * freq * dist / max_dim + phase) * amplitude
# Apply decay
if decay > 0:
ripple = ripple * np.exp(-dist * decay / max_dim)
# Displace along radial direction
with np.errstate(divide='ignore', invalid='ignore'):
norm_dx = np.where(dist > 0, dx / dist, 0)
norm_dy = np.where(dist > 0, dy / dist, 0)
map_x = (x_coords + ripple * norm_dx).astype(np.float32)
map_y = (y_coords + ripple * norm_dy).astype(np.float32)
return cv2.remap(frame, map_x, map_y, cv2.INTER_LINEAR,
borderMode=cv2.BORDER_REFLECT)
def _update_from_audio(self, analyzer, t: float):
"""Update analysis data from audio analyzer (streaming or live)."""
# Set time for file-based streaming analyzers
if hasattr(analyzer, 'set_time'):
analyzer.set_time(t)
# Get current audio features
energy = analyzer.get_energy() if hasattr(analyzer, 'get_energy') else 0
beat = analyzer.get_beat() if hasattr(analyzer, 'get_beat') else False
# Update analysis tracks - these can be referenced by effect bindings
self.analysis_data["live_energy"] = {
"times": [t],
"values": [energy],
"duration": float('inf'),
}
self.analysis_data["live_beat"] = {
"times": [t],
"values": [1.0 if beat else 0.0],
"duration": float('inf'),
}
def quick_preview(
sources: List[str],
effects: List[List[Dict]] = None,
duration: float = 10,
fps: float = 30,
):
"""
Quick preview helper - show sources with optional effects.
Example:
quick_preview(["video1.mp4", "video2.mp4"], duration=30)
"""
compositor = StreamingCompositor(
sources=sources,
effects_per_source=effects,
fps=fps,
)
compositor.run(output="preview", duration=duration)

125
streaming/demo.py Normal file
View File

@@ -0,0 +1,125 @@
#!/usr/bin/env python3
"""
Demo script for streaming compositor.
Usage:
# Preview two videos blended
python -m streaming.demo preview video1.mp4 video2.mp4
# Record output to file
python -m streaming.demo record video1.mp4 video2.mp4 -o output.mp4
# Benchmark (no output)
python -m streaming.demo benchmark video1.mp4 --duration 10
"""
import argparse
import sys
from pathlib import Path
# Add parent to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
from streaming import StreamingCompositor, VideoSource
from streaming.output import NullOutput
def demo_preview(sources: list, duration: float, effects: bool = False):
"""Preview sources with optional simple effects."""
effects_config = None
if effects:
effects_config = [
[{"effect": "rotate", "angle": 15}],
[{"effect": "zoom", "amount": 1.2}],
][:len(sources)]
compositor = StreamingCompositor(
sources=sources,
effects_per_source=effects_config,
recipe_dir=Path(__file__).parent.parent,
)
compositor.run(output="preview", duration=duration)
def demo_record(sources: list, output_path: str, duration: float):
"""Record blended output to file."""
compositor = StreamingCompositor(
sources=sources,
recipe_dir=Path(__file__).parent.parent,
)
compositor.run(output=output_path, duration=duration)
def demo_benchmark(sources: list, duration: float):
"""Benchmark processing speed (no output)."""
compositor = StreamingCompositor(
sources=sources,
recipe_dir=Path(__file__).parent.parent,
)
compositor.run(output="null", duration=duration)
def demo_audio_reactive(sources: list, duration: float):
"""Preview with live audio reactivity."""
from streaming.audio import AudioAnalyzer
# Create compositor with energy-reactive effects
effects_config = [
[{
"effect": "zoom",
"amount": {"_binding": True, "source": "live_energy", "feature": "values", "range": [1.0, 1.5]},
}]
for _ in sources
]
compositor = StreamingCompositor(
sources=sources,
effects_per_source=effects_config,
recipe_dir=Path(__file__).parent.parent,
)
# Start audio analyzer
try:
with AudioAnalyzer() as audio:
print("Audio analyzer started. Make some noise!", file=sys.stderr)
compositor.run(output="preview", duration=duration, audio_analyzer=audio)
except Exception as e:
print(f"Audio not available: {e}", file=sys.stderr)
print("Running without audio...", file=sys.stderr)
compositor.run(output="preview", duration=duration)
def main():
parser = argparse.ArgumentParser(description="Streaming compositor demo")
parser.add_argument("mode", choices=["preview", "record", "benchmark", "audio"],
help="Demo mode")
parser.add_argument("sources", nargs="+", help="Video source files")
parser.add_argument("-o", "--output", help="Output file (for record mode)")
parser.add_argument("-d", "--duration", type=float, default=30,
help="Duration in seconds")
parser.add_argument("--effects", action="store_true",
help="Apply simple effects (for preview)")
args = parser.parse_args()
# Verify sources exist
for src in args.sources:
if not Path(src).exists():
print(f"Error: Source not found: {src}", file=sys.stderr)
sys.exit(1)
if args.mode == "preview":
demo_preview(args.sources, args.duration, args.effects)
elif args.mode == "record":
if not args.output:
print("Error: --output required for record mode", file=sys.stderr)
sys.exit(1)
demo_record(args.sources, args.output, args.duration)
elif args.mode == "benchmark":
demo_benchmark(args.sources, args.duration)
elif args.mode == "audio":
demo_audio_reactive(args.sources, args.duration)
if __name__ == "__main__":
main()

538
streaming/gpu_output.py Normal file
View File

@@ -0,0 +1,538 @@
"""
Zero-copy GPU video encoding output.
Uses PyNvVideoCodec for direct GPU-to-GPU encoding without CPU transfers.
Frames stay on GPU throughout: CuPy → NV12 conversion → NVENC encoding.
"""
import numpy as np
import subprocess
import sys
import threading
import queue
from pathlib import Path
from typing import Tuple, Optional, Union
import time
# Try to import GPU libraries
try:
import cupy as cp
CUPY_AVAILABLE = True
except ImportError:
cp = None
CUPY_AVAILABLE = False
try:
import PyNvVideoCodec as nvc
PYNVCODEC_AVAILABLE = True
except ImportError:
nvc = None
PYNVCODEC_AVAILABLE = False
def check_gpu_encode_available() -> bool:
"""Check if zero-copy GPU encoding is available."""
return CUPY_AVAILABLE and PYNVCODEC_AVAILABLE
# RGB to NV12 CUDA kernel
_RGB_TO_NV12_KERNEL = None
def _get_rgb_to_nv12_kernel():
"""Get or create the RGB to NV12 conversion kernel."""
global _RGB_TO_NV12_KERNEL
if _RGB_TO_NV12_KERNEL is None and CUPY_AVAILABLE:
_RGB_TO_NV12_KERNEL = cp.RawKernel(r'''
extern "C" __global__
void rgb_to_nv12(
const unsigned char* rgb,
unsigned char* y_plane,
unsigned char* uv_plane,
int width, int height
) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height) return;
int rgb_idx = (y * width + x) * 3;
unsigned char r = rgb[rgb_idx];
unsigned char g = rgb[rgb_idx + 1];
unsigned char b = rgb[rgb_idx + 2];
// RGB to Y (BT.601)
int y_val = ((66 * r + 129 * g + 25 * b + 128) >> 8) + 16;
y_plane[y * width + x] = (unsigned char)(y_val > 255 ? 255 : (y_val < 0 ? 0 : y_val));
// UV (subsample 2x2) - only process even pixels
if ((x & 1) == 0 && (y & 1) == 0) {
int u_val = ((-38 * r - 74 * g + 112 * b + 128) >> 8) + 128;
int v_val = ((112 * r - 94 * g - 18 * b + 128) >> 8) + 128;
int uv_idx = (y / 2) * width + x;
uv_plane[uv_idx] = (unsigned char)(u_val > 255 ? 255 : (u_val < 0 ? 0 : u_val));
uv_plane[uv_idx + 1] = (unsigned char)(v_val > 255 ? 255 : (v_val < 0 ? 0 : v_val));
}
}
''', 'rgb_to_nv12')
return _RGB_TO_NV12_KERNEL
class GPUEncoder:
"""
Zero-copy GPU video encoder using PyNvVideoCodec.
Frames are converted from RGB to NV12 on GPU and encoded directly
without any CPU memory transfers.
"""
def __init__(self, width: int, height: int, fps: float = 30, crf: int = 23):
if not check_gpu_encode_available():
raise RuntimeError("GPU encoding not available (need CuPy and PyNvVideoCodec)")
self.width = width
self.height = height
self.fps = fps
self.crf = crf
# Create dummy video to get frame buffer template
self._init_frame_buffer()
# Create encoder with low-latency settings (no B-frames for immediate output)
# Use H264 codec explicitly, with SPS/PPS headers for browser compatibility
self.encoder = nvc.CreateEncoder(
width, height, "NV12", usecpuinputbuffer=False,
codec="h264", # Explicit H.264 (not HEVC)
bf=0, # No B-frames - immediate output
repeatSPSPPS=1, # Include SPS/PPS with each IDR frame
idrPeriod=30, # IDR frame every 30 frames (1 sec at 30fps)
)
# CUDA kernel grid/block config
self._block = (16, 16)
self._grid = ((width + 15) // 16, (height + 15) // 16)
self._frame_count = 0
self._encoded_data = []
print(f"[GPUEncoder] Initialized {width}x{height} @ {fps}fps, zero-copy GPU encoding", file=sys.stderr)
def _init_frame_buffer(self):
"""Initialize frame buffer from dummy decode."""
# Create minimal dummy video
dummy_path = Path("/tmp/gpu_encoder_dummy.mp4")
subprocess.run([
"ffmpeg", "-y", "-f", "lavfi",
"-i", f"color=black:size={self.width}x{self.height}:duration=0.1:rate=30",
"-c:v", "h264", "-pix_fmt", "yuv420p",
str(dummy_path)
], capture_output=True)
# Decode to get frame buffer
demuxer = nvc.CreateDemuxer(str(dummy_path))
decoder = nvc.CreateDecoder(gpuid=0, usedevicememory=True)
self._template_frame = None
for _ in range(30):
packet = demuxer.Demux()
if not packet:
break
frames = decoder.Decode(packet)
if frames:
self._template_frame = frames[0]
break
if not self._template_frame:
raise RuntimeError("Failed to initialize GPU frame buffer")
# Wrap frame planes with CuPy for zero-copy access
y_ptr = self._template_frame.GetPtrToPlane(0)
uv_ptr = self._template_frame.GetPtrToPlane(1)
y_mem = cp.cuda.UnownedMemory(y_ptr, self.height * self.width, None)
self._y_plane = cp.ndarray(
(self.height, self.width), dtype=cp.uint8,
memptr=cp.cuda.MemoryPointer(y_mem, 0)
)
uv_mem = cp.cuda.UnownedMemory(uv_ptr, (self.height // 2) * self.width, None)
self._uv_plane = cp.ndarray(
(self.height // 2, self.width), dtype=cp.uint8,
memptr=cp.cuda.MemoryPointer(uv_mem, 0)
)
# Keep references to prevent GC
self._decoder = decoder
self._demuxer = demuxer
# Cleanup dummy file
dummy_path.unlink(missing_ok=True)
def encode_frame(self, frame: Union[np.ndarray, 'cp.ndarray']) -> bytes:
"""
Encode a frame (RGB format) to H.264.
Args:
frame: RGB frame as numpy or CuPy array, shape (H, W, 3)
Returns:
Encoded bytes (may be empty if frame is buffered)
"""
# Ensure frame is on GPU
if isinstance(frame, np.ndarray):
frame_gpu = cp.asarray(frame)
else:
frame_gpu = frame
# Ensure uint8
if frame_gpu.dtype != cp.uint8:
frame_gpu = cp.clip(frame_gpu, 0, 255).astype(cp.uint8)
# Ensure contiguous
if not frame_gpu.flags['C_CONTIGUOUS']:
frame_gpu = cp.ascontiguousarray(frame_gpu)
# Debug: check input frame has actual data (first few frames only)
if self._frame_count < 3:
frame_sum = float(cp.sum(frame_gpu))
print(f"[GPUEncoder] Frame {self._frame_count}: shape={frame_gpu.shape}, dtype={frame_gpu.dtype}, sum={frame_sum:.0f}", file=sys.stderr)
if frame_sum < 1000:
print(f"[GPUEncoder] WARNING: Frame appears to be mostly black!", file=sys.stderr)
# Convert RGB to NV12 on GPU
kernel = _get_rgb_to_nv12_kernel()
kernel(self._grid, self._block, (frame_gpu, self._y_plane, self._uv_plane, self.width, self.height))
# CRITICAL: Synchronize CUDA to ensure kernel completes before encoding
cp.cuda.Stream.null.synchronize()
# Debug: check Y plane has data after conversion (first few frames only)
if self._frame_count < 3:
y_sum = float(cp.sum(self._y_plane))
print(f"[GPUEncoder] Frame {self._frame_count}: Y plane sum={y_sum:.0f}", file=sys.stderr)
# Encode (GPU to GPU)
result = self.encoder.Encode(self._template_frame)
self._frame_count += 1
return result if result else b''
def flush(self) -> bytes:
"""Flush encoder and return remaining data."""
return self.encoder.EndEncode()
def close(self):
"""Close encoder and cleanup."""
pass
class GPUHLSOutput:
"""
GPU-accelerated HLS output with IPFS upload.
Uses zero-copy GPU encoding and writes HLS segments.
Uploads happen asynchronously in a background thread to avoid stuttering.
"""
def __init__(
self,
output_dir: str,
size: Tuple[int, int],
fps: float = 30,
segment_duration: float = 4.0,
crf: int = 23,
audio_source: str = None,
ipfs_gateway: str = "https://ipfs.io/ipfs",
on_playlist_update: callable = None,
):
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
self.size = size
self.fps = fps
self.segment_duration = segment_duration
self.ipfs_gateway = ipfs_gateway.rstrip("/")
self._on_playlist_update = on_playlist_update
self._is_open = True
self.audio_source = audio_source
# GPU encoder
self._gpu_encoder = GPUEncoder(size[0], size[1], fps, crf)
# Segment management
self._current_segment = 0
self._frames_in_segment = 0
self._frames_per_segment = int(fps * segment_duration)
self._segment_data = []
# Track segment CIDs for IPFS
self.segment_cids = {}
self._playlist_cid = None
self._upload_lock = threading.Lock()
# Import IPFS client
from ipfs_client import add_file, add_bytes
self._ipfs_add_file = add_file
self._ipfs_add_bytes = add_bytes
# Background upload thread
self._upload_queue = queue.Queue()
self._upload_thread = threading.Thread(target=self._upload_worker, daemon=True)
self._upload_thread.start()
# Setup ffmpeg for muxing (takes raw H.264, outputs .ts segments)
self._setup_muxer()
print(f"[GPUHLSOutput] Initialized {size[0]}x{size[1]} @ {fps}fps, GPU encoding", file=sys.stderr)
def _setup_muxer(self):
"""Setup ffmpeg for muxing H.264 to MPEG-TS segments with optional audio."""
self.local_playlist_path = self.output_dir / "stream.m3u8"
cmd = [
"ffmpeg", "-y",
"-f", "h264", # Input is raw H.264
"-i", "-",
]
# Add audio input if provided
if self.audio_source:
cmd.extend(["-i", str(self.audio_source)])
cmd.extend(["-map", "0:v", "-map", "1:a"])
cmd.extend([
"-c:v", "copy", # Just copy video, no re-encoding
])
# Add audio codec if we have audio
if self.audio_source:
cmd.extend(["-c:a", "aac", "-b:a", "128k", "-shortest"])
cmd.extend([
"-f", "hls",
"-hls_time", str(self.segment_duration),
"-hls_list_size", "0",
"-hls_flags", "independent_segments+append_list+split_by_time",
"-hls_segment_type", "mpegts",
"-hls_segment_filename", str(self.output_dir / "segment_%05d.ts"),
str(self.local_playlist_path),
])
print(f"[GPUHLSOutput] FFmpeg cmd: {' '.join(cmd)}", file=sys.stderr)
self._muxer = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stderr=subprocess.PIPE, # Capture stderr for debugging
)
# Start thread to drain stderr (prevents pipe buffer from filling and blocking FFmpeg)
self._stderr_thread = threading.Thread(target=self._drain_stderr, daemon=True)
self._stderr_thread.start()
def _drain_stderr(self):
"""Drain FFmpeg stderr to prevent blocking."""
try:
for line in self._muxer.stderr:
line_str = line.decode('utf-8', errors='replace').strip()
if line_str:
print(f"[FFmpeg] {line_str}", file=sys.stderr)
except Exception as e:
print(f"[FFmpeg stderr] Error reading: {e}", file=sys.stderr)
def write(self, frame: Union[np.ndarray, 'cp.ndarray'], t: float = 0):
"""Write a frame using GPU encoding."""
if not self._is_open:
return
# Handle GPUFrame objects (from streaming_gpu primitives)
if hasattr(frame, 'gpu') and hasattr(frame, 'is_on_gpu'):
# It's a GPUFrame - extract the underlying array
frame = frame.gpu if frame.is_on_gpu else frame.cpu
# GPU encode
encoded = self._gpu_encoder.encode_frame(frame)
# Send to muxer
if encoded:
try:
self._muxer.stdin.write(encoded)
except BrokenPipeError as e:
print(f"[GPUHLSOutput] FFmpeg pipe broken after {self._frames_in_segment} frames in segment, total segments: {self._current_segment}", file=sys.stderr)
# Check if muxer is still running
if self._muxer.poll() is not None:
print(f"[GPUHLSOutput] FFmpeg exited with code {self._muxer.returncode}", file=sys.stderr)
self._is_open = False
return
except Exception as e:
print(f"[GPUHLSOutput] Error writing to FFmpeg: {e}", file=sys.stderr)
self._is_open = False
return
self._frames_in_segment += 1
# Check for segment completion
if self._frames_in_segment >= self._frames_per_segment:
self._frames_in_segment = 0
self._check_upload_segments()
def _upload_worker(self):
"""Background worker thread for async IPFS uploads."""
while True:
try:
item = self._upload_queue.get(timeout=1.0)
if item is None: # Shutdown signal
break
seg_path, seg_num = item
self._do_upload(seg_path, seg_num)
except queue.Empty:
continue
except Exception as e:
print(f"Upload worker error: {e}", file=sys.stderr)
def _do_upload(self, seg_path: Path, seg_num: int):
"""Actually perform the upload (runs in background thread)."""
try:
cid = self._ipfs_add_file(seg_path, pin=True)
if cid:
with self._upload_lock:
self.segment_cids[seg_num] = cid
print(f"Added to IPFS: {seg_path.name} -> {cid}", file=sys.stderr)
self._update_playlist()
except Exception as e:
print(f"Failed to add to IPFS: {e}", file=sys.stderr)
def _check_upload_segments(self):
"""Check for and queue new segments for async IPFS upload."""
segments = sorted(self.output_dir.glob("segment_*.ts"))
for seg_path in segments:
seg_num = int(seg_path.stem.split("_")[1])
with self._upload_lock:
if seg_num in self.segment_cids:
continue
# Check if segment is complete (quick check, no blocking)
try:
size1 = seg_path.stat().st_size
if size1 == 0:
continue
# Quick non-blocking check
time.sleep(0.01)
size2 = seg_path.stat().st_size
if size1 != size2:
continue
except FileNotFoundError:
continue
# Queue for async upload (non-blocking!)
self._upload_queue.put((seg_path, seg_num))
def _update_playlist(self):
"""Generate and upload IPFS-aware playlist."""
with self._upload_lock:
if not self.segment_cids:
return
lines = [
"#EXTM3U",
"#EXT-X-VERSION:3",
f"#EXT-X-TARGETDURATION:{int(self.segment_duration) + 1}",
"#EXT-X-MEDIA-SEQUENCE:0",
]
for seg_num in sorted(self.segment_cids.keys()):
cid = self.segment_cids[seg_num]
lines.append(f"#EXTINF:{self.segment_duration:.3f},")
# Use /ipfs-ts/ path for segments to get correct MIME type (video/mp2t)
segment_gateway = self.ipfs_gateway.replace("/ipfs", "/ipfs-ts")
lines.append(f"{segment_gateway}/{cid}")
playlist_content = "\n".join(lines) + "\n"
# Upload playlist
self._playlist_cid = self._ipfs_add_bytes(playlist_content.encode(), pin=True)
if self._playlist_cid and self._on_playlist_update:
self._on_playlist_update(self._playlist_cid)
def close(self):
"""Close output and flush remaining data."""
if not self._is_open:
return
self._is_open = False
# Flush GPU encoder
final_data = self._gpu_encoder.flush()
if final_data:
try:
self._muxer.stdin.write(final_data)
except:
pass
# Close muxer
try:
self._muxer.stdin.close()
self._muxer.wait(timeout=10)
except:
self._muxer.kill()
# Final segment upload
self._check_upload_segments()
# Wait for pending uploads to complete
self._upload_queue.put(None) # Signal shutdown
self._upload_thread.join(timeout=30)
# Generate final playlist with #EXT-X-ENDLIST for VOD playback
self._generate_final_playlist()
self._gpu_encoder.close()
def _generate_final_playlist(self):
"""Generate final IPFS playlist with #EXT-X-ENDLIST for completed streams."""
with self._upload_lock:
if not self.segment_cids:
return
lines = [
"#EXTM3U",
"#EXT-X-VERSION:3",
f"#EXT-X-TARGETDURATION:{int(self.segment_duration) + 1}",
"#EXT-X-MEDIA-SEQUENCE:0",
"#EXT-X-PLAYLIST-TYPE:VOD", # Mark as VOD for completed streams
]
for seg_num in sorted(self.segment_cids.keys()):
cid = self.segment_cids[seg_num]
lines.append(f"#EXTINF:{self.segment_duration:.3f},")
# Use /ipfs-ts/ path for segments to get correct MIME type (video/mp2t)
segment_gateway = self.ipfs_gateway.replace("/ipfs", "/ipfs-ts")
lines.append(f"{segment_gateway}/{cid}")
# Mark stream as complete - critical for VOD playback
lines.append("#EXT-X-ENDLIST")
playlist_content = "\n".join(lines) + "\n"
# Upload final playlist
self._playlist_cid = self._ipfs_add_bytes(playlist_content.encode(), pin=True)
if self._playlist_cid:
print(f"[GPUHLSOutput] Final VOD playlist: {self._playlist_cid} ({len(self.segment_cids)} segments)", file=sys.stderr)
if self._on_playlist_update:
self._on_playlist_update(self._playlist_cid)
@property
def is_open(self) -> bool:
return self._is_open
@property
def playlist_cid(self) -> Optional[str]:
return self._playlist_cid
@property
def playlist_url(self) -> Optional[str]:
"""Get the full IPFS URL for the playlist."""
if self._playlist_cid:
return f"{self.ipfs_gateway}/{self._playlist_cid}"
return None

1642
streaming/jax_typography.py Normal file

File diff suppressed because it is too large Load Diff

531
streaming/jit_compiler.py Normal file
View File

@@ -0,0 +1,531 @@
"""
JIT Compiler for sexp frame pipelines.
Compiles sexp expressions to fused CUDA kernels for maximum performance.
"""
import cupy as cp
import numpy as np
from typing import Dict, List, Any, Optional, Tuple, Callable
import hashlib
import sys
# Cache for compiled kernels
_KERNEL_CACHE: Dict[str, Callable] = {}
def _generate_kernel_key(ops: List[Tuple]) -> str:
"""Generate cache key for operation sequence."""
return hashlib.md5(str(ops).encode()).hexdigest()
# =============================================================================
# CUDA Kernel Templates
# =============================================================================
AFFINE_WARP_KERNEL = cp.RawKernel(r'''
extern "C" __global__
void affine_warp(
const unsigned char* src,
unsigned char* dst,
int width, int height, int channels,
float m00, float m01, float m02,
float m10, float m11, float m12
) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height) return;
// Apply inverse affine transform
float src_x = m00 * x + m01 * y + m02;
float src_y = m10 * x + m11 * y + m12;
int dst_idx = (y * width + x) * channels;
// Bounds check
if (src_x < 0 || src_x >= width - 1 || src_y < 0 || src_y >= height - 1) {
for (int c = 0; c < channels; c++) {
dst[dst_idx + c] = 0;
}
return;
}
// Bilinear interpolation
int x0 = (int)src_x;
int y0 = (int)src_y;
int x1 = x0 + 1;
int y1 = y0 + 1;
float fx = src_x - x0;
float fy = src_y - y0;
for (int c = 0; c < channels; c++) {
float v00 = src[(y0 * width + x0) * channels + c];
float v10 = src[(y0 * width + x1) * channels + c];
float v01 = src[(y1 * width + x0) * channels + c];
float v11 = src[(y1 * width + x1) * channels + c];
float v0 = v00 * (1 - fx) + v10 * fx;
float v1 = v01 * (1 - fx) + v11 * fx;
float v = v0 * (1 - fy) + v1 * fy;
dst[dst_idx + c] = (unsigned char)(v < 0 ? 0 : (v > 255 ? 255 : v));
}
}
''', 'affine_warp')
BLEND_KERNEL = cp.RawKernel(r'''
extern "C" __global__
void blend(
const unsigned char* src1,
const unsigned char* src2,
unsigned char* dst,
int size,
float alpha
) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx >= size) return;
float v = src1[idx] * (1.0f - alpha) + src2[idx] * alpha;
dst[idx] = (unsigned char)(v < 0 ? 0 : (v > 255 ? 255 : v));
}
''', 'blend')
BRIGHTNESS_CONTRAST_KERNEL = cp.RawKernel(r'''
extern "C" __global__
void brightness_contrast(
const unsigned char* src,
unsigned char* dst,
int size,
float brightness,
float contrast
) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx >= size) return;
float v = src[idx];
v = (v - 128.0f) * contrast + 128.0f + brightness;
dst[idx] = (unsigned char)(v < 0 ? 0 : (v > 255 ? 255 : v));
}
''', 'brightness_contrast')
HUE_SHIFT_KERNEL = cp.RawKernel(r'''
extern "C" __global__
void hue_shift(
const unsigned char* src,
unsigned char* dst,
int width, int height,
float hue_shift
) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height) return;
int idx = (y * width + x) * 3;
float r = src[idx] / 255.0f;
float g = src[idx + 1] / 255.0f;
float b = src[idx + 2] / 255.0f;
// RGB to HSV
float max_c = fmaxf(r, fmaxf(g, b));
float min_c = fminf(r, fminf(g, b));
float delta = max_c - min_c;
float h = 0, s = 0, v = max_c;
if (delta > 0.00001f) {
s = delta / max_c;
if (r >= max_c) h = (g - b) / delta;
else if (g >= max_c) h = 2.0f + (b - r) / delta;
else h = 4.0f + (r - g) / delta;
h *= 60.0f;
if (h < 0) h += 360.0f;
}
// Apply hue shift
h = fmodf(h + hue_shift + 360.0f, 360.0f);
// HSV to RGB
float c = v * s;
float x_val = c * (1 - fabsf(fmodf(h / 60.0f, 2.0f) - 1));
float m = v - c;
float r2, g2, b2;
if (h < 60) { r2 = c; g2 = x_val; b2 = 0; }
else if (h < 120) { r2 = x_val; g2 = c; b2 = 0; }
else if (h < 180) { r2 = 0; g2 = c; b2 = x_val; }
else if (h < 240) { r2 = 0; g2 = x_val; b2 = c; }
else if (h < 300) { r2 = x_val; g2 = 0; b2 = c; }
else { r2 = c; g2 = 0; b2 = x_val; }
dst[idx] = (unsigned char)((r2 + m) * 255);
dst[idx + 1] = (unsigned char)((g2 + m) * 255);
dst[idx + 2] = (unsigned char)((b2 + m) * 255);
}
''', 'hue_shift')
INVERT_KERNEL = cp.RawKernel(r'''
extern "C" __global__
void invert(
const unsigned char* src,
unsigned char* dst,
int size
) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx >= size) return;
dst[idx] = 255 - src[idx];
}
''', 'invert')
ZOOM_KERNEL = cp.RawKernel(r'''
extern "C" __global__
void zoom(
const unsigned char* src,
unsigned char* dst,
int width, int height, int channels,
float zoom_factor,
float cx, float cy
) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height) return;
// Map to source coordinates (zoom from center)
float src_x = (x - cx) / zoom_factor + cx;
float src_y = (y - cy) / zoom_factor + cy;
int dst_idx = (y * width + x) * channels;
if (src_x < 0 || src_x >= width - 1 || src_y < 0 || src_y >= height - 1) {
for (int c = 0; c < channels; c++) {
dst[dst_idx + c] = 0;
}
return;
}
// Bilinear interpolation
int x0 = (int)src_x;
int y0 = (int)src_y;
float fx = src_x - x0;
float fy = src_y - y0;
for (int c = 0; c < channels; c++) {
float v00 = src[(y0 * width + x0) * channels + c];
float v10 = src[(y0 * width + (x0+1)) * channels + c];
float v01 = src[((y0+1) * width + x0) * channels + c];
float v11 = src[((y0+1) * width + (x0+1)) * channels + c];
float v = v00*(1-fx)*(1-fy) + v10*fx*(1-fy) + v01*(1-fx)*fy + v11*fx*fy;
dst[dst_idx + c] = (unsigned char)(v < 0 ? 0 : (v > 255 ? 255 : v));
}
}
''', 'zoom')
RIPPLE_KERNEL = cp.RawKernel(r'''
extern "C" __global__
void ripple(
const unsigned char* src,
unsigned char* dst,
int width, int height, int channels,
float cx, float cy,
float amplitude, float frequency, float decay, float phase
) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height) return;
float dx = x - cx;
float dy = y - cy;
float dist = sqrtf(dx * dx + dy * dy);
// Ripple displacement
float wave = sinf(dist * frequency * 0.1f + phase);
float amp = amplitude * expf(-dist * decay * 0.01f);
float src_x = x + dx / (dist + 0.001f) * wave * amp;
float src_y = y + dy / (dist + 0.001f) * wave * amp;
int dst_idx = (y * width + x) * channels;
if (src_x < 0 || src_x >= width - 1 || src_y < 0 || src_y >= height - 1) {
for (int c = 0; c < channels; c++) {
dst[dst_idx + c] = src[dst_idx + c]; // Keep original on boundary
}
return;
}
// Bilinear interpolation
int x0 = (int)src_x;
int y0 = (int)src_y;
float fx = src_x - x0;
float fy = src_y - y0;
for (int c = 0; c < channels; c++) {
float v00 = src[(y0 * width + x0) * channels + c];
float v10 = src[(y0 * width + (x0+1)) * channels + c];
float v01 = src[((y0+1) * width + x0) * channels + c];
float v11 = src[((y0+1) * width + (x0+1)) * channels + c];
float v = v00*(1-fx)*(1-fy) + v10*fx*(1-fy) + v01*(1-fx)*fy + v11*fx*fy;
dst[dst_idx + c] = (unsigned char)(v < 0 ? 0 : (v > 255 ? 255 : v));
}
}
''', 'ripple')
# =============================================================================
# Fast GPU Operations
# =============================================================================
class FastGPUOps:
"""Optimized GPU operations using CUDA kernels."""
def __init__(self, width: int, height: int):
self.width = width
self.height = height
self.channels = 3
# Pre-allocate work buffers
self._buf1 = cp.zeros((height, width, 3), dtype=cp.uint8)
self._buf2 = cp.zeros((height, width, 3), dtype=cp.uint8)
self._current_buf = 0
# Grid/block config
self._block_2d = (16, 16)
self._grid_2d = ((width + 15) // 16, (height + 15) // 16)
self._block_1d = 256
self._grid_1d = (width * height * 3 + 255) // 256
def _get_buffers(self):
"""Get source and destination buffers (ping-pong)."""
if self._current_buf == 0:
return self._buf1, self._buf2
return self._buf2, self._buf1
def _swap_buffers(self):
"""Swap ping-pong buffers."""
self._current_buf = 1 - self._current_buf
def set_input(self, frame: cp.ndarray):
"""Set input frame."""
if self._current_buf == 0:
cp.copyto(self._buf1, frame)
else:
cp.copyto(self._buf2, frame)
def get_output(self) -> cp.ndarray:
"""Get current output buffer."""
if self._current_buf == 0:
return self._buf1
return self._buf2
def rotate(self, angle: float, cx: float = None, cy: float = None):
"""Fast GPU rotation."""
if cx is None:
cx = self.width / 2
if cy is None:
cy = self.height / 2
src, dst = self._get_buffers()
# Compute inverse rotation matrix
import math
rad = math.radians(-angle) # Negative for inverse
cos_a = math.cos(rad)
sin_a = math.sin(rad)
# Inverse affine matrix (rotate around center)
m00 = cos_a
m01 = -sin_a
m02 = cx - cos_a * cx + sin_a * cy
m10 = sin_a
m11 = cos_a
m12 = cy - sin_a * cx - cos_a * cy
AFFINE_WARP_KERNEL(
self._grid_2d, self._block_2d,
(src, dst, self.width, self.height, self.channels,
np.float32(m00), np.float32(m01), np.float32(m02),
np.float32(m10), np.float32(m11), np.float32(m12))
)
self._swap_buffers()
def zoom(self, factor: float, cx: float = None, cy: float = None):
"""Fast GPU zoom."""
if cx is None:
cx = self.width / 2
if cy is None:
cy = self.height / 2
src, dst = self._get_buffers()
ZOOM_KERNEL(
self._grid_2d, self._block_2d,
(src, dst, self.width, self.height, self.channels,
np.float32(factor), np.float32(cx), np.float32(cy))
)
self._swap_buffers()
def blend(self, other: cp.ndarray, alpha: float):
"""Fast GPU blend."""
src, dst = self._get_buffers()
size = self.width * self.height * self.channels
BLEND_KERNEL(
(self._grid_1d,), (self._block_1d,),
(src.ravel(), other.ravel(), dst.ravel(), size, np.float32(alpha))
)
self._swap_buffers()
def brightness(self, factor: float):
"""Fast GPU brightness adjustment."""
src, dst = self._get_buffers()
size = self.width * self.height * self.channels
BRIGHTNESS_CONTRAST_KERNEL(
(self._grid_1d,), (self._block_1d,),
(src.ravel(), dst.ravel(), size, np.float32((factor - 1) * 128), np.float32(1.0))
)
self._swap_buffers()
def contrast(self, factor: float):
"""Fast GPU contrast adjustment."""
src, dst = self._get_buffers()
size = self.width * self.height * self.channels
BRIGHTNESS_CONTRAST_KERNEL(
(self._grid_1d,), (self._block_1d,),
(src.ravel(), dst.ravel(), size, np.float32(0), np.float32(factor))
)
self._swap_buffers()
def hue_shift(self, degrees: float):
"""Fast GPU hue shift."""
src, dst = self._get_buffers()
HUE_SHIFT_KERNEL(
self._grid_2d, self._block_2d,
(src, dst, self.width, self.height, np.float32(degrees))
)
self._swap_buffers()
def invert(self):
"""Fast GPU invert."""
src, dst = self._get_buffers()
size = self.width * self.height * self.channels
INVERT_KERNEL(
(self._grid_1d,), (self._block_1d,),
(src.ravel(), dst.ravel(), size)
)
self._swap_buffers()
def ripple(self, amplitude: float, cx: float = None, cy: float = None,
frequency: float = 8, decay: float = 2, phase: float = 0):
"""Fast GPU ripple effect."""
if cx is None:
cx = self.width / 2
if cy is None:
cy = self.height / 2
src, dst = self._get_buffers()
RIPPLE_KERNEL(
self._grid_2d, self._block_2d,
(src, dst, self.width, self.height, self.channels,
np.float32(cx), np.float32(cy),
np.float32(amplitude), np.float32(frequency),
np.float32(decay), np.float32(phase))
)
self._swap_buffers()
# Global fast ops instance (created per resolution)
_FAST_OPS: Dict[Tuple[int, int], FastGPUOps] = {}
def get_fast_ops(width: int, height: int) -> FastGPUOps:
"""Get or create FastGPUOps for given resolution."""
key = (width, height)
if key not in _FAST_OPS:
_FAST_OPS[key] = FastGPUOps(width, height)
return _FAST_OPS[key]
# =============================================================================
# Fast effect functions (drop-in replacements)
# =============================================================================
def fast_rotate(frame: cp.ndarray, angle: float, **kwargs) -> cp.ndarray:
"""Fast GPU rotation."""
h, w = frame.shape[:2]
ops = get_fast_ops(w, h)
ops.set_input(frame)
ops.rotate(angle, kwargs.get('cx'), kwargs.get('cy'))
return ops.get_output().copy()
def fast_zoom(frame: cp.ndarray, factor: float, **kwargs) -> cp.ndarray:
"""Fast GPU zoom."""
h, w = frame.shape[:2]
ops = get_fast_ops(w, h)
ops.set_input(frame)
ops.zoom(factor, kwargs.get('cx'), kwargs.get('cy'))
return ops.get_output().copy()
def fast_blend(frame1: cp.ndarray, frame2: cp.ndarray, alpha: float) -> cp.ndarray:
"""Fast GPU blend."""
h, w = frame1.shape[:2]
ops = get_fast_ops(w, h)
ops.set_input(frame1)
ops.blend(frame2, alpha)
return ops.get_output().copy()
def fast_hue_shift(frame: cp.ndarray, degrees: float) -> cp.ndarray:
"""Fast GPU hue shift."""
h, w = frame.shape[:2]
ops = get_fast_ops(w, h)
ops.set_input(frame)
ops.hue_shift(degrees)
return ops.get_output().copy()
def fast_invert(frame: cp.ndarray) -> cp.ndarray:
"""Fast GPU invert."""
h, w = frame.shape[:2]
ops = get_fast_ops(w, h)
ops.set_input(frame)
ops.invert()
return ops.get_output().copy()
def fast_ripple(frame: cp.ndarray, amplitude: float, **kwargs) -> cp.ndarray:
"""Fast GPU ripple."""
h, w = frame.shape[:2]
ops = get_fast_ops(w, h)
ops.set_input(frame)
ops.ripple(
amplitude,
kwargs.get('center_x', w/2),
kwargs.get('center_y', h/2),
kwargs.get('frequency', 8),
kwargs.get('decay', 2),
kwargs.get('speed', 0) * kwargs.get('t', 0) # phase from speed*time
)
return ops.get_output().copy()
print("[jit_compiler] CUDA kernels loaded", file=sys.stderr)

View File

@@ -0,0 +1,509 @@
"""
Multi-Resolution HLS Output with IPFS Storage.
Renders video at multiple quality levels simultaneously:
- Original resolution (from recipe)
- 720p (streaming quality)
- 360p (mobile/low bandwidth)
All segments stored on IPFS. Master playlist enables adaptive bitrate streaming.
"""
import os
import sys
import subprocess
import threading
import queue
import time
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Union
from dataclasses import dataclass, field
import numpy as np
# Try GPU imports
try:
import cupy as cp
GPU_AVAILABLE = True
except ImportError:
cp = None
GPU_AVAILABLE = False
@dataclass
class QualityLevel:
"""Configuration for a quality level."""
name: str
width: int
height: int
bitrate: int # kbps
segment_cids: Dict[int, str] = field(default_factory=dict)
playlist_cid: Optional[str] = None
class MultiResolutionHLSOutput:
"""
GPU-accelerated multi-resolution HLS output with IPFS storage.
Encodes video at multiple quality levels simultaneously using NVENC.
Segments are uploaded to IPFS as they're created.
Generates adaptive bitrate master playlist.
"""
def __init__(
self,
output_dir: str,
source_size: Tuple[int, int],
fps: float = 30,
segment_duration: float = 4.0,
ipfs_gateway: str = "https://ipfs.io/ipfs",
on_playlist_update: callable = None,
audio_source: str = None,
resume_from: Optional[Dict] = None,
):
"""Initialize multi-resolution HLS output.
Args:
output_dir: Directory for HLS output files
source_size: (width, height) of source frames
fps: Frames per second
segment_duration: Duration of each HLS segment in seconds
ipfs_gateway: IPFS gateway URL for playlist URLs
on_playlist_update: Callback when playlists are updated
audio_source: Optional audio file to mux with video
resume_from: Optional dict to resume from checkpoint with keys:
- segment_cids: Dict of quality -> {seg_num: cid}
"""
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
self.source_width, self.source_height = source_size
self.fps = fps
self.segment_duration = segment_duration
self.ipfs_gateway = ipfs_gateway.rstrip("/")
self._on_playlist_update = on_playlist_update
self.audio_source = audio_source
self._is_open = True
self._frame_count = 0
# Define quality levels
self.qualities: Dict[str, QualityLevel] = {}
self._setup_quality_levels()
# Restore segment CIDs if resuming (don't re-upload existing segments)
if resume_from and resume_from.get('segment_cids'):
for name, cids in resume_from['segment_cids'].items():
if name in self.qualities:
self.qualities[name].segment_cids = dict(cids)
print(f"[MultiResHLS] Restored {len(cids)} segment CIDs for {name}", file=sys.stderr)
# IPFS client
from ipfs_client import add_file, add_bytes
self._ipfs_add_file = add_file
self._ipfs_add_bytes = add_bytes
# Upload queue and thread
self._upload_queue = queue.Queue()
self._upload_thread = threading.Thread(target=self._upload_worker, daemon=True)
self._upload_thread.start()
# Track master playlist
self._master_playlist_cid = None
# Setup encoders
self._setup_encoders()
print(f"[MultiResHLS] Initialized {self.source_width}x{self.source_height} @ {fps}fps", file=sys.stderr)
print(f"[MultiResHLS] Quality levels: {list(self.qualities.keys())}", file=sys.stderr)
def _setup_quality_levels(self):
"""Configure quality levels based on source resolution."""
# Always include original resolution
self.qualities['original'] = QualityLevel(
name='original',
width=self.source_width,
height=self.source_height,
bitrate=self._estimate_bitrate(self.source_width, self.source_height),
)
# Add 720p if source is larger
if self.source_height > 720:
aspect = self.source_width / self.source_height
w720 = int(720 * aspect)
w720 = w720 - (w720 % 2) # Ensure even width
self.qualities['720p'] = QualityLevel(
name='720p',
width=w720,
height=720,
bitrate=2500,
)
# Add 360p if source is larger
if self.source_height > 360:
aspect = self.source_width / self.source_height
w360 = int(360 * aspect)
w360 = w360 - (w360 % 2) # Ensure even width
self.qualities['360p'] = QualityLevel(
name='360p',
width=w360,
height=360,
bitrate=800,
)
def _estimate_bitrate(self, width: int, height: int) -> int:
"""Estimate appropriate bitrate for resolution (in kbps)."""
pixels = width * height
if pixels >= 3840 * 2160: # 4K
return 15000
elif pixels >= 1920 * 1080: # 1080p
return 5000
elif pixels >= 1280 * 720: # 720p
return 2500
elif pixels >= 854 * 480: # 480p
return 1500
else:
return 800
def _setup_encoders(self):
"""Setup FFmpeg encoder processes for each quality level."""
self._encoders: Dict[str, subprocess.Popen] = {}
self._encoder_threads: Dict[str, threading.Thread] = {}
for name, quality in self.qualities.items():
# Create output directory for this quality
quality_dir = self.output_dir / name
quality_dir.mkdir(parents=True, exist_ok=True)
# Build FFmpeg command
cmd = self._build_encoder_cmd(quality, quality_dir)
print(f"[MultiResHLS] Starting encoder for {name}: {quality.width}x{quality.height}", file=sys.stderr)
# Start encoder process
proc = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stderr=subprocess.PIPE,
bufsize=10**7, # Large buffer to prevent blocking
)
self._encoders[name] = proc
# Start stderr drain thread
stderr_thread = threading.Thread(
target=self._drain_stderr,
args=(name, proc),
daemon=True
)
stderr_thread.start()
self._encoder_threads[name] = stderr_thread
def _build_encoder_cmd(self, quality: QualityLevel, output_dir: Path) -> List[str]:
"""Build FFmpeg command for a quality level."""
playlist_path = output_dir / "playlist.m3u8"
segment_pattern = output_dir / "segment_%05d.ts"
cmd = [
"ffmpeg", "-y",
"-f", "rawvideo",
"-pixel_format", "rgb24",
"-video_size", f"{self.source_width}x{self.source_height}",
"-framerate", str(self.fps),
"-i", "-",
]
# Add audio input if provided
if self.audio_source:
cmd.extend(["-i", str(self.audio_source)])
# Map video from input 0, audio from input 1
cmd.extend(["-map", "0:v", "-map", "1:a"])
# Scale if not original resolution
if quality.width != self.source_width or quality.height != self.source_height:
cmd.extend([
"-vf", f"scale={quality.width}:{quality.height}:flags=lanczos",
])
# NVENC encoding with quality settings
cmd.extend([
"-c:v", "h264_nvenc",
"-preset", "p4", # Balanced speed/quality
"-tune", "hq",
"-b:v", f"{quality.bitrate}k",
"-maxrate", f"{int(quality.bitrate * 1.5)}k",
"-bufsize", f"{quality.bitrate * 2}k",
"-g", str(int(self.fps * self.segment_duration)), # Keyframe interval = segment duration
"-keyint_min", str(int(self.fps * self.segment_duration)),
"-sc_threshold", "0", # Disable scene change detection for consistent segments
])
# Add audio encoding if audio source provided
if self.audio_source:
cmd.extend([
"-c:a", "aac",
"-b:a", "128k",
"-shortest", # Stop when shortest stream ends
])
# HLS output
cmd.extend([
"-f", "hls",
"-hls_time", str(self.segment_duration),
"-hls_list_size", "0", # Keep all segments in playlist
"-hls_flags", "independent_segments+append_list",
"-hls_segment_type", "mpegts",
"-hls_segment_filename", str(segment_pattern),
str(playlist_path),
])
return cmd
def _drain_stderr(self, name: str, proc: subprocess.Popen):
"""Drain FFmpeg stderr to prevent blocking."""
try:
for line in proc.stderr:
line_str = line.decode('utf-8', errors='replace').strip()
if line_str and ('error' in line_str.lower() or 'warning' in line_str.lower()):
print(f"[FFmpeg/{name}] {line_str}", file=sys.stderr)
except Exception as e:
print(f"[FFmpeg/{name}] stderr drain error: {e}", file=sys.stderr)
def write(self, frame: Union[np.ndarray, 'cp.ndarray'], t: float = 0):
"""Write a frame to all quality encoders."""
if not self._is_open:
return
# Convert GPU frame to CPU if needed
if GPU_AVAILABLE and hasattr(frame, 'get'):
frame = frame.get() # CuPy to NumPy
elif hasattr(frame, 'cpu'):
frame = frame.cpu # GPUFrame to NumPy
elif hasattr(frame, 'gpu') and hasattr(frame, 'is_on_gpu'):
frame = frame.gpu.get() if frame.is_on_gpu else frame.cpu
# Ensure correct format
if frame.dtype != np.uint8:
frame = np.clip(frame, 0, 255).astype(np.uint8)
# Ensure contiguous
if not frame.flags['C_CONTIGUOUS']:
frame = np.ascontiguousarray(frame)
frame_bytes = frame.tobytes()
# Write to all encoders
for name, proc in self._encoders.items():
if proc.poll() is not None:
print(f"[MultiResHLS] Encoder {name} died with code {proc.returncode}", file=sys.stderr)
self._is_open = False
return
try:
proc.stdin.write(frame_bytes)
except BrokenPipeError:
print(f"[MultiResHLS] Encoder {name} pipe broken", file=sys.stderr)
self._is_open = False
return
self._frame_count += 1
# Check for new segments periodically
if self._frame_count % int(self.fps * self.segment_duration) == 0:
self._check_and_upload_segments()
def _check_and_upload_segments(self):
"""Check for new segments and queue them for upload."""
for name, quality in self.qualities.items():
quality_dir = self.output_dir / name
segments = sorted(quality_dir.glob("segment_*.ts"))
for seg_path in segments:
seg_num = int(seg_path.stem.split("_")[1])
if seg_num in quality.segment_cids:
continue # Already uploaded
# Check if segment is complete (not still being written)
try:
size1 = seg_path.stat().st_size
if size1 == 0:
continue
time.sleep(0.05)
size2 = seg_path.stat().st_size
if size1 != size2:
continue # Still being written
except FileNotFoundError:
continue
# Queue for upload
self._upload_queue.put((name, seg_path, seg_num))
def _upload_worker(self):
"""Background worker for IPFS uploads."""
while True:
try:
item = self._upload_queue.get(timeout=1.0)
if item is None: # Shutdown signal
break
quality_name, seg_path, seg_num = item
self._do_upload(quality_name, seg_path, seg_num)
except queue.Empty:
continue
except Exception as e:
print(f"[MultiResHLS] Upload worker error: {e}", file=sys.stderr)
def _do_upload(self, quality_name: str, seg_path: Path, seg_num: int):
"""Upload a segment to IPFS."""
try:
cid = self._ipfs_add_file(seg_path, pin=True)
if cid:
self.qualities[quality_name].segment_cids[seg_num] = cid
print(f"[MultiResHLS] Uploaded {quality_name}/segment_{seg_num:05d}.ts -> {cid[:16]}...", file=sys.stderr)
# Update playlists after each upload
self._update_playlists()
except Exception as e:
print(f"[MultiResHLS] Failed to upload {seg_path}: {e}", file=sys.stderr)
def _update_playlists(self):
"""Generate and upload IPFS playlists."""
# Generate quality-specific playlists
for name, quality in self.qualities.items():
if not quality.segment_cids:
continue
playlist = self._generate_quality_playlist(quality)
cid = self._ipfs_add_bytes(playlist.encode(), pin=True)
if cid:
quality.playlist_cid = cid
# Generate master playlist
self._generate_master_playlist()
def _generate_quality_playlist(self, quality: QualityLevel, finalize: bool = False) -> str:
"""Generate HLS playlist for a quality level."""
lines = [
"#EXTM3U",
"#EXT-X-VERSION:3",
f"#EXT-X-TARGETDURATION:{int(self.segment_duration) + 1}",
"#EXT-X-MEDIA-SEQUENCE:0",
]
if finalize:
lines.append("#EXT-X-PLAYLIST-TYPE:VOD")
# Use /ipfs-ts/ for correct MIME type
segment_gateway = self.ipfs_gateway.replace("/ipfs", "/ipfs-ts")
for seg_num in sorted(quality.segment_cids.keys()):
cid = quality.segment_cids[seg_num]
lines.append(f"#EXTINF:{self.segment_duration:.3f},")
lines.append(f"{segment_gateway}/{cid}")
if finalize:
lines.append("#EXT-X-ENDLIST")
return "\n".join(lines) + "\n"
def _generate_master_playlist(self, finalize: bool = False):
"""Generate and upload master playlist."""
lines = ["#EXTM3U", "#EXT-X-VERSION:3"]
for name, quality in self.qualities.items():
if not quality.playlist_cid:
continue
lines.append(
f"#EXT-X-STREAM-INF:BANDWIDTH={quality.bitrate * 1000},"
f"RESOLUTION={quality.width}x{quality.height},"
f"NAME=\"{name}\""
)
lines.append(f"{self.ipfs_gateway}/{quality.playlist_cid}")
if len(lines) <= 2:
return # No quality playlists yet
master_content = "\n".join(lines) + "\n"
cid = self._ipfs_add_bytes(master_content.encode(), pin=True)
if cid:
self._master_playlist_cid = cid
print(f"[MultiResHLS] Master playlist: {cid}", file=sys.stderr)
if self._on_playlist_update:
# Pass both master CID and quality info for dynamic playlist generation
quality_info = {
name: {
"cid": q.playlist_cid,
"width": q.width,
"height": q.height,
"bitrate": q.bitrate,
}
for name, q in self.qualities.items()
if q.playlist_cid
}
self._on_playlist_update(cid, quality_info)
def close(self):
"""Close all encoders and finalize output."""
if not self._is_open:
return
self._is_open = False
print(f"[MultiResHLS] Closing after {self._frame_count} frames", file=sys.stderr)
# Close encoder stdin pipes
for name, proc in self._encoders.items():
try:
proc.stdin.close()
except:
pass
# Wait for encoders to finish
for name, proc in self._encoders.items():
try:
proc.wait(timeout=30)
print(f"[MultiResHLS] Encoder {name} finished with code {proc.returncode}", file=sys.stderr)
except subprocess.TimeoutExpired:
proc.kill()
print(f"[MultiResHLS] Encoder {name} killed (timeout)", file=sys.stderr)
# Final segment check and upload
self._check_and_upload_segments()
# Wait for uploads to complete
self._upload_queue.put(None) # Shutdown signal
self._upload_thread.join(timeout=60)
# Generate final playlists with EXT-X-ENDLIST
for name, quality in self.qualities.items():
if quality.segment_cids:
playlist = self._generate_quality_playlist(quality, finalize=True)
cid = self._ipfs_add_bytes(playlist.encode(), pin=True)
if cid:
quality.playlist_cid = cid
print(f"[MultiResHLS] Final {name} playlist: {cid} ({len(quality.segment_cids)} segments)", file=sys.stderr)
# Final master playlist
self._generate_master_playlist(finalize=True)
print(f"[MultiResHLS] Complete. Master playlist: {self._master_playlist_cid}", file=sys.stderr)
@property
def is_open(self) -> bool:
return self._is_open
@property
def playlist_cid(self) -> Optional[str]:
return self._master_playlist_cid
@property
def playlist_url(self) -> Optional[str]:
if self._master_playlist_cid:
return f"{self.ipfs_gateway}/{self._master_playlist_cid}"
return None
@property
def segment_cids(self) -> Dict[str, Dict[int, str]]:
"""Get all segment CIDs organized by quality."""
return {name: dict(q.segment_cids) for name, q in self.qualities.items()}

963
streaming/output.py Normal file
View File

@@ -0,0 +1,963 @@
"""
Output targets for streaming compositor.
Supports:
- Display window (preview)
- File output (recording)
- Stream output (RTMP, etc.) - future
- NVENC hardware encoding (auto-detected)
- CuPy GPU arrays (auto-converted to numpy for output)
"""
import numpy as np
import subprocess
import threading
import queue
from abc import ABC, abstractmethod
from typing import Tuple, Optional, List, Union
from pathlib import Path
# Try to import CuPy for GPU array support
try:
import cupy as cp
CUPY_AVAILABLE = True
except ImportError:
cp = None
CUPY_AVAILABLE = False
def ensure_numpy(frame: Union[np.ndarray, 'cp.ndarray']) -> np.ndarray:
"""Convert frame to numpy array if it's a CuPy array."""
if CUPY_AVAILABLE and isinstance(frame, cp.ndarray):
return cp.asnumpy(frame)
return frame
# Cache NVENC availability check
_nvenc_available: Optional[bool] = None
def check_nvenc_available() -> bool:
"""Check if NVENC hardware encoding is available and working.
Does a real encode test to catch cases where nvenc is listed
but CUDA libraries aren't loaded.
"""
global _nvenc_available
if _nvenc_available is not None:
return _nvenc_available
try:
# First check if encoder is listed
result = subprocess.run(
["ffmpeg", "-encoders"],
capture_output=True,
text=True,
timeout=5
)
if "h264_nvenc" not in result.stdout:
_nvenc_available = False
return _nvenc_available
# Actually try to encode a small test frame
result = subprocess.run(
["ffmpeg", "-y", "-f", "lavfi", "-i", "testsrc=duration=0.1:size=64x64:rate=1",
"-c:v", "h264_nvenc", "-f", "null", "-"],
capture_output=True,
text=True,
timeout=10
)
_nvenc_available = result.returncode == 0
if not _nvenc_available:
import sys
print("NVENC listed but not working, falling back to libx264", file=sys.stderr)
except Exception:
_nvenc_available = False
return _nvenc_available
def get_encoder_params(codec: str, preset: str, crf: int) -> List[str]:
"""
Get encoder-specific FFmpeg parameters.
For NVENC (h264_nvenc, hevc_nvenc):
- Uses -cq for constant quality (similar to CRF)
- Presets: p1 (fastest) to p7 (slowest/best quality)
- Mapping: fast->p4, medium->p5, slow->p6
For libx264:
- Uses -crf for constant rate factor
- Presets: ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow
"""
if codec in ("h264_nvenc", "hevc_nvenc"):
# Map libx264 presets to NVENC presets
nvenc_preset_map = {
"ultrafast": "p1",
"superfast": "p2",
"veryfast": "p3",
"faster": "p3",
"fast": "p4",
"medium": "p5",
"slow": "p6",
"slower": "p6",
"veryslow": "p7",
}
nvenc_preset = nvenc_preset_map.get(preset, "p4")
# NVENC quality: 0 (best) to 51 (worst), similar to CRF
# CRF 18 = high quality, CRF 23 = good quality
return [
"-c:v", codec,
"-preset", nvenc_preset,
"-cq", str(crf), # Constant quality mode
"-rc", "vbr", # Variable bitrate with quality target
]
else:
# Standard libx264 params
return [
"-c:v", codec,
"-preset", preset,
"-crf", str(crf),
]
class Output(ABC):
"""Abstract base class for output targets."""
@abstractmethod
def write(self, frame: np.ndarray, t: float):
"""Write a frame to the output."""
pass
@abstractmethod
def close(self):
"""Close the output and clean up resources."""
pass
@property
@abstractmethod
def is_open(self) -> bool:
"""Check if output is still open/valid."""
pass
class DisplayOutput(Output):
"""
Display frames using mpv (handles Wayland properly).
Useful for live preview. Press 'q' to quit.
"""
def __init__(self, title: str = "Streaming Preview", size: Tuple[int, int] = None,
audio_source: str = None, fps: float = 30):
self.title = title
self.size = size
self.audio_source = audio_source
self.fps = fps
self._is_open = True
self._process = None
self._audio_process = None
def _start_mpv(self, frame_size: Tuple[int, int]):
"""Start mpv process for display."""
import sys
w, h = frame_size
cmd = [
"mpv",
"--no-cache",
"--demuxer=rawvideo",
f"--demuxer-rawvideo-w={w}",
f"--demuxer-rawvideo-h={h}",
"--demuxer-rawvideo-mp-format=rgb24",
f"--demuxer-rawvideo-fps={self.fps}",
f"--title={self.title}",
"-",
]
print(f"Starting mpv: {' '.join(cmd)}", file=sys.stderr)
self._process = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stderr=subprocess.PIPE,
)
# Start audio playback if we have an audio source
if self.audio_source:
audio_cmd = [
"ffplay", "-nodisp", "-autoexit", "-loglevel", "quiet",
str(self.audio_source)
]
print(f"Starting audio: {self.audio_source}", file=sys.stderr)
self._audio_process = subprocess.Popen(
audio_cmd,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
def write(self, frame: np.ndarray, t: float):
"""Display frame."""
if not self._is_open:
return
# Convert GPU array to numpy if needed
frame = ensure_numpy(frame)
# Ensure frame is correct format
if frame.dtype != np.uint8:
frame = np.clip(frame, 0, 255).astype(np.uint8)
if not frame.flags['C_CONTIGUOUS']:
frame = np.ascontiguousarray(frame)
# Start mpv on first frame
if self._process is None:
self._start_mpv((frame.shape[1], frame.shape[0]))
# Check if mpv is still running
if self._process.poll() is not None:
self._is_open = False
return
try:
self._process.stdin.write(frame.tobytes())
self._process.stdin.flush() # Prevent buffering
except BrokenPipeError:
self._is_open = False
def close(self):
"""Close the display and audio."""
if self._process:
try:
self._process.stdin.close()
except:
pass
self._process.terminate()
self._process.wait()
if self._audio_process:
self._audio_process.terminate()
self._audio_process.wait()
self._is_open = False
@property
def is_open(self) -> bool:
if self._process and self._process.poll() is not None:
self._is_open = False
return self._is_open
class FileOutput(Output):
"""
Write frames to a video file using ffmpeg.
Automatically uses NVENC hardware encoding when available,
falling back to libx264 CPU encoding otherwise.
"""
def __init__(
self,
path: str,
size: Tuple[int, int],
fps: float = 30,
codec: str = "auto", # "auto", "h264_nvenc", "libx264"
crf: int = 18,
preset: str = "fast",
audio_source: str = None,
):
self.path = Path(path)
self.size = size
self.fps = fps
self._is_open = True
# Auto-detect NVENC
if codec == "auto":
codec = "h264_nvenc" if check_nvenc_available() else "libx264"
self.codec = codec
# Build ffmpeg command
cmd = [
"ffmpeg", "-y",
"-f", "rawvideo",
"-vcodec", "rawvideo",
"-pix_fmt", "rgb24",
"-s", f"{size[0]}x{size[1]}",
"-r", str(fps),
"-i", "-",
]
# Add audio input if provided
if audio_source:
cmd.extend(["-i", str(audio_source)])
# Explicitly map: video from input 0 (rawvideo), audio from input 1
cmd.extend(["-map", "0:v", "-map", "1:a"])
# Get encoder-specific params
cmd.extend(get_encoder_params(codec, preset, crf))
cmd.extend(["-pix_fmt", "yuv420p"])
# Add audio codec if we have audio
if audio_source:
cmd.extend(["-c:a", "aac", "-b:a", "192k", "-shortest"])
# Use fragmented mp4 for streamable output while writing
if str(self.path).endswith('.mp4'):
cmd.extend(["-movflags", "frag_keyframe+empty_moov+default_base_moof"])
cmd.append(str(self.path))
import sys
print(f"FileOutput cmd: {' '.join(cmd)}", file=sys.stderr)
self._process = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stderr=None, # Show errors for debugging
)
def write(self, frame: np.ndarray, t: float):
"""Write frame to video file."""
if not self._is_open or self._process.poll() is not None:
self._is_open = False
return
# Convert GPU array to numpy if needed
frame = ensure_numpy(frame)
# Resize if needed
if frame.shape[1] != self.size[0] or frame.shape[0] != self.size[1]:
import cv2
frame = cv2.resize(frame, self.size)
# Ensure correct format
if frame.dtype != np.uint8:
frame = np.clip(frame, 0, 255).astype(np.uint8)
if not frame.flags['C_CONTIGUOUS']:
frame = np.ascontiguousarray(frame)
try:
self._process.stdin.write(frame.tobytes())
except BrokenPipeError:
self._is_open = False
def close(self):
"""Close the video file."""
if self._process:
self._process.stdin.close()
self._process.wait()
self._is_open = False
@property
def is_open(self) -> bool:
return self._is_open and self._process.poll() is None
class MultiOutput(Output):
"""
Write to multiple outputs simultaneously.
Useful for recording while showing preview.
"""
def __init__(self, outputs: list):
self.outputs = outputs
def write(self, frame: np.ndarray, t: float):
for output in self.outputs:
if output.is_open:
output.write(frame, t)
def close(self):
for output in self.outputs:
output.close()
@property
def is_open(self) -> bool:
return any(o.is_open for o in self.outputs)
class NullOutput(Output):
"""
Discard frames (for benchmarking).
"""
def __init__(self):
self._is_open = True
self.frame_count = 0
def write(self, frame: np.ndarray, t: float):
self.frame_count += 1
def close(self):
self._is_open = False
@property
def is_open(self) -> bool:
return self._is_open
class PipeOutput(Output):
"""
Pipe frames directly to mpv.
Launches mpv with rawvideo demuxer and writes frames to stdin.
"""
def __init__(self, size: Tuple[int, int], fps: float = 30, audio_source: str = None):
self.size = size
self.fps = fps
self.audio_source = audio_source
self._is_open = True
self._process = None
self._audio_process = None
self._started = False
def _start(self):
"""Start mpv and audio on first frame."""
if self._started:
return
self._started = True
import sys
w, h = self.size
# Start mpv
cmd = [
"mpv", "--no-cache",
"--demuxer=rawvideo",
f"--demuxer-rawvideo-w={w}",
f"--demuxer-rawvideo-h={h}",
"--demuxer-rawvideo-mp-format=rgb24",
f"--demuxer-rawvideo-fps={self.fps}",
"--title=Streaming",
"-"
]
print(f"Starting mpv: {w}x{h} @ {self.fps}fps", file=sys.stderr)
self._process = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stderr=subprocess.DEVNULL,
)
# Start audio
if self.audio_source:
audio_cmd = [
"ffplay", "-nodisp", "-autoexit", "-loglevel", "quiet",
str(self.audio_source)
]
print(f"Starting audio: {self.audio_source}", file=sys.stderr)
self._audio_process = subprocess.Popen(
audio_cmd,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
def write(self, frame: np.ndarray, t: float):
"""Write frame to mpv."""
if not self._is_open:
return
self._start()
# Check mpv still running
if self._process.poll() is not None:
self._is_open = False
return
# Convert GPU array to numpy if needed
frame = ensure_numpy(frame)
# Resize if needed
if frame.shape[1] != self.size[0] or frame.shape[0] != self.size[1]:
import cv2
frame = cv2.resize(frame, self.size)
# Ensure correct format
if frame.dtype != np.uint8:
frame = np.clip(frame, 0, 255).astype(np.uint8)
if not frame.flags['C_CONTIGUOUS']:
frame = np.ascontiguousarray(frame)
try:
self._process.stdin.write(frame.tobytes())
self._process.stdin.flush()
except BrokenPipeError:
self._is_open = False
def close(self):
"""Close mpv and audio."""
if self._process:
try:
self._process.stdin.close()
except:
pass
self._process.terminate()
self._process.wait()
if self._audio_process:
self._audio_process.terminate()
self._audio_process.wait()
self._is_open = False
@property
def is_open(self) -> bool:
if self._process and self._process.poll() is not None:
self._is_open = False
return self._is_open
class HLSOutput(Output):
"""
Write frames as HLS stream (m3u8 playlist + .ts segments).
This enables true live streaming where the browser can poll
for new segments as they become available.
Automatically uses NVENC hardware encoding when available.
"""
def __init__(
self,
output_dir: str,
size: Tuple[int, int],
fps: float = 30,
segment_duration: float = 4.0, # 4s segments for stability
codec: str = "auto", # "auto", "h264_nvenc", "libx264"
crf: int = 23,
preset: str = "fast", # Better quality than ultrafast
audio_source: str = None,
):
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
self.size = size
self.fps = fps
self.segment_duration = segment_duration
self._is_open = True
# Auto-detect NVENC
if codec == "auto":
codec = "h264_nvenc" if check_nvenc_available() else "libx264"
self.codec = codec
# HLS playlist path
self.playlist_path = self.output_dir / "stream.m3u8"
# Build ffmpeg command for HLS output
cmd = [
"ffmpeg", "-y",
"-f", "rawvideo",
"-vcodec", "rawvideo",
"-pix_fmt", "rgb24",
"-s", f"{size[0]}x{size[1]}",
"-r", str(fps),
"-i", "-",
]
# Add audio input if provided
if audio_source:
cmd.extend(["-i", str(audio_source)])
cmd.extend(["-map", "0:v", "-map", "1:a"])
# Keyframe interval - must be exactly segment_duration for clean cuts
gop_size = int(fps * segment_duration)
# Get encoder-specific params
cmd.extend(get_encoder_params(codec, preset, crf))
cmd.extend([
"-pix_fmt", "yuv420p",
# Force keyframes at exact intervals for clean segment boundaries
"-g", str(gop_size),
"-keyint_min", str(gop_size),
"-sc_threshold", "0", # Disable scene change detection
"-force_key_frames", f"expr:gte(t,n_forced*{segment_duration})",
# Reduce buffering for faster segment availability
"-flush_packets", "1",
])
# Add audio codec if we have audio
if audio_source:
cmd.extend(["-c:a", "aac", "-b:a", "128k", "-shortest"])
# HLS specific options for smooth live streaming
cmd.extend([
"-f", "hls",
"-hls_time", str(segment_duration),
"-hls_list_size", "0", # Keep all segments in playlist
"-hls_flags", "independent_segments+append_list+split_by_time",
"-hls_segment_type", "mpegts",
"-hls_segment_filename", str(self.output_dir / "segment_%05d.ts"),
str(self.playlist_path),
])
import sys
print(f"HLSOutput cmd: {' '.join(cmd)}", file=sys.stderr)
self._process = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stderr=None, # Show errors for debugging
)
# Track segments for status reporting
self.segments_written = 0
self._last_segment_check = 0
def write(self, frame: np.ndarray, t: float):
"""Write frame to HLS stream."""
if not self._is_open or self._process.poll() is not None:
self._is_open = False
return
# Convert GPU array to numpy if needed
frame = ensure_numpy(frame)
# Resize if needed
if frame.shape[1] != self.size[0] or frame.shape[0] != self.size[1]:
import cv2
frame = cv2.resize(frame, self.size)
# Ensure correct format
if frame.dtype != np.uint8:
frame = np.clip(frame, 0, 255).astype(np.uint8)
if not frame.flags['C_CONTIGUOUS']:
frame = np.ascontiguousarray(frame)
try:
self._process.stdin.write(frame.tobytes())
except BrokenPipeError:
self._is_open = False
# Periodically count segments
if t - self._last_segment_check > 1.0:
self._last_segment_check = t
self.segments_written = len(list(self.output_dir.glob("segment_*.ts")))
def close(self):
"""Close the HLS stream."""
if self._process:
self._process.stdin.close()
self._process.wait()
self._is_open = False
# Final segment count
self.segments_written = len(list(self.output_dir.glob("segment_*.ts")))
# Mark playlist as ended (VOD mode)
if self.playlist_path.exists():
with open(self.playlist_path, "a") as f:
f.write("#EXT-X-ENDLIST\n")
@property
def is_open(self) -> bool:
return self._is_open and self._process.poll() is None
class IPFSHLSOutput(Output):
"""
Write frames as HLS stream with segments uploaded to IPFS.
Each segment is uploaded to IPFS as it's created, enabling distributed
streaming where clients can fetch segments from any IPFS gateway.
The m3u8 playlist is continuously updated with IPFS URLs and can be
fetched via get_playlist() or the playlist_cid property.
"""
def __init__(
self,
output_dir: str,
size: Tuple[int, int],
fps: float = 30,
segment_duration: float = 4.0,
codec: str = "auto",
crf: int = 23,
preset: str = "fast",
audio_source: str = None,
ipfs_gateway: str = "https://ipfs.io/ipfs",
on_playlist_update: callable = None,
):
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
self.size = size
self.fps = fps
self.segment_duration = segment_duration
self.ipfs_gateway = ipfs_gateway.rstrip("/")
self._is_open = True
self._on_playlist_update = on_playlist_update # Callback when playlist CID changes
# Auto-detect NVENC
if codec == "auto":
codec = "h264_nvenc" if check_nvenc_available() else "libx264"
self.codec = codec
# Track segment CIDs
self.segment_cids: dict = {} # segment_number -> cid
self._last_segment_checked = -1
self._playlist_cid: Optional[str] = None
self._upload_lock = threading.Lock()
# Import IPFS client
from ipfs_client import add_file, add_bytes
self._ipfs_add_file = add_file
self._ipfs_add_bytes = add_bytes
# Background upload thread for async IPFS uploads
self._upload_queue = queue.Queue()
self._upload_thread = threading.Thread(target=self._upload_worker, daemon=True)
self._upload_thread.start()
# Local HLS paths
self.local_playlist_path = self.output_dir / "stream.m3u8"
# Build ffmpeg command for HLS output
cmd = [
"ffmpeg", "-y",
"-f", "rawvideo",
"-vcodec", "rawvideo",
"-pix_fmt", "rgb24",
"-s", f"{size[0]}x{size[1]}",
"-r", str(fps),
"-i", "-",
]
# Add audio input if provided
if audio_source:
cmd.extend(["-i", str(audio_source)])
cmd.extend(["-map", "0:v", "-map", "1:a"])
# Keyframe interval
gop_size = int(fps * segment_duration)
# Get encoder-specific params
cmd.extend(get_encoder_params(codec, preset, crf))
cmd.extend([
"-pix_fmt", "yuv420p",
"-g", str(gop_size),
"-keyint_min", str(gop_size),
"-sc_threshold", "0",
"-force_key_frames", f"expr:gte(t,n_forced*{segment_duration})",
"-flush_packets", "1",
])
# Add audio codec if we have audio
if audio_source:
cmd.extend(["-c:a", "aac", "-b:a", "128k", "-shortest"])
# HLS options
cmd.extend([
"-f", "hls",
"-hls_time", str(segment_duration),
"-hls_list_size", "0",
"-hls_flags", "independent_segments+append_list+split_by_time",
"-hls_segment_type", "mpegts",
"-hls_segment_filename", str(self.output_dir / "segment_%05d.ts"),
str(self.local_playlist_path),
])
import sys
print(f"IPFSHLSOutput: starting ffmpeg", file=sys.stderr)
self._process = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stderr=None,
)
def _upload_worker(self):
"""Background worker thread for async IPFS uploads."""
import sys
while True:
try:
item = self._upload_queue.get(timeout=1.0)
if item is None: # Shutdown signal
break
seg_path, seg_num = item
self._do_upload(seg_path, seg_num)
except queue.Empty:
continue
except Exception as e:
print(f"Upload worker error: {e}", file=sys.stderr)
def _do_upload(self, seg_path: Path, seg_num: int):
"""Actually perform the upload (runs in background thread)."""
import sys
try:
cid = self._ipfs_add_file(seg_path, pin=True)
if cid:
with self._upload_lock:
self.segment_cids[seg_num] = cid
print(f"IPFS: segment_{seg_num:05d}.ts -> {cid}", file=sys.stderr)
self._update_ipfs_playlist()
except Exception as e:
print(f"Failed to upload segment {seg_num}: {e}", file=sys.stderr)
def _upload_new_segments(self):
"""Check for new segments and queue them for async IPFS upload."""
import sys
import time
# Find all segments
segments = sorted(self.output_dir.glob("segment_*.ts"))
for seg_path in segments:
# Extract segment number from filename
seg_name = seg_path.stem # segment_00000
seg_num = int(seg_name.split("_")[1])
# Skip if already uploaded or queued
with self._upload_lock:
if seg_num in self.segment_cids:
continue
# Skip if segment is still being written (quick non-blocking check)
try:
size1 = seg_path.stat().st_size
if size1 == 0:
continue # Empty file, still being created
time.sleep(0.01) # Very short check
size2 = seg_path.stat().st_size
if size1 != size2:
continue # File still being written
except FileNotFoundError:
continue
# Queue for async upload (non-blocking!)
self._upload_queue.put((seg_path, seg_num))
def _update_ipfs_playlist(self):
"""Generate and upload IPFS-aware m3u8 playlist."""
import sys
with self._upload_lock:
if not self.segment_cids:
return
# Build m3u8 content with IPFS URLs
lines = [
"#EXTM3U",
"#EXT-X-VERSION:3",
f"#EXT-X-TARGETDURATION:{int(self.segment_duration) + 1}",
"#EXT-X-MEDIA-SEQUENCE:0",
]
# Add segments in order
for seg_num in sorted(self.segment_cids.keys()):
cid = self.segment_cids[seg_num]
lines.append(f"#EXTINF:{self.segment_duration:.3f},")
lines.append(f"{self.ipfs_gateway}/{cid}")
playlist_content = "\n".join(lines) + "\n"
# Upload playlist to IPFS
cid = self._ipfs_add_bytes(playlist_content.encode("utf-8"), pin=True)
if cid:
self._playlist_cid = cid
print(f"IPFS: playlist updated -> {cid} ({len(self.segment_cids)} segments)", file=sys.stderr)
# Notify callback (e.g., to update database for live HLS redirect)
if self._on_playlist_update:
try:
self._on_playlist_update(cid)
except Exception as e:
print(f"IPFS: playlist callback error: {e}", file=sys.stderr)
def write(self, frame: np.ndarray, t: float):
"""Write frame to HLS stream and upload segments to IPFS."""
if not self._is_open or self._process.poll() is not None:
self._is_open = False
return
# Convert GPU array to numpy if needed
frame = ensure_numpy(frame)
# Resize if needed
if frame.shape[1] != self.size[0] or frame.shape[0] != self.size[1]:
import cv2
frame = cv2.resize(frame, self.size)
# Ensure correct format
if frame.dtype != np.uint8:
frame = np.clip(frame, 0, 255).astype(np.uint8)
if not frame.flags['C_CONTIGUOUS']:
frame = np.ascontiguousarray(frame)
try:
self._process.stdin.write(frame.tobytes())
except BrokenPipeError:
self._is_open = False
return
# Check for new segments periodically (every second)
current_segment = int(t / self.segment_duration)
if current_segment > self._last_segment_checked:
self._last_segment_checked = current_segment
self._upload_new_segments()
def close(self):
"""Close the HLS stream and finalize IPFS uploads."""
import sys
if self._process:
self._process.stdin.close()
self._process.wait()
self._is_open = False
# Queue any remaining segments
self._upload_new_segments()
# Wait for pending uploads to complete
self._upload_queue.put(None) # Signal shutdown
self._upload_thread.join(timeout=30)
# Generate final playlist with #EXT-X-ENDLIST
if self.segment_cids:
lines = [
"#EXTM3U",
"#EXT-X-VERSION:3",
f"#EXT-X-TARGETDURATION:{int(self.segment_duration) + 1}",
"#EXT-X-MEDIA-SEQUENCE:0",
"#EXT-X-PLAYLIST-TYPE:VOD",
]
for seg_num in sorted(self.segment_cids.keys()):
cid = self.segment_cids[seg_num]
lines.append(f"#EXTINF:{self.segment_duration:.3f},")
lines.append(f"{self.ipfs_gateway}/{cid}")
lines.append("#EXT-X-ENDLIST")
playlist_content = "\n".join(lines) + "\n"
cid = self._ipfs_add_bytes(playlist_content.encode("utf-8"), pin=True)
if cid:
self._playlist_cid = cid
print(f"IPFS: final playlist -> {cid} ({len(self.segment_cids)} segments)", file=sys.stderr)
@property
def playlist_cid(self) -> Optional[str]:
"""Get the current playlist CID."""
return self._playlist_cid
@property
def playlist_url(self) -> Optional[str]:
"""Get the full IPFS URL for the playlist."""
if self._playlist_cid:
return f"{self.ipfs_gateway}/{self._playlist_cid}"
return None
def get_playlist(self) -> str:
"""Get the current m3u8 playlist content with IPFS URLs."""
if not self.segment_cids:
return "#EXTM3U\n"
lines = [
"#EXTM3U",
"#EXT-X-VERSION:3",
f"#EXT-X-TARGETDURATION:{int(self.segment_duration) + 1}",
"#EXT-X-MEDIA-SEQUENCE:0",
]
for seg_num in sorted(self.segment_cids.keys()):
cid = self.segment_cids[seg_num]
lines.append(f"#EXTINF:{self.segment_duration:.3f},")
lines.append(f"{self.ipfs_gateway}/{cid}")
if not self._is_open:
lines.append("#EXT-X-ENDLIST")
return "\n".join(lines) + "\n"
@property
def is_open(self) -> bool:
return self._is_open and self._process.poll() is None

846
streaming/pipeline.py Normal file
View File

@@ -0,0 +1,846 @@
"""
Streaming pipeline executor.
Directly executes compiled sexp recipes frame-by-frame.
No adapter layer - frames and analysis flow through the DAG.
"""
import sys
import time
import numpy as np
from pathlib import Path
from typing import Dict, List, Any, Optional
from dataclasses import dataclass, field
from .sources import VideoSource
from .audio import StreamingAudioAnalyzer
from .output import DisplayOutput, FileOutput
from .sexp_interp import SexpInterpreter
@dataclass
class FrameContext:
"""Context passed through the pipeline for each frame."""
t: float # Current time
energy: float = 0.0
is_beat: bool = False
beat_count: int = 0
analysis: Dict[str, Any] = field(default_factory=dict)
class StreamingPipeline:
"""
Executes a compiled sexp recipe as a streaming pipeline.
Frames flow through the DAG directly - no adapter needed.
Each node is evaluated lazily when its output is requested.
"""
def __init__(self, compiled_recipe, recipe_dir: Path = None, fps: float = 30, seed: int = 42,
output_size: tuple = None):
self.recipe = compiled_recipe
self.recipe_dir = recipe_dir or Path(".")
self.fps = fps
self.seed = seed
# Build node lookup
self.nodes = {n['id']: n for n in compiled_recipe.nodes}
# Runtime state
self.sources: Dict[str, VideoSource] = {}
self.audio_analyzer: Optional[StreamingAudioAnalyzer] = None
self.audio_source_path: Optional[str] = None
# Sexp interpreter for expressions
self.interp = SexpInterpreter()
# Scan state (node_id -> current value)
self.scan_state: Dict[str, Any] = {}
self.scan_emit: Dict[str, Any] = {}
# SLICE_ON state
self.slice_on_acc: Dict[str, Any] = {}
self.slice_on_result: Dict[str, Any] = {}
# Frame cache for current timestep (cleared each frame)
self._frame_cache: Dict[str, np.ndarray] = {}
# Context for current frame
self.ctx = FrameContext(t=0.0)
# Output size (w, h) - set after sources are initialized
self._output_size = output_size
# Initialize
self._init_sources()
self._init_scans()
self._init_slice_on()
# Set output size from first source if not specified
if self._output_size is None and self.sources:
first_source = next(iter(self.sources.values()))
self._output_size = first_source._size
def _init_sources(self):
"""Initialize video and audio sources."""
for node in self.recipe.nodes:
if node.get('type') == 'SOURCE':
config = node.get('config', {})
path = config.get('path')
if path:
full_path = (self.recipe_dir / path).resolve()
suffix = full_path.suffix.lower()
if suffix in ('.mp4', '.webm', '.mov', '.avi', '.mkv'):
if not full_path.exists():
print(f"Warning: video not found: {full_path}", file=sys.stderr)
continue
self.sources[node['id']] = VideoSource(
str(full_path),
target_fps=self.fps
)
elif suffix in ('.mp3', '.wav', '.flac', '.ogg', '.m4a', '.aac'):
if not full_path.exists():
print(f"Warning: audio not found: {full_path}", file=sys.stderr)
continue
self.audio_source_path = str(full_path)
self.audio_analyzer = StreamingAudioAnalyzer(str(full_path))
def _init_scans(self):
"""Initialize scan nodes with their initial state."""
import random
seed_offset = 0
for node in self.recipe.nodes:
if node.get('type') == 'SCAN':
config = node.get('config', {})
# Create RNG for this scan
scan_seed = config.get('seed', self.seed + seed_offset)
rng = random.Random(scan_seed)
seed_offset += 1
# Evaluate initial value
init_expr = config.get('init', 0)
init_value = self.interp.eval(init_expr, {})
self.scan_state[node['id']] = {
'value': init_value,
'rng': rng,
'config': config,
}
# Compute initial emit
self._update_scan_emit(node['id'])
def _update_scan_emit(self, node_id: str):
"""Update the emit value for a scan."""
state = self.scan_state[node_id]
config = state['config']
emit_expr = config.get('emit_expr', config.get('emit', None))
if emit_expr is None:
# No emit expression - emit the value directly
self.scan_emit[node_id] = state['value']
return
# Build environment from state
env = {}
if isinstance(state['value'], dict):
env.update(state['value'])
else:
env['acc'] = state['value']
env['beat_count'] = self.ctx.beat_count
env['time'] = self.ctx.t
# Set RNG for interpreter
self.interp.rng = state['rng']
self.scan_emit[node_id] = self.interp.eval(emit_expr, env)
def _step_scan(self, node_id: str):
"""Step a scan forward on beat."""
state = self.scan_state[node_id]
config = state['config']
step_expr = config.get('step_expr', config.get('step', None))
if step_expr is None:
return
# Build environment
env = {}
if isinstance(state['value'], dict):
env.update(state['value'])
else:
env['acc'] = state['value']
env['beat_count'] = self.ctx.beat_count
env['time'] = self.ctx.t
# Set RNG
self.interp.rng = state['rng']
# Evaluate step
new_value = self.interp.eval(step_expr, env)
state['value'] = new_value
# Update emit
self._update_scan_emit(node_id)
def _init_slice_on(self):
"""Initialize SLICE_ON nodes."""
for node in self.recipe.nodes:
if node.get('type') == 'SLICE_ON':
config = node.get('config', {})
init = config.get('init', {})
self.slice_on_acc[node['id']] = dict(init)
# Evaluate initial state
self._eval_slice_on(node['id'])
def _eval_slice_on(self, node_id: str):
"""Evaluate a SLICE_ON node's Lambda."""
node = self.nodes[node_id]
config = node.get('config', {})
fn = config.get('fn')
videos = config.get('videos', [])
if not fn:
return
acc = self.slice_on_acc[node_id]
n_videos = len(videos)
# Set up environment
self.interp.globals['videos'] = list(range(n_videos))
try:
from .sexp_interp import eval_slice_on_lambda
result = eval_slice_on_lambda(
fn, acc, self.ctx.beat_count, 0, 1,
list(range(n_videos)), self.interp
)
self.slice_on_result[node_id] = result
# Update accumulator
if 'acc' in result:
self.slice_on_acc[node_id] = result['acc']
except Exception as e:
print(f"SLICE_ON eval error: {e}", file=sys.stderr)
def _on_beat(self):
"""Called when a beat is detected."""
self.ctx.beat_count += 1
# Step all scans
for node_id in self.scan_state:
self._step_scan(node_id)
# Step all SLICE_ON nodes
for node_id in self.slice_on_acc:
self._eval_slice_on(node_id)
def _get_frame(self, node_id: str) -> Optional[np.ndarray]:
"""
Get the output frame for a node at current time.
Recursively evaluates inputs as needed.
Results are cached for the current timestep.
"""
if node_id in self._frame_cache:
return self._frame_cache[node_id]
node = self.nodes.get(node_id)
if not node:
return None
node_type = node.get('type')
if node_type == 'SOURCE':
frame = self._eval_source(node)
elif node_type == 'SEGMENT':
frame = self._eval_segment(node)
elif node_type == 'EFFECT':
frame = self._eval_effect(node)
elif node_type == 'SLICE_ON':
frame = self._eval_slice_on_frame(node)
else:
# Unknown node type - try to pass through input
inputs = node.get('inputs', [])
frame = self._get_frame(inputs[0]) if inputs else None
self._frame_cache[node_id] = frame
return frame
def _eval_source(self, node: dict) -> Optional[np.ndarray]:
"""Evaluate a SOURCE node."""
source = self.sources.get(node['id'])
if source:
return source.read_frame(self.ctx.t)
return None
def _eval_segment(self, node: dict) -> Optional[np.ndarray]:
"""Evaluate a SEGMENT node (time segment of source)."""
inputs = node.get('inputs', [])
if not inputs:
return None
config = node.get('config', {})
start = config.get('start', 0)
duration = config.get('duration')
# Resolve any bindings
if isinstance(start, dict):
start = self._resolve_binding(start) if start.get('_binding') else 0
if isinstance(duration, dict):
duration = self._resolve_binding(duration) if duration.get('_binding') else None
# Adjust time for segment
t_local = self.ctx.t + (start if isinstance(start, (int, float)) else 0)
if duration and isinstance(duration, (int, float)):
t_local = t_local % duration # Loop within segment
# Get source frame at adjusted time
source_id = inputs[0]
source = self.sources.get(source_id)
if source:
return source.read_frame(t_local)
return self._get_frame(source_id)
def _eval_effect(self, node: dict) -> Optional[np.ndarray]:
"""Evaluate an EFFECT node."""
import cv2
inputs = node.get('inputs', [])
config = node.get('config', {})
effect_name = config.get('effect')
# Get input frame(s)
input_frames = [self._get_frame(inp) for inp in inputs]
input_frames = [f for f in input_frames if f is not None]
if not input_frames:
return None
frame = input_frames[0]
# Resolve bindings in config
params = self._resolve_config(config)
# Apply effect based on name
if effect_name == 'rotate':
angle = params.get('angle', 0)
if abs(angle) > 0.5:
h, w = frame.shape[:2]
center = (w // 2, h // 2)
matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
frame = cv2.warpAffine(frame, matrix, (w, h))
elif effect_name == 'zoom':
amount = params.get('amount', 1.0)
if abs(amount - 1.0) > 0.01:
frame = self._apply_zoom(frame, amount)
elif effect_name == 'invert':
amount = params.get('amount', 0)
if amount > 0.01:
inverted = 255 - frame
frame = cv2.addWeighted(frame, 1 - amount, inverted, amount, 0)
elif effect_name == 'hue_shift':
degrees = params.get('degrees', 0)
if abs(degrees) > 1:
hsv = cv2.cvtColor(frame, cv2.COLOR_RGB2HSV)
hsv[:, :, 0] = (hsv[:, :, 0].astype(int) + int(degrees / 2)) % 180
frame = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
elif effect_name == 'blend':
if len(input_frames) >= 2:
opacity = params.get('opacity', 0.5)
frame = cv2.addWeighted(input_frames[0], 1 - opacity,
input_frames[1], opacity, 0)
elif effect_name == 'blend_multi':
weights = params.get('weights', [])
if len(input_frames) > 1 and weights:
h, w = input_frames[0].shape[:2]
result = np.zeros((h, w, 3), dtype=np.float32)
for f, wt in zip(input_frames, weights):
if f is not None and wt > 0.001:
if f.shape[:2] != (h, w):
f = cv2.resize(f, (w, h))
result += f.astype(np.float32) * wt
frame = np.clip(result, 0, 255).astype(np.uint8)
elif effect_name == 'ripple':
amp = params.get('amplitude', 0)
if amp > 1:
frame = self._apply_ripple(frame, amp,
params.get('center_x', 0.5),
params.get('center_y', 0.5),
params.get('frequency', 8),
params.get('decay', 2),
params.get('speed', 5))
return frame
def _eval_slice_on_frame(self, node: dict) -> Optional[np.ndarray]:
"""Evaluate a SLICE_ON node - returns composited frame."""
import cv2
config = node.get('config', {})
video_ids = config.get('videos', [])
result = self.slice_on_result.get(node['id'], {})
if not result:
# No result yet - return first video
if video_ids:
return self._get_frame(video_ids[0])
return None
# Get layers and compose info
layers = result.get('layers', [])
compose = result.get('compose', {})
weights = compose.get('weights', [])
if not layers or not weights:
if video_ids:
return self._get_frame(video_ids[0])
return None
# Get frames for each layer
frames = []
for i, layer in enumerate(layers):
video_idx = layer.get('video', i)
if video_idx < len(video_ids):
frame = self._get_frame(video_ids[video_idx])
# Apply layer effects (zoom)
effects = layer.get('effects', [])
for eff in effects:
eff_name = eff.get('effect')
if hasattr(eff_name, 'name'):
eff_name = eff_name.name
if eff_name == 'zoom':
zoom_amt = eff.get('amount', 1.0)
if frame is not None:
frame = self._apply_zoom(frame, zoom_amt)
frames.append(frame)
else:
frames.append(None)
# Composite with weights - use consistent output size
if self._output_size:
w, h = self._output_size
else:
# Fallback to first non-None frame size
for f in frames:
if f is not None:
h, w = f.shape[:2]
break
else:
return None
output = np.zeros((h, w, 3), dtype=np.float32)
for frame, weight in zip(frames, weights):
if frame is None or weight < 0.001:
continue
# Resize to output size
if frame.shape[1] != w or frame.shape[0] != h:
frame = cv2.resize(frame, (w, h))
output += frame.astype(np.float32) * weight
# Normalize weights
total_weight = sum(wt for wt in weights if wt > 0.001)
if total_weight > 0 and abs(total_weight - 1.0) > 0.01:
output /= total_weight
return np.clip(output, 0, 255).astype(np.uint8)
def _resolve_config(self, config: dict) -> dict:
"""Resolve bindings in effect config to actual values."""
resolved = {}
for key, value in config.items():
if key in ('effect', 'effect_path', 'effect_cid', 'effects_registry',
'analysis_refs', 'inputs', 'cid'):
continue
if isinstance(value, dict) and value.get('_binding'):
resolved[key] = self._resolve_binding(value)
elif isinstance(value, dict) and value.get('_expr'):
resolved[key] = self._resolve_expr(value)
else:
resolved[key] = value
return resolved
def _resolve_binding(self, binding: dict) -> Any:
"""Resolve a binding to its current value."""
source_id = binding.get('source')
feature = binding.get('feature', 'values')
range_map = binding.get('range')
# Get raw value from scan or analysis
if source_id in self.scan_emit:
value = self.scan_emit[source_id]
elif source_id in self.ctx.analysis:
data = self.ctx.analysis[source_id]
value = data.get(feature, data.get('values', [0]))[0] if isinstance(data, dict) else data
else:
# Fallback to energy
value = self.ctx.energy
# Extract feature from dict
if isinstance(value, dict) and feature in value:
value = value[feature]
# Apply range mapping
if range_map and isinstance(value, (int, float)):
lo, hi = range_map
value = lo + value * (hi - lo)
return value
def _resolve_expr(self, expr: dict) -> Any:
"""Resolve a compiled expression."""
env = {
'energy': self.ctx.energy,
'beat_count': self.ctx.beat_count,
't': self.ctx.t,
}
# Add scan values
for scan_id, value in self.scan_emit.items():
# Use short form if available
env[scan_id] = value
# Extract the actual expression from _expr wrapper
actual_expr = expr.get('_expr', expr)
return self.interp.eval(actual_expr, env)
def _apply_zoom(self, frame: np.ndarray, amount: float) -> np.ndarray:
"""Apply zoom to frame."""
import cv2
h, w = frame.shape[:2]
if amount > 1.01:
# Zoom in: crop center
new_w, new_h = int(w / amount), int(h / amount)
if new_w > 0 and new_h > 0:
x1, y1 = (w - new_w) // 2, (h - new_h) // 2
cropped = frame[y1:y1+new_h, x1:x1+new_w]
return cv2.resize(cropped, (w, h))
elif amount < 0.99:
# Zoom out: shrink and center
scaled_w, scaled_h = int(w * amount), int(h * amount)
if scaled_w > 0 and scaled_h > 0:
shrunk = cv2.resize(frame, (scaled_w, scaled_h))
canvas = np.zeros((h, w, 3), dtype=np.uint8)
x_off, y_off = (w - scaled_w) // 2, (h - scaled_h) // 2
canvas[y_off:y_off+scaled_h, x_off:x_off+scaled_w] = shrunk
return canvas
return frame
def _apply_ripple(self, frame: np.ndarray, amplitude: float,
cx: float, cy: float, frequency: float,
decay: float, speed: float) -> np.ndarray:
"""Apply ripple effect."""
import cv2
h, w = frame.shape[:2]
# Create coordinate grids
y_coords, x_coords = np.mgrid[0:h, 0:w].astype(np.float32)
# Normalize to center
center_x, center_y = w * cx, h * cy
dx = x_coords - center_x
dy = y_coords - center_y
dist = np.sqrt(dx**2 + dy**2)
# Ripple displacement
phase = self.ctx.t * speed
ripple = amplitude * np.sin(dist / frequency - phase) * np.exp(-dist * decay / max(w, h))
# Displace coordinates
angle = np.arctan2(dy, dx)
map_x = (x_coords + ripple * np.cos(angle)).astype(np.float32)
map_y = (y_coords + ripple * np.sin(angle)).astype(np.float32)
return cv2.remap(frame, map_x, map_y, cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT)
def _find_output_node(self) -> Optional[str]:
"""Find the final output node (MUX or last EFFECT)."""
# Look for MUX node
for node in self.recipe.nodes:
if node.get('type') == 'MUX':
return node['id']
# Otherwise find last EFFECT after SLICE_ON
last_effect = None
found_slice_on = False
for node in self.recipe.nodes:
if node.get('type') == 'SLICE_ON':
found_slice_on = True
elif node.get('type') == 'EFFECT' and found_slice_on:
last_effect = node['id']
return last_effect
def render_frame(self, t: float) -> Optional[np.ndarray]:
"""Render a single frame at time t."""
# Clear frame cache
self._frame_cache.clear()
# Update context
self.ctx.t = t
# Update audio analysis
if self.audio_analyzer:
self.audio_analyzer.set_time(t)
energy = self.audio_analyzer.get_energy()
is_beat = self.audio_analyzer.get_beat()
# Beat edge detection
was_beat = self.ctx.is_beat
self.ctx.energy = energy
self.ctx.is_beat = is_beat
if is_beat and not was_beat:
self._on_beat()
# Store in analysis dict
self.ctx.analysis['live_energy'] = {'values': [energy]}
self.ctx.analysis['live_beat'] = {'values': [1.0 if is_beat else 0.0]}
# Find output node and render
output_node = self._find_output_node()
if output_node:
frame = self._get_frame(output_node)
# Normalize to output size
if frame is not None and self._output_size:
w, h = self._output_size
if frame.shape[1] != w or frame.shape[0] != h:
import cv2
frame = cv2.resize(frame, (w, h))
return frame
return None
def run(self, output: str = "preview", duration: float = None):
"""
Run the pipeline.
Args:
output: "preview", filename, or Output object
duration: Duration in seconds (default: audio duration or 60s)
"""
# Determine duration
if duration is None:
if self.audio_analyzer:
duration = self.audio_analyzer.duration
else:
duration = 60.0
# Create output
if output == "preview":
# Get frame size from first source
first_source = next(iter(self.sources.values()), None)
if first_source:
w, h = first_source._size
else:
w, h = 720, 720
out = DisplayOutput(size=(w, h), fps=self.fps, audio_source=self.audio_source_path)
elif isinstance(output, str):
first_source = next(iter(self.sources.values()), None)
if first_source:
w, h = first_source._size
else:
w, h = 720, 720
out = FileOutput(output, size=(w, h), fps=self.fps, audio_source=self.audio_source_path)
else:
out = output
frame_time = 1.0 / self.fps
n_frames = int(duration * self.fps)
print(f"Streaming: {len(self.sources)} sources -> {output}", file=sys.stderr)
print(f"Duration: {duration:.1f}s, {n_frames} frames @ {self.fps}fps", file=sys.stderr)
start_time = time.time()
frame_count = 0
try:
for frame_num in range(n_frames):
t = frame_num * frame_time
frame = self.render_frame(t)
if frame is not None:
out.write(frame, t)
frame_count += 1
# Progress
if frame_num % 50 == 0:
elapsed = time.time() - start_time
fps = frame_count / elapsed if elapsed > 0 else 0
pct = 100 * frame_num / n_frames
print(f"\r{pct:5.1f}% | {fps:5.1f} fps | frame {frame_num}/{n_frames}",
end="", file=sys.stderr)
except KeyboardInterrupt:
print("\nInterrupted", file=sys.stderr)
finally:
out.close()
for src in self.sources.values():
src.close()
elapsed = time.time() - start_time
avg_fps = frame_count / elapsed if elapsed > 0 else 0
print(f"\nCompleted: {frame_count} frames in {elapsed:.1f}s ({avg_fps:.1f} fps avg)",
file=sys.stderr)
def run_pipeline(recipe_path: str, output: str = "preview",
duration: float = None, fps: float = None):
"""
Run a recipe through the streaming pipeline.
No adapter layer - directly executes the compiled recipe.
"""
from pathlib import Path
# Add artdag to path
import sys
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "artdag"))
from artdag.sexp.compiler import compile_string
recipe_path = Path(recipe_path)
recipe_text = recipe_path.read_text()
compiled = compile_string(recipe_text, {}, recipe_dir=recipe_path.parent)
pipeline = StreamingPipeline(
compiled,
recipe_dir=recipe_path.parent,
fps=fps or compiled.encoding.get('fps', 30),
)
pipeline.run(output=output, duration=duration)
def run_pipeline_piped(recipe_path: str, duration: float = None, fps: float = None):
"""
Run pipeline and pipe directly to mpv with audio.
"""
import subprocess
from pathlib import Path
import sys
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "artdag"))
from artdag.sexp.compiler import compile_string
recipe_path = Path(recipe_path)
recipe_text = recipe_path.read_text()
compiled = compile_string(recipe_text, {}, recipe_dir=recipe_path.parent)
pipeline = StreamingPipeline(
compiled,
recipe_dir=recipe_path.parent,
fps=fps or compiled.encoding.get('fps', 30),
)
# Get frame info
first_source = next(iter(pipeline.sources.values()), None)
if first_source:
w, h = first_source._size
else:
w, h = 720, 720
# Determine duration
if duration is None:
if pipeline.audio_analyzer:
duration = pipeline.audio_analyzer.duration
else:
duration = 60.0
actual_fps = fps or compiled.encoding.get('fps', 30)
n_frames = int(duration * actual_fps)
frame_time = 1.0 / actual_fps
print(f"Streaming {n_frames} frames @ {actual_fps}fps to mpv", file=sys.stderr)
# Start mpv
mpv_cmd = [
"mpv", "--no-cache",
"--demuxer=rawvideo",
f"--demuxer-rawvideo-w={w}",
f"--demuxer-rawvideo-h={h}",
"--demuxer-rawvideo-mp-format=rgb24",
f"--demuxer-rawvideo-fps={actual_fps}",
"--title=Streaming Pipeline",
"-"
]
mpv = subprocess.Popen(mpv_cmd, stdin=subprocess.PIPE, stderr=subprocess.DEVNULL)
# Start audio if available
audio_proc = None
if pipeline.audio_source_path:
audio_cmd = ["ffplay", "-nodisp", "-autoexit", "-loglevel", "quiet",
pipeline.audio_source_path]
audio_proc = subprocess.Popen(audio_cmd, stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL)
try:
import cv2
for frame_num in range(n_frames):
if mpv.poll() is not None:
break # mpv closed
t = frame_num * frame_time
frame = pipeline.render_frame(t)
if frame is not None:
# Ensure consistent frame size
if frame.shape[1] != w or frame.shape[0] != h:
frame = cv2.resize(frame, (w, h))
if not frame.flags['C_CONTIGUOUS']:
frame = np.ascontiguousarray(frame)
try:
mpv.stdin.write(frame.tobytes())
mpv.stdin.flush()
except BrokenPipeError:
break
except KeyboardInterrupt:
pass
finally:
if mpv.stdin:
mpv.stdin.close()
mpv.terminate()
if audio_proc:
audio_proc.terminate()
for src in pipeline.sources.values():
src.close()
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Run sexp recipe through streaming pipeline")
parser.add_argument("recipe", help="Path to .sexp recipe file")
parser.add_argument("-o", "--output", default="pipe",
help="Output: 'pipe' (mpv), 'preview', or filename (default: pipe)")
parser.add_argument("-d", "--duration", type=float, default=None,
help="Duration in seconds (default: audio duration)")
parser.add_argument("--fps", type=float, default=None,
help="Frame rate (default: from recipe)")
args = parser.parse_args()
if args.output == "pipe":
run_pipeline_piped(args.recipe, duration=args.duration, fps=args.fps)
else:
run_pipeline(args.recipe, output=args.output, duration=args.duration, fps=args.fps)

470
streaming/recipe_adapter.py Normal file
View File

@@ -0,0 +1,470 @@
"""
Adapter to run sexp recipes through the streaming compositor.
Bridges the gap between:
- Existing recipe format (sexp files with stages, effects, analysis)
- Streaming compositor (sources, effect chains, compositor config)
"""
import sys
from pathlib import Path
from typing import Dict, List, Any, Optional
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "artdag"))
from .compositor import StreamingCompositor
from .sources import VideoSource
from .audio import FileAudioAnalyzer
class RecipeAdapter:
"""
Adapts a compiled sexp recipe to run through the streaming compositor.
Example:
adapter = RecipeAdapter("effects/quick_test.sexp")
adapter.run(output="preview", duration=60)
"""
def __init__(
self,
recipe_path: str,
params: Dict[str, Any] = None,
backend: str = "numpy",
):
"""
Load and prepare a recipe for streaming.
Args:
recipe_path: Path to .sexp recipe file
params: Parameter overrides
backend: "numpy" or "glsl"
"""
self.recipe_path = Path(recipe_path)
self.recipe_dir = self.recipe_path.parent
self.params = params or {}
self.backend = backend
# Compile recipe
self._compile()
def _compile(self):
"""Compile the recipe and extract structure."""
from artdag.sexp.compiler import compile_string
recipe_text = self.recipe_path.read_text()
self.compiled = compile_string(recipe_text, self.params, recipe_dir=self.recipe_dir)
# Extract key info
self.sources = {} # name -> path
self.effects_registry = {} # effect_name -> path
self.analyzers = {} # name -> analyzer info
# Walk nodes to find sources and structure
# nodes is a list in CompiledRecipe
for node in self.compiled.nodes:
node_type = node.get("type", "")
if node_type == "SOURCE":
config = node.get("config", {})
path = config.get("path")
if path:
self.sources[node["id"]] = self.recipe_dir / path
elif node_type == "ANALYZE":
config = node.get("config", {})
self.analyzers[node["id"]] = {
"analyzer": config.get("analyzer"),
"path": config.get("analyzer_path"),
}
# Get effects registry from compiled recipe
# registry has 'effects' sub-dict
effects_dict = self.compiled.registry.get("effects", {})
for name, info in effects_dict.items():
if info.get("path"):
self.effects_registry[name] = Path(info["path"])
def run_analysis(self) -> Dict[str, Any]:
"""
Run analysis phase (energy, beats, etc.).
Returns:
Dict of analysis track name -> {times, values, duration}
"""
print(f"Running analysis...", file=sys.stderr)
# Use existing planner's analysis execution
from artdag.sexp.planner import create_plan
analysis_data = {}
def on_analysis(node_id: str, results: dict):
analysis_data[node_id] = results
print(f" {node_id[:16]}...: {len(results.get('times', []))} samples", file=sys.stderr)
# Create plan (runs analysis as side effect)
plan = create_plan(
self.compiled,
inputs={},
recipe_dir=self.recipe_dir,
on_analysis=on_analysis,
)
# Also store named analysis tracks
for name, data in plan.analysis.items():
analysis_data[name] = data
return analysis_data
def build_compositor(
self,
analysis_data: Dict[str, Any] = None,
fps: float = None,
) -> StreamingCompositor:
"""
Build a streaming compositor from the recipe.
This is a simplified version that handles common patterns.
Complex recipes may need manual configuration.
Args:
analysis_data: Pre-computed analysis data
Returns:
Configured StreamingCompositor
"""
# Extract video and audio sources in SLICE_ON input order
video_sources = []
audio_source = None
# Find audio source first
for node_id, path in self.sources.items():
suffix = path.suffix.lower()
if suffix in ('.mp3', '.wav', '.flac', '.ogg', '.m4a', '.aac'):
audio_source = str(path)
break
# Find SLICE_ON node to get correct video order
slice_on_inputs = None
for node in self.compiled.nodes:
if node.get('type') == 'SLICE_ON':
# Use 'videos' config key which has the correct order
config = node.get('config', {})
slice_on_inputs = config.get('videos', [])
break
if slice_on_inputs:
# Trace each SLICE_ON input back to its SOURCE
node_lookup = {n['id']: n for n in self.compiled.nodes}
def trace_to_source(node_id, visited=None):
"""Trace a node back to its SOURCE, return source path."""
if visited is None:
visited = set()
if node_id in visited:
return None
visited.add(node_id)
node = node_lookup.get(node_id)
if not node:
return None
if node.get('type') == 'SOURCE':
return self.sources.get(node_id)
# Recurse through inputs
for inp in node.get('inputs', []):
result = trace_to_source(inp, visited)
if result:
return result
return None
# Build video_sources in SLICE_ON input order
for inp_id in slice_on_inputs:
source_path = trace_to_source(inp_id)
if source_path:
suffix = source_path.suffix.lower()
if suffix in ('.mp4', '.webm', '.mov', '.avi', '.mkv'):
video_sources.append(str(source_path))
# Fallback to definition order if no SLICE_ON
if not video_sources:
for node_id, path in self.sources.items():
suffix = path.suffix.lower()
if suffix in ('.mp4', '.webm', '.mov', '.avi', '.mkv'):
video_sources.append(str(path))
if not video_sources:
raise ValueError("No video sources found in recipe")
# Build effect chains - use live audio bindings (matching video_sources count)
effects_per_source = self._build_streaming_effects(n_sources=len(video_sources))
# Build compositor config from recipe
compositor_config = self._extract_compositor_config(analysis_data)
return StreamingCompositor(
sources=video_sources,
effects_per_source=effects_per_source,
compositor_config=compositor_config,
analysis_data=analysis_data or {},
backend=self.backend,
recipe_dir=self.recipe_dir,
fps=fps or self.compiled.encoding.get("fps", 30),
audio_source=audio_source,
)
def _build_streaming_effects(self, n_sources: int = None) -> List[List[Dict]]:
"""
Build effect chains for streaming with live audio bindings.
Replicates the recipe's effect pipeline:
- Per source: rotate, zoom, invert, hue_shift, ascii_art
- All driven by live_energy and live_beat
"""
if n_sources is None:
n_sources = len([p for p in self.sources.values()
if p.suffix.lower() in ('.mp4', '.webm', '.mov', '.avi', '.mkv')])
effects_per_source = []
for i in range(n_sources):
# Alternate rotation direction per source
rot_dir = 1 if i % 2 == 0 else -1
effects = [
# Rotate - energy drives angle
{
"effect": "rotate",
"effect_path": str(self.effects_registry.get("rotate", "")),
"angle": {
"_binding": True,
"source": "live_energy",
"feature": "values",
"range": [0, 45 * rot_dir],
},
},
# Zoom - energy drives amount
{
"effect": "zoom",
"effect_path": str(self.effects_registry.get("zoom", "")),
"amount": {
"_binding": True,
"source": "live_energy",
"feature": "values",
"range": [1.0, 1.5] if i % 2 == 0 else [1.0, 0.7],
},
},
# Invert - beat triggers
{
"effect": "invert",
"effect_path": str(self.effects_registry.get("invert", "")),
"amount": {
"_binding": True,
"source": "live_beat",
"feature": "values",
"range": [0, 1],
},
},
# Hue shift - energy drives hue
{
"effect": "hue_shift",
"effect_path": str(self.effects_registry.get("hue_shift", "")),
"degrees": {
"_binding": True,
"source": "live_energy",
"feature": "values",
"range": [0, 180],
},
},
# ASCII art - energy drives char size, beat triggers mix
{
"effect": "ascii_art",
"effect_path": str(self.effects_registry.get("ascii_art", "")),
"char_size": {
"_binding": True,
"source": "live_energy",
"feature": "values",
"range": [4, 32],
},
"mix": {
"_binding": True,
"source": "live_beat",
"feature": "values",
"range": [0, 1],
},
},
]
effects_per_source.append(effects)
return effects_per_source
def _extract_effects(self) -> List[List[Dict]]:
"""Extract effect chains for each source (legacy, pre-computed analysis)."""
# Simplified: find EFFECT nodes and their configs
effects_per_source = []
for node_id, path in self.sources.items():
if path.suffix.lower() not in ('.mp4', '.webm', '.mov', '.avi', '.mkv'):
continue
# Find effects that depend on this source
# This is simplified - real implementation would trace the DAG
effects = []
for node in self.compiled.nodes:
if node.get("type") == "EFFECT":
config = node.get("config", {})
effect_name = config.get("effect")
if effect_name and effect_name in self.effects_registry:
effect_config = {
"effect": effect_name,
"effect_path": str(self.effects_registry[effect_name]),
}
# Copy only effect params (filter out internal fields)
internal_fields = (
"effect", "effect_path", "cid", "effect_cid",
"effects_registry", "analysis_refs", "inputs",
)
for k, v in config.items():
if k not in internal_fields:
effect_config[k] = v
effects.append(effect_config)
break # One effect per source for now
effects_per_source.append(effects)
return effects_per_source
def _extract_compositor_config(self, analysis_data: Dict) -> Dict:
"""Extract compositor configuration."""
# Look for blend_multi or similar composition nodes
for node in self.compiled.nodes:
if node.get("type") == "EFFECT":
config = node.get("config", {})
if config.get("effect") == "blend_multi":
return {
"mode": config.get("mode", "alpha"),
"weights": config.get("weights", []),
}
# Default: equal blend
n_sources = len([p for p in self.sources.values()
if p.suffix.lower() in ('.mp4', '.webm', '.mov', '.avi', '.mkv')])
return {
"mode": "alpha",
"weights": [1.0 / n_sources] * n_sources if n_sources > 0 else [1.0],
}
def run(
self,
output: str = "preview",
duration: float = None,
fps: float = None,
):
"""
Run the recipe through streaming compositor.
Everything streams: video frames read on-demand, audio analyzed in real-time.
No pre-computation.
Args:
output: "preview", filename, or Output object
duration: Duration in seconds (default: audio duration)
fps: Frame rate (default from recipe, or 30)
"""
# Build compositor with recipe executor for full pipeline
from .recipe_executor import StreamingRecipeExecutor
compositor = self.build_compositor(analysis_data={}, fps=fps)
# Use audio duration if not specified
if duration is None:
if compositor._audio_analyzer:
duration = compositor._audio_analyzer.duration
print(f"Using audio duration: {duration:.1f}s", file=sys.stderr)
else:
# Live mode - run until quit
print("Live mode - press 'q' to quit", file=sys.stderr)
# Create sexp executor that interprets the recipe
from .sexp_executor import SexpStreamingExecutor
executor = SexpStreamingExecutor(self.compiled, seed=42)
compositor.run(output=output, duration=duration, recipe_executor=executor)
def run_recipe(
recipe_path: str,
output: str = "preview",
duration: float = None,
params: Dict = None,
fps: float = None,
):
"""
Run a recipe through streaming compositor.
Everything streams in real-time: video frames, audio analysis.
No pre-computation - starts immediately.
Example:
run_recipe("effects/quick_test.sexp", output="preview", duration=30)
run_recipe("effects/quick_test.sexp", output="preview", fps=5) # Lower fps for slow systems
"""
adapter = RecipeAdapter(recipe_path, params=params)
adapter.run(output=output, duration=duration, fps=fps)
def run_recipe_piped(
recipe_path: str,
duration: float = None,
params: Dict = None,
fps: float = None,
):
"""
Run recipe and pipe directly to mpv.
"""
from .output import PipeOutput
adapter = RecipeAdapter(recipe_path, params=params)
compositor = adapter.build_compositor(analysis_data={}, fps=fps)
# Get frame size
if compositor.sources:
first_source = compositor.sources[0]
w, h = first_source._size
else:
w, h = 720, 720
actual_fps = fps or adapter.compiled.encoding.get('fps', 30)
# Create pipe output
pipe_out = PipeOutput(
size=(w, h),
fps=actual_fps,
audio_source=compositor._audio_source
)
# Create executor
from .sexp_executor import SexpStreamingExecutor
executor = SexpStreamingExecutor(adapter.compiled, seed=42)
# Run with pipe output
compositor.run(output=pipe_out, duration=duration, recipe_executor=executor)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Run sexp recipe with streaming compositor")
parser.add_argument("recipe", help="Path to .sexp recipe file")
parser.add_argument("-o", "--output", default="pipe",
help="Output: 'pipe' (mpv), 'preview', or filename (default: pipe)")
parser.add_argument("-d", "--duration", type=float, default=None,
help="Duration in seconds (default: audio duration)")
parser.add_argument("--fps", type=float, default=None,
help="Frame rate (default: from recipe)")
args = parser.parse_args()
if args.output == "pipe":
run_recipe_piped(args.recipe, duration=args.duration, fps=args.fps)
else:
run_recipe(args.recipe, output=args.output, duration=args.duration, fps=args.fps)

View File

@@ -0,0 +1,415 @@
"""
Streaming recipe executor.
Implements the full recipe logic for real-time streaming:
- Scans (state machines that evolve on beats)
- Process-pair template (two clips with sporadic effects, blended)
- Cycle-crossfade (dynamic composition cycling through video pairs)
"""
import random
import numpy as np
from typing import Dict, List, Any, Optional
from dataclasses import dataclass, field
@dataclass
class ScanState:
"""State for a scan (beat-driven state machine)."""
value: Any = 0
rng: random.Random = field(default_factory=random.Random)
class StreamingScans:
"""
Real-time scan executor.
Scans are state machines that evolve on each beat.
They drive effect parameters like invert triggers, hue shifts, etc.
"""
def __init__(self, seed: int = 42, n_sources: int = 4):
self.master_seed = seed
self.n_sources = n_sources
self.scans: Dict[str, ScanState] = {}
self.beat_count = 0
self.current_time = 0.0
self.last_beat_time = 0.0
self._init_scans()
def _init_scans(self):
"""Initialize all scans with their own RNG seeds."""
scan_names = []
# Per-pair scans (dynamic based on n_sources)
for i in range(self.n_sources):
scan_names.extend([
f"inv_a_{i}", f"inv_b_{i}", f"hue_a_{i}", f"hue_b_{i}",
f"ascii_a_{i}", f"ascii_b_{i}", f"pair_mix_{i}", f"pair_rot_{i}",
])
# Global scans
scan_names.extend(["whole_spin", "ripple_gate", "cycle"])
for i, name in enumerate(scan_names):
rng = random.Random(self.master_seed + i)
self.scans[name] = ScanState(value=self._init_value(name), rng=rng)
def _init_value(self, name: str) -> Any:
"""Get initial value for a scan."""
if name.startswith("inv_") or name.startswith("ascii_"):
return 0 # Counter for remaining beats
elif name.startswith("hue_"):
return {"rem": 0, "hue": 0}
elif name.startswith("pair_mix"):
return {"rem": 0, "opacity": 0.5}
elif name.startswith("pair_rot"):
pair_idx = int(name.split("_")[-1])
rot_dir = 1 if pair_idx % 2 == 0 else -1
return {"beat": 0, "clen": 25, "dir": rot_dir, "angle": 0}
elif name == "whole_spin":
return {
"phase": 0, # 0 = waiting, 1 = spinning
"beat": 0, # beats into current phase
"plen": 20, # beats in this phase
"dir": 1, # spin direction
"total_angle": 0.0, # cumulative angle after all spins
"spin_start_angle": 0.0, # angle when current spin started
"spin_start_time": 0.0, # time when current spin started
"spin_end_time": 0.0, # estimated time when spin ends
}
elif name == "ripple_gate":
return {"rem": 0, "cx": 0.5, "cy": 0.5}
elif name == "cycle":
return {"cycle": 0, "beat": 0, "clen": 60}
return 0
def on_beat(self):
"""Update all scans on a beat."""
self.beat_count += 1
# Estimate beat interval from last two beats
beat_interval = self.current_time - self.last_beat_time if self.last_beat_time > 0 else 0.5
self.last_beat_time = self.current_time
for name, state in self.scans.items():
state.value = self._step_scan(name, state.value, state.rng, beat_interval)
def _step_scan(self, name: str, value: Any, rng: random.Random, beat_interval: float = 0.5) -> Any:
"""Step a scan forward by one beat."""
# Invert scan: 10% chance, lasts 1-5 beats
if name.startswith("inv_"):
if value > 0:
return value - 1
elif rng.random() < 0.1:
return rng.randint(1, 5)
return 0
# Hue scan: 10% chance, random hue 30-330, lasts 1-5 beats
elif name.startswith("hue_"):
if value["rem"] > 0:
return {"rem": value["rem"] - 1, "hue": value["hue"]}
elif rng.random() < 0.1:
return {"rem": rng.randint(1, 5), "hue": rng.uniform(30, 330)}
return {"rem": 0, "hue": 0}
# ASCII scan: 5% chance, lasts 1-3 beats
elif name.startswith("ascii_"):
if value > 0:
return value - 1
elif rng.random() < 0.05:
return rng.randint(1, 3)
return 0
# Pair mix: changes every 1-11 beats
elif name.startswith("pair_mix"):
if value["rem"] > 0:
return {"rem": value["rem"] - 1, "opacity": value["opacity"]}
return {"rem": rng.randint(1, 11), "opacity": rng.choice([0, 0.5, 1.0])}
# Pair rotation: full rotation every 20-30 beats
elif name.startswith("pair_rot"):
beat = value["beat"]
clen = value["clen"]
dir_ = value["dir"]
angle = value["angle"]
if beat + 1 < clen:
new_angle = angle + dir_ * (360 / clen)
return {"beat": beat + 1, "clen": clen, "dir": dir_, "angle": new_angle}
else:
return {"beat": 0, "clen": rng.randint(20, 30), "dir": -dir_, "angle": angle}
# Whole spin: sporadic 720 degree spins (cumulative - stays rotated)
elif name == "whole_spin":
phase = value["phase"]
beat = value["beat"]
plen = value["plen"]
dir_ = value["dir"]
total_angle = value.get("total_angle", 0.0)
spin_start_angle = value.get("spin_start_angle", 0.0)
spin_start_time = value.get("spin_start_time", 0.0)
spin_end_time = value.get("spin_end_time", 0.0)
if phase == 1:
# Currently spinning
if beat + 1 < plen:
return {
"phase": 1, "beat": beat + 1, "plen": plen, "dir": dir_,
"total_angle": total_angle,
"spin_start_angle": spin_start_angle,
"spin_start_time": spin_start_time,
"spin_end_time": spin_end_time,
}
else:
# Spin complete - update total_angle with final spin
new_total = spin_start_angle + dir_ * 720.0
return {
"phase": 0, "beat": 0, "plen": rng.randint(20, 40), "dir": dir_,
"total_angle": new_total,
"spin_start_angle": new_total,
"spin_start_time": self.current_time,
"spin_end_time": self.current_time,
}
else:
# Waiting phase
if beat + 1 < plen:
return {
"phase": 0, "beat": beat + 1, "plen": plen, "dir": dir_,
"total_angle": total_angle,
"spin_start_angle": spin_start_angle,
"spin_start_time": spin_start_time,
"spin_end_time": spin_end_time,
}
else:
# Start new spin
new_dir = 1 if rng.random() < 0.5 else -1
new_plen = rng.randint(10, 25)
spin_duration = new_plen * beat_interval
return {
"phase": 1, "beat": 0, "plen": new_plen, "dir": new_dir,
"total_angle": total_angle,
"spin_start_angle": total_angle,
"spin_start_time": self.current_time,
"spin_end_time": self.current_time + spin_duration,
}
# Ripple gate: 5% chance, lasts 1-20 beats
elif name == "ripple_gate":
if value["rem"] > 0:
return {"rem": value["rem"] - 1, "cx": value["cx"], "cy": value["cy"]}
elif rng.random() < 0.05:
return {"rem": rng.randint(1, 20),
"cx": rng.uniform(0.1, 0.9),
"cy": rng.uniform(0.1, 0.9)}
return {"rem": 0, "cx": 0.5, "cy": 0.5}
# Cycle: track which video pair is active
elif name == "cycle":
beat = value["beat"]
clen = value["clen"]
cycle = value["cycle"]
if beat + 1 < clen:
return {"cycle": cycle, "beat": beat + 1, "clen": clen}
else:
# Move to next pair, vary cycle length
return {"cycle": (cycle + 1) % 4, "beat": 0,
"clen": 40 + (self.beat_count * 7) % 41}
return value
def get_emit(self, name: str) -> float:
"""Get emitted value for a scan."""
value = self.scans[name].value
if name.startswith("inv_") or name.startswith("ascii_"):
return 1.0 if value > 0 else 0.0
elif name.startswith("hue_"):
return value["hue"] if value["rem"] > 0 else 0.0
elif name.startswith("pair_mix"):
return value["opacity"]
elif name.startswith("pair_rot"):
return value["angle"]
elif name == "whole_spin":
# Smooth time-based interpolation during spin
phase = value.get("phase", 0)
if phase == 1:
# Currently spinning - interpolate based on time
spin_start_time = value.get("spin_start_time", 0.0)
spin_end_time = value.get("spin_end_time", spin_start_time + 1.0)
spin_start_angle = value.get("spin_start_angle", 0.0)
dir_ = value.get("dir", 1)
duration = spin_end_time - spin_start_time
if duration > 0:
progress = (self.current_time - spin_start_time) / duration
progress = max(0.0, min(1.0, progress)) # clamp to 0-1
else:
progress = 1.0
return spin_start_angle + progress * 720.0 * dir_
else:
# Not spinning - return cumulative angle
return value.get("total_angle", 0.0)
elif name == "ripple_gate":
return 1.0 if value["rem"] > 0 else 0.0
elif name == "cycle":
return value
return 0.0
class StreamingRecipeExecutor:
"""
Executes a recipe in streaming mode.
Implements:
- process-pair: two video clips with opposite effects, blended
- cycle-crossfade: dynamic cycling through video pairs
- Final effects: whole-spin rotation, ripple
"""
def __init__(self, n_sources: int = 4, seed: int = 42):
self.n_sources = n_sources
self.scans = StreamingScans(seed, n_sources=n_sources)
self.last_beat_detected = False
self.current_time = 0.0
def on_frame(self, energy: float, is_beat: bool, t: float = 0.0):
"""Called each frame with current audio analysis."""
self.current_time = t
self.scans.current_time = t
# Update scans on beat
if is_beat and not self.last_beat_detected:
self.scans.on_beat()
self.last_beat_detected = is_beat
def get_effect_params(self, source_idx: int, clip: str, energy: float) -> Dict:
"""
Get effect parameters for a source clip.
Args:
source_idx: Which video source (0-3)
clip: "a" or "b" (each source has two clips)
energy: Current audio energy (0-1)
"""
suffix = f"_{source_idx}"
# Rotation ranges alternate
if source_idx % 2 == 0:
rot_range = [0, 45] if clip == "a" else [0, -45]
zoom_range = [1, 1.5] if clip == "a" else [1, 0.5]
else:
rot_range = [0, -45] if clip == "a" else [0, 45]
zoom_range = [1, 0.5] if clip == "a" else [1, 1.5]
return {
"rotate_angle": rot_range[0] + energy * (rot_range[1] - rot_range[0]),
"zoom_amount": zoom_range[0] + energy * (zoom_range[1] - zoom_range[0]),
"invert_amount": self.scans.get_emit(f"inv_{clip}{suffix}"),
"hue_degrees": self.scans.get_emit(f"hue_{clip}{suffix}"),
"ascii_mix": 0, # Disabled - too slow without GPU
"ascii_char_size": 4 + energy * 28, # 4-32
}
def get_pair_params(self, source_idx: int) -> Dict:
"""Get blend and rotation params for a video pair."""
suffix = f"_{source_idx}"
return {
"blend_opacity": self.scans.get_emit(f"pair_mix{suffix}"),
"pair_rotation": self.scans.get_emit(f"pair_rot{suffix}"),
}
def get_cycle_weights(self) -> List[float]:
"""Get blend weights for cycle-crossfade composition."""
cycle_state = self.scans.get_emit("cycle")
active = cycle_state["cycle"]
beat = cycle_state["beat"]
clen = cycle_state["clen"]
n = self.n_sources
phase3 = beat * 3
weights = []
for p in range(n):
prev = (p + n - 1) % n
if active == p:
if phase3 < clen:
w = 0.9
elif phase3 < clen * 2:
w = 0.9 - ((phase3 - clen) / clen) * 0.85
else:
w = 0.05
elif active == prev:
if phase3 < clen:
w = 0.05
elif phase3 < clen * 2:
w = 0.05 + ((phase3 - clen) / clen) * 0.85
else:
w = 0.9
else:
w = 0.05
weights.append(w)
# Normalize
total = sum(weights)
if total > 0:
weights = [w / total for w in weights]
return weights
def get_cycle_zooms(self) -> List[float]:
"""Get zoom amounts for cycle-crossfade."""
cycle_state = self.scans.get_emit("cycle")
active = cycle_state["cycle"]
beat = cycle_state["beat"]
clen = cycle_state["clen"]
n = self.n_sources
phase3 = beat * 3
zooms = []
for p in range(n):
prev = (p + n - 1) % n
if active == p:
if phase3 < clen:
z = 1.0
elif phase3 < clen * 2:
z = 1.0 + ((phase3 - clen) / clen) * 1.0
else:
z = 0.1
elif active == prev:
if phase3 < clen:
z = 3.0 # Start big
elif phase3 < clen * 2:
z = 3.0 - ((phase3 - clen) / clen) * 2.0 # Shrink to 1.0
else:
z = 1.0
else:
z = 0.1
zooms.append(z)
return zooms
def get_final_effects(self, energy: float) -> Dict:
"""Get final composition effects (whole-spin, ripple)."""
ripple_gate = self.scans.get_emit("ripple_gate")
ripple_state = self.scans.scans["ripple_gate"].value
return {
"whole_spin_angle": self.scans.get_emit("whole_spin"),
"ripple_amplitude": ripple_gate * (5 + energy * 45), # 5-50
"ripple_cx": ripple_state["cx"],
"ripple_cy": ripple_state["cy"],
}

678
streaming/sexp_executor.py Normal file
View File

@@ -0,0 +1,678 @@
"""
Streaming S-expression executor.
Executes compiled sexp recipes in real-time by:
- Evaluating scan expressions on each beat
- Resolving bindings to get effect parameter values
- Applying effects frame-by-frame
- Evaluating SLICE_ON Lambda for cycle crossfade
"""
import random
import numpy as np
from typing import Dict, List, Any, Optional
from dataclasses import dataclass, field
from .sexp_interp import SexpInterpreter, eval_slice_on_lambda
@dataclass
class ScanState:
"""Runtime state for a scan."""
node_id: str
name: Optional[str]
value: Any
rng: random.Random
init_expr: dict
step_expr: dict
emit_expr: dict
class ExprEvaluator:
"""
Evaluates compiled expression ASTs.
Expressions are dicts with:
- _expr: True (marks as expression)
- op: operation name
- args: list of arguments
- name: for 'var' ops
- keys: for 'dict' ops
"""
def __init__(self, rng: random.Random = None):
self.rng = rng or random.Random()
def eval(self, expr: Any, env: Dict[str, Any]) -> Any:
"""Evaluate an expression in the given environment."""
# Literal values
if not isinstance(expr, dict):
return expr
# Check if it's an expression
if not expr.get('_expr'):
# It's a plain dict - return as-is
return expr
op = expr.get('op')
args = expr.get('args', [])
# Evaluate based on operation
if op == 'var':
name = expr.get('name')
if name in env:
return env[name]
raise KeyError(f"Unknown variable: {name}")
elif op == 'dict':
keys = expr.get('keys', [])
values = [self.eval(a, env) for a in args]
return dict(zip(keys, values))
elif op == 'get':
obj = self.eval(args[0], env)
key = args[1]
return obj.get(key) if isinstance(obj, dict) else obj[key]
elif op == 'if':
cond = self.eval(args[0], env)
if cond:
return self.eval(args[1], env)
elif len(args) > 2:
return self.eval(args[2], env)
return None
# Comparison ops
elif op == '<':
return self.eval(args[0], env) < self.eval(args[1], env)
elif op == '>':
return self.eval(args[0], env) > self.eval(args[1], env)
elif op == '<=':
return self.eval(args[0], env) <= self.eval(args[1], env)
elif op == '>=':
return self.eval(args[0], env) >= self.eval(args[1], env)
elif op == '=':
return self.eval(args[0], env) == self.eval(args[1], env)
elif op == '!=':
return self.eval(args[0], env) != self.eval(args[1], env)
# Arithmetic ops
elif op == '+':
return self.eval(args[0], env) + self.eval(args[1], env)
elif op == '-':
return self.eval(args[0], env) - self.eval(args[1], env)
elif op == '*':
return self.eval(args[0], env) * self.eval(args[1], env)
elif op == '/':
return self.eval(args[0], env) / self.eval(args[1], env)
elif op == 'mod':
return self.eval(args[0], env) % self.eval(args[1], env)
# Random ops
elif op == 'rand':
return self.rng.random()
elif op == 'rand-int':
lo = self.eval(args[0], env)
hi = self.eval(args[1], env)
return self.rng.randint(lo, hi)
elif op == 'rand-range':
lo = self.eval(args[0], env)
hi = self.eval(args[1], env)
return self.rng.uniform(lo, hi)
# Logic ops
elif op == 'and':
return all(self.eval(a, env) for a in args)
elif op == 'or':
return any(self.eval(a, env) for a in args)
elif op == 'not':
return not self.eval(args[0], env)
else:
raise ValueError(f"Unknown operation: {op}")
class SexpStreamingExecutor:
"""
Executes a compiled sexp recipe in streaming mode.
Reads scan definitions, effect chains, and bindings from the
compiled recipe and executes them frame-by-frame.
"""
def __init__(self, compiled_recipe, seed: int = 42):
self.recipe = compiled_recipe
self.master_seed = seed
# Build node lookup
self.nodes = {n['id']: n for n in compiled_recipe.nodes}
# State (must be initialized before _init_scans)
self.beat_count = 0
self.current_time = 0.0
self.last_beat_time = 0.0
self.last_beat_detected = False
self.energy = 0.0
# Initialize scans
self.scans: Dict[str, ScanState] = {}
self.scan_outputs: Dict[str, Any] = {} # Current emit values by node_id
self._init_scans()
# Initialize SLICE_ON interpreter
self.sexp_interp = SexpInterpreter(random.Random(seed))
self._slice_on_lambda = None
self._slice_on_acc = None
self._slice_on_result = None # Last evaluation result {layers, compose, acc}
self._init_slice_on()
def _init_slice_on(self):
"""Initialize SLICE_ON Lambda for cycle crossfade."""
for node in self.recipe.nodes:
if node.get('type') == 'SLICE_ON':
config = node.get('config', {})
self._slice_on_lambda = config.get('fn')
init = config.get('init', {})
self._slice_on_acc = {
'cycle': init.get('cycle', 0),
'beat': init.get('beat', 0),
'clen': init.get('clen', 60),
}
# Evaluate initial state
self._eval_slice_on()
break
def _eval_slice_on(self):
"""Evaluate the SLICE_ON Lambda with current state."""
if not self._slice_on_lambda:
return
n = len(self._get_video_sources())
videos = list(range(n)) # Placeholder video indices
try:
result = eval_slice_on_lambda(
self._slice_on_lambda,
self._slice_on_acc,
self.beat_count,
0.0, # start time (not used for weights)
1.0, # end time (not used for weights)
videos,
self.sexp_interp,
)
self._slice_on_result = result
# Update accumulator for next beat
if 'acc' in result:
self._slice_on_acc = result['acc']
except Exception as e:
import sys
print(f"SLICE_ON eval error: {e}", file=sys.stderr)
def _init_scans(self):
"""Initialize all scan nodes from the recipe."""
seed_offset = 0
for node in self.recipe.nodes:
if node.get('type') == 'SCAN':
node_id = node['id']
config = node.get('config', {})
# Create RNG with unique seed
scan_seed = config.get('seed', self.master_seed + seed_offset)
rng = random.Random(scan_seed)
seed_offset += 1
# Evaluate initial value
init_expr = config.get('init', 0)
evaluator = ExprEvaluator(rng)
init_value = evaluator.eval(init_expr, {})
self.scans[node_id] = ScanState(
node_id=node_id,
name=node.get('name'),
value=init_value,
rng=rng,
init_expr=init_expr,
step_expr=config.get('step_expr', {}),
emit_expr=config.get('emit_expr', {}),
)
# Compute initial emit
self._update_emit(node_id)
def _update_emit(self, node_id: str):
"""Update the emit value for a scan."""
scan = self.scans[node_id]
evaluator = ExprEvaluator(scan.rng)
# Build environment from current state
env = self._build_scan_env(scan)
# Evaluate emit expression
emit_value = evaluator.eval(scan.emit_expr, env)
self.scan_outputs[node_id] = emit_value
def _build_scan_env(self, scan: ScanState) -> Dict[str, Any]:
"""Build environment for scan expression evaluation."""
env = {}
# Add state variables
if isinstance(scan.value, dict):
env.update(scan.value)
else:
env['acc'] = scan.value
# Add beat count
env['beat_count'] = self.beat_count
env['time'] = self.current_time
return env
def on_beat(self):
"""Update all scans on a beat."""
self.beat_count += 1
# Estimate beat interval
beat_interval = self.current_time - self.last_beat_time if self.last_beat_time > 0 else 0.5
self.last_beat_time = self.current_time
# Step each scan
for node_id, scan in self.scans.items():
evaluator = ExprEvaluator(scan.rng)
env = self._build_scan_env(scan)
# Evaluate step expression
new_value = evaluator.eval(scan.step_expr, env)
scan.value = new_value
# Update emit
self._update_emit(node_id)
# Step the cycle state
self._step_cycle()
def on_frame(self, energy: float, is_beat: bool, t: float = 0.0):
"""Called each frame with audio analysis."""
self.current_time = t
self.energy = energy
# Update scans on beat (edge detection)
if is_beat and not self.last_beat_detected:
self.on_beat()
self.last_beat_detected = is_beat
def resolve_binding(self, binding: dict) -> Any:
"""Resolve a binding to get the current value."""
if not isinstance(binding, dict) or not binding.get('_binding'):
return binding
source_id = binding.get('source')
feature = binding.get('feature', 'values')
range_map = binding.get('range')
# Get the raw value
if source_id in self.scan_outputs:
value = self.scan_outputs[source_id]
else:
# Might be an analyzer reference - use energy as fallback
value = self.energy
# Extract feature if value is a dict
if isinstance(value, dict) and feature in value:
value = value[feature]
# Apply range mapping
if range_map and isinstance(value, (int, float)):
lo, hi = range_map
value = lo + value * (hi - lo)
return value
def get_effect_params(self, effect_node: dict) -> Dict[str, Any]:
"""Get resolved parameters for an effect node."""
config = effect_node.get('config', {})
params = {}
for key, value in config.items():
# Skip internal fields
if key in ('effect', 'effect_path', 'effect_cid', 'effects_registry', 'analysis_refs'):
continue
# Resolve bindings
params[key] = self.resolve_binding(value)
return params
def get_scan_value(self, name: str) -> Any:
"""Get scan output by name."""
for node_id, scan in self.scans.items():
if scan.name == name:
return self.scan_outputs.get(node_id)
return None
def get_all_scan_values(self) -> Dict[str, Any]:
"""Get all named scan outputs."""
result = {}
for node_id, scan in self.scans.items():
if scan.name:
result[scan.name] = self.scan_outputs.get(node_id)
return result
# === Compositor interface methods ===
def _get_video_sources(self) -> List[str]:
"""Get list of video source node IDs."""
sources = []
for node in self.recipe.nodes:
if node.get('type') == 'SOURCE':
sources.append(node['id'])
# Filter to video only (exclude audio - last one is usually audio)
# Look at file extensions in the paths
return sources[:-1] if len(sources) > 1 else sources
def _trace_effect_chain(self, start_id: str, stop_at_blend: bool = True) -> List[dict]:
"""Trace effect chain from a node, returning effects in order."""
chain = []
current_id = start_id
for _ in range(20): # Max depth
# Find node that uses current as input
next_node = None
for node in self.recipe.nodes:
if current_id in node.get('inputs', []):
if node.get('type') == 'EFFECT':
effect_type = node.get('config', {}).get('effect')
chain.append(node)
if stop_at_blend and effect_type == 'blend':
return chain
next_node = node
break
elif node.get('type') == 'SEGMENT':
next_node = node
break
if next_node is None:
break
current_id = next_node['id']
return chain
def _find_clip_chains(self, source_idx: int) -> tuple:
"""Find effect chains for clip A and B from a source."""
sources = self._get_video_sources()
if source_idx >= len(sources):
return [], []
source_id = sources[source_idx]
# Find SEGMENT node
segment_id = None
for node in self.recipe.nodes:
if node.get('type') == 'SEGMENT' and source_id in node.get('inputs', []):
segment_id = node['id']
break
if not segment_id:
return [], []
# Find the two effect chains from segment (clip A and clip B)
chains = []
for node in self.recipe.nodes:
if segment_id in node.get('inputs', []) and node.get('type') == 'EFFECT':
chain = self._trace_effect_chain(segment_id)
# Get chain starting from this specific branch
branch_chain = [node]
current = node['id']
for _ in range(10):
found = False
for n in self.recipe.nodes:
if current in n.get('inputs', []) and n.get('type') == 'EFFECT':
branch_chain.append(n)
if n.get('config', {}).get('effect') == 'blend':
break
current = n['id']
found = True
break
if not found:
break
chains.append(branch_chain)
# Return first two chains as A and B
chain_a = chains[0] if len(chains) > 0 else []
chain_b = chains[1] if len(chains) > 1 else []
return chain_a, chain_b
def get_effect_params(self, source_idx: int, clip: str, energy: float) -> Dict:
"""Get effect parameters for a source clip (compositor interface)."""
# Get the correct chain for this clip
chain_a, chain_b = self._find_clip_chains(source_idx)
chain = chain_a if clip == 'a' else chain_b
# Default params
params = {
"rotate_angle": 0,
"zoom_amount": 1.0,
"invert_amount": 0,
"hue_degrees": 0,
"ascii_mix": 0,
"ascii_char_size": 8,
}
# Resolve from effects in chain
for eff in chain:
config = eff.get('config', {})
effect_type = config.get('effect')
if effect_type == 'rotate':
angle_binding = config.get('angle')
if angle_binding:
if isinstance(angle_binding, dict) and angle_binding.get('_binding'):
# Bound to analyzer - use energy with range
range_map = angle_binding.get('range')
if range_map:
lo, hi = range_map
params["rotate_angle"] = lo + energy * (hi - lo)
else:
params["rotate_angle"] = self.resolve_binding(angle_binding)
else:
params["rotate_angle"] = angle_binding if isinstance(angle_binding, (int, float)) else 0
elif effect_type == 'zoom':
amount_binding = config.get('amount')
if amount_binding:
if isinstance(amount_binding, dict) and amount_binding.get('_binding'):
range_map = amount_binding.get('range')
if range_map:
lo, hi = range_map
params["zoom_amount"] = lo + energy * (hi - lo)
else:
params["zoom_amount"] = self.resolve_binding(amount_binding)
else:
params["zoom_amount"] = amount_binding if isinstance(amount_binding, (int, float)) else 1.0
elif effect_type == 'invert':
amount_binding = config.get('amount')
if amount_binding:
val = self.resolve_binding(amount_binding)
params["invert_amount"] = val if isinstance(val, (int, float)) else 0
elif effect_type == 'hue_shift':
deg_binding = config.get('degrees')
if deg_binding:
val = self.resolve_binding(deg_binding)
params["hue_degrees"] = val if isinstance(val, (int, float)) else 0
elif effect_type == 'ascii_art':
mix_binding = config.get('mix')
if mix_binding:
val = self.resolve_binding(mix_binding)
params["ascii_mix"] = val if isinstance(val, (int, float)) else 0
size_binding = config.get('char_size')
if size_binding:
if isinstance(size_binding, dict) and size_binding.get('_binding'):
range_map = size_binding.get('range')
if range_map:
lo, hi = range_map
params["ascii_char_size"] = lo + energy * (hi - lo)
return params
def get_pair_params(self, source_idx: int) -> Dict:
"""Get blend and rotation params for a video pair (compositor interface)."""
params = {
"blend_opacity": 0.5,
"pair_rotation": 0,
}
# Find the blend node for this source
chain_a, _ = self._find_clip_chains(source_idx)
# The last effect in chain_a should be the blend
blend_node = None
for eff in reversed(chain_a):
if eff.get('config', {}).get('effect') == 'blend':
blend_node = eff
break
if blend_node:
config = blend_node.get('config', {})
opacity_binding = config.get('opacity')
if opacity_binding:
val = self.resolve_binding(opacity_binding)
if isinstance(val, (int, float)):
params["blend_opacity"] = val
# Find rotate after blend (pair rotation)
blend_id = blend_node['id']
for node in self.recipe.nodes:
if blend_id in node.get('inputs', []) and node.get('type') == 'EFFECT':
if node.get('config', {}).get('effect') == 'rotate':
angle_binding = node.get('config', {}).get('angle')
if angle_binding:
val = self.resolve_binding(angle_binding)
if isinstance(val, (int, float)):
params["pair_rotation"] = val
break
return params
def _get_cycle_state(self) -> dict:
"""Get current cycle state from SLICE_ON or internal tracking."""
if not hasattr(self, '_cycle_state'):
# Initialize from SLICE_ON node
for node in self.recipe.nodes:
if node.get('type') == 'SLICE_ON':
init = node.get('config', {}).get('init', {})
self._cycle_state = {
'cycle': init.get('cycle', 0),
'beat': init.get('beat', 0),
'clen': init.get('clen', 60),
}
break
else:
self._cycle_state = {'cycle': 0, 'beat': 0, 'clen': 60}
return self._cycle_state
def _step_cycle(self):
"""Step the cycle state forward on beat by evaluating SLICE_ON Lambda."""
# Use interpreter to evaluate the Lambda
self._eval_slice_on()
def get_cycle_weights(self) -> List[float]:
"""Get blend weights for cycle-crossfade from SLICE_ON result."""
n = len(self._get_video_sources())
if n == 0:
return [1.0]
# Get weights from interpreted result
if self._slice_on_result:
compose = self._slice_on_result.get('compose', {})
weights = compose.get('weights', [])
if weights and len(weights) == n:
# Normalize
total = sum(weights)
if total > 0:
return [w / total for w in weights]
# Fallback: equal weights
return [1.0 / n] * n
def get_cycle_zooms(self) -> List[float]:
"""Get zoom amounts for cycle-crossfade from SLICE_ON result."""
n = len(self._get_video_sources())
if n == 0:
return [1.0]
# Get zooms from interpreted result (layers -> effects -> zoom amount)
if self._slice_on_result:
layers = self._slice_on_result.get('layers', [])
if layers and len(layers) == n:
zooms = []
for layer in layers:
effects = layer.get('effects', [])
zoom_amt = 1.0
for eff in effects:
if eff.get('effect') == 'zoom' or (hasattr(eff.get('effect'), 'name') and eff.get('effect').name == 'zoom'):
zoom_amt = eff.get('amount', 1.0)
break
zooms.append(zoom_amt)
return zooms
# Fallback
return [1.0] * n
def _get_final_rotate_scan_id(self) -> str:
"""Find the scan ID that drives the final rotation (after SLICE_ON)."""
if hasattr(self, '_final_rotate_scan_id'):
return self._final_rotate_scan_id
# Find SLICE_ON node index
slice_on_idx = None
for i, node in enumerate(self.recipe.nodes):
if node.get('type') == 'SLICE_ON':
slice_on_idx = i
break
# Find rotate effect after SLICE_ON
if slice_on_idx is not None:
for node in self.recipe.nodes[slice_on_idx + 1:]:
if node.get('type') == 'EFFECT':
config = node.get('config', {})
if config.get('effect') == 'rotate':
angle_binding = config.get('angle', {})
if isinstance(angle_binding, dict) and angle_binding.get('_binding'):
self._final_rotate_scan_id = angle_binding.get('source')
return self._final_rotate_scan_id
self._final_rotate_scan_id = None
return None
def get_final_effects(self, energy: float) -> Dict:
"""Get final composition effects (compositor interface)."""
# Get named scans
scan_values = self.get_all_scan_values()
# Whole spin - get from the specific scan bound to final rotate effect
whole_spin = 0
final_rotate_scan_id = self._get_final_rotate_scan_id()
if final_rotate_scan_id and final_rotate_scan_id in self.scan_outputs:
val = self.scan_outputs[final_rotate_scan_id]
if isinstance(val, dict) and 'angle' in val:
whole_spin = val['angle']
elif isinstance(val, (int, float)):
whole_spin = val
# Ripple
ripple_gate = scan_values.get('ripple-gate', 0)
ripple_cx = scan_values.get('ripple-cx', 0.5)
ripple_cy = scan_values.get('ripple-cy', 0.5)
if isinstance(ripple_gate, dict):
ripple_gate = ripple_gate.get('gate', 0) if 'gate' in ripple_gate else 1
return {
"whole_spin_angle": whole_spin,
"ripple_amplitude": ripple_gate * (5 + energy * 45),
"ripple_cx": ripple_cx if isinstance(ripple_cx, (int, float)) else 0.5,
"ripple_cy": ripple_cy if isinstance(ripple_cy, (int, float)) else 0.5,
}

376
streaming/sexp_interp.py Normal file
View File

@@ -0,0 +1,376 @@
"""
S-expression interpreter for streaming execution.
Evaluates sexp expressions including:
- let bindings
- lambda definitions and calls
- Arithmetic, comparison, logic operators
- dict/list operations
- Random number generation
"""
import random
from typing import Any, Dict, List, Callable
from dataclasses import dataclass
@dataclass
class Lambda:
"""Runtime lambda value."""
params: List[str]
body: Any
closure: Dict[str, Any]
class Symbol:
"""Symbol reference."""
def __init__(self, name: str):
self.name = name
def __repr__(self):
return f"Symbol({self.name})"
class SexpInterpreter:
"""
Interprets S-expressions in real-time.
Handles the full sexp language used in recipes.
"""
def __init__(self, rng: random.Random = None):
self.rng = rng or random.Random()
self.globals: Dict[str, Any] = {}
def eval(self, expr: Any, env: Dict[str, Any] = None) -> Any:
"""Evaluate an expression in the given environment."""
if env is None:
env = {}
# Literals
if isinstance(expr, (int, float, str, bool)) or expr is None:
return expr
# Symbol lookup
if isinstance(expr, Symbol) or (hasattr(expr, 'name') and hasattr(expr, '__class__') and expr.__class__.__name__ == 'Symbol'):
name = expr.name if hasattr(expr, 'name') else str(expr)
if name in env:
return env[name]
if name in self.globals:
return self.globals[name]
raise NameError(f"Undefined symbol: {name}")
# Compiled expression dict (from compiler)
if isinstance(expr, dict):
if expr.get('_expr'):
return self._eval_compiled_expr(expr, env)
# Plain dict - evaluate values that might be expressions
result = {}
for k, v in expr.items():
# Some keys should keep Symbol values as strings (effect names, modes)
if k in ('effect', 'mode') and hasattr(v, 'name'):
result[k] = v.name
else:
result[k] = self.eval(v, env)
return result
# List expression (sexp)
if isinstance(expr, (list, tuple)) and len(expr) > 0:
return self._eval_list(expr, env)
# Empty list
if isinstance(expr, (list, tuple)):
return []
return expr
def _eval_compiled_expr(self, expr: dict, env: Dict[str, Any]) -> Any:
"""Evaluate a compiled expression dict."""
op = expr.get('op')
args = expr.get('args', [])
if op == 'var':
name = expr.get('name')
if name in env:
return env[name]
if name in self.globals:
return self.globals[name]
raise NameError(f"Undefined: {name}")
elif op == 'dict':
keys = expr.get('keys', [])
values = [self.eval(a, env) for a in args]
return dict(zip(keys, values))
elif op == 'get':
obj = self.eval(args[0], env)
key = args[1]
return obj.get(key) if isinstance(obj, dict) else obj[key]
elif op == 'if':
cond = self.eval(args[0], env)
if cond:
return self.eval(args[1], env)
elif len(args) > 2:
return self.eval(args[2], env)
return None
# Comparison
elif op == '<':
return self.eval(args[0], env) < self.eval(args[1], env)
elif op == '>':
return self.eval(args[0], env) > self.eval(args[1], env)
elif op == '<=':
return self.eval(args[0], env) <= self.eval(args[1], env)
elif op == '>=':
return self.eval(args[0], env) >= self.eval(args[1], env)
elif op == '=':
return self.eval(args[0], env) == self.eval(args[1], env)
elif op == '!=':
return self.eval(args[0], env) != self.eval(args[1], env)
# Arithmetic
elif op == '+':
return self.eval(args[0], env) + self.eval(args[1], env)
elif op == '-':
return self.eval(args[0], env) - self.eval(args[1], env)
elif op == '*':
return self.eval(args[0], env) * self.eval(args[1], env)
elif op == '/':
return self.eval(args[0], env) / self.eval(args[1], env)
elif op == 'mod':
return self.eval(args[0], env) % self.eval(args[1], env)
# Random
elif op == 'rand':
return self.rng.random()
elif op == 'rand-int':
return self.rng.randint(self.eval(args[0], env), self.eval(args[1], env))
elif op == 'rand-range':
return self.rng.uniform(self.eval(args[0], env), self.eval(args[1], env))
# Logic
elif op == 'and':
return all(self.eval(a, env) for a in args)
elif op == 'or':
return any(self.eval(a, env) for a in args)
elif op == 'not':
return not self.eval(args[0], env)
else:
raise ValueError(f"Unknown op: {op}")
def _eval_list(self, expr: list, env: Dict[str, Any]) -> Any:
"""Evaluate a list expression (sexp form)."""
if len(expr) == 0:
return []
head = expr[0]
# Get head name
if isinstance(head, Symbol) or (hasattr(head, 'name') and hasattr(head, '__class__')):
head_name = head.name if hasattr(head, 'name') else str(head)
elif isinstance(head, str):
head_name = head
else:
# Not a symbol - check if it's a data list or function call
if isinstance(head, dict):
# List of dicts - evaluate each element as data
return [self.eval(item, env) for item in expr]
# Otherwise evaluate as function call
fn = self.eval(head, env)
args = [self.eval(a, env) for a in expr[1:]]
return self._call(fn, args, env)
# Special forms
if head_name == 'let':
return self._eval_let(expr, env)
elif head_name in ('lambda', 'fn'):
return self._eval_lambda(expr, env)
elif head_name == 'if':
return self._eval_if(expr, env)
elif head_name == 'dict':
return self._eval_dict(expr, env)
elif head_name == 'get':
obj = self.eval(expr[1], env)
key = self.eval(expr[2], env) if len(expr) > 2 else expr[2]
if isinstance(key, str):
return obj.get(key) if isinstance(obj, dict) else getattr(obj, key, None)
return obj[key]
elif head_name == 'len':
return len(self.eval(expr[1], env))
elif head_name == 'range':
start = self.eval(expr[1], env)
end = self.eval(expr[2], env) if len(expr) > 2 else start
if len(expr) == 2:
return list(range(end))
return list(range(start, end))
elif head_name == 'map':
fn = self.eval(expr[1], env)
lst = self.eval(expr[2], env)
return [self._call(fn, [x], env) for x in lst]
elif head_name == 'mod':
return self.eval(expr[1], env) % self.eval(expr[2], env)
# Arithmetic
elif head_name == '+':
return self.eval(expr[1], env) + self.eval(expr[2], env)
elif head_name == '-':
if len(expr) == 2:
return -self.eval(expr[1], env)
return self.eval(expr[1], env) - self.eval(expr[2], env)
elif head_name == '*':
return self.eval(expr[1], env) * self.eval(expr[2], env)
elif head_name == '/':
return self.eval(expr[1], env) / self.eval(expr[2], env)
# Comparison
elif head_name == '<':
return self.eval(expr[1], env) < self.eval(expr[2], env)
elif head_name == '>':
return self.eval(expr[1], env) > self.eval(expr[2], env)
elif head_name == '<=':
return self.eval(expr[1], env) <= self.eval(expr[2], env)
elif head_name == '>=':
return self.eval(expr[1], env) >= self.eval(expr[2], env)
elif head_name == '=':
return self.eval(expr[1], env) == self.eval(expr[2], env)
# Logic
elif head_name == 'and':
return all(self.eval(a, env) for a in expr[1:])
elif head_name == 'or':
return any(self.eval(a, env) for a in expr[1:])
elif head_name == 'not':
return not self.eval(expr[1], env)
# Function call
else:
fn = env.get(head_name) or self.globals.get(head_name)
if fn is None:
raise NameError(f"Undefined function: {head_name}")
args = [self.eval(a, env) for a in expr[1:]]
return self._call(fn, args, env)
def _eval_let(self, expr: list, env: Dict[str, Any]) -> Any:
"""Evaluate (let [bindings...] body)."""
bindings = expr[1]
body = expr[2]
# Create new environment with bindings
new_env = dict(env)
# Process bindings in pairs
i = 0
while i < len(bindings):
name = bindings[i]
if isinstance(name, Symbol) or hasattr(name, 'name'):
name = name.name if hasattr(name, 'name') else str(name)
value = self.eval(bindings[i + 1], new_env)
new_env[name] = value
i += 2
return self.eval(body, new_env)
def _eval_lambda(self, expr: list, env: Dict[str, Any]) -> Lambda:
"""Evaluate (lambda [params] body)."""
params_expr = expr[1]
body = expr[2]
# Extract parameter names
params = []
for p in params_expr:
if isinstance(p, Symbol) or hasattr(p, 'name'):
params.append(p.name if hasattr(p, 'name') else str(p))
else:
params.append(str(p))
return Lambda(params=params, body=body, closure=dict(env))
def _eval_if(self, expr: list, env: Dict[str, Any]) -> Any:
"""Evaluate (if cond then else)."""
cond = self.eval(expr[1], env)
if cond:
return self.eval(expr[2], env)
elif len(expr) > 3:
return self.eval(expr[3], env)
return None
def _eval_dict(self, expr: list, env: Dict[str, Any]) -> dict:
"""Evaluate (dict :key val ...)."""
result = {}
i = 1
while i < len(expr):
key = expr[i]
# Handle keyword syntax (:key) and Keyword objects
if hasattr(key, 'name'):
key = key.name
elif hasattr(key, '__class__') and key.__class__.__name__ == 'Keyword':
key = str(key).lstrip(':')
elif isinstance(key, str) and key.startswith(':'):
key = key[1:]
value = self.eval(expr[i + 1], env)
result[key] = value
i += 2
return result
def _call(self, fn: Any, args: List[Any], env: Dict[str, Any]) -> Any:
"""Call a function with arguments."""
if isinstance(fn, Lambda):
# Our own Lambda type
call_env = dict(fn.closure)
for param, arg in zip(fn.params, args):
call_env[param] = arg
return self.eval(fn.body, call_env)
elif hasattr(fn, 'params') and hasattr(fn, 'body'):
# Lambda from parser (artdag.sexp.parser.Lambda)
call_env = dict(env)
if hasattr(fn, 'closure') and fn.closure:
call_env.update(fn.closure)
# Get param names
params = []
for p in fn.params:
if hasattr(p, 'name'):
params.append(p.name)
else:
params.append(str(p))
for param, arg in zip(params, args):
call_env[param] = arg
return self.eval(fn.body, call_env)
elif callable(fn):
return fn(*args)
else:
raise TypeError(f"Not callable: {type(fn).__name__}")
def eval_slice_on_lambda(lambda_obj, acc: dict, i: int, start: float, end: float,
videos: list, interp: SexpInterpreter = None) -> dict:
"""
Evaluate a SLICE_ON lambda function.
Args:
lambda_obj: The Lambda object from the compiled recipe
acc: Current accumulator state
i: Beat index
start: Slice start time
end: Slice end time
videos: List of video inputs
interp: Interpreter to use
Returns:
Dict with 'layers', 'compose', 'acc' keys
"""
if interp is None:
interp = SexpInterpreter()
# Set up global 'videos' for (len videos) to work
interp.globals['videos'] = videos
# Build initial environment with lambda parameters
env = dict(lambda_obj.closure) if hasattr(lambda_obj, 'closure') and lambda_obj.closure else {}
env['videos'] = videos
# Call the lambda
result = interp._call(lambda_obj, [acc, i, start, end], env)
return result

706
streaming/sexp_to_cuda.py Normal file
View File

@@ -0,0 +1,706 @@
"""
Sexp to CUDA Kernel Compiler.
Compiles sexp frame pipelines to fused CUDA kernels for maximum performance.
Instead of interpreting sexp and launching 10+ kernels per frame,
generates a single kernel that does everything in one pass.
"""
import cupy as cp
import numpy as np
from typing import Dict, List, Any, Optional, Tuple
import hashlib
import sys
import logging
logger = logging.getLogger(__name__)
# Kernel cache
_COMPILED_KERNELS: Dict[str, Any] = {}
def compile_frame_pipeline(effects: List[dict], width: int, height: int) -> callable:
"""
Compile a list of effects to a fused CUDA kernel.
Args:
effects: List of effect dicts like:
[{'op': 'rotate', 'angle': 45.0},
{'op': 'blend', 'alpha': 0.5, 'src2': <gpu_array>},
{'op': 'hue_shift', 'degrees': 90.0},
{'op': 'ripple', 'amplitude': 10.0, 'frequency': 8.0, ...}]
width, height: Frame dimensions
Returns:
Callable that takes input frame and returns output frame
"""
# Generate cache key
ops_key = str([(e['op'], {k:v for k,v in e.items() if k != 'src2'}) for e in effects])
cache_key = f"{width}x{height}_{hashlib.md5(ops_key.encode()).hexdigest()}"
if cache_key in _COMPILED_KERNELS:
return _COMPILED_KERNELS[cache_key]
# Generate fused kernel code
kernel_code = _generate_fused_kernel(effects, width, height)
# Compile kernel
kernel = cp.RawKernel(kernel_code, 'fused_pipeline')
# Create wrapper function
def run_pipeline(frame: cp.ndarray, **dynamic_params) -> cp.ndarray:
"""Run the compiled pipeline on a frame."""
if frame.dtype != cp.uint8:
frame = cp.clip(frame, 0, 255).astype(cp.uint8)
if not frame.flags['C_CONTIGUOUS']:
frame = cp.ascontiguousarray(frame)
output = cp.zeros_like(frame)
block = (16, 16)
grid = ((width + 15) // 16, (height + 15) // 16)
# Build parameter array
params = _build_params(effects, dynamic_params)
kernel(grid, block, (frame, output, width, height, params))
return output
_COMPILED_KERNELS[cache_key] = run_pipeline
return run_pipeline
def _generate_fused_kernel(effects: List[dict], width: int, height: int) -> str:
"""Generate CUDA kernel code for fused effects pipeline."""
# Validate all ops are supported
SUPPORTED_OPS = {'rotate', 'zoom', 'ripple', 'invert', 'hue_shift', 'brightness'}
for effect in effects:
op = effect.get('op')
if op not in SUPPORTED_OPS:
raise ValueError(f"Unsupported CUDA kernel operation: '{op}'. Supported ops: {', '.join(sorted(SUPPORTED_OPS))}. Note: 'resize' must be handled separately before the fused kernel.")
# Build the kernel
code = r'''
extern "C" __global__
void fused_pipeline(
const unsigned char* src,
unsigned char* dst,
int width, int height,
const float* params
) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height) return;
// Start with source coordinates
float src_x = (float)x;
float src_y = (float)y;
float cx = width / 2.0f;
float cy = height / 2.0f;
// Track accumulated transforms
float total_cos = 1.0f, total_sin = 0.0f; // rotation
float total_zoom = 1.0f; // zoom
float ripple_dx = 0.0f, ripple_dy = 0.0f; // ripple displacement
int param_idx = 0;
'''
# Add effect-specific code
for i, effect in enumerate(effects):
op = effect['op']
if op == 'rotate':
code += f'''
// Rotate {i}
{{
float angle = params[param_idx++] * 3.14159265f / 180.0f;
float c = cosf(angle);
float s = sinf(angle);
// Compose with existing rotation
float nc = total_cos * c - total_sin * s;
float ns = total_cos * s + total_sin * c;
total_cos = nc;
total_sin = ns;
}}
'''
elif op == 'zoom':
code += f'''
// Zoom {i}
{{
float zoom = params[param_idx++];
total_zoom *= zoom;
}}
'''
elif op == 'ripple':
code += f'''
// Ripple {i} - matching original formula: sin(dist/freq - phase) * exp(-dist*decay/maxdim)
{{
float amplitude = params[param_idx++];
float frequency = params[param_idx++];
float decay = params[param_idx++];
float phase = params[param_idx++];
float rcx = params[param_idx++];
float rcy = params[param_idx++];
float rdx = src_x - rcx;
float rdy = src_y - rcy;
float dist = sqrtf(rdx * rdx + rdy * rdy);
float max_dim = (float)(width > height ? width : height);
// Original formula: sin(dist / frequency - phase) * exp(-dist * decay / max_dim)
float wave = sinf(dist / frequency - phase);
float amp = amplitude * expf(-dist * decay / max_dim);
if (dist > 0.001f) {{
ripple_dx += rdx / dist * wave * amp;
ripple_dy += rdy / dist * wave * amp;
}}
}}
'''
# Apply all geometric transforms at once
code += '''
// Apply accumulated geometric transforms
{
// Translate to center
float dx = src_x - cx;
float dy = src_y - cy;
// Apply rotation
float rx = total_cos * dx + total_sin * dy;
float ry = -total_sin * dx + total_cos * dy;
// Apply zoom (inverse for sampling)
rx /= total_zoom;
ry /= total_zoom;
// Translate back and apply ripple
src_x = rx + cx - ripple_dx;
src_y = ry + cy - ripple_dy;
}
// Sample source with bilinear interpolation
float r, g, b;
if (src_x < 0 || src_x >= width - 1 || src_y < 0 || src_y >= height - 1) {
r = g = b = 0;
} else {
int x0 = (int)src_x;
int y0 = (int)src_y;
float fx = src_x - x0;
float fy = src_y - y0;
int idx00 = (y0 * width + x0) * 3;
int idx10 = (y0 * width + x0 + 1) * 3;
int idx01 = ((y0 + 1) * width + x0) * 3;
int idx11 = ((y0 + 1) * width + x0 + 1) * 3;
#define BILERP(c) \\
(src[idx00 + c] * (1-fx) * (1-fy) + \\
src[idx10 + c] * fx * (1-fy) + \\
src[idx01 + c] * (1-fx) * fy + \\
src[idx11 + c] * fx * fy)
r = BILERP(0);
g = BILERP(1);
b = BILERP(2);
}
'''
# Add color transforms
for i, effect in enumerate(effects):
op = effect['op']
if op == 'invert':
code += f'''
// Invert {i}
{{
float amount = params[param_idx++];
if (amount > 0.5f) {{
r = 255.0f - r;
g = 255.0f - g;
b = 255.0f - b;
}}
}}
'''
elif op == 'hue_shift':
code += f'''
// Hue shift {i}
{{
float shift = params[param_idx++];
if (fabsf(shift) > 0.01f) {{
// RGB to HSV
float rf = r / 255.0f;
float gf = g / 255.0f;
float bf = b / 255.0f;
float max_c = fmaxf(rf, fmaxf(gf, bf));
float min_c = fminf(rf, fminf(gf, bf));
float delta = max_c - min_c;
float h = 0, s = 0, v = max_c;
if (delta > 0.00001f) {{
s = delta / max_c;
if (rf >= max_c) h = (gf - bf) / delta;
else if (gf >= max_c) h = 2.0f + (bf - rf) / delta;
else h = 4.0f + (rf - gf) / delta;
h *= 60.0f;
if (h < 0) h += 360.0f;
}}
h = fmodf(h + shift + 360.0f, 360.0f);
// HSV to RGB
float c = v * s;
float x_val = c * (1 - fabsf(fmodf(h / 60.0f, 2.0f) - 1));
float m = v - c;
float r2, g2, b2;
if (h < 60) {{ r2 = c; g2 = x_val; b2 = 0; }}
else if (h < 120) {{ r2 = x_val; g2 = c; b2 = 0; }}
else if (h < 180) {{ r2 = 0; g2 = c; b2 = x_val; }}
else if (h < 240) {{ r2 = 0; g2 = x_val; b2 = c; }}
else if (h < 300) {{ r2 = x_val; g2 = 0; b2 = c; }}
else {{ r2 = c; g2 = 0; b2 = x_val; }}
r = (r2 + m) * 255.0f;
g = (g2 + m) * 255.0f;
b = (b2 + m) * 255.0f;
}}
}}
'''
elif op == 'brightness':
code += f'''
// Brightness {i}
{{
float factor = params[param_idx++];
r *= factor;
g *= factor;
b *= factor;
}}
'''
# Write output
code += '''
// Write output
int dst_idx = (y * width + x) * 3;
dst[dst_idx] = (unsigned char)fminf(255.0f, fmaxf(0.0f, r));
dst[dst_idx + 1] = (unsigned char)fminf(255.0f, fmaxf(0.0f, g));
dst[dst_idx + 2] = (unsigned char)fminf(255.0f, fmaxf(0.0f, b));
}
'''
return code
_BUILD_PARAMS_COUNT = 0
def _build_params(effects: List[dict], dynamic_params: dict) -> cp.ndarray:
"""Build parameter array for kernel.
IMPORTANT: Parameters must be built in the same order the kernel consumes them:
1. First all geometric transforms (rotate, zoom, ripple) in list order
2. Then all color transforms (invert, hue_shift, brightness) in list order
"""
global _BUILD_PARAMS_COUNT
_BUILD_PARAMS_COUNT += 1
# ALWAYS log first few calls - use WARNING to ensure visibility in Celery logs
if _BUILD_PARAMS_COUNT <= 3:
logger.warning(f"[BUILD_PARAMS #{_BUILD_PARAMS_COUNT}] effects={[e['op'] for e in effects]}")
params = []
# First pass: geometric transforms (matches kernel's first loop)
for effect in effects:
op = effect['op']
if op == 'rotate':
params.append(float(dynamic_params.get('rotate_angle', effect.get('angle', 0))))
elif op == 'zoom':
params.append(float(dynamic_params.get('zoom_amount', effect.get('amount', 1.0))))
elif op == 'ripple':
amp = float(dynamic_params.get('ripple_amplitude', effect.get('amplitude', 10)))
freq = float(effect.get('frequency', 8))
decay = float(effect.get('decay', 2))
phase = float(dynamic_params.get('ripple_phase', effect.get('phase', 0)))
cx = float(effect.get('center_x', 960))
cy = float(effect.get('center_y', 540))
params.extend([amp, freq, decay, phase, cx, cy])
if _BUILD_PARAMS_COUNT <= 10 or _BUILD_PARAMS_COUNT % 500 == 0:
logger.warning(f"[BUILD_PARAMS #{_BUILD_PARAMS_COUNT}] ripple amp={amp} freq={freq} decay={decay} phase={phase:.2f} cx={cx} cy={cy}")
# Second pass: color transforms (matches kernel's second loop)
for effect in effects:
op = effect['op']
if op == 'invert':
amt = float(effect.get('amount', 0))
params.append(amt)
if _BUILD_PARAMS_COUNT <= 10 or _BUILD_PARAMS_COUNT % 500 == 0:
logger.warning(f"[BUILD_PARAMS #{_BUILD_PARAMS_COUNT}] invert amount={amt}")
elif op == 'hue_shift':
deg = float(effect.get('degrees', 0))
params.append(deg)
if _BUILD_PARAMS_COUNT <= 10 or _BUILD_PARAMS_COUNT % 500 == 0:
logger.warning(f"[BUILD_PARAMS #{_BUILD_PARAMS_COUNT}] hue_shift degrees={deg}")
elif op == 'brightness':
params.append(float(effect.get('factor', 1.0)))
return cp.array(params, dtype=cp.float32)
def compile_autonomous_pipeline(effects: List[dict], width: int, height: int,
dynamic_expressions: dict = None) -> callable:
"""
Compile a fully autonomous pipeline that computes ALL parameters on GPU.
This eliminates Python from the hot path - the kernel computes time-based
parameters (sin, cos, etc.) directly on GPU.
Args:
effects: List of effect dicts
width, height: Frame dimensions
dynamic_expressions: Dict mapping param names to expressions, e.g.:
{'rotate_angle': 't * 30',
'ripple_phase': 't * 2',
'brightness_factor': '0.8 + 0.4 * sin(t * 2)'}
Returns:
Callable that takes (frame, frame_num, fps) and returns output frame
"""
if dynamic_expressions is None:
dynamic_expressions = {}
# Generate cache key
ops_key = str([(e['op'], {k:v for k,v in e.items() if k != 'src2'}) for e in effects])
expr_key = str(sorted(dynamic_expressions.items()))
cache_key = f"auto_{width}x{height}_{hashlib.md5((ops_key + expr_key).encode()).hexdigest()}"
if cache_key in _COMPILED_KERNELS:
return _COMPILED_KERNELS[cache_key]
# Generate autonomous kernel code
kernel_code = _generate_autonomous_kernel(effects, width, height, dynamic_expressions)
# Compile kernel
kernel = cp.RawKernel(kernel_code, 'autonomous_pipeline')
# Create wrapper function
def run_autonomous(frame: cp.ndarray, frame_num: int, fps: float = 30.0) -> cp.ndarray:
"""Run the autonomous pipeline - no Python in the hot path!"""
if frame.dtype != cp.uint8:
frame = cp.clip(frame, 0, 255).astype(cp.uint8)
if not frame.flags['C_CONTIGUOUS']:
frame = cp.ascontiguousarray(frame)
output = cp.zeros_like(frame)
block = (16, 16)
grid = ((width + 15) // 16, (height + 15) // 16)
# Only pass frame_num and fps - kernel computes everything else!
t = float(frame_num) / float(fps)
kernel(grid, block, (frame, output, np.int32(width), np.int32(height),
np.float32(t), np.int32(frame_num)))
return output
_COMPILED_KERNELS[cache_key] = run_autonomous
return run_autonomous
def _generate_autonomous_kernel(effects: List[dict], width: int, height: int,
dynamic_expressions: dict) -> str:
"""Generate CUDA kernel that computes everything autonomously."""
# Map simple expressions to CUDA code
def expr_to_cuda(expr: str) -> str:
"""Convert simple expression to CUDA."""
expr = expr.replace('sin(', 'sinf(')
expr = expr.replace('cos(', 'cosf(')
expr = expr.replace('abs(', 'fabsf(')
return expr
code = r'''
extern "C" __global__
void autonomous_pipeline(
const unsigned char* src,
unsigned char* dst,
int width, int height,
float t, int frame_num
) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height) return;
// Compute dynamic parameters from time (ALL ON GPU!)
'''
# Add dynamic parameter calculations
rotate_expr = dynamic_expressions.get('rotate_angle', '0.0f')
ripple_phase_expr = dynamic_expressions.get('ripple_phase', '0.0f')
brightness_expr = dynamic_expressions.get('brightness_factor', '1.0f')
zoom_expr = dynamic_expressions.get('zoom_amount', '1.0f')
code += f'''
float rotate_angle = {expr_to_cuda(rotate_expr)};
float ripple_phase = {expr_to_cuda(ripple_phase_expr)};
float brightness_factor = {expr_to_cuda(brightness_expr)};
float zoom_amount = {expr_to_cuda(zoom_expr)};
// Start with source coordinates
float src_x = (float)x;
float src_y = (float)y;
float cx = width / 2.0f;
float cy = height / 2.0f;
// Accumulated transforms
float total_cos = 1.0f, total_sin = 0.0f;
float total_zoom = 1.0f;
float ripple_dx = 0.0f, ripple_dy = 0.0f;
'''
# Add effect-specific code
for i, effect in enumerate(effects):
op = effect['op']
if op == 'rotate':
code += f'''
// Rotate {i}
{{
float angle = rotate_angle * 3.14159265f / 180.0f;
float c = cosf(angle);
float s = sinf(angle);
float nc = total_cos * c - total_sin * s;
float ns = total_cos * s + total_sin * c;
total_cos = nc;
total_sin = ns;
}}
'''
elif op == 'zoom':
code += f'''
// Zoom {i}
{{
total_zoom *= zoom_amount;
}}
'''
elif op == 'ripple':
amp = float(effect.get('amplitude', 10))
freq = float(effect.get('frequency', 8))
decay = float(effect.get('decay', 2))
rcx = float(effect.get('center_x', width/2))
rcy = float(effect.get('center_y', height/2))
code += f'''
// Ripple {i}
{{
float amplitude = {amp:.1f}f;
float frequency = {freq:.1f}f;
float decay_val = {decay:.1f}f;
float rcx = {rcx:.1f}f;
float rcy = {rcy:.1f}f;
float rdx = src_x - rcx;
float rdy = src_y - rcy;
float dist = sqrtf(rdx * rdx + rdy * rdy);
float wave = sinf(dist * frequency * 0.1f + ripple_phase);
float amp = amplitude * expf(-dist * decay_val * 0.01f);
if (dist > 0.001f) {{
ripple_dx += rdx / dist * wave * amp;
ripple_dy += rdy / dist * wave * amp;
}}
}}
'''
# Apply geometric transforms
code += '''
// Apply accumulated transforms
{
float dx = src_x - cx;
float dy = src_y - cy;
float rx = total_cos * dx + total_sin * dy;
float ry = -total_sin * dx + total_cos * dy;
rx /= total_zoom;
ry /= total_zoom;
src_x = rx + cx - ripple_dx;
src_y = ry + cy - ripple_dy;
}
// Bilinear sample
float r, g, b;
if (src_x < 0 || src_x >= width - 1 || src_y < 0 || src_y >= height - 1) {
r = g = b = 0;
} else {
int x0 = (int)src_x;
int y0 = (int)src_y;
float fx = src_x - x0;
float fy = src_y - y0;
int idx00 = (y0 * width + x0) * 3;
int idx10 = (y0 * width + x0 + 1) * 3;
int idx01 = ((y0 + 1) * width + x0) * 3;
int idx11 = ((y0 + 1) * width + x0 + 1) * 3;
#define BILERP(c) \\
(src[idx00 + c] * (1-fx) * (1-fy) + \\
src[idx10 + c] * fx * (1-fy) + \\
src[idx01 + c] * (1-fx) * fy + \\
src[idx11 + c] * fx * fy)
r = BILERP(0);
g = BILERP(1);
b = BILERP(2);
}
'''
# Add color transforms
for i, effect in enumerate(effects):
op = effect['op']
if op == 'hue_shift':
degrees = float(effect.get('degrees', 0))
code += f'''
// Hue shift {i}
{{
float shift = {degrees:.1f}f;
float rf = r / 255.0f;
float gf = g / 255.0f;
float bf = b / 255.0f;
float max_c = fmaxf(rf, fmaxf(gf, bf));
float min_c = fminf(rf, fminf(gf, bf));
float delta = max_c - min_c;
float h = 0, s = 0, v = max_c;
if (delta > 0.00001f) {{
s = delta / max_c;
if (rf >= max_c) h = (gf - bf) / delta;
else if (gf >= max_c) h = 2.0f + (bf - rf) / delta;
else h = 4.0f + (rf - gf) / delta;
h *= 60.0f;
if (h < 0) h += 360.0f;
}}
h = fmodf(h + shift + 360.0f, 360.0f);
float c = v * s;
float x_val = c * (1 - fabsf(fmodf(h / 60.0f, 2.0f) - 1));
float m = v - c;
float r2, g2, b2;
if (h < 60) {{ r2 = c; g2 = x_val; b2 = 0; }}
else if (h < 120) {{ r2 = x_val; g2 = c; b2 = 0; }}
else if (h < 180) {{ r2 = 0; g2 = c; b2 = x_val; }}
else if (h < 240) {{ r2 = 0; g2 = x_val; b2 = c; }}
else if (h < 300) {{ r2 = x_val; g2 = 0; b2 = c; }}
else {{ r2 = c; g2 = 0; b2 = x_val; }}
r = (r2 + m) * 255.0f;
g = (g2 + m) * 255.0f;
b = (b2 + m) * 255.0f;
}}
'''
elif op == 'brightness':
code += '''
// Brightness
{
r *= brightness_factor;
g *= brightness_factor;
b *= brightness_factor;
}
'''
# Write output
code += '''
// Write output
int dst_idx = (y * width + x) * 3;
dst[dst_idx] = (unsigned char)fminf(255.0f, fmaxf(0.0f, r));
dst[dst_idx + 1] = (unsigned char)fminf(255.0f, fmaxf(0.0f, g));
dst[dst_idx + 2] = (unsigned char)fminf(255.0f, fmaxf(0.0f, b));
}
'''
return code
# Test the compiler
if __name__ == '__main__':
import time
print("[sexp_to_cuda] Testing fused kernel compiler...")
print("=" * 60)
# Define a test pipeline
effects = [
{'op': 'rotate', 'angle': 45.0},
{'op': 'hue_shift', 'degrees': 30.0},
{'op': 'ripple', 'amplitude': 15, 'frequency': 10, 'decay': 2, 'phase': 0, 'center_x': 960, 'center_y': 540},
{'op': 'brightness', 'factor': 1.0},
]
frame = cp.random.randint(0, 255, (1080, 1920, 3), dtype=cp.uint8)
# ===== Test 1: Standard fused kernel (params passed from Python) =====
print("\n[Test 1] Standard fused kernel (Python computes params)")
pipeline = compile_frame_pipeline(effects, 1920, 1080)
# Warmup
output = pipeline(frame)
cp.cuda.Stream.null.synchronize()
# Benchmark with Python param computation
start = time.time()
for i in range(100):
# Simulate Python computing params (like sexp interpreter does)
import math
t = i / 30.0
angle = t * 30
phase = t * 2
brightness = 0.8 + 0.4 * math.sin(t * 2)
output = pipeline(frame, rotate_angle=angle, ripple_phase=phase)
cp.cuda.Stream.null.synchronize()
elapsed = time.time() - start
print(f" Time: {elapsed/100*1000:.2f}ms per frame")
print(f" FPS: {100/elapsed:.0f}")
# ===== Test 2: Autonomous kernel (GPU computes everything) =====
print("\n[Test 2] Autonomous kernel (GPU computes ALL params)")
dynamic_expressions = {
'rotate_angle': 't * 30.0f',
'ripple_phase': 't * 2.0f',
'brightness_factor': '0.8f + 0.4f * sinf(t * 2.0f)',
}
auto_pipeline = compile_autonomous_pipeline(effects, 1920, 1080, dynamic_expressions)
# Warmup
output = auto_pipeline(frame, 0, 30.0)
cp.cuda.Stream.null.synchronize()
# Benchmark - NO Python computation in loop!
start = time.time()
for i in range(100):
output = auto_pipeline(frame, i, 30.0) # Just pass frame_num!
cp.cuda.Stream.null.synchronize()
elapsed = time.time() - start
print(f" Time: {elapsed/100*1000:.2f}ms per frame")
print(f" FPS: {100/elapsed:.0f}")
print("\n" + "=" * 60)
print("Autonomous kernel eliminates Python from hot path!")

4628
streaming/sexp_to_jax.py Normal file

File diff suppressed because it is too large Load Diff

281
streaming/sources.py Normal file
View File

@@ -0,0 +1,281 @@
"""
Video and image sources with looping support.
"""
import numpy as np
import subprocess
import json
from pathlib import Path
from typing import Optional, Tuple
from abc import ABC, abstractmethod
class Source(ABC):
"""Abstract base class for frame sources."""
@abstractmethod
def read_frame(self, t: float) -> np.ndarray:
"""Read frame at time t (with looping if needed)."""
pass
@property
@abstractmethod
def duration(self) -> float:
"""Source duration in seconds."""
pass
@property
@abstractmethod
def size(self) -> Tuple[int, int]:
"""Frame size as (width, height)."""
pass
@property
@abstractmethod
def fps(self) -> float:
"""Frames per second."""
pass
class VideoSource(Source):
"""
Video file source with automatic looping.
Reads frames on-demand, seeking as needed. When time exceeds
duration, wraps around (loops).
"""
def __init__(self, path: str, target_fps: float = 30):
self.path = Path(path)
self.target_fps = target_fps
# Initialize decode state first (before _probe which could fail)
self._process: Optional[subprocess.Popen] = None
self._current_start: Optional[float] = None
self._frame_buffer: Optional[np.ndarray] = None
self._buffer_time: Optional[float] = None
self._duration = None
self._size = None
self._fps = None
if not self.path.exists():
raise FileNotFoundError(f"Video not found: {path}")
self._probe()
def _probe(self):
"""Get video metadata."""
cmd = [
"ffprobe", "-v", "quiet",
"-print_format", "json",
"-show_format", "-show_streams",
str(self.path)
]
result = subprocess.run(cmd, capture_output=True, text=True)
data = json.loads(result.stdout)
# Get duration
self._duration = float(data["format"]["duration"])
# Get video stream info
for stream in data["streams"]:
if stream["codec_type"] == "video":
self._size = (int(stream["width"]), int(stream["height"]))
# Parse fps from r_frame_rate (e.g., "30/1" or "30000/1001")
fps_parts = stream.get("r_frame_rate", "30/1").split("/")
self._fps = float(fps_parts[0]) / float(fps_parts[1])
break
@property
def duration(self) -> float:
return self._duration
@property
def size(self) -> Tuple[int, int]:
return self._size
@property
def fps(self) -> float:
return self._fps
def _start_decode(self, start_time: float):
"""Start ffmpeg decode process from given time."""
if self._process:
try:
self._process.stdout.close()
except:
pass
self._process.terminate()
try:
self._process.wait(timeout=1)
except:
self._process.kill()
self._process.wait()
w, h = self._size
cmd = [
"ffmpeg", "-v", "quiet",
"-ss", str(start_time),
"-i", str(self.path),
"-f", "rawvideo",
"-pix_fmt", "rgb24",
"-r", str(self.target_fps),
"-"
]
self._process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
bufsize=w * h * 3 * 4, # Buffer a few frames
)
self._current_start = start_time
self._buffer_time = start_time
def read_frame(self, t: float) -> np.ndarray:
"""
Read frame at time t.
If t exceeds duration, wraps around (loops).
Seeks if needed, otherwise reads sequentially.
"""
# Wrap time for looping
t_wrapped = t % self._duration
# Check if we need to seek (loop point or large time jump)
need_seek = (
self._process is None or
self._buffer_time is None or
abs(t_wrapped - self._buffer_time) > 1.0 / self.target_fps * 2
)
if need_seek:
self._start_decode(t_wrapped)
# Read frame
w, h = self._size
frame_size = w * h * 3
# Try to read with retries for seek settling
for attempt in range(3):
raw = self._process.stdout.read(frame_size)
if len(raw) == frame_size:
break
# End of stream or seek not ready - restart from beginning
self._start_decode(0)
if len(raw) < frame_size:
# Still no data - return last frame or black
if self._frame_buffer is not None:
return self._frame_buffer.copy()
return np.zeros((h, w, 3), dtype=np.uint8)
frame = np.frombuffer(raw, dtype=np.uint8).reshape((h, w, 3))
self._frame_buffer = frame # Cache for fallback
self._buffer_time = t_wrapped + 1.0 / self.target_fps
return frame
def close(self):
"""Clean up resources."""
if self._process:
self._process.terminate()
self._process.wait()
self._process = None
def __del__(self):
self.close()
def __repr__(self):
return f"VideoSource({self.path.name}, {self._size[0]}x{self._size[1]}, {self._duration:.1f}s)"
class ImageSource(Source):
"""
Static image source (returns same frame for any time).
Useful for backgrounds, overlays, etc.
"""
def __init__(self, path: str):
self.path = Path(path)
if not self.path.exists():
raise FileNotFoundError(f"Image not found: {path}")
# Load image
import cv2
self._frame = cv2.imread(str(self.path))
self._frame = cv2.cvtColor(self._frame, cv2.COLOR_BGR2RGB)
self._size = (self._frame.shape[1], self._frame.shape[0])
@property
def duration(self) -> float:
return float('inf') # Images last forever
@property
def size(self) -> Tuple[int, int]:
return self._size
@property
def fps(self) -> float:
return 30.0 # Arbitrary
def read_frame(self, t: float) -> np.ndarray:
return self._frame.copy()
def __repr__(self):
return f"ImageSource({self.path.name}, {self._size[0]}x{self._size[1]})"
class LiveSource(Source):
"""
Live video capture source (webcam, capture card, etc.).
Time parameter is ignored - always returns latest frame.
"""
def __init__(self, device: int = 0, size: Tuple[int, int] = (1280, 720), fps: float = 30):
import cv2
self._cap = cv2.VideoCapture(device)
self._cap.set(cv2.CAP_PROP_FRAME_WIDTH, size[0])
self._cap.set(cv2.CAP_PROP_FRAME_HEIGHT, size[1])
self._cap.set(cv2.CAP_PROP_FPS, fps)
# Get actual settings
self._size = (
int(self._cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(self._cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
)
self._fps = self._cap.get(cv2.CAP_PROP_FPS)
if not self._cap.isOpened():
raise RuntimeError(f"Could not open video device {device}")
@property
def duration(self) -> float:
return float('inf') # Live - no duration
@property
def size(self) -> Tuple[int, int]:
return self._size
@property
def fps(self) -> float:
return self._fps
def read_frame(self, t: float) -> np.ndarray:
"""Read latest frame (t is ignored for live sources)."""
import cv2
ret, frame = self._cap.read()
if not ret:
return np.zeros((self._size[1], self._size[0], 3), dtype=np.uint8)
return cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
def close(self):
self._cap.release()
def __del__(self):
self.close()
def __repr__(self):
return f"LiveSource({self._size[0]}x{self._size[1]}, {self._fps}fps)"

1098
streaming/stream_sexp.py Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff