Add IPFS HLS streaming and GPU optimizations
- Add IPFSHLSOutput class that uploads segments to IPFS as they're created - Update streaming task to use IPFS HLS output for distributed streaming - Add /ipfs-stream endpoint to get IPFS playlist URL - Update /stream endpoint to redirect to IPFS when available - Add GPU persistence mode (STREAMING_GPU_PERSIST=1) to keep frames on GPU - Add hardware video decoding (NVDEC) support for faster video processing - Add GPU-accelerated primitive libraries: blending_gpu, color_ops_gpu, geometry_gpu - Add streaming_gpu module with GPUFrame class for tracking CPU/GPU data location - Add Dockerfile.gpu for building GPU-enabled worker image Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -68,6 +68,8 @@ class NumpyBackend(Backend):
|
||||
|
||||
def load_effect(self, effect_path: Path) -> Any:
|
||||
"""Load an effect from sexp file."""
|
||||
if isinstance(effect_path, str):
|
||||
effect_path = Path(effect_path)
|
||||
effect_key = str(effect_path)
|
||||
if effect_key not in self._loaded_effects:
|
||||
interp = self._get_interpreter()
|
||||
@@ -260,23 +262,258 @@ class NumpyBackend(Backend):
|
||||
return np.clip(result, 0, 255).astype(np.uint8)
|
||||
|
||||
|
||||
class GLSLBackend(Backend):
|
||||
class WGPUBackend(Backend):
|
||||
"""
|
||||
GPU-based effect processing using OpenGL/GLSL.
|
||||
GPU-based effect processing using wgpu/WebGPU compute shaders.
|
||||
|
||||
Requires GPU with OpenGL 3.3+ support (or Mesa software renderer).
|
||||
Achieves 30+ fps real-time processing.
|
||||
Compiles sexp effects to WGSL at load time, executes on GPU.
|
||||
Achieves 30+ fps real-time processing on supported hardware.
|
||||
|
||||
TODO: Implement when ready for GPU acceleration.
|
||||
Requirements:
|
||||
- wgpu-py library
|
||||
- Vulkan-capable GPU (or software renderer)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
raise NotImplementedError(
|
||||
"GLSL backend not yet implemented. Use NumpyBackend for now."
|
||||
)
|
||||
def __init__(self, recipe_dir: Path = None):
|
||||
self.recipe_dir = recipe_dir or Path(".")
|
||||
self._device = None
|
||||
self._loaded_effects: Dict[str, Any] = {} # name -> compiled shader info
|
||||
self._numpy_fallback = NumpyBackend(recipe_dir)
|
||||
# Buffer pool for reuse - keyed by (width, height)
|
||||
self._buffer_pool: Dict[tuple, Dict] = {}
|
||||
|
||||
def _ensure_device(self):
|
||||
"""Lazy-initialize wgpu device."""
|
||||
if self._device is not None:
|
||||
return
|
||||
|
||||
try:
|
||||
import wgpu
|
||||
adapter = wgpu.gpu.request_adapter_sync(power_preference="high-performance")
|
||||
self._device = adapter.request_device_sync()
|
||||
print(f"[WGPUBackend] Using GPU: {adapter.info.get('device', 'unknown')}")
|
||||
except Exception as e:
|
||||
print(f"[WGPUBackend] GPU init failed: {e}, falling back to CPU")
|
||||
self._device = None
|
||||
|
||||
def load_effect(self, effect_path: Path) -> Any:
|
||||
pass
|
||||
"""Load and compile an effect from sexp file to WGSL."""
|
||||
effect_key = str(effect_path)
|
||||
if effect_key in self._loaded_effects:
|
||||
return self._loaded_effects[effect_key]
|
||||
|
||||
try:
|
||||
from sexp_effects.wgsl_compiler import compile_effect_file
|
||||
compiled = compile_effect_file(str(effect_path))
|
||||
|
||||
self._ensure_device()
|
||||
if self._device is None:
|
||||
# Fall back to numpy
|
||||
return self._numpy_fallback.load_effect(effect_path)
|
||||
|
||||
# Create shader module
|
||||
import wgpu
|
||||
shader_module = self._device.create_shader_module(code=compiled.wgsl_code)
|
||||
|
||||
# Create compute pipeline
|
||||
pipeline = self._device.create_compute_pipeline(
|
||||
layout="auto",
|
||||
compute={"module": shader_module, "entry_point": "main"}
|
||||
)
|
||||
|
||||
self._loaded_effects[effect_key] = {
|
||||
'compiled': compiled,
|
||||
'pipeline': pipeline,
|
||||
'name': compiled.name,
|
||||
}
|
||||
return compiled.name
|
||||
|
||||
except Exception as e:
|
||||
print(f"[WGPUBackend] Failed to compile {effect_path}: {e}")
|
||||
# Fall back to numpy for this effect
|
||||
return self._numpy_fallback.load_effect(effect_path)
|
||||
|
||||
def _resolve_binding(self, value: Any, t: float, analysis_data: Dict) -> Any:
|
||||
"""Resolve a parameter binding to its value at time t."""
|
||||
# Delegate to numpy backend's implementation
|
||||
return self._numpy_fallback._resolve_binding(value, t, analysis_data)
|
||||
|
||||
def _get_or_create_buffers(self, w: int, h: int):
|
||||
"""Get or create reusable buffers for given dimensions."""
|
||||
import wgpu
|
||||
|
||||
key = (w, h)
|
||||
if key in self._buffer_pool:
|
||||
return self._buffer_pool[key]
|
||||
|
||||
size = w * h * 4 # u32 per pixel
|
||||
|
||||
# Create staging buffer for uploads (MAP_WRITE)
|
||||
staging_buffer = self._device.create_buffer(
|
||||
size=size,
|
||||
usage=wgpu.BufferUsage.MAP_WRITE | wgpu.BufferUsage.COPY_SRC,
|
||||
mapped_at_creation=False,
|
||||
)
|
||||
|
||||
# Create input buffer (STORAGE, receives data from staging)
|
||||
input_buffer = self._device.create_buffer(
|
||||
size=size,
|
||||
usage=wgpu.BufferUsage.STORAGE | wgpu.BufferUsage.COPY_DST,
|
||||
)
|
||||
|
||||
# Create output buffer (STORAGE + COPY_SRC for readback)
|
||||
output_buffer = self._device.create_buffer(
|
||||
size=size,
|
||||
usage=wgpu.BufferUsage.STORAGE | wgpu.BufferUsage.COPY_SRC,
|
||||
)
|
||||
|
||||
# Params buffer (uniform, 256 bytes should be enough)
|
||||
params_buffer = self._device.create_buffer(
|
||||
size=256,
|
||||
usage=wgpu.BufferUsage.UNIFORM | wgpu.BufferUsage.COPY_DST,
|
||||
)
|
||||
|
||||
self._buffer_pool[key] = {
|
||||
'staging': staging_buffer,
|
||||
'input': input_buffer,
|
||||
'output': output_buffer,
|
||||
'params': params_buffer,
|
||||
'size': size,
|
||||
}
|
||||
return self._buffer_pool[key]
|
||||
|
||||
def _apply_effect_gpu(
|
||||
self,
|
||||
frame: np.ndarray,
|
||||
effect_name: str,
|
||||
params: Dict,
|
||||
t: float,
|
||||
) -> Optional[np.ndarray]:
|
||||
"""Apply effect using GPU. Returns None if GPU not available."""
|
||||
import wgpu
|
||||
|
||||
# Find the loaded effect
|
||||
effect_info = None
|
||||
for key, info in self._loaded_effects.items():
|
||||
if info.get('name') == effect_name:
|
||||
effect_info = info
|
||||
break
|
||||
|
||||
if effect_info is None or self._device is None:
|
||||
return None
|
||||
|
||||
compiled = effect_info['compiled']
|
||||
pipeline = effect_info['pipeline']
|
||||
|
||||
h, w = frame.shape[:2]
|
||||
|
||||
# Get reusable buffers
|
||||
buffers = self._get_or_create_buffers(w, h)
|
||||
|
||||
# Pack frame as u32 array (RGB -> packed u32)
|
||||
r = frame[:, :, 0].astype(np.uint32)
|
||||
g = frame[:, :, 1].astype(np.uint32)
|
||||
b = frame[:, :, 2].astype(np.uint32)
|
||||
packed = (r << 16) | (g << 8) | b
|
||||
input_data = packed.flatten().astype(np.uint32)
|
||||
|
||||
# Upload input data via queue.write_buffer (more efficient than recreation)
|
||||
self._device.queue.write_buffer(buffers['input'], 0, input_data.tobytes())
|
||||
|
||||
# Build params struct
|
||||
import struct
|
||||
param_values = [w, h] # width, height as u32
|
||||
param_format = "II" # two u32
|
||||
|
||||
# Add time as f32
|
||||
param_values.append(t)
|
||||
param_format += "f"
|
||||
|
||||
# Add effect-specific params
|
||||
for param in compiled.params:
|
||||
val = params.get(param.name, param.default)
|
||||
if val is None:
|
||||
val = 0
|
||||
if param.wgsl_type == 'f32':
|
||||
param_values.append(float(val))
|
||||
param_format += "f"
|
||||
elif param.wgsl_type == 'i32':
|
||||
param_values.append(int(val))
|
||||
param_format += "i"
|
||||
elif param.wgsl_type == 'u32':
|
||||
param_values.append(int(val))
|
||||
param_format += "I"
|
||||
|
||||
# Pad to 16-byte alignment
|
||||
param_bytes = struct.pack(param_format, *param_values)
|
||||
while len(param_bytes) % 16 != 0:
|
||||
param_bytes += b'\x00'
|
||||
|
||||
self._device.queue.write_buffer(buffers['params'], 0, param_bytes)
|
||||
|
||||
# Create bind group (unfortunately this can't be easily reused with different effects)
|
||||
bind_group = self._device.create_bind_group(
|
||||
layout=pipeline.get_bind_group_layout(0),
|
||||
entries=[
|
||||
{"binding": 0, "resource": {"buffer": buffers['input']}},
|
||||
{"binding": 1, "resource": {"buffer": buffers['output']}},
|
||||
{"binding": 2, "resource": {"buffer": buffers['params']}},
|
||||
]
|
||||
)
|
||||
|
||||
# Dispatch compute
|
||||
encoder = self._device.create_command_encoder()
|
||||
compute_pass = encoder.begin_compute_pass()
|
||||
compute_pass.set_pipeline(pipeline)
|
||||
compute_pass.set_bind_group(0, bind_group)
|
||||
|
||||
# Workgroups: ceil(w/16) x ceil(h/16)
|
||||
wg_x = (w + 15) // 16
|
||||
wg_y = (h + 15) // 16
|
||||
compute_pass.dispatch_workgroups(wg_x, wg_y, 1)
|
||||
compute_pass.end()
|
||||
|
||||
self._device.queue.submit([encoder.finish()])
|
||||
|
||||
# Read back result
|
||||
result_data = self._device.queue.read_buffer(buffers['output'])
|
||||
result_packed = np.frombuffer(result_data, dtype=np.uint32).reshape(h, w)
|
||||
|
||||
# Unpack u32 -> RGB
|
||||
result = np.zeros((h, w, 3), dtype=np.uint8)
|
||||
result[:, :, 0] = ((result_packed >> 16) & 0xFF).astype(np.uint8)
|
||||
result[:, :, 1] = ((result_packed >> 8) & 0xFF).astype(np.uint8)
|
||||
result[:, :, 2] = (result_packed & 0xFF).astype(np.uint8)
|
||||
|
||||
return result
|
||||
|
||||
def _apply_effect(
|
||||
self,
|
||||
frame: np.ndarray,
|
||||
effect_name: str,
|
||||
params: Dict,
|
||||
t: float,
|
||||
analysis_data: Dict,
|
||||
) -> np.ndarray:
|
||||
"""Apply a single effect to a frame."""
|
||||
# Resolve bindings in params
|
||||
resolved_params = {"_time": t}
|
||||
for key, value in params.items():
|
||||
if key in ("effect", "effect_path", "cid", "analysis_refs"):
|
||||
continue
|
||||
resolved_params[key] = self._resolve_binding(value, t, analysis_data)
|
||||
|
||||
# Try GPU first
|
||||
self._ensure_device()
|
||||
if self._device is not None:
|
||||
result = self._apply_effect_gpu(frame, effect_name, resolved_params, t)
|
||||
if result is not None:
|
||||
return result
|
||||
|
||||
# Fall back to numpy
|
||||
return self._numpy_fallback._apply_effect(
|
||||
frame, effect_name, params, t, analysis_data
|
||||
)
|
||||
|
||||
def process_frame(
|
||||
self,
|
||||
@@ -286,7 +523,34 @@ class GLSLBackend(Backend):
|
||||
t: float,
|
||||
analysis_data: Dict,
|
||||
) -> np.ndarray:
|
||||
pass
|
||||
"""Process frames through effects and composite."""
|
||||
if not frames:
|
||||
return np.zeros((720, 1280, 3), dtype=np.uint8)
|
||||
|
||||
processed = []
|
||||
|
||||
# Apply effects to each input frame
|
||||
for i, (frame, effects) in enumerate(zip(frames, effects_per_frame)):
|
||||
result = frame.copy()
|
||||
for effect_config in effects:
|
||||
effect_name = effect_config.get("effect", "")
|
||||
if effect_name:
|
||||
result = self._apply_effect(
|
||||
result, effect_name, effect_config, t, analysis_data
|
||||
)
|
||||
processed.append(result)
|
||||
|
||||
# Composite layers (use numpy backend for now)
|
||||
if len(processed) == 1:
|
||||
return processed[0]
|
||||
|
||||
return self._numpy_fallback._composite(
|
||||
processed, compositor_config, t, analysis_data
|
||||
)
|
||||
|
||||
|
||||
# Keep GLSLBackend as alias for backwards compatibility
|
||||
GLSLBackend = WGPUBackend
|
||||
|
||||
|
||||
def get_backend(name: str = "numpy", **kwargs) -> Backend:
|
||||
@@ -294,7 +558,7 @@ def get_backend(name: str = "numpy", **kwargs) -> Backend:
|
||||
Get a backend by name.
|
||||
|
||||
Args:
|
||||
name: "numpy" or "glsl"
|
||||
name: "numpy", "wgpu", or "glsl" (alias for wgpu)
|
||||
**kwargs: Backend-specific options
|
||||
|
||||
Returns:
|
||||
@@ -302,7 +566,7 @@ def get_backend(name: str = "numpy", **kwargs) -> Backend:
|
||||
"""
|
||||
if name == "numpy":
|
||||
return NumpyBackend(**kwargs)
|
||||
elif name == "glsl":
|
||||
return GLSLBackend(**kwargs)
|
||||
elif name in ("wgpu", "glsl", "gpu"):
|
||||
return WGPUBackend(**kwargs)
|
||||
else:
|
||||
raise ValueError(f"Unknown backend: {name}")
|
||||
|
||||
@@ -5,14 +5,99 @@ Supports:
|
||||
- Display window (preview)
|
||||
- File output (recording)
|
||||
- Stream output (RTMP, etc.) - future
|
||||
- NVENC hardware encoding (auto-detected)
|
||||
- CuPy GPU arrays (auto-converted to numpy for output)
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import subprocess
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Tuple, Optional
|
||||
from typing import Tuple, Optional, List, Union
|
||||
from pathlib import Path
|
||||
|
||||
# Try to import CuPy for GPU array support
|
||||
try:
|
||||
import cupy as cp
|
||||
CUPY_AVAILABLE = True
|
||||
except ImportError:
|
||||
cp = None
|
||||
CUPY_AVAILABLE = False
|
||||
|
||||
|
||||
def ensure_numpy(frame: Union[np.ndarray, 'cp.ndarray']) -> np.ndarray:
|
||||
"""Convert frame to numpy array if it's a CuPy array."""
|
||||
if CUPY_AVAILABLE and isinstance(frame, cp.ndarray):
|
||||
return cp.asnumpy(frame)
|
||||
return frame
|
||||
|
||||
# Cache NVENC availability check
|
||||
_nvenc_available: Optional[bool] = None
|
||||
|
||||
|
||||
def check_nvenc_available() -> bool:
|
||||
"""Check if NVENC hardware encoding is available."""
|
||||
global _nvenc_available
|
||||
if _nvenc_available is not None:
|
||||
return _nvenc_available
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["ffmpeg", "-encoders"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5
|
||||
)
|
||||
_nvenc_available = "h264_nvenc" in result.stdout
|
||||
except Exception:
|
||||
_nvenc_available = False
|
||||
|
||||
return _nvenc_available
|
||||
|
||||
|
||||
def get_encoder_params(codec: str, preset: str, crf: int) -> List[str]:
|
||||
"""
|
||||
Get encoder-specific FFmpeg parameters.
|
||||
|
||||
For NVENC (h264_nvenc, hevc_nvenc):
|
||||
- Uses -cq for constant quality (similar to CRF)
|
||||
- Presets: p1 (fastest) to p7 (slowest/best quality)
|
||||
- Mapping: fast->p4, medium->p5, slow->p6
|
||||
|
||||
For libx264:
|
||||
- Uses -crf for constant rate factor
|
||||
- Presets: ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow
|
||||
"""
|
||||
if codec in ("h264_nvenc", "hevc_nvenc"):
|
||||
# Map libx264 presets to NVENC presets
|
||||
nvenc_preset_map = {
|
||||
"ultrafast": "p1",
|
||||
"superfast": "p2",
|
||||
"veryfast": "p3",
|
||||
"faster": "p3",
|
||||
"fast": "p4",
|
||||
"medium": "p5",
|
||||
"slow": "p6",
|
||||
"slower": "p6",
|
||||
"veryslow": "p7",
|
||||
}
|
||||
nvenc_preset = nvenc_preset_map.get(preset, "p4")
|
||||
|
||||
# NVENC quality: 0 (best) to 51 (worst), similar to CRF
|
||||
# CRF 18 = high quality, CRF 23 = good quality
|
||||
return [
|
||||
"-c:v", codec,
|
||||
"-preset", nvenc_preset,
|
||||
"-cq", str(crf), # Constant quality mode
|
||||
"-rc", "vbr", # Variable bitrate with quality target
|
||||
]
|
||||
else:
|
||||
# Standard libx264 params
|
||||
return [
|
||||
"-c:v", codec,
|
||||
"-preset", preset,
|
||||
"-crf", str(crf),
|
||||
]
|
||||
|
||||
|
||||
class Output(ABC):
|
||||
"""Abstract base class for output targets."""
|
||||
@@ -91,6 +176,9 @@ class DisplayOutput(Output):
|
||||
if not self._is_open:
|
||||
return
|
||||
|
||||
# Convert GPU array to numpy if needed
|
||||
frame = ensure_numpy(frame)
|
||||
|
||||
# Ensure frame is correct format
|
||||
if frame.dtype != np.uint8:
|
||||
frame = np.clip(frame, 0, 255).astype(np.uint8)
|
||||
@@ -136,6 +224,9 @@ class DisplayOutput(Output):
|
||||
class FileOutput(Output):
|
||||
"""
|
||||
Write frames to a video file using ffmpeg.
|
||||
|
||||
Automatically uses NVENC hardware encoding when available,
|
||||
falling back to libx264 CPU encoding otherwise.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -143,7 +234,7 @@ class FileOutput(Output):
|
||||
path: str,
|
||||
size: Tuple[int, int],
|
||||
fps: float = 30,
|
||||
codec: str = "libx264",
|
||||
codec: str = "auto", # "auto", "h264_nvenc", "libx264"
|
||||
crf: int = 18,
|
||||
preset: str = "fast",
|
||||
audio_source: str = None,
|
||||
@@ -153,6 +244,11 @@ class FileOutput(Output):
|
||||
self.fps = fps
|
||||
self._is_open = True
|
||||
|
||||
# Auto-detect NVENC
|
||||
if codec == "auto":
|
||||
codec = "h264_nvenc" if check_nvenc_available() else "libx264"
|
||||
self.codec = codec
|
||||
|
||||
# Build ffmpeg command
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
@@ -170,12 +266,9 @@ class FileOutput(Output):
|
||||
# Explicitly map: video from input 0 (rawvideo), audio from input 1
|
||||
cmd.extend(["-map", "0:v", "-map", "1:a"])
|
||||
|
||||
cmd.extend([
|
||||
"-c:v", codec,
|
||||
"-preset", preset,
|
||||
"-crf", str(crf),
|
||||
"-pix_fmt", "yuv420p",
|
||||
])
|
||||
# Get encoder-specific params
|
||||
cmd.extend(get_encoder_params(codec, preset, crf))
|
||||
cmd.extend(["-pix_fmt", "yuv420p"])
|
||||
|
||||
# Add audio codec if we have audio
|
||||
if audio_source:
|
||||
@@ -201,11 +294,20 @@ class FileOutput(Output):
|
||||
self._is_open = False
|
||||
return
|
||||
|
||||
# Convert GPU array to numpy if needed
|
||||
frame = ensure_numpy(frame)
|
||||
|
||||
# Resize if needed
|
||||
if frame.shape[1] != self.size[0] or frame.shape[0] != self.size[1]:
|
||||
import cv2
|
||||
frame = cv2.resize(frame, self.size)
|
||||
|
||||
# Ensure correct format
|
||||
if frame.dtype != np.uint8:
|
||||
frame = np.clip(frame, 0, 255).astype(np.uint8)
|
||||
if not frame.flags['C_CONTIGUOUS']:
|
||||
frame = np.ascontiguousarray(frame)
|
||||
|
||||
try:
|
||||
self._process.stdin.write(frame.tobytes())
|
||||
except BrokenPipeError:
|
||||
@@ -335,6 +437,9 @@ class PipeOutput(Output):
|
||||
self._is_open = False
|
||||
return
|
||||
|
||||
# Convert GPU array to numpy if needed
|
||||
frame = ensure_numpy(frame)
|
||||
|
||||
# Resize if needed
|
||||
if frame.shape[1] != self.size[0] or frame.shape[0] != self.size[1]:
|
||||
import cv2
|
||||
@@ -371,3 +476,424 @@ class PipeOutput(Output):
|
||||
if self._process and self._process.poll() is not None:
|
||||
self._is_open = False
|
||||
return self._is_open
|
||||
|
||||
|
||||
class HLSOutput(Output):
|
||||
"""
|
||||
Write frames as HLS stream (m3u8 playlist + .ts segments).
|
||||
|
||||
This enables true live streaming where the browser can poll
|
||||
for new segments as they become available.
|
||||
|
||||
Automatically uses NVENC hardware encoding when available.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
output_dir: str,
|
||||
size: Tuple[int, int],
|
||||
fps: float = 30,
|
||||
segment_duration: float = 4.0, # 4s segments for stability
|
||||
codec: str = "auto", # "auto", "h264_nvenc", "libx264"
|
||||
crf: int = 23,
|
||||
preset: str = "fast", # Better quality than ultrafast
|
||||
audio_source: str = None,
|
||||
):
|
||||
self.output_dir = Path(output_dir)
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.size = size
|
||||
self.fps = fps
|
||||
self.segment_duration = segment_duration
|
||||
self._is_open = True
|
||||
|
||||
# Auto-detect NVENC
|
||||
if codec == "auto":
|
||||
codec = "h264_nvenc" if check_nvenc_available() else "libx264"
|
||||
self.codec = codec
|
||||
|
||||
# HLS playlist path
|
||||
self.playlist_path = self.output_dir / "stream.m3u8"
|
||||
|
||||
# Build ffmpeg command for HLS output
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-f", "rawvideo",
|
||||
"-vcodec", "rawvideo",
|
||||
"-pix_fmt", "rgb24",
|
||||
"-s", f"{size[0]}x{size[1]}",
|
||||
"-r", str(fps),
|
||||
"-i", "-",
|
||||
]
|
||||
|
||||
# Add audio input if provided
|
||||
if audio_source:
|
||||
cmd.extend(["-i", str(audio_source)])
|
||||
cmd.extend(["-map", "0:v", "-map", "1:a"])
|
||||
|
||||
# Keyframe interval - must be exactly segment_duration for clean cuts
|
||||
gop_size = int(fps * segment_duration)
|
||||
|
||||
# Get encoder-specific params
|
||||
cmd.extend(get_encoder_params(codec, preset, crf))
|
||||
cmd.extend([
|
||||
"-pix_fmt", "yuv420p",
|
||||
# Force keyframes at exact intervals for clean segment boundaries
|
||||
"-g", str(gop_size),
|
||||
"-keyint_min", str(gop_size),
|
||||
"-sc_threshold", "0", # Disable scene change detection
|
||||
"-force_key_frames", f"expr:gte(t,n_forced*{segment_duration})",
|
||||
# Reduce buffering for faster segment availability
|
||||
"-flush_packets", "1",
|
||||
])
|
||||
|
||||
# Add audio codec if we have audio
|
||||
if audio_source:
|
||||
cmd.extend(["-c:a", "aac", "-b:a", "128k"])
|
||||
|
||||
# HLS specific options for smooth live streaming
|
||||
cmd.extend([
|
||||
"-f", "hls",
|
||||
"-hls_time", str(segment_duration),
|
||||
"-hls_list_size", "0", # Keep all segments in playlist
|
||||
"-hls_flags", "independent_segments+append_list+split_by_time",
|
||||
"-hls_segment_type", "mpegts",
|
||||
"-hls_segment_filename", str(self.output_dir / "segment_%05d.ts"),
|
||||
str(self.playlist_path),
|
||||
])
|
||||
|
||||
import sys
|
||||
print(f"HLSOutput cmd: {' '.join(cmd)}", file=sys.stderr)
|
||||
self._process = subprocess.Popen(
|
||||
cmd,
|
||||
stdin=subprocess.PIPE,
|
||||
stderr=None, # Show errors for debugging
|
||||
)
|
||||
|
||||
# Track segments for status reporting
|
||||
self.segments_written = 0
|
||||
self._last_segment_check = 0
|
||||
|
||||
def write(self, frame: np.ndarray, t: float):
|
||||
"""Write frame to HLS stream."""
|
||||
if not self._is_open or self._process.poll() is not None:
|
||||
self._is_open = False
|
||||
return
|
||||
|
||||
# Convert GPU array to numpy if needed
|
||||
frame = ensure_numpy(frame)
|
||||
|
||||
# Resize if needed
|
||||
if frame.shape[1] != self.size[0] or frame.shape[0] != self.size[1]:
|
||||
import cv2
|
||||
frame = cv2.resize(frame, self.size)
|
||||
|
||||
# Ensure correct format
|
||||
if frame.dtype != np.uint8:
|
||||
frame = np.clip(frame, 0, 255).astype(np.uint8)
|
||||
if not frame.flags['C_CONTIGUOUS']:
|
||||
frame = np.ascontiguousarray(frame)
|
||||
|
||||
try:
|
||||
self._process.stdin.write(frame.tobytes())
|
||||
except BrokenPipeError:
|
||||
self._is_open = False
|
||||
|
||||
# Periodically count segments
|
||||
if t - self._last_segment_check > 1.0:
|
||||
self._last_segment_check = t
|
||||
self.segments_written = len(list(self.output_dir.glob("segment_*.ts")))
|
||||
|
||||
def close(self):
|
||||
"""Close the HLS stream."""
|
||||
if self._process:
|
||||
self._process.stdin.close()
|
||||
self._process.wait()
|
||||
self._is_open = False
|
||||
|
||||
# Final segment count
|
||||
self.segments_written = len(list(self.output_dir.glob("segment_*.ts")))
|
||||
|
||||
# Mark playlist as ended (VOD mode)
|
||||
if self.playlist_path.exists():
|
||||
with open(self.playlist_path, "a") as f:
|
||||
f.write("#EXT-X-ENDLIST\n")
|
||||
|
||||
@property
|
||||
def is_open(self) -> bool:
|
||||
return self._is_open and self._process.poll() is None
|
||||
|
||||
|
||||
class IPFSHLSOutput(Output):
|
||||
"""
|
||||
Write frames as HLS stream with segments uploaded to IPFS.
|
||||
|
||||
Each segment is uploaded to IPFS as it's created, enabling distributed
|
||||
streaming where clients can fetch segments from any IPFS gateway.
|
||||
|
||||
The m3u8 playlist is continuously updated with IPFS URLs and can be
|
||||
fetched via get_playlist() or the playlist_cid property.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
output_dir: str,
|
||||
size: Tuple[int, int],
|
||||
fps: float = 30,
|
||||
segment_duration: float = 4.0,
|
||||
codec: str = "auto",
|
||||
crf: int = 23,
|
||||
preset: str = "fast",
|
||||
audio_source: str = None,
|
||||
ipfs_gateway: str = "https://ipfs.io/ipfs",
|
||||
):
|
||||
self.output_dir = Path(output_dir)
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.size = size
|
||||
self.fps = fps
|
||||
self.segment_duration = segment_duration
|
||||
self.ipfs_gateway = ipfs_gateway.rstrip("/")
|
||||
self._is_open = True
|
||||
|
||||
# Auto-detect NVENC
|
||||
if codec == "auto":
|
||||
codec = "h264_nvenc" if check_nvenc_available() else "libx264"
|
||||
self.codec = codec
|
||||
|
||||
# Track segment CIDs
|
||||
self.segment_cids: dict = {} # segment_number -> cid
|
||||
self._last_segment_checked = -1
|
||||
self._playlist_cid: Optional[str] = None
|
||||
|
||||
# Import IPFS client
|
||||
from ipfs_client import add_file, add_bytes
|
||||
self._ipfs_add_file = add_file
|
||||
self._ipfs_add_bytes = add_bytes
|
||||
|
||||
# Local HLS paths
|
||||
self.local_playlist_path = self.output_dir / "stream.m3u8"
|
||||
|
||||
# Build ffmpeg command for HLS output
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-f", "rawvideo",
|
||||
"-vcodec", "rawvideo",
|
||||
"-pix_fmt", "rgb24",
|
||||
"-s", f"{size[0]}x{size[1]}",
|
||||
"-r", str(fps),
|
||||
"-i", "-",
|
||||
]
|
||||
|
||||
# Add audio input if provided
|
||||
if audio_source:
|
||||
cmd.extend(["-i", str(audio_source)])
|
||||
cmd.extend(["-map", "0:v", "-map", "1:a"])
|
||||
|
||||
# Keyframe interval
|
||||
gop_size = int(fps * segment_duration)
|
||||
|
||||
# Get encoder-specific params
|
||||
cmd.extend(get_encoder_params(codec, preset, crf))
|
||||
cmd.extend([
|
||||
"-pix_fmt", "yuv420p",
|
||||
"-g", str(gop_size),
|
||||
"-keyint_min", str(gop_size),
|
||||
"-sc_threshold", "0",
|
||||
"-force_key_frames", f"expr:gte(t,n_forced*{segment_duration})",
|
||||
"-flush_packets", "1",
|
||||
])
|
||||
|
||||
# Add audio codec if we have audio
|
||||
if audio_source:
|
||||
cmd.extend(["-c:a", "aac", "-b:a", "128k"])
|
||||
|
||||
# HLS options
|
||||
cmd.extend([
|
||||
"-f", "hls",
|
||||
"-hls_time", str(segment_duration),
|
||||
"-hls_list_size", "0",
|
||||
"-hls_flags", "independent_segments+append_list+split_by_time",
|
||||
"-hls_segment_type", "mpegts",
|
||||
"-hls_segment_filename", str(self.output_dir / "segment_%05d.ts"),
|
||||
str(self.local_playlist_path),
|
||||
])
|
||||
|
||||
import sys
|
||||
print(f"IPFSHLSOutput: starting ffmpeg", file=sys.stderr)
|
||||
self._process = subprocess.Popen(
|
||||
cmd,
|
||||
stdin=subprocess.PIPE,
|
||||
stderr=None,
|
||||
)
|
||||
|
||||
def _upload_new_segments(self):
|
||||
"""Check for new segments and upload them to IPFS."""
|
||||
import sys
|
||||
|
||||
# Find all segments
|
||||
segments = sorted(self.output_dir.glob("segment_*.ts"))
|
||||
|
||||
for seg_path in segments:
|
||||
# Extract segment number from filename
|
||||
seg_name = seg_path.stem # segment_00000
|
||||
seg_num = int(seg_name.split("_")[1])
|
||||
|
||||
# Skip if already uploaded
|
||||
if seg_num in self.segment_cids:
|
||||
continue
|
||||
|
||||
# Skip if segment is still being written (check if file size is stable)
|
||||
try:
|
||||
size1 = seg_path.stat().st_size
|
||||
if size1 == 0:
|
||||
continue # Empty file, still being created
|
||||
|
||||
import time
|
||||
time.sleep(0.1)
|
||||
size2 = seg_path.stat().st_size
|
||||
if size1 != size2:
|
||||
continue # File still being written
|
||||
except FileNotFoundError:
|
||||
continue
|
||||
|
||||
# Upload to IPFS
|
||||
cid = self._ipfs_add_file(seg_path, pin=True)
|
||||
if cid:
|
||||
self.segment_cids[seg_num] = cid
|
||||
print(f"IPFS: segment_{seg_num:05d}.ts -> {cid}", file=sys.stderr)
|
||||
|
||||
# Update playlist after each segment upload
|
||||
self._update_ipfs_playlist()
|
||||
|
||||
def _update_ipfs_playlist(self):
|
||||
"""Generate and upload IPFS-aware m3u8 playlist."""
|
||||
if not self.segment_cids:
|
||||
return
|
||||
|
||||
import sys
|
||||
|
||||
# Build m3u8 content with IPFS URLs
|
||||
lines = [
|
||||
"#EXTM3U",
|
||||
"#EXT-X-VERSION:3",
|
||||
f"#EXT-X-TARGETDURATION:{int(self.segment_duration) + 1}",
|
||||
"#EXT-X-MEDIA-SEQUENCE:0",
|
||||
]
|
||||
|
||||
# Add segments in order
|
||||
for seg_num in sorted(self.segment_cids.keys()):
|
||||
cid = self.segment_cids[seg_num]
|
||||
lines.append(f"#EXTINF:{self.segment_duration:.3f},")
|
||||
lines.append(f"{self.ipfs_gateway}/{cid}")
|
||||
|
||||
playlist_content = "\n".join(lines) + "\n"
|
||||
|
||||
# Upload playlist to IPFS
|
||||
cid = self._ipfs_add_bytes(playlist_content.encode("utf-8"), pin=True)
|
||||
if cid:
|
||||
self._playlist_cid = cid
|
||||
print(f"IPFS: playlist updated -> {cid} ({len(self.segment_cids)} segments)", file=sys.stderr)
|
||||
|
||||
def write(self, frame: np.ndarray, t: float):
|
||||
"""Write frame to HLS stream and upload segments to IPFS."""
|
||||
if not self._is_open or self._process.poll() is not None:
|
||||
self._is_open = False
|
||||
return
|
||||
|
||||
# Convert GPU array to numpy if needed
|
||||
frame = ensure_numpy(frame)
|
||||
|
||||
# Resize if needed
|
||||
if frame.shape[1] != self.size[0] or frame.shape[0] != self.size[1]:
|
||||
import cv2
|
||||
frame = cv2.resize(frame, self.size)
|
||||
|
||||
# Ensure correct format
|
||||
if frame.dtype != np.uint8:
|
||||
frame = np.clip(frame, 0, 255).astype(np.uint8)
|
||||
if not frame.flags['C_CONTIGUOUS']:
|
||||
frame = np.ascontiguousarray(frame)
|
||||
|
||||
try:
|
||||
self._process.stdin.write(frame.tobytes())
|
||||
except BrokenPipeError:
|
||||
self._is_open = False
|
||||
return
|
||||
|
||||
# Check for new segments periodically (every second)
|
||||
current_segment = int(t / self.segment_duration)
|
||||
if current_segment > self._last_segment_checked:
|
||||
self._last_segment_checked = current_segment
|
||||
self._upload_new_segments()
|
||||
|
||||
def close(self):
|
||||
"""Close the HLS stream and finalize IPFS uploads."""
|
||||
import sys
|
||||
|
||||
if self._process:
|
||||
self._process.stdin.close()
|
||||
self._process.wait()
|
||||
self._is_open = False
|
||||
|
||||
# Upload any remaining segments
|
||||
self._upload_new_segments()
|
||||
|
||||
# Generate final playlist with #EXT-X-ENDLIST
|
||||
if self.segment_cids:
|
||||
lines = [
|
||||
"#EXTM3U",
|
||||
"#EXT-X-VERSION:3",
|
||||
f"#EXT-X-TARGETDURATION:{int(self.segment_duration) + 1}",
|
||||
"#EXT-X-MEDIA-SEQUENCE:0",
|
||||
"#EXT-X-PLAYLIST-TYPE:VOD",
|
||||
]
|
||||
|
||||
for seg_num in sorted(self.segment_cids.keys()):
|
||||
cid = self.segment_cids[seg_num]
|
||||
lines.append(f"#EXTINF:{self.segment_duration:.3f},")
|
||||
lines.append(f"{self.ipfs_gateway}/{cid}")
|
||||
|
||||
lines.append("#EXT-X-ENDLIST")
|
||||
playlist_content = "\n".join(lines) + "\n"
|
||||
|
||||
cid = self._ipfs_add_bytes(playlist_content.encode("utf-8"), pin=True)
|
||||
if cid:
|
||||
self._playlist_cid = cid
|
||||
print(f"IPFS: final playlist -> {cid} ({len(self.segment_cids)} segments)", file=sys.stderr)
|
||||
|
||||
@property
|
||||
def playlist_cid(self) -> Optional[str]:
|
||||
"""Get the current playlist CID."""
|
||||
return self._playlist_cid
|
||||
|
||||
@property
|
||||
def playlist_url(self) -> Optional[str]:
|
||||
"""Get the full IPFS URL for the playlist."""
|
||||
if self._playlist_cid:
|
||||
return f"{self.ipfs_gateway}/{self._playlist_cid}"
|
||||
return None
|
||||
|
||||
def get_playlist(self) -> str:
|
||||
"""Get the current m3u8 playlist content with IPFS URLs."""
|
||||
if not self.segment_cids:
|
||||
return "#EXTM3U\n"
|
||||
|
||||
lines = [
|
||||
"#EXTM3U",
|
||||
"#EXT-X-VERSION:3",
|
||||
f"#EXT-X-TARGETDURATION:{int(self.segment_duration) + 1}",
|
||||
"#EXT-X-MEDIA-SEQUENCE:0",
|
||||
]
|
||||
|
||||
for seg_num in sorted(self.segment_cids.keys()):
|
||||
cid = self.segment_cids[seg_num]
|
||||
lines.append(f"#EXTINF:{self.segment_duration:.3f},")
|
||||
lines.append(f"{self.ipfs_gateway}/{cid}")
|
||||
|
||||
if not self._is_open:
|
||||
lines.append("#EXT-X-ENDLIST")
|
||||
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
@property
|
||||
def is_open(self) -> bool:
|
||||
return self._is_open and self._process.poll() is None
|
||||
@@ -159,36 +159,51 @@ class StreamInterpreter:
|
||||
return config
|
||||
|
||||
def _load_primitives(self, lib_name: str):
|
||||
"""Load primitives from a Python library file."""
|
||||
"""Load primitives from a Python library file.
|
||||
|
||||
Prefers GPU-accelerated versions (*_gpu.py) when available.
|
||||
"""
|
||||
import importlib.util
|
||||
|
||||
lib_paths = [
|
||||
self.primitive_lib_dir / f"{lib_name}.py",
|
||||
self.sexp_dir / "primitive_libs" / f"{lib_name}.py",
|
||||
self.sexp_dir.parent / "sexp_effects" / "primitive_libs" / f"{lib_name}.py",
|
||||
]
|
||||
# Try GPU version first, then fall back to CPU version
|
||||
lib_names_to_try = [f"{lib_name}_gpu", lib_name]
|
||||
|
||||
lib_path = None
|
||||
for p in lib_paths:
|
||||
if p.exists():
|
||||
lib_path = p
|
||||
actual_lib_name = lib_name
|
||||
|
||||
for try_lib in lib_names_to_try:
|
||||
lib_paths = [
|
||||
self.primitive_lib_dir / f"{try_lib}.py",
|
||||
self.sexp_dir / "primitive_libs" / f"{try_lib}.py",
|
||||
self.sexp_dir.parent / "sexp_effects" / "primitive_libs" / f"{try_lib}.py",
|
||||
]
|
||||
for p in lib_paths:
|
||||
if p.exists():
|
||||
lib_path = p
|
||||
actual_lib_name = try_lib
|
||||
break
|
||||
if lib_path:
|
||||
break
|
||||
|
||||
if not lib_path:
|
||||
print(f"Warning: primitive library '{lib_name}' not found", file=sys.stderr)
|
||||
return
|
||||
|
||||
spec = importlib.util.spec_from_file_location(lib_name, lib_path)
|
||||
spec = importlib.util.spec_from_file_location(actual_lib_name, lib_path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
|
||||
# Check if this is a GPU-accelerated module
|
||||
is_gpu = actual_lib_name.endswith('_gpu')
|
||||
gpu_tag = " [GPU]" if is_gpu else ""
|
||||
|
||||
count = 0
|
||||
for name in dir(module):
|
||||
if name.startswith('prim_'):
|
||||
func = getattr(module, name)
|
||||
prim_name = name[5:]
|
||||
dash_name = prim_name.replace('_', '-')
|
||||
# Register ONLY with namespace (geometry:ripple-displace)
|
||||
# Register with original lib_name namespace (geometry:rotate, not geometry_gpu:rotate)
|
||||
# Don't overwrite if already registered (allows pre-registration of overrides)
|
||||
key = f"{lib_name}:{dash_name}"
|
||||
if key not in self.primitives:
|
||||
@@ -199,7 +214,7 @@ class StreamInterpreter:
|
||||
prims = getattr(module, 'PRIMITIVES')
|
||||
if isinstance(prims, dict):
|
||||
for name, func in prims.items():
|
||||
# Register ONLY with namespace
|
||||
# Register with original lib_name namespace
|
||||
# Don't overwrite if already registered
|
||||
dash_name = name.replace('_', '-')
|
||||
key = f"{lib_name}:{dash_name}"
|
||||
@@ -207,7 +222,7 @@ class StreamInterpreter:
|
||||
self.primitives[key] = func
|
||||
count += 1
|
||||
|
||||
print(f"Loaded primitives: {lib_name} ({count} functions)", file=sys.stderr)
|
||||
print(f"Loaded primitives: {lib_name} ({count} functions){gpu_tag}", file=sys.stderr)
|
||||
|
||||
def _load_effect(self, effect_path: Path):
|
||||
"""Load and register an effect from a .sexp file."""
|
||||
@@ -807,8 +822,11 @@ class StreamInterpreter:
|
||||
self._record_error(f"Primitive {op} error: {e}")
|
||||
raise RuntimeError(f"Primitive {op} failed: {e}")
|
||||
|
||||
# Unknown - return as-is
|
||||
return expr
|
||||
# Unknown function call - raise meaningful error
|
||||
raise RuntimeError(f"Unknown function or primitive: '{op}'. "
|
||||
f"Available primitives: {sorted(list(self.primitives.keys())[:10])}... "
|
||||
f"Available effects: {sorted(list(self.effects.keys())[:10])}... "
|
||||
f"Available macros: {sorted(list(self.macros.keys())[:10])}...")
|
||||
|
||||
def _step_scans(self, ctx: Context, env: dict):
|
||||
"""Step scans based on trigger evaluation."""
|
||||
@@ -833,9 +851,9 @@ class StreamInterpreter:
|
||||
"""Run the streaming pipeline."""
|
||||
# Import output classes - handle both package and direct execution
|
||||
try:
|
||||
from .output import PipeOutput, DisplayOutput, FileOutput
|
||||
from .output import PipeOutput, DisplayOutput, FileOutput, HLSOutput, IPFSHLSOutput
|
||||
except ImportError:
|
||||
from output import PipeOutput, DisplayOutput, FileOutput
|
||||
from output import PipeOutput, DisplayOutput, FileOutput, HLSOutput, IPFSHLSOutput
|
||||
|
||||
self._init()
|
||||
|
||||
@@ -871,6 +889,16 @@ class StreamInterpreter:
|
||||
out = PipeOutput(size=(w, h), fps=fps, audio_source=audio)
|
||||
elif output == "preview":
|
||||
out = DisplayOutput(size=(w, h), fps=fps, audio_source=audio)
|
||||
elif output.endswith("/hls"):
|
||||
# HLS output - output is a directory path ending in /hls
|
||||
hls_dir = output[:-4] # Remove /hls suffix
|
||||
out = HLSOutput(hls_dir, size=(w, h), fps=fps, audio_source=audio)
|
||||
elif output.endswith("/ipfs-hls"):
|
||||
# IPFS HLS output - segments uploaded to IPFS as they're created
|
||||
hls_dir = output[:-9] # Remove /ipfs-hls suffix
|
||||
import os
|
||||
ipfs_gateway = os.environ.get("IPFS_GATEWAY_URL", "https://ipfs.io/ipfs")
|
||||
out = IPFSHLSOutput(hls_dir, size=(w, h), fps=fps, audio_source=audio, ipfs_gateway=ipfs_gateway)
|
||||
else:
|
||||
out = FileOutput(output, size=(w, h), fps=fps, audio_source=audio)
|
||||
|
||||
@@ -916,6 +944,8 @@ class StreamInterpreter:
|
||||
|
||||
finally:
|
||||
out.close()
|
||||
# Store output for access to properties like playlist_cid
|
||||
self.output = out
|
||||
print("\nDone", file=sys.stderr)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user