Add IPFS HLS streaming and GPU optimizations

- Add IPFSHLSOutput class that uploads segments to IPFS as they're created
- Update streaming task to use IPFS HLS output for distributed streaming
- Add /ipfs-stream endpoint to get IPFS playlist URL
- Update /stream endpoint to redirect to IPFS when available
- Add GPU persistence mode (STREAMING_GPU_PERSIST=1) to keep frames on GPU
- Add hardware video decoding (NVDEC) support for faster video processing
- Add GPU-accelerated primitive libraries: blending_gpu, color_ops_gpu, geometry_gpu
- Add streaming_gpu module with GPUFrame class for tracking CPU/GPU data location
- Add Dockerfile.gpu for building GPU-enabled worker image

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
giles
2026-02-03 20:23:16 +00:00
parent 5bc655f8c8
commit 86830019ad
24 changed files with 4025 additions and 96 deletions

View File

@@ -68,6 +68,8 @@ class NumpyBackend(Backend):
def load_effect(self, effect_path: Path) -> Any:
"""Load an effect from sexp file."""
if isinstance(effect_path, str):
effect_path = Path(effect_path)
effect_key = str(effect_path)
if effect_key not in self._loaded_effects:
interp = self._get_interpreter()
@@ -260,23 +262,258 @@ class NumpyBackend(Backend):
return np.clip(result, 0, 255).astype(np.uint8)
class GLSLBackend(Backend):
class WGPUBackend(Backend):
"""
GPU-based effect processing using OpenGL/GLSL.
GPU-based effect processing using wgpu/WebGPU compute shaders.
Requires GPU with OpenGL 3.3+ support (or Mesa software renderer).
Achieves 30+ fps real-time processing.
Compiles sexp effects to WGSL at load time, executes on GPU.
Achieves 30+ fps real-time processing on supported hardware.
TODO: Implement when ready for GPU acceleration.
Requirements:
- wgpu-py library
- Vulkan-capable GPU (or software renderer)
"""
def __init__(self):
raise NotImplementedError(
"GLSL backend not yet implemented. Use NumpyBackend for now."
)
def __init__(self, recipe_dir: Path = None):
self.recipe_dir = recipe_dir or Path(".")
self._device = None
self._loaded_effects: Dict[str, Any] = {} # name -> compiled shader info
self._numpy_fallback = NumpyBackend(recipe_dir)
# Buffer pool for reuse - keyed by (width, height)
self._buffer_pool: Dict[tuple, Dict] = {}
def _ensure_device(self):
"""Lazy-initialize wgpu device."""
if self._device is not None:
return
try:
import wgpu
adapter = wgpu.gpu.request_adapter_sync(power_preference="high-performance")
self._device = adapter.request_device_sync()
print(f"[WGPUBackend] Using GPU: {adapter.info.get('device', 'unknown')}")
except Exception as e:
print(f"[WGPUBackend] GPU init failed: {e}, falling back to CPU")
self._device = None
def load_effect(self, effect_path: Path) -> Any:
pass
"""Load and compile an effect from sexp file to WGSL."""
effect_key = str(effect_path)
if effect_key in self._loaded_effects:
return self._loaded_effects[effect_key]
try:
from sexp_effects.wgsl_compiler import compile_effect_file
compiled = compile_effect_file(str(effect_path))
self._ensure_device()
if self._device is None:
# Fall back to numpy
return self._numpy_fallback.load_effect(effect_path)
# Create shader module
import wgpu
shader_module = self._device.create_shader_module(code=compiled.wgsl_code)
# Create compute pipeline
pipeline = self._device.create_compute_pipeline(
layout="auto",
compute={"module": shader_module, "entry_point": "main"}
)
self._loaded_effects[effect_key] = {
'compiled': compiled,
'pipeline': pipeline,
'name': compiled.name,
}
return compiled.name
except Exception as e:
print(f"[WGPUBackend] Failed to compile {effect_path}: {e}")
# Fall back to numpy for this effect
return self._numpy_fallback.load_effect(effect_path)
def _resolve_binding(self, value: Any, t: float, analysis_data: Dict) -> Any:
"""Resolve a parameter binding to its value at time t."""
# Delegate to numpy backend's implementation
return self._numpy_fallback._resolve_binding(value, t, analysis_data)
def _get_or_create_buffers(self, w: int, h: int):
"""Get or create reusable buffers for given dimensions."""
import wgpu
key = (w, h)
if key in self._buffer_pool:
return self._buffer_pool[key]
size = w * h * 4 # u32 per pixel
# Create staging buffer for uploads (MAP_WRITE)
staging_buffer = self._device.create_buffer(
size=size,
usage=wgpu.BufferUsage.MAP_WRITE | wgpu.BufferUsage.COPY_SRC,
mapped_at_creation=False,
)
# Create input buffer (STORAGE, receives data from staging)
input_buffer = self._device.create_buffer(
size=size,
usage=wgpu.BufferUsage.STORAGE | wgpu.BufferUsage.COPY_DST,
)
# Create output buffer (STORAGE + COPY_SRC for readback)
output_buffer = self._device.create_buffer(
size=size,
usage=wgpu.BufferUsage.STORAGE | wgpu.BufferUsage.COPY_SRC,
)
# Params buffer (uniform, 256 bytes should be enough)
params_buffer = self._device.create_buffer(
size=256,
usage=wgpu.BufferUsage.UNIFORM | wgpu.BufferUsage.COPY_DST,
)
self._buffer_pool[key] = {
'staging': staging_buffer,
'input': input_buffer,
'output': output_buffer,
'params': params_buffer,
'size': size,
}
return self._buffer_pool[key]
def _apply_effect_gpu(
self,
frame: np.ndarray,
effect_name: str,
params: Dict,
t: float,
) -> Optional[np.ndarray]:
"""Apply effect using GPU. Returns None if GPU not available."""
import wgpu
# Find the loaded effect
effect_info = None
for key, info in self._loaded_effects.items():
if info.get('name') == effect_name:
effect_info = info
break
if effect_info is None or self._device is None:
return None
compiled = effect_info['compiled']
pipeline = effect_info['pipeline']
h, w = frame.shape[:2]
# Get reusable buffers
buffers = self._get_or_create_buffers(w, h)
# Pack frame as u32 array (RGB -> packed u32)
r = frame[:, :, 0].astype(np.uint32)
g = frame[:, :, 1].astype(np.uint32)
b = frame[:, :, 2].astype(np.uint32)
packed = (r << 16) | (g << 8) | b
input_data = packed.flatten().astype(np.uint32)
# Upload input data via queue.write_buffer (more efficient than recreation)
self._device.queue.write_buffer(buffers['input'], 0, input_data.tobytes())
# Build params struct
import struct
param_values = [w, h] # width, height as u32
param_format = "II" # two u32
# Add time as f32
param_values.append(t)
param_format += "f"
# Add effect-specific params
for param in compiled.params:
val = params.get(param.name, param.default)
if val is None:
val = 0
if param.wgsl_type == 'f32':
param_values.append(float(val))
param_format += "f"
elif param.wgsl_type == 'i32':
param_values.append(int(val))
param_format += "i"
elif param.wgsl_type == 'u32':
param_values.append(int(val))
param_format += "I"
# Pad to 16-byte alignment
param_bytes = struct.pack(param_format, *param_values)
while len(param_bytes) % 16 != 0:
param_bytes += b'\x00'
self._device.queue.write_buffer(buffers['params'], 0, param_bytes)
# Create bind group (unfortunately this can't be easily reused with different effects)
bind_group = self._device.create_bind_group(
layout=pipeline.get_bind_group_layout(0),
entries=[
{"binding": 0, "resource": {"buffer": buffers['input']}},
{"binding": 1, "resource": {"buffer": buffers['output']}},
{"binding": 2, "resource": {"buffer": buffers['params']}},
]
)
# Dispatch compute
encoder = self._device.create_command_encoder()
compute_pass = encoder.begin_compute_pass()
compute_pass.set_pipeline(pipeline)
compute_pass.set_bind_group(0, bind_group)
# Workgroups: ceil(w/16) x ceil(h/16)
wg_x = (w + 15) // 16
wg_y = (h + 15) // 16
compute_pass.dispatch_workgroups(wg_x, wg_y, 1)
compute_pass.end()
self._device.queue.submit([encoder.finish()])
# Read back result
result_data = self._device.queue.read_buffer(buffers['output'])
result_packed = np.frombuffer(result_data, dtype=np.uint32).reshape(h, w)
# Unpack u32 -> RGB
result = np.zeros((h, w, 3), dtype=np.uint8)
result[:, :, 0] = ((result_packed >> 16) & 0xFF).astype(np.uint8)
result[:, :, 1] = ((result_packed >> 8) & 0xFF).astype(np.uint8)
result[:, :, 2] = (result_packed & 0xFF).astype(np.uint8)
return result
def _apply_effect(
self,
frame: np.ndarray,
effect_name: str,
params: Dict,
t: float,
analysis_data: Dict,
) -> np.ndarray:
"""Apply a single effect to a frame."""
# Resolve bindings in params
resolved_params = {"_time": t}
for key, value in params.items():
if key in ("effect", "effect_path", "cid", "analysis_refs"):
continue
resolved_params[key] = self._resolve_binding(value, t, analysis_data)
# Try GPU first
self._ensure_device()
if self._device is not None:
result = self._apply_effect_gpu(frame, effect_name, resolved_params, t)
if result is not None:
return result
# Fall back to numpy
return self._numpy_fallback._apply_effect(
frame, effect_name, params, t, analysis_data
)
def process_frame(
self,
@@ -286,7 +523,34 @@ class GLSLBackend(Backend):
t: float,
analysis_data: Dict,
) -> np.ndarray:
pass
"""Process frames through effects and composite."""
if not frames:
return np.zeros((720, 1280, 3), dtype=np.uint8)
processed = []
# Apply effects to each input frame
for i, (frame, effects) in enumerate(zip(frames, effects_per_frame)):
result = frame.copy()
for effect_config in effects:
effect_name = effect_config.get("effect", "")
if effect_name:
result = self._apply_effect(
result, effect_name, effect_config, t, analysis_data
)
processed.append(result)
# Composite layers (use numpy backend for now)
if len(processed) == 1:
return processed[0]
return self._numpy_fallback._composite(
processed, compositor_config, t, analysis_data
)
# Keep GLSLBackend as alias for backwards compatibility
GLSLBackend = WGPUBackend
def get_backend(name: str = "numpy", **kwargs) -> Backend:
@@ -294,7 +558,7 @@ def get_backend(name: str = "numpy", **kwargs) -> Backend:
Get a backend by name.
Args:
name: "numpy" or "glsl"
name: "numpy", "wgpu", or "glsl" (alias for wgpu)
**kwargs: Backend-specific options
Returns:
@@ -302,7 +566,7 @@ def get_backend(name: str = "numpy", **kwargs) -> Backend:
"""
if name == "numpy":
return NumpyBackend(**kwargs)
elif name == "glsl":
return GLSLBackend(**kwargs)
elif name in ("wgpu", "glsl", "gpu"):
return WGPUBackend(**kwargs)
else:
raise ValueError(f"Unknown backend: {name}")