Add IPFS HLS streaming and GPU optimizations

- Add IPFSHLSOutput class that uploads segments to IPFS as they're created
- Update streaming task to use IPFS HLS output for distributed streaming
- Add /ipfs-stream endpoint to get IPFS playlist URL
- Update /stream endpoint to redirect to IPFS when available
- Add GPU persistence mode (STREAMING_GPU_PERSIST=1) to keep frames on GPU
- Add hardware video decoding (NVDEC) support for faster video processing
- Add GPU-accelerated primitive libraries: blending_gpu, color_ops_gpu, geometry_gpu
- Add streaming_gpu module with GPUFrame class for tracking CPU/GPU data location
- Add Dockerfile.gpu for building GPU-enabled worker image

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
giles
2026-02-03 20:23:16 +00:00
parent 5bc655f8c8
commit 86830019ad
24 changed files with 4025 additions and 96 deletions

View File

@@ -3,13 +3,52 @@ Streaming primitives for video/audio processing.
These primitives handle video source reading and audio analysis,
keeping the interpreter completely generic.
GPU Acceleration:
- Set STREAMING_GPU_PERSIST=1 to output CuPy arrays (frames stay on GPU)
- Hardware video decoding (NVDEC) is used when available
- Dramatically improves performance on GPU nodes
"""
import os
import numpy as np
import subprocess
import json
from pathlib import Path
# Try to import CuPy for GPU acceleration
try:
import cupy as cp
CUPY_AVAILABLE = True
except ImportError:
cp = None
CUPY_AVAILABLE = False
# GPU persistence mode - output CuPy arrays instead of numpy
GPU_PERSIST = os.environ.get("STREAMING_GPU_PERSIST", "1") == "1" and CUPY_AVAILABLE
# Check for hardware decode support (cached)
_HWDEC_AVAILABLE = None
def _check_hwdec():
"""Check if NVIDIA hardware decode is available."""
global _HWDEC_AVAILABLE
if _HWDEC_AVAILABLE is not None:
return _HWDEC_AVAILABLE
try:
result = subprocess.run(["nvidia-smi"], capture_output=True, timeout=2)
if result.returncode != 0:
_HWDEC_AVAILABLE = False
return False
result = subprocess.run(["ffmpeg", "-hwaccels"], capture_output=True, text=True, timeout=5)
_HWDEC_AVAILABLE = "cuda" in result.stdout
except Exception:
_HWDEC_AVAILABLE = False
return _HWDEC_AVAILABLE
class VideoSource:
"""Video source with persistent streaming pipe for fast sequential reads."""
@@ -57,7 +96,10 @@ class VideoSource:
print(f"VideoSource: {self.path.name} duration={self._duration} size={self._frame_size}", file=sys.stderr)
def _start_stream(self, seek_time: float = 0):
"""Start or restart the ffmpeg streaming process."""
"""Start or restart the ffmpeg streaming process.
Uses NVIDIA hardware decoding (NVDEC) when available for better performance.
"""
if self._proc:
self._proc.kill()
self._proc = None
@@ -67,15 +109,23 @@ class VideoSource:
raise FileNotFoundError(f"Video file not found: {self.path}")
w, h = self._frame_size
cmd = [
"ffmpeg", "-v", "error", # Show errors instead of quiet
# Build ffmpeg command with optional hardware decode
cmd = ["ffmpeg", "-v", "error"]
# Use hardware decode if available (significantly faster)
if _check_hwdec():
cmd.extend(["-hwaccel", "cuda"])
cmd.extend([
"-ss", f"{seek_time:.3f}",
"-i", str(self.path),
"-f", "rawvideo", "-pix_fmt", "rgb24",
"-s", f"{w}x{h}",
"-r", str(self.fps), # Output at specified fps
"-"
]
])
self._proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
self._stream_time = seek_time
@@ -88,8 +138,11 @@ class VideoSource:
if err:
print(f"ffmpeg error for {self.path.name}: {err}", file=sys.stderr)
def _read_frame_from_stream(self) -> np.ndarray:
"""Read one frame from the stream."""
def _read_frame_from_stream(self):
"""Read one frame from the stream.
Returns CuPy array if GPU_PERSIST is enabled, numpy array otherwise.
"""
w, h = self._frame_size
frame_size = w * h * 3
@@ -100,7 +153,12 @@ class VideoSource:
if len(data) < frame_size:
return None
return np.frombuffer(data, dtype=np.uint8).reshape((h, w, 3)).copy()
frame = np.frombuffer(data, dtype=np.uint8).reshape((h, w, 3)).copy()
# Transfer to GPU if persistence mode enabled
if GPU_PERSIST:
return cp.asarray(frame)
return frame
def read(self) -> np.ndarray:
"""Read frame (uses last cached or t=0)."""
@@ -120,6 +178,9 @@ class VideoSource:
seek_time = t
if self._duration and self._duration > 0:
seek_time = t % self._duration
# If we're within 0.1s of the end, wrap to beginning to avoid EOF issues
if seek_time > self._duration - 0.1:
seek_time = 0.0
# Decide whether to seek or continue streaming
# Seek if: no stream, going backwards (more than 1 frame), or jumping more than 2 seconds ahead
@@ -138,24 +199,59 @@ class VideoSource:
self._start_stream(seek_time)
# Skip frames to reach target time
skip_retries = 0
while self._stream_time + self._frame_time <= seek_time:
frame = self._read_frame_from_stream()
if frame is None:
# Stream ended, restart from seek point
# Stream ended or failed - restart from seek point
import time
skip_retries += 1
if skip_retries > 3:
# Give up skipping, just start fresh at seek_time
self._start_stream(seek_time)
time.sleep(0.1)
break
self._start_stream(seek_time)
break
time.sleep(0.05)
continue
self._stream_time += self._frame_time
skip_retries = 0 # Reset on successful read
# Read the target frame
frame = self._read_frame_from_stream()
if frame is None:
# Read the target frame with retry logic
frame = None
max_retries = 3
for attempt in range(max_retries):
frame = self._read_frame_from_stream()
if frame is not None:
break
# Stream failed - try restarting
import sys
import time
print(f"RETRY {self.path.name}: attempt {attempt+1}/{max_retries} at t={t:.2f}", file=sys.stderr)
# Check for ffmpeg errors
if self._proc and self._proc.stderr:
err = self._proc.stderr.read(4096).decode('utf-8', errors='ignore')
if err:
raise RuntimeError(f"Failed to read video frame from {self.path.name}: {err}")
raise RuntimeError(f"Failed to read video frame from {self.path.name} at t={t:.2f} - file may be corrupted or inaccessible")
try:
import select
readable, _, _ = select.select([self._proc.stderr], [], [], 0.1)
if readable:
err = self._proc.stderr.read(4096).decode('utf-8', errors='ignore')
if err:
print(f"ffmpeg error: {err}", file=sys.stderr)
except:
pass
# Wait a bit and restart
time.sleep(0.1)
self._start_stream(seek_time)
# Give ffmpeg time to start
time.sleep(0.1)
if frame is None:
import sys
raise RuntimeError(f"Failed to read video frame from {self.path.name} at t={t:.2f} after {max_retries} retries")
else:
self._stream_time += self._frame_time