Add IPFS HLS streaming and GPU optimizations

- Add IPFSHLSOutput class that uploads segments to IPFS as they're created
- Update streaming task to use IPFS HLS output for distributed streaming
- Add /ipfs-stream endpoint to get IPFS playlist URL
- Update /stream endpoint to redirect to IPFS when available
- Add GPU persistence mode (STREAMING_GPU_PERSIST=1) to keep frames on GPU
- Add hardware video decoding (NVDEC) support for faster video processing
- Add GPU-accelerated primitive libraries: blending_gpu, color_ops_gpu, geometry_gpu
- Add streaming_gpu module with GPUFrame class for tracking CPU/GPU data location
- Add Dockerfile.gpu for building GPU-enabled worker image

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
giles
2026-02-03 20:23:16 +00:00
parent 5bc655f8c8
commit 86830019ad
24 changed files with 4025 additions and 96 deletions

View File

@@ -5,14 +5,99 @@ Supports:
- Display window (preview)
- File output (recording)
- Stream output (RTMP, etc.) - future
- NVENC hardware encoding (auto-detected)
- CuPy GPU arrays (auto-converted to numpy for output)
"""
import numpy as np
import subprocess
from abc import ABC, abstractmethod
from typing import Tuple, Optional
from typing import Tuple, Optional, List, Union
from pathlib import Path
# Try to import CuPy for GPU array support
try:
import cupy as cp
CUPY_AVAILABLE = True
except ImportError:
cp = None
CUPY_AVAILABLE = False
def ensure_numpy(frame: Union[np.ndarray, 'cp.ndarray']) -> np.ndarray:
"""Convert frame to numpy array if it's a CuPy array."""
if CUPY_AVAILABLE and isinstance(frame, cp.ndarray):
return cp.asnumpy(frame)
return frame
# Cache NVENC availability check
_nvenc_available: Optional[bool] = None
def check_nvenc_available() -> bool:
"""Check if NVENC hardware encoding is available."""
global _nvenc_available
if _nvenc_available is not None:
return _nvenc_available
try:
result = subprocess.run(
["ffmpeg", "-encoders"],
capture_output=True,
text=True,
timeout=5
)
_nvenc_available = "h264_nvenc" in result.stdout
except Exception:
_nvenc_available = False
return _nvenc_available
def get_encoder_params(codec: str, preset: str, crf: int) -> List[str]:
"""
Get encoder-specific FFmpeg parameters.
For NVENC (h264_nvenc, hevc_nvenc):
- Uses -cq for constant quality (similar to CRF)
- Presets: p1 (fastest) to p7 (slowest/best quality)
- Mapping: fast->p4, medium->p5, slow->p6
For libx264:
- Uses -crf for constant rate factor
- Presets: ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow
"""
if codec in ("h264_nvenc", "hevc_nvenc"):
# Map libx264 presets to NVENC presets
nvenc_preset_map = {
"ultrafast": "p1",
"superfast": "p2",
"veryfast": "p3",
"faster": "p3",
"fast": "p4",
"medium": "p5",
"slow": "p6",
"slower": "p6",
"veryslow": "p7",
}
nvenc_preset = nvenc_preset_map.get(preset, "p4")
# NVENC quality: 0 (best) to 51 (worst), similar to CRF
# CRF 18 = high quality, CRF 23 = good quality
return [
"-c:v", codec,
"-preset", nvenc_preset,
"-cq", str(crf), # Constant quality mode
"-rc", "vbr", # Variable bitrate with quality target
]
else:
# Standard libx264 params
return [
"-c:v", codec,
"-preset", preset,
"-crf", str(crf),
]
class Output(ABC):
"""Abstract base class for output targets."""
@@ -91,6 +176,9 @@ class DisplayOutput(Output):
if not self._is_open:
return
# Convert GPU array to numpy if needed
frame = ensure_numpy(frame)
# Ensure frame is correct format
if frame.dtype != np.uint8:
frame = np.clip(frame, 0, 255).astype(np.uint8)
@@ -136,6 +224,9 @@ class DisplayOutput(Output):
class FileOutput(Output):
"""
Write frames to a video file using ffmpeg.
Automatically uses NVENC hardware encoding when available,
falling back to libx264 CPU encoding otherwise.
"""
def __init__(
@@ -143,7 +234,7 @@ class FileOutput(Output):
path: str,
size: Tuple[int, int],
fps: float = 30,
codec: str = "libx264",
codec: str = "auto", # "auto", "h264_nvenc", "libx264"
crf: int = 18,
preset: str = "fast",
audio_source: str = None,
@@ -153,6 +244,11 @@ class FileOutput(Output):
self.fps = fps
self._is_open = True
# Auto-detect NVENC
if codec == "auto":
codec = "h264_nvenc" if check_nvenc_available() else "libx264"
self.codec = codec
# Build ffmpeg command
cmd = [
"ffmpeg", "-y",
@@ -170,12 +266,9 @@ class FileOutput(Output):
# Explicitly map: video from input 0 (rawvideo), audio from input 1
cmd.extend(["-map", "0:v", "-map", "1:a"])
cmd.extend([
"-c:v", codec,
"-preset", preset,
"-crf", str(crf),
"-pix_fmt", "yuv420p",
])
# Get encoder-specific params
cmd.extend(get_encoder_params(codec, preset, crf))
cmd.extend(["-pix_fmt", "yuv420p"])
# Add audio codec if we have audio
if audio_source:
@@ -201,11 +294,20 @@ class FileOutput(Output):
self._is_open = False
return
# Convert GPU array to numpy if needed
frame = ensure_numpy(frame)
# Resize if needed
if frame.shape[1] != self.size[0] or frame.shape[0] != self.size[1]:
import cv2
frame = cv2.resize(frame, self.size)
# Ensure correct format
if frame.dtype != np.uint8:
frame = np.clip(frame, 0, 255).astype(np.uint8)
if not frame.flags['C_CONTIGUOUS']:
frame = np.ascontiguousarray(frame)
try:
self._process.stdin.write(frame.tobytes())
except BrokenPipeError:
@@ -335,6 +437,9 @@ class PipeOutput(Output):
self._is_open = False
return
# Convert GPU array to numpy if needed
frame = ensure_numpy(frame)
# Resize if needed
if frame.shape[1] != self.size[0] or frame.shape[0] != self.size[1]:
import cv2
@@ -371,3 +476,424 @@ class PipeOutput(Output):
if self._process and self._process.poll() is not None:
self._is_open = False
return self._is_open
class HLSOutput(Output):
"""
Write frames as HLS stream (m3u8 playlist + .ts segments).
This enables true live streaming where the browser can poll
for new segments as they become available.
Automatically uses NVENC hardware encoding when available.
"""
def __init__(
self,
output_dir: str,
size: Tuple[int, int],
fps: float = 30,
segment_duration: float = 4.0, # 4s segments for stability
codec: str = "auto", # "auto", "h264_nvenc", "libx264"
crf: int = 23,
preset: str = "fast", # Better quality than ultrafast
audio_source: str = None,
):
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
self.size = size
self.fps = fps
self.segment_duration = segment_duration
self._is_open = True
# Auto-detect NVENC
if codec == "auto":
codec = "h264_nvenc" if check_nvenc_available() else "libx264"
self.codec = codec
# HLS playlist path
self.playlist_path = self.output_dir / "stream.m3u8"
# Build ffmpeg command for HLS output
cmd = [
"ffmpeg", "-y",
"-f", "rawvideo",
"-vcodec", "rawvideo",
"-pix_fmt", "rgb24",
"-s", f"{size[0]}x{size[1]}",
"-r", str(fps),
"-i", "-",
]
# Add audio input if provided
if audio_source:
cmd.extend(["-i", str(audio_source)])
cmd.extend(["-map", "0:v", "-map", "1:a"])
# Keyframe interval - must be exactly segment_duration for clean cuts
gop_size = int(fps * segment_duration)
# Get encoder-specific params
cmd.extend(get_encoder_params(codec, preset, crf))
cmd.extend([
"-pix_fmt", "yuv420p",
# Force keyframes at exact intervals for clean segment boundaries
"-g", str(gop_size),
"-keyint_min", str(gop_size),
"-sc_threshold", "0", # Disable scene change detection
"-force_key_frames", f"expr:gte(t,n_forced*{segment_duration})",
# Reduce buffering for faster segment availability
"-flush_packets", "1",
])
# Add audio codec if we have audio
if audio_source:
cmd.extend(["-c:a", "aac", "-b:a", "128k"])
# HLS specific options for smooth live streaming
cmd.extend([
"-f", "hls",
"-hls_time", str(segment_duration),
"-hls_list_size", "0", # Keep all segments in playlist
"-hls_flags", "independent_segments+append_list+split_by_time",
"-hls_segment_type", "mpegts",
"-hls_segment_filename", str(self.output_dir / "segment_%05d.ts"),
str(self.playlist_path),
])
import sys
print(f"HLSOutput cmd: {' '.join(cmd)}", file=sys.stderr)
self._process = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stderr=None, # Show errors for debugging
)
# Track segments for status reporting
self.segments_written = 0
self._last_segment_check = 0
def write(self, frame: np.ndarray, t: float):
"""Write frame to HLS stream."""
if not self._is_open or self._process.poll() is not None:
self._is_open = False
return
# Convert GPU array to numpy if needed
frame = ensure_numpy(frame)
# Resize if needed
if frame.shape[1] != self.size[0] or frame.shape[0] != self.size[1]:
import cv2
frame = cv2.resize(frame, self.size)
# Ensure correct format
if frame.dtype != np.uint8:
frame = np.clip(frame, 0, 255).astype(np.uint8)
if not frame.flags['C_CONTIGUOUS']:
frame = np.ascontiguousarray(frame)
try:
self._process.stdin.write(frame.tobytes())
except BrokenPipeError:
self._is_open = False
# Periodically count segments
if t - self._last_segment_check > 1.0:
self._last_segment_check = t
self.segments_written = len(list(self.output_dir.glob("segment_*.ts")))
def close(self):
"""Close the HLS stream."""
if self._process:
self._process.stdin.close()
self._process.wait()
self._is_open = False
# Final segment count
self.segments_written = len(list(self.output_dir.glob("segment_*.ts")))
# Mark playlist as ended (VOD mode)
if self.playlist_path.exists():
with open(self.playlist_path, "a") as f:
f.write("#EXT-X-ENDLIST\n")
@property
def is_open(self) -> bool:
return self._is_open and self._process.poll() is None
class IPFSHLSOutput(Output):
"""
Write frames as HLS stream with segments uploaded to IPFS.
Each segment is uploaded to IPFS as it's created, enabling distributed
streaming where clients can fetch segments from any IPFS gateway.
The m3u8 playlist is continuously updated with IPFS URLs and can be
fetched via get_playlist() or the playlist_cid property.
"""
def __init__(
self,
output_dir: str,
size: Tuple[int, int],
fps: float = 30,
segment_duration: float = 4.0,
codec: str = "auto",
crf: int = 23,
preset: str = "fast",
audio_source: str = None,
ipfs_gateway: str = "https://ipfs.io/ipfs",
):
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
self.size = size
self.fps = fps
self.segment_duration = segment_duration
self.ipfs_gateway = ipfs_gateway.rstrip("/")
self._is_open = True
# Auto-detect NVENC
if codec == "auto":
codec = "h264_nvenc" if check_nvenc_available() else "libx264"
self.codec = codec
# Track segment CIDs
self.segment_cids: dict = {} # segment_number -> cid
self._last_segment_checked = -1
self._playlist_cid: Optional[str] = None
# Import IPFS client
from ipfs_client import add_file, add_bytes
self._ipfs_add_file = add_file
self._ipfs_add_bytes = add_bytes
# Local HLS paths
self.local_playlist_path = self.output_dir / "stream.m3u8"
# Build ffmpeg command for HLS output
cmd = [
"ffmpeg", "-y",
"-f", "rawvideo",
"-vcodec", "rawvideo",
"-pix_fmt", "rgb24",
"-s", f"{size[0]}x{size[1]}",
"-r", str(fps),
"-i", "-",
]
# Add audio input if provided
if audio_source:
cmd.extend(["-i", str(audio_source)])
cmd.extend(["-map", "0:v", "-map", "1:a"])
# Keyframe interval
gop_size = int(fps * segment_duration)
# Get encoder-specific params
cmd.extend(get_encoder_params(codec, preset, crf))
cmd.extend([
"-pix_fmt", "yuv420p",
"-g", str(gop_size),
"-keyint_min", str(gop_size),
"-sc_threshold", "0",
"-force_key_frames", f"expr:gte(t,n_forced*{segment_duration})",
"-flush_packets", "1",
])
# Add audio codec if we have audio
if audio_source:
cmd.extend(["-c:a", "aac", "-b:a", "128k"])
# HLS options
cmd.extend([
"-f", "hls",
"-hls_time", str(segment_duration),
"-hls_list_size", "0",
"-hls_flags", "independent_segments+append_list+split_by_time",
"-hls_segment_type", "mpegts",
"-hls_segment_filename", str(self.output_dir / "segment_%05d.ts"),
str(self.local_playlist_path),
])
import sys
print(f"IPFSHLSOutput: starting ffmpeg", file=sys.stderr)
self._process = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stderr=None,
)
def _upload_new_segments(self):
"""Check for new segments and upload them to IPFS."""
import sys
# Find all segments
segments = sorted(self.output_dir.glob("segment_*.ts"))
for seg_path in segments:
# Extract segment number from filename
seg_name = seg_path.stem # segment_00000
seg_num = int(seg_name.split("_")[1])
# Skip if already uploaded
if seg_num in self.segment_cids:
continue
# Skip if segment is still being written (check if file size is stable)
try:
size1 = seg_path.stat().st_size
if size1 == 0:
continue # Empty file, still being created
import time
time.sleep(0.1)
size2 = seg_path.stat().st_size
if size1 != size2:
continue # File still being written
except FileNotFoundError:
continue
# Upload to IPFS
cid = self._ipfs_add_file(seg_path, pin=True)
if cid:
self.segment_cids[seg_num] = cid
print(f"IPFS: segment_{seg_num:05d}.ts -> {cid}", file=sys.stderr)
# Update playlist after each segment upload
self._update_ipfs_playlist()
def _update_ipfs_playlist(self):
"""Generate and upload IPFS-aware m3u8 playlist."""
if not self.segment_cids:
return
import sys
# Build m3u8 content with IPFS URLs
lines = [
"#EXTM3U",
"#EXT-X-VERSION:3",
f"#EXT-X-TARGETDURATION:{int(self.segment_duration) + 1}",
"#EXT-X-MEDIA-SEQUENCE:0",
]
# Add segments in order
for seg_num in sorted(self.segment_cids.keys()):
cid = self.segment_cids[seg_num]
lines.append(f"#EXTINF:{self.segment_duration:.3f},")
lines.append(f"{self.ipfs_gateway}/{cid}")
playlist_content = "\n".join(lines) + "\n"
# Upload playlist to IPFS
cid = self._ipfs_add_bytes(playlist_content.encode("utf-8"), pin=True)
if cid:
self._playlist_cid = cid
print(f"IPFS: playlist updated -> {cid} ({len(self.segment_cids)} segments)", file=sys.stderr)
def write(self, frame: np.ndarray, t: float):
"""Write frame to HLS stream and upload segments to IPFS."""
if not self._is_open or self._process.poll() is not None:
self._is_open = False
return
# Convert GPU array to numpy if needed
frame = ensure_numpy(frame)
# Resize if needed
if frame.shape[1] != self.size[0] or frame.shape[0] != self.size[1]:
import cv2
frame = cv2.resize(frame, self.size)
# Ensure correct format
if frame.dtype != np.uint8:
frame = np.clip(frame, 0, 255).astype(np.uint8)
if not frame.flags['C_CONTIGUOUS']:
frame = np.ascontiguousarray(frame)
try:
self._process.stdin.write(frame.tobytes())
except BrokenPipeError:
self._is_open = False
return
# Check for new segments periodically (every second)
current_segment = int(t / self.segment_duration)
if current_segment > self._last_segment_checked:
self._last_segment_checked = current_segment
self._upload_new_segments()
def close(self):
"""Close the HLS stream and finalize IPFS uploads."""
import sys
if self._process:
self._process.stdin.close()
self._process.wait()
self._is_open = False
# Upload any remaining segments
self._upload_new_segments()
# Generate final playlist with #EXT-X-ENDLIST
if self.segment_cids:
lines = [
"#EXTM3U",
"#EXT-X-VERSION:3",
f"#EXT-X-TARGETDURATION:{int(self.segment_duration) + 1}",
"#EXT-X-MEDIA-SEQUENCE:0",
"#EXT-X-PLAYLIST-TYPE:VOD",
]
for seg_num in sorted(self.segment_cids.keys()):
cid = self.segment_cids[seg_num]
lines.append(f"#EXTINF:{self.segment_duration:.3f},")
lines.append(f"{self.ipfs_gateway}/{cid}")
lines.append("#EXT-X-ENDLIST")
playlist_content = "\n".join(lines) + "\n"
cid = self._ipfs_add_bytes(playlist_content.encode("utf-8"), pin=True)
if cid:
self._playlist_cid = cid
print(f"IPFS: final playlist -> {cid} ({len(self.segment_cids)} segments)", file=sys.stderr)
@property
def playlist_cid(self) -> Optional[str]:
"""Get the current playlist CID."""
return self._playlist_cid
@property
def playlist_url(self) -> Optional[str]:
"""Get the full IPFS URL for the playlist."""
if self._playlist_cid:
return f"{self.ipfs_gateway}/{self._playlist_cid}"
return None
def get_playlist(self) -> str:
"""Get the current m3u8 playlist content with IPFS URLs."""
if not self.segment_cids:
return "#EXTM3U\n"
lines = [
"#EXTM3U",
"#EXT-X-VERSION:3",
f"#EXT-X-TARGETDURATION:{int(self.segment_duration) + 1}",
"#EXT-X-MEDIA-SEQUENCE:0",
]
for seg_num in sorted(self.segment_cids.keys()):
cid = self.segment_cids[seg_num]
lines.append(f"#EXTINF:{self.segment_duration:.3f},")
lines.append(f"{self.ipfs_gateway}/{cid}")
if not self._is_open:
lines.append("#EXT-X-ENDLIST")
return "\n".join(lines) + "\n"
@property
def is_open(self) -> bool:
return self._is_open and self._process.poll() is None