Add IPFS HLS streaming and GPU optimizations
- Add IPFSHLSOutput class that uploads segments to IPFS as they're created - Update streaming task to use IPFS HLS output for distributed streaming - Add /ipfs-stream endpoint to get IPFS playlist URL - Update /stream endpoint to redirect to IPFS when available - Add GPU persistence mode (STREAMING_GPU_PERSIST=1) to keep frames on GPU - Add hardware video decoding (NVDEC) support for faster video processing - Add GPU-accelerated primitive libraries: blending_gpu, color_ops_gpu, geometry_gpu - Add streaming_gpu module with GPUFrame class for tracking CPU/GPU data location - Add Dockerfile.gpu for building GPU-enabled worker image Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
44
Dockerfile.gpu
Normal file
44
Dockerfile.gpu
Normal file
@@ -0,0 +1,44 @@
|
||||
# GPU-enabled worker image
|
||||
# Based on NVIDIA CUDA with Python for CuPy support
|
||||
|
||||
FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install Python 3.11 and system dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3.11 \
|
||||
python3.11-venv \
|
||||
python3-pip \
|
||||
git \
|
||||
ffmpeg \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& ln -sf /usr/bin/python3.11 /usr/bin/python3 \
|
||||
&& ln -sf /usr/bin/python3 /usr/bin/python
|
||||
|
||||
# Upgrade pip
|
||||
RUN python3 -m pip install --upgrade pip
|
||||
|
||||
# Install CPU dependencies first
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Install GPU-specific dependencies (CuPy for CUDA 12.x)
|
||||
RUN pip install --no-cache-dir cupy-cuda12x
|
||||
|
||||
# Copy application
|
||||
COPY . .
|
||||
|
||||
# Clone effects repo
|
||||
RUN git clone https://git.rose-ash.com/art-dag/effects.git /app/artdag-effects
|
||||
|
||||
# Create cache directory
|
||||
RUN mkdir -p /data/cache
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
ENV EFFECTS_PATH=/app/artdag-effects
|
||||
ENV PYTHONPATH=/app
|
||||
|
||||
# Default command runs celery worker
|
||||
CMD ["celery", "-A", "celery_app", "worker", "--loglevel=info", "-E", "-Q", "gpu,celery"]
|
||||
@@ -227,8 +227,9 @@ async def create_stream_run(
|
||||
logger.warning(f"Failed to store recipe in cache: {e}")
|
||||
# Continue anyway - run will still work, just won't appear in /recipes
|
||||
|
||||
# Submit Celery task
|
||||
task = run_stream.delay(
|
||||
# Submit Celery task to GPU queue for hardware-accelerated rendering
|
||||
task = run_stream.apply_async(
|
||||
kwargs=dict(
|
||||
run_id=run_id,
|
||||
recipe_sexp=request.recipe_sexp,
|
||||
output_name=request.output_name,
|
||||
@@ -237,6 +238,8 @@ async def create_stream_run(
|
||||
actor_id=actor_id,
|
||||
sources_sexp=request.sources_sexp,
|
||||
audio_sexp=request.audio_sexp,
|
||||
),
|
||||
queue='gpu',
|
||||
)
|
||||
|
||||
# Store in database for durability
|
||||
@@ -396,7 +399,7 @@ async def get_run(
|
||||
artifacts = []
|
||||
output_media_type = None
|
||||
if run.get("output_cid"):
|
||||
# Detect media type using magic bytes
|
||||
# Detect media type using magic bytes, fall back to database item_type
|
||||
output_cid = run["output_cid"]
|
||||
media_type = None
|
||||
try:
|
||||
@@ -408,6 +411,16 @@ async def get_run(
|
||||
output_media_type = media_type
|
||||
except Exception:
|
||||
pass
|
||||
# Fall back to database item_type if local detection failed
|
||||
if not media_type:
|
||||
try:
|
||||
import database
|
||||
item_types = await database.get_item_types(output_cid, run.get("actor_id"))
|
||||
if item_types:
|
||||
media_type = type_to_mime(item_types[0].get("type"))
|
||||
output_media_type = media_type
|
||||
except Exception:
|
||||
pass
|
||||
artifacts.append({
|
||||
"cid": output_cid,
|
||||
"step_name": "Output",
|
||||
@@ -964,17 +977,43 @@ async def stream_run_output(
|
||||
):
|
||||
"""Stream the video output of a running render.
|
||||
|
||||
Returns the partial video file as it's being written,
|
||||
allowing live preview of the render progress.
|
||||
For IPFS HLS streams, redirects to the IPFS gateway playlist.
|
||||
For local HLS streams, redirects to the m3u8 playlist.
|
||||
For legacy MP4 streams, returns the file directly.
|
||||
"""
|
||||
from fastapi.responses import StreamingResponse, FileResponse
|
||||
from fastapi.responses import StreamingResponse, FileResponse, RedirectResponse
|
||||
from pathlib import Path
|
||||
import os
|
||||
import database
|
||||
from celery_app import app as celery_app
|
||||
|
||||
await database.init_db()
|
||||
|
||||
# Check for IPFS HLS streaming first (distributed P2P streaming)
|
||||
pending = await database.get_pending_run(run_id)
|
||||
if pending and pending.get("celery_task_id"):
|
||||
task_id = pending["celery_task_id"]
|
||||
result = celery_app.AsyncResult(task_id)
|
||||
if result.ready() and isinstance(result.result, dict):
|
||||
ipfs_playlist_url = result.result.get("ipfs_playlist_url")
|
||||
if ipfs_playlist_url:
|
||||
logger.info(f"Redirecting to IPFS stream: {ipfs_playlist_url}")
|
||||
return RedirectResponse(url=ipfs_playlist_url, status_code=302)
|
||||
|
||||
# Check for the streaming output file in the shared cache
|
||||
cache_dir = os.environ.get("CACHE_DIR", "/data/cache")
|
||||
stream_path = Path(cache_dir) / "streaming" / run_id / "output.mp4"
|
||||
stream_dir = Path(cache_dir) / "streaming" / run_id
|
||||
|
||||
# Check for local HLS output
|
||||
hls_playlist = stream_dir / "stream.m3u8"
|
||||
if hls_playlist.exists():
|
||||
# Redirect to the HLS playlist endpoint
|
||||
return RedirectResponse(
|
||||
url=f"/runs/{run_id}/hls/stream.m3u8",
|
||||
status_code=302
|
||||
)
|
||||
|
||||
# Fall back to legacy MP4 streaming
|
||||
stream_path = stream_dir / "output.mp4"
|
||||
if not stream_path.exists():
|
||||
raise HTTPException(404, "Stream not available yet")
|
||||
|
||||
@@ -982,7 +1021,6 @@ async def stream_run_output(
|
||||
if file_size == 0:
|
||||
raise HTTPException(404, "Stream not ready")
|
||||
|
||||
# Return the file with headers that allow streaming of growing file
|
||||
return FileResponse(
|
||||
path=str(stream_path),
|
||||
media_type="video/mp4",
|
||||
@@ -992,3 +1030,139 @@ async def stream_run_output(
|
||||
"X-Content-Size": str(file_size),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{run_id}/hls/{filename:path}")
|
||||
async def serve_hls_content(
|
||||
run_id: str,
|
||||
filename: str,
|
||||
request: Request,
|
||||
):
|
||||
"""Serve HLS playlist and segments for live streaming.
|
||||
|
||||
Serves stream.m3u8 (playlist) and segment_*.ts files.
|
||||
The playlist updates as new segments are rendered.
|
||||
|
||||
If files aren't found locally, proxies to the GPU worker (if configured).
|
||||
"""
|
||||
from fastapi.responses import FileResponse, StreamingResponse
|
||||
from pathlib import Path
|
||||
import os
|
||||
import httpx
|
||||
|
||||
cache_dir = os.environ.get("CACHE_DIR", "/data/cache")
|
||||
stream_dir = Path(cache_dir) / "streaming" / run_id
|
||||
file_path = stream_dir / filename
|
||||
|
||||
# Security: ensure we're only serving files within stream_dir
|
||||
try:
|
||||
file_path_resolved = file_path.resolve()
|
||||
stream_dir_resolved = stream_dir.resolve()
|
||||
if stream_dir.exists() and not str(file_path_resolved).startswith(str(stream_dir_resolved)):
|
||||
raise HTTPException(403, "Invalid path")
|
||||
except Exception:
|
||||
pass # Allow proxy fallback
|
||||
|
||||
# Determine content type
|
||||
if filename.endswith(".m3u8"):
|
||||
media_type = "application/vnd.apple.mpegurl"
|
||||
headers = {
|
||||
"Cache-Control": "no-cache, no-store, must-revalidate",
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
}
|
||||
elif filename.endswith(".ts"):
|
||||
media_type = "video/mp2t"
|
||||
headers = {
|
||||
"Cache-Control": "public, max-age=3600",
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
}
|
||||
else:
|
||||
raise HTTPException(400, "Invalid file type")
|
||||
|
||||
# Try local file first
|
||||
if file_path.exists():
|
||||
return FileResponse(
|
||||
path=str(file_path),
|
||||
media_type=media_type,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
# Fallback: proxy to GPU worker if configured
|
||||
gpu_worker_url = os.environ.get("GPU_WORKER_STREAM_URL")
|
||||
if gpu_worker_url:
|
||||
# Proxy request to GPU worker
|
||||
proxy_url = f"{gpu_worker_url}/{run_id}/{filename}"
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
resp = await client.get(proxy_url)
|
||||
if resp.status_code == 200:
|
||||
return StreamingResponse(
|
||||
content=iter([resp.content]),
|
||||
media_type=media_type,
|
||||
headers=headers,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"GPU worker proxy failed: {e}")
|
||||
|
||||
raise HTTPException(404, f"File not found: {filename}")
|
||||
|
||||
|
||||
@router.get("/{run_id}/ipfs-stream")
|
||||
async def get_ipfs_stream_info(run_id: str, request: Request):
|
||||
"""Get IPFS streaming info for a run.
|
||||
|
||||
Returns the IPFS playlist URL and segment info if available.
|
||||
This allows clients to stream directly from IPFS gateways.
|
||||
"""
|
||||
from celery_app import app as celery_app
|
||||
import database
|
||||
import os
|
||||
|
||||
await database.init_db()
|
||||
|
||||
# Try to get pending run to find the Celery task ID
|
||||
pending = await database.get_pending_run(run_id)
|
||||
if not pending:
|
||||
# Try completed runs
|
||||
run = await database.get_run_cache(run_id)
|
||||
if not run:
|
||||
raise HTTPException(404, "Run not found")
|
||||
# For completed runs, check if we have IPFS info stored
|
||||
ipfs_cid = run.get("ipfs_cid")
|
||||
if ipfs_cid:
|
||||
gateway = os.environ.get("IPFS_GATEWAY_URL", "https://ipfs.io/ipfs")
|
||||
return {
|
||||
"run_id": run_id,
|
||||
"status": "completed",
|
||||
"ipfs_video_url": f"{gateway}/{ipfs_cid}",
|
||||
}
|
||||
raise HTTPException(404, "No IPFS stream info available")
|
||||
|
||||
task_id = pending.get("celery_task_id")
|
||||
if not task_id:
|
||||
raise HTTPException(404, "No task ID for this run")
|
||||
|
||||
# Get the Celery task result
|
||||
result = celery_app.AsyncResult(task_id)
|
||||
|
||||
if result.ready():
|
||||
# Task is complete - check the result for IPFS playlist info
|
||||
task_result = result.result
|
||||
if isinstance(task_result, dict):
|
||||
ipfs_playlist_cid = task_result.get("ipfs_playlist_cid")
|
||||
ipfs_playlist_url = task_result.get("ipfs_playlist_url")
|
||||
if ipfs_playlist_url:
|
||||
return {
|
||||
"run_id": run_id,
|
||||
"status": "completed",
|
||||
"ipfs_playlist_cid": ipfs_playlist_cid,
|
||||
"ipfs_playlist_url": ipfs_playlist_url,
|
||||
"segment_count": task_result.get("ipfs_segment_count", 0),
|
||||
}
|
||||
|
||||
# Task is still running or no IPFS info available
|
||||
return {
|
||||
"run_id": run_id,
|
||||
"status": pending.get("status", "pending"),
|
||||
"message": "IPFS streaming info not yet available"
|
||||
}
|
||||
|
||||
@@ -100,30 +100,52 @@ class CacheService:
|
||||
|
||||
async def get_cache_item(self, cid: str, actor_id: str = None) -> Optional[Dict[str, Any]]:
|
||||
"""Get cached item with full metadata for display."""
|
||||
# Check if content exists
|
||||
if not self.cache.has_content(cid):
|
||||
return None
|
||||
|
||||
path = self.cache.get_by_cid(cid)
|
||||
if not path or not path.exists():
|
||||
return None
|
||||
|
||||
# Get metadata from database
|
||||
# Get metadata from database first
|
||||
meta = await self.db.load_item_metadata(cid, actor_id)
|
||||
cache_item = await self.db.get_cache_item(cid)
|
||||
|
||||
# Check if content exists locally
|
||||
path = self.cache.get_by_cid(cid) if self.cache.has_content(cid) else None
|
||||
|
||||
if path and path.exists():
|
||||
# Local file exists - detect type from file
|
||||
media_type = detect_media_type(path)
|
||||
mime_type = get_mime_type(path)
|
||||
size = path.stat().st_size
|
||||
else:
|
||||
# File not local - check database for type info
|
||||
# Try to get type from item_types table
|
||||
media_type = "unknown"
|
||||
mime_type = "application/octet-stream"
|
||||
size = 0
|
||||
|
||||
if actor_id:
|
||||
try:
|
||||
item_types = await self.db.get_item_types(cid, actor_id)
|
||||
if item_types:
|
||||
media_type = item_types[0].get("type", "unknown")
|
||||
if media_type == "video":
|
||||
mime_type = "video/mp4"
|
||||
elif media_type == "image":
|
||||
mime_type = "image/png"
|
||||
elif media_type == "audio":
|
||||
mime_type = "audio/mpeg"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# If no local path but we have IPFS CID, content is available remotely
|
||||
if not cache_item:
|
||||
return None
|
||||
|
||||
result = {
|
||||
"cid": cid,
|
||||
"path": str(path),
|
||||
"path": str(path) if path else None,
|
||||
"media_type": media_type,
|
||||
"mime_type": mime_type,
|
||||
"size": size,
|
||||
"ipfs_cid": cache_item.get("ipfs_cid") if cache_item else None,
|
||||
"meta": meta,
|
||||
"remote_only": path is None or not path.exists(),
|
||||
}
|
||||
|
||||
# Unpack meta fields to top level for template convenience
|
||||
|
||||
15
app/templates/cache/detail.html
vendored
15
app/templates/cache/detail.html
vendored
@@ -13,17 +13,32 @@
|
||||
<!-- Preview -->
|
||||
<div class="bg-gray-800 rounded-lg border border-gray-700 mb-6 overflow-hidden">
|
||||
{% if cache.mime_type and cache.mime_type.startswith('image/') %}
|
||||
{% if cache.remote_only and cache.ipfs_cid %}
|
||||
<img src="https://ipfs.io/ipfs/{{ cache.ipfs_cid }}" alt=""
|
||||
class="w-full max-h-96 object-contain bg-gray-900">
|
||||
{% else %}
|
||||
<img src="/cache/{{ cache.cid }}/raw" alt=""
|
||||
class="w-full max-h-96 object-contain bg-gray-900">
|
||||
{% endif %}
|
||||
|
||||
{% elif cache.mime_type and cache.mime_type.startswith('video/') %}
|
||||
{% if cache.remote_only and cache.ipfs_cid %}
|
||||
<video src="https://ipfs.io/ipfs/{{ cache.ipfs_cid }}" controls
|
||||
class="w-full max-h-96 bg-gray-900">
|
||||
</video>
|
||||
{% else %}
|
||||
<video src="/cache/{{ cache.cid }}/raw" controls
|
||||
class="w-full max-h-96 bg-gray-900">
|
||||
</video>
|
||||
{% endif %}
|
||||
|
||||
{% elif cache.mime_type and cache.mime_type.startswith('audio/') %}
|
||||
<div class="p-8 bg-gray-900">
|
||||
{% if cache.remote_only and cache.ipfs_cid %}
|
||||
<audio src="https://ipfs.io/ipfs/{{ cache.ipfs_cid }}" controls class="w-full"></audio>
|
||||
{% else %}
|
||||
<audio src="/cache/{{ cache.cid }}/raw" controls class="w-full"></audio>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
{% elif cache.mime_type == 'application/json' %}
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/cytoscape/3.23.0/cytoscape.min.js"></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/dagre/0.8.5/dagre.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/cytoscape-dagre@2.5.0/cytoscape-dagre.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/hls.js@1.4.12/dist/hls.min.js"></script>
|
||||
{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
@@ -73,6 +74,174 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Live Stream Player (shown during rendering) -->
|
||||
{% if run.status == 'rendering' %}
|
||||
<div id="live-stream-container" class="mb-6 bg-gray-800 rounded-lg p-4">
|
||||
<div class="flex items-center justify-between mb-4">
|
||||
<h3 class="text-lg font-semibold flex items-center">
|
||||
<span class="w-3 h-3 bg-red-500 rounded-full mr-2 animate-pulse"></span>
|
||||
Live Preview
|
||||
</h3>
|
||||
<div id="stream-status" class="text-sm text-gray-400">Connecting...</div>
|
||||
</div>
|
||||
<div class="relative bg-black rounded-lg overflow-hidden" style="aspect-ratio: 16/9;">
|
||||
<video id="live-video" class="w-full h-full" controls autoplay muted playsinline></video>
|
||||
<div id="stream-loading" class="absolute inset-0 flex items-center justify-center bg-gray-900/80">
|
||||
<div class="text-center">
|
||||
<div class="animate-spin w-8 h-8 border-2 border-blue-500 border-t-transparent rounded-full mx-auto mb-2"></div>
|
||||
<div class="text-gray-400">Waiting for stream...</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="mt-2 text-xs text-gray-500">
|
||||
Stream URL: <code class="bg-gray-900 px-1 rounded">/runs/{{ run.run_id }}/hls/stream.m3u8</code>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
(function() {
|
||||
const video = document.getElementById('live-video');
|
||||
const statusEl = document.getElementById('stream-status');
|
||||
const loadingEl = document.getElementById('stream-loading');
|
||||
const hlsUrl = '/runs/{{ run.run_id }}/hls/stream.m3u8';
|
||||
let hls = null;
|
||||
let retryCount = 0;
|
||||
const maxRetries = 120; // Try for up to 4 minutes
|
||||
let segmentsLoaded = 0;
|
||||
|
||||
function initHls() {
|
||||
if (Hls.isSupported()) {
|
||||
hls = new Hls({
|
||||
// Stability over low latency - buffer more for smoother playback
|
||||
liveSyncDurationCount: 4, // Stay 4 segments behind live edge
|
||||
liveMaxLatencyDurationCount: 8, // Max 8 segments behind
|
||||
liveDurationInfinity: true, // Treat as infinite live stream
|
||||
|
||||
// Large buffers to absorb rendering speed variations
|
||||
maxBufferLength: 60, // Buffer up to 60s ahead
|
||||
maxMaxBufferLength: 120, // Allow even more if needed
|
||||
maxBufferSize: 60 * 1024 * 1024, // 60MB buffer
|
||||
maxBufferHole: 0.5, // Tolerate small gaps
|
||||
|
||||
// Back buffer for smooth seeking
|
||||
backBufferLength: 30,
|
||||
|
||||
// Playlist reload settings
|
||||
manifestLoadingTimeOut: 10000,
|
||||
manifestLoadingMaxRetry: 4,
|
||||
levelLoadingTimeOut: 10000,
|
||||
levelLoadingMaxRetry: 4,
|
||||
fragLoadingTimeOut: 20000,
|
||||
fragLoadingMaxRetry: 6,
|
||||
|
||||
// Start at lowest quality for faster start
|
||||
startLevel: 0,
|
||||
|
||||
// Enable smooth level switching
|
||||
abrEwmaDefaultEstimate: 500000,
|
||||
});
|
||||
|
||||
hls.on(Hls.Events.MANIFEST_PARSED, function(event, data) {
|
||||
loadingEl.classList.add('hidden');
|
||||
statusEl.textContent = 'Buffering...';
|
||||
statusEl.classList.remove('text-gray-400');
|
||||
statusEl.classList.add('text-yellow-400');
|
||||
video.play().catch(() => {});
|
||||
});
|
||||
|
||||
hls.on(Hls.Events.FRAG_LOADED, function(event, data) {
|
||||
retryCount = 0;
|
||||
segmentsLoaded++;
|
||||
statusEl.textContent = `Playing (${segmentsLoaded} segments)`;
|
||||
statusEl.classList.remove('text-yellow-400', 'text-gray-400');
|
||||
statusEl.classList.add('text-green-400');
|
||||
});
|
||||
|
||||
hls.on(Hls.Events.BUFFER_APPENDED, function() {
|
||||
// Hide loading once we have buffered content
|
||||
loadingEl.classList.add('hidden');
|
||||
});
|
||||
|
||||
hls.on(Hls.Events.ERROR, function(event, data) {
|
||||
console.log('HLS error:', data.type, data.details, data.fatal);
|
||||
|
||||
if (data.fatal) {
|
||||
switch (data.type) {
|
||||
case Hls.ErrorTypes.NETWORK_ERROR:
|
||||
if (retryCount < maxRetries) {
|
||||
retryCount++;
|
||||
statusEl.textContent = `Waiting for stream... (${retryCount})`;
|
||||
statusEl.classList.remove('text-green-400');
|
||||
statusEl.classList.add('text-yellow-400');
|
||||
// Exponential backoff with jitter
|
||||
const delay = Math.min(1000 * Math.pow(1.5, Math.min(retryCount, 6)), 10000);
|
||||
setTimeout(() => {
|
||||
hls.loadSource(hlsUrl);
|
||||
}, delay + Math.random() * 1000);
|
||||
} else {
|
||||
statusEl.textContent = 'Stream unavailable';
|
||||
statusEl.classList.add('text-red-400');
|
||||
}
|
||||
break;
|
||||
case Hls.ErrorTypes.MEDIA_ERROR:
|
||||
console.log('Media error, attempting recovery');
|
||||
hls.recoverMediaError();
|
||||
break;
|
||||
default:
|
||||
statusEl.textContent = 'Stream error';
|
||||
statusEl.classList.add('text-red-400');
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Non-fatal error - just log it
|
||||
if (data.details === 'bufferStalledError') {
|
||||
statusEl.textContent = 'Buffering...';
|
||||
statusEl.classList.remove('text-green-400');
|
||||
statusEl.classList.add('text-yellow-400');
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Handle video stalls
|
||||
video.addEventListener('waiting', function() {
|
||||
statusEl.textContent = 'Buffering...';
|
||||
statusEl.classList.remove('text-green-400');
|
||||
statusEl.classList.add('text-yellow-400');
|
||||
});
|
||||
|
||||
video.addEventListener('playing', function() {
|
||||
statusEl.textContent = `Playing (${segmentsLoaded} segments)`;
|
||||
statusEl.classList.remove('text-yellow-400');
|
||||
statusEl.classList.add('text-green-400');
|
||||
});
|
||||
|
||||
hls.loadSource(hlsUrl);
|
||||
hls.attachMedia(video);
|
||||
} else if (video.canPlayType('application/vnd.apple.mpegurl')) {
|
||||
// Native HLS support (Safari)
|
||||
video.src = hlsUrl;
|
||||
video.addEventListener('loadedmetadata', function() {
|
||||
loadingEl.classList.add('hidden');
|
||||
statusEl.textContent = 'Playing';
|
||||
video.play().catch(() => {});
|
||||
});
|
||||
} else {
|
||||
statusEl.textContent = 'HLS not supported';
|
||||
statusEl.classList.add('text-red-400');
|
||||
}
|
||||
}
|
||||
|
||||
// Start trying to connect
|
||||
initHls();
|
||||
|
||||
// Cleanup on page unload
|
||||
window.addEventListener('beforeunload', function() {
|
||||
if (hls) hls.destroy();
|
||||
});
|
||||
})();
|
||||
</script>
|
||||
{% endif %}
|
||||
|
||||
<!-- Tabs -->
|
||||
<div class="border-b border-gray-700 mb-6">
|
||||
<nav class="flex space-x-8">
|
||||
|
||||
15
database.py
15
database.py
@@ -1811,3 +1811,18 @@ async def delete_friendly_name(actor_id: str, cid: str) -> bool:
|
||||
actor_id, cid
|
||||
)
|
||||
return "DELETE 1" in result
|
||||
|
||||
|
||||
async def update_friendly_name_cid(actor_id: str, old_cid: str, new_cid: str) -> bool:
|
||||
"""
|
||||
Update a friendly name's CID (used when IPFS upload completes).
|
||||
|
||||
This updates the CID from a local SHA256 hash to an IPFS CID,
|
||||
ensuring assets can be fetched by remote workers via IPFS.
|
||||
"""
|
||||
async with pool.acquire() as conn:
|
||||
result = await conn.execute(
|
||||
"UPDATE friendly_names SET cid = $3 WHERE actor_id = $1 AND cid = $2",
|
||||
actor_id, old_cid, new_cid
|
||||
)
|
||||
return "UPDATE 1" in result
|
||||
|
||||
@@ -3,6 +3,10 @@ version: "3.8"
|
||||
services:
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
ports:
|
||||
- target: 6379
|
||||
published: 16379
|
||||
mode: host # Bypass swarm routing mesh
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
networks:
|
||||
@@ -11,6 +15,9 @@ services:
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
placement:
|
||||
constraints:
|
||||
- node.labels.gpu != true
|
||||
|
||||
postgres:
|
||||
image: postgres:16-alpine
|
||||
@@ -18,6 +25,10 @@ services:
|
||||
- POSTGRES_USER=artdag
|
||||
- POSTGRES_PASSWORD=artdag
|
||||
- POSTGRES_DB=artdag
|
||||
ports:
|
||||
- target: 5432
|
||||
published: 15432
|
||||
mode: host # Expose for GPU worker on different VPC
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
networks:
|
||||
@@ -26,12 +37,18 @@ services:
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
placement:
|
||||
constraints:
|
||||
- node.labels.gpu != true
|
||||
|
||||
ipfs:
|
||||
image: ipfs/kubo:latest
|
||||
ports:
|
||||
- "4001:4001" # Swarm TCP
|
||||
- "4001:4001/udp" # Swarm UDP
|
||||
- target: 5001
|
||||
published: 15001
|
||||
mode: host # API port for GPU worker on different VPC
|
||||
volumes:
|
||||
- ipfs_data:/data/ipfs
|
||||
- l1_cache:/data/cache:ro # Read-only access to cache for adding files
|
||||
@@ -42,6 +59,9 @@ services:
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
placement:
|
||||
constraints:
|
||||
- node.labels.gpu != true
|
||||
|
||||
l1-server:
|
||||
image: git.rose-ash.com/art-dag/l1-server:latest
|
||||
@@ -75,6 +95,9 @@ services:
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
placement:
|
||||
constraints:
|
||||
- node.labels.gpu != true
|
||||
|
||||
l1-worker:
|
||||
image: git.rose-ash.com/art-dag/l1-server:latest
|
||||
@@ -102,6 +125,9 @@ services:
|
||||
replicas: 2
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
placement:
|
||||
constraints:
|
||||
- node.labels.gpu != true
|
||||
|
||||
flower:
|
||||
image: mher/flower:2.0
|
||||
@@ -118,12 +144,60 @@ services:
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
placement:
|
||||
constraints:
|
||||
- node.labels.gpu != true
|
||||
|
||||
# GPU worker for streaming/rendering tasks
|
||||
# Build: docker build -f Dockerfile.gpu -t git.rose-ash.com/art-dag/l1-gpu-server:latest .
|
||||
# Requires: docker node update --label-add gpu=true <gpu-node-name>
|
||||
l1-gpu-worker:
|
||||
image: git.rose-ash.com/art-dag/l1-gpu-server:latest
|
||||
# For local dev, uncomment to build from Dockerfile.gpu:
|
||||
# build:
|
||||
# context: .
|
||||
# dockerfile: Dockerfile.gpu
|
||||
command: sh -c "cd /app && celery -A celery_app worker --loglevel=info -E -Q gpu,celery"
|
||||
environment:
|
||||
# GPU node is on different VPC - use public IPs for cross-node communication
|
||||
- REDIS_URL=redis://138.68.142.139:16379/5
|
||||
- DATABASE_URL=postgresql://artdag:artdag@138.68.142.139:15432/artdag
|
||||
# Connect to shared IPFS node on CPU (via public IP)
|
||||
- IPFS_API=/ip4/138.68.142.139/tcp/15001
|
||||
# Gateway fallback for resilience
|
||||
- IPFS_GATEWAYS=https://ipfs.io,https://cloudflare-ipfs.com,https://dweb.link
|
||||
# Local cache is ephemeral (tmpfs or local volume)
|
||||
- CACHE_DIR=/data/cache
|
||||
- C_FORCE_ROOT=true
|
||||
- ARTDAG_CLUSTER_KEY=${ARTDAG_CLUSTER_KEY:-}
|
||||
# GPU acceleration settings
|
||||
- NVIDIA_VISIBLE_DEVICES=all
|
||||
# Keep frames on GPU between operations for maximum performance
|
||||
- STREAMING_GPU_PERSIST=1
|
||||
volumes:
|
||||
# Local cache - ephemeral, just for working files
|
||||
- gpu_cache:/data/cache
|
||||
# Note: No source mount - GPU worker uses code from image
|
||||
depends_on:
|
||||
- redis
|
||||
- postgres
|
||||
- ipfs
|
||||
networks:
|
||||
- celery
|
||||
deploy:
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
placement:
|
||||
constraints:
|
||||
- node.labels.gpu == true
|
||||
|
||||
volumes:
|
||||
redis_data:
|
||||
postgres_data:
|
||||
ipfs_data:
|
||||
l1_cache:
|
||||
gpu_cache: # Ephemeral cache for GPU workers
|
||||
|
||||
networks:
|
||||
celery:
|
||||
|
||||
@@ -22,6 +22,16 @@ IPFS_API = os.getenv("IPFS_API", "/ip4/127.0.0.1/tcp/5001")
|
||||
# Connection timeout in seconds (increased for large files)
|
||||
IPFS_TIMEOUT = int(os.getenv("IPFS_TIMEOUT", "120"))
|
||||
|
||||
# IPFS gateway URLs for fallback when local node doesn't have content
|
||||
# Comma-separated list of gateway URLs (without /ipfs/ suffix)
|
||||
IPFS_GATEWAYS = [g.strip() for g in os.getenv(
|
||||
"IPFS_GATEWAYS",
|
||||
"https://ipfs.io,https://cloudflare-ipfs.com,https://dweb.link"
|
||||
).split(",") if g.strip()]
|
||||
|
||||
# Gateway timeout (shorter than API timeout for faster fallback)
|
||||
GATEWAY_TIMEOUT = int(os.getenv("GATEWAY_TIMEOUT", "30"))
|
||||
|
||||
|
||||
def _multiaddr_to_url(multiaddr: str) -> str:
|
||||
"""Convert IPFS multiaddr to HTTP URL."""
|
||||
@@ -165,16 +175,50 @@ def get_file(cid: str, dest_path: Union[Path, str]) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def get_bytes(cid: str) -> Optional[bytes]:
|
||||
def get_bytes_from_gateway(cid: str) -> Optional[bytes]:
|
||||
"""
|
||||
Retrieve bytes data from IPFS.
|
||||
Retrieve bytes from IPFS via public gateways (fallback).
|
||||
|
||||
Tries each configured gateway in order until one succeeds.
|
||||
|
||||
Args:
|
||||
cid: IPFS CID to retrieve
|
||||
|
||||
Returns:
|
||||
File content as bytes or None if all gateways fail
|
||||
"""
|
||||
for gateway in IPFS_GATEWAYS:
|
||||
try:
|
||||
url = f"{gateway}/ipfs/{cid}"
|
||||
logger.info(f"Trying gateway: {url}")
|
||||
response = requests.get(url, timeout=GATEWAY_TIMEOUT)
|
||||
response.raise_for_status()
|
||||
data = response.content
|
||||
logger.info(f"Retrieved from gateway {gateway}: {cid} ({len(data)} bytes)")
|
||||
return data
|
||||
except Exception as e:
|
||||
logger.warning(f"Gateway {gateway} failed for {cid}: {e}")
|
||||
continue
|
||||
|
||||
logger.error(f"All gateways failed for {cid}")
|
||||
return None
|
||||
|
||||
|
||||
def get_bytes(cid: str, use_gateway_fallback: bool = True) -> Optional[bytes]:
|
||||
"""
|
||||
Retrieve bytes data from IPFS.
|
||||
|
||||
Tries local IPFS node first, then falls back to public gateways
|
||||
if configured and use_gateway_fallback is True.
|
||||
|
||||
Args:
|
||||
cid: IPFS CID to retrieve
|
||||
use_gateway_fallback: If True, try public gateways on local failure
|
||||
|
||||
Returns:
|
||||
File content as bytes or None on failure
|
||||
"""
|
||||
# Try local IPFS node first
|
||||
try:
|
||||
url = f"{IPFS_BASE_URL}/api/v0/cat"
|
||||
params = {"arg": cid}
|
||||
@@ -186,6 +230,13 @@ def get_bytes(cid: str) -> Optional[bytes]:
|
||||
logger.info(f"Retrieved from IPFS: {cid} ({len(data)} bytes)")
|
||||
return data
|
||||
except Exception as e:
|
||||
logger.warning(f"Local IPFS failed for {cid}: {e}")
|
||||
|
||||
# Try gateway fallback
|
||||
if use_gateway_fallback and IPFS_GATEWAYS:
|
||||
logger.info(f"Trying gateway fallback for {cid}")
|
||||
return get_bytes_from_gateway(cid)
|
||||
|
||||
logger.error(f"Failed to get bytes from IPFS: {e}")
|
||||
return None
|
||||
|
||||
|
||||
77
scripts/cloud-init-gpu.sh
Normal file
77
scripts/cloud-init-gpu.sh
Normal file
@@ -0,0 +1,77 @@
|
||||
#!/bin/bash
|
||||
# Cloud-init startup script for GPU droplet (RTX 6000 Ada, etc.)
|
||||
# Paste this into DigitalOcean "User data" field when creating droplet
|
||||
|
||||
set -e
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
exec > /var/log/artdag-setup.log 2>&1
|
||||
|
||||
echo "=== ArtDAG GPU Setup Started $(date) ==="
|
||||
|
||||
# Update system (non-interactive, keep existing configs)
|
||||
apt-get update
|
||||
apt-get -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" upgrade
|
||||
|
||||
# Install essentials
|
||||
apt-get install -y \
|
||||
python3 python3-venv python3-pip \
|
||||
git curl wget \
|
||||
ffmpeg \
|
||||
vulkan-tools \
|
||||
build-essential
|
||||
|
||||
# Create venv
|
||||
VENV_DIR="/opt/artdag-gpu"
|
||||
python3 -m venv "$VENV_DIR"
|
||||
source "$VENV_DIR/bin/activate"
|
||||
|
||||
# Install Python packages
|
||||
pip install --upgrade pip
|
||||
pip install \
|
||||
numpy \
|
||||
opencv-python-headless \
|
||||
wgpu \
|
||||
httpx \
|
||||
pyyaml \
|
||||
celery[redis] \
|
||||
fastapi \
|
||||
uvicorn \
|
||||
asyncpg
|
||||
|
||||
# Create code directory
|
||||
mkdir -p "$VENV_DIR/celery/sexp_effects/effects"
|
||||
mkdir -p "$VENV_DIR/celery/sexp_effects/primitive_libs"
|
||||
mkdir -p "$VENV_DIR/celery/streaming"
|
||||
|
||||
# Add SSH key for easier access (optional - add your key here)
|
||||
# echo "ssh-ed25519 AAAA... your-key" >> /root/.ssh/authorized_keys
|
||||
|
||||
# Test GPU
|
||||
echo "=== GPU Info ==="
|
||||
nvidia-smi || echo "nvidia-smi not available yet"
|
||||
|
||||
echo "=== NVENC Check ==="
|
||||
ffmpeg -encoders 2>/dev/null | grep -E "nvenc|cuda" || echo "NVENC not detected"
|
||||
|
||||
echo "=== wgpu Check ==="
|
||||
"$VENV_DIR/bin/python3" -c "
|
||||
import wgpu
|
||||
try:
|
||||
adapter = wgpu.gpu.request_adapter_sync(power_preference='high-performance')
|
||||
print(f'GPU: {adapter.info}')
|
||||
except Exception as e:
|
||||
print(f'wgpu error: {e}')
|
||||
" || echo "wgpu test failed"
|
||||
|
||||
# Add environment setup
|
||||
cat >> /etc/profile.d/artdag-gpu.sh << 'ENVEOF'
|
||||
export WGPU_BACKEND_TYPE=Vulkan
|
||||
export PATH="/opt/artdag-gpu/bin:$PATH"
|
||||
ENVEOF
|
||||
|
||||
# Mark setup complete
|
||||
touch /opt/artdag-gpu/.setup-complete
|
||||
echo "=== Setup Complete $(date) ==="
|
||||
echo "Venv: /opt/artdag-gpu"
|
||||
echo "Activate: source /opt/artdag-gpu/bin/activate"
|
||||
echo "Vulkan: export WGPU_BACKEND_TYPE=Vulkan"
|
||||
51
scripts/deploy-to-gpu.sh
Executable file
51
scripts/deploy-to-gpu.sh
Executable file
@@ -0,0 +1,51 @@
|
||||
#!/bin/bash
|
||||
# Deploy art-dag GPU code to a remote droplet
|
||||
# Usage: ./deploy-to-gpu.sh <droplet-ip>
|
||||
|
||||
set -e
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "Usage: $0 <droplet-ip>"
|
||||
echo "Example: $0 159.223.7.100"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DROPLET_IP="$1"
|
||||
REMOTE_DIR="/opt/artdag-gpu/celery"
|
||||
LOCAL_DIR="$(dirname "$0")/.."
|
||||
|
||||
echo "=== Deploying to $DROPLET_IP ==="
|
||||
|
||||
# Create remote directory
|
||||
echo "[1/4] Creating remote directory..."
|
||||
ssh "root@$DROPLET_IP" "mkdir -p $REMOTE_DIR/sexp_effects $REMOTE_DIR/streaming $REMOTE_DIR/scripts"
|
||||
|
||||
# Copy core files
|
||||
echo "[2/4] Copying core files..."
|
||||
scp "$LOCAL_DIR/sexp_effects/wgsl_compiler.py" "root@$DROPLET_IP:$REMOTE_DIR/sexp_effects/"
|
||||
scp "$LOCAL_DIR/sexp_effects/parser.py" "root@$DROPLET_IP:$REMOTE_DIR/sexp_effects/"
|
||||
scp "$LOCAL_DIR/sexp_effects/interpreter.py" "root@$DROPLET_IP:$REMOTE_DIR/sexp_effects/"
|
||||
scp "$LOCAL_DIR/sexp_effects/__init__.py" "root@$DROPLET_IP:$REMOTE_DIR/sexp_effects/"
|
||||
scp "$LOCAL_DIR/streaming/backends.py" "root@$DROPLET_IP:$REMOTE_DIR/streaming/"
|
||||
|
||||
# Copy effects
|
||||
echo "[3/4] Copying effects..."
|
||||
ssh "root@$DROPLET_IP" "mkdir -p $REMOTE_DIR/sexp_effects/effects $REMOTE_DIR/sexp_effects/primitive_libs"
|
||||
scp -r "$LOCAL_DIR/sexp_effects/effects/"*.sexp "root@$DROPLET_IP:$REMOTE_DIR/sexp_effects/effects/" 2>/dev/null || true
|
||||
scp -r "$LOCAL_DIR/sexp_effects/primitive_libs/"*.py "root@$DROPLET_IP:$REMOTE_DIR/sexp_effects/primitive_libs/" 2>/dev/null || true
|
||||
|
||||
# Test
|
||||
echo "[4/4] Testing deployment..."
|
||||
ssh "root@$DROPLET_IP" "cd $REMOTE_DIR && /opt/artdag-gpu/bin/python3 -c '
|
||||
import sys
|
||||
sys.path.insert(0, \".\")
|
||||
from sexp_effects.wgsl_compiler import compile_effect_file
|
||||
result = compile_effect_file(\"sexp_effects/effects/invert.sexp\")
|
||||
print(f\"Compiled effect: {result.name}\")
|
||||
print(\"Deployment OK\")
|
||||
'" || echo "Test failed - may need to run setup script first"
|
||||
|
||||
echo ""
|
||||
echo "=== Deployment complete ==="
|
||||
echo "SSH: ssh root@$DROPLET_IP"
|
||||
echo "Test: ssh root@$DROPLET_IP 'cd $REMOTE_DIR && /opt/artdag-gpu/bin/python3 -c \"from streaming.backends import get_backend; b=get_backend(\\\"wgpu\\\"); print(b)\"'"
|
||||
108
scripts/setup-gpu-droplet.sh
Executable file
108
scripts/setup-gpu-droplet.sh
Executable file
@@ -0,0 +1,108 @@
|
||||
#!/bin/bash
|
||||
# Setup script for GPU droplet with NVENC support
|
||||
# Run as root on a fresh Ubuntu droplet with NVIDIA GPU
|
||||
|
||||
set -e
|
||||
|
||||
echo "=== ArtDAG GPU Droplet Setup ==="
|
||||
|
||||
# 1. System updates
|
||||
echo "[1/7] Updating system..."
|
||||
apt-get update
|
||||
apt-get upgrade -y
|
||||
|
||||
# 2. Install NVIDIA drivers (if not already installed)
|
||||
echo "[2/7] Checking NVIDIA drivers..."
|
||||
if ! command -v nvidia-smi &> /dev/null; then
|
||||
echo "Installing NVIDIA drivers..."
|
||||
apt-get install -y nvidia-driver-535 nvidia-utils-535
|
||||
echo "NVIDIA drivers installed. Reboot required."
|
||||
echo "After reboot, run this script again."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
nvidia-smi
|
||||
echo "NVIDIA drivers OK"
|
||||
|
||||
# 3. Install FFmpeg with NVENC support
|
||||
echo "[3/7] Installing FFmpeg with NVENC..."
|
||||
apt-get install -y ffmpeg
|
||||
|
||||
# Verify NVENC
|
||||
if ffmpeg -encoders 2>/dev/null | grep -q nvenc; then
|
||||
echo "NVENC available:"
|
||||
ffmpeg -encoders 2>/dev/null | grep nvenc
|
||||
else
|
||||
echo "WARNING: NVENC not available. GPU may not support hardware encoding."
|
||||
fi
|
||||
|
||||
# 4. Install Python and create venv
|
||||
echo "[4/7] Setting up Python environment..."
|
||||
apt-get install -y python3 python3-venv python3-pip git
|
||||
|
||||
VENV_DIR="/opt/artdag-gpu"
|
||||
python3 -m venv "$VENV_DIR"
|
||||
source "$VENV_DIR/bin/activate"
|
||||
|
||||
# 5. Install Python dependencies
|
||||
echo "[5/7] Installing Python packages..."
|
||||
pip install --upgrade pip
|
||||
pip install \
|
||||
numpy \
|
||||
opencv-python-headless \
|
||||
wgpu \
|
||||
httpx \
|
||||
pyyaml \
|
||||
celery[redis] \
|
||||
fastapi \
|
||||
uvicorn
|
||||
|
||||
# 6. Clone/update art-dag code
|
||||
echo "[6/7] Setting up art-dag code..."
|
||||
ARTDAG_DIR="$VENV_DIR/celery"
|
||||
if [ -d "$ARTDAG_DIR" ]; then
|
||||
echo "Updating existing code..."
|
||||
cd "$ARTDAG_DIR"
|
||||
git pull || true
|
||||
else
|
||||
echo "Cloning art-dag..."
|
||||
git clone https://git.rose-ash.com/art-dag/celery.git "$ARTDAG_DIR" || {
|
||||
echo "Git clone failed. You may need to copy code manually."
|
||||
}
|
||||
fi
|
||||
|
||||
# 7. Test GPU compute
|
||||
echo "[7/7] Testing GPU compute..."
|
||||
"$VENV_DIR/bin/python3" << 'PYTEST'
|
||||
import sys
|
||||
try:
|
||||
import wgpu
|
||||
adapter = wgpu.gpu.request_adapter_sync(power_preference="high-performance")
|
||||
print(f"GPU Adapter: {adapter.info.get('device', 'unknown')}")
|
||||
device = adapter.request_device_sync()
|
||||
print("wgpu device created successfully")
|
||||
|
||||
# Check for NVENC via FFmpeg
|
||||
import subprocess
|
||||
result = subprocess.run(['ffmpeg', '-encoders'], capture_output=True, text=True)
|
||||
if 'h264_nvenc' in result.stdout:
|
||||
print("NVENC H.264 encoder: AVAILABLE")
|
||||
else:
|
||||
print("NVENC H.264 encoder: NOT AVAILABLE")
|
||||
if 'hevc_nvenc' in result.stdout:
|
||||
print("NVENC HEVC encoder: AVAILABLE")
|
||||
else:
|
||||
print("NVENC HEVC encoder: NOT AVAILABLE")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
sys.exit(1)
|
||||
PYTEST
|
||||
|
||||
echo ""
|
||||
echo "=== Setup Complete ==="
|
||||
echo "Venv: $VENV_DIR"
|
||||
echo "Code: $ARTDAG_DIR"
|
||||
echo ""
|
||||
echo "To activate: source $VENV_DIR/bin/activate"
|
||||
echo "To test: cd $ARTDAG_DIR && python -c 'from streaming.backends import get_backend; print(get_backend(\"wgpu\"))'"
|
||||
@@ -385,9 +385,9 @@ def _serialize_pretty(expr: List, indent: int) -> str:
|
||||
|
||||
|
||||
def parse_file(path: str) -> Any:
|
||||
"""Parse an S-expression file."""
|
||||
"""Parse an S-expression file (supports multiple top-level expressions)."""
|
||||
with open(path, 'r') as f:
|
||||
return parse(f.read())
|
||||
return parse_all(f.read())
|
||||
|
||||
|
||||
def to_sexp(obj: Any) -> str:
|
||||
|
||||
220
sexp_effects/primitive_libs/blending_gpu.py
Normal file
220
sexp_effects/primitive_libs/blending_gpu.py
Normal file
@@ -0,0 +1,220 @@
|
||||
"""
|
||||
GPU-Accelerated Blending Primitives Library
|
||||
|
||||
Uses CuPy for CUDA-accelerated image blending and compositing.
|
||||
Keeps frames on GPU when STREAMING_GPU_PERSIST=1 for maximum performance.
|
||||
"""
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
# Try to import CuPy for GPU acceleration
|
||||
try:
|
||||
import cupy as cp
|
||||
GPU_AVAILABLE = True
|
||||
print("[blending_gpu] CuPy GPU acceleration enabled")
|
||||
except ImportError:
|
||||
cp = np
|
||||
GPU_AVAILABLE = False
|
||||
print("[blending_gpu] CuPy not available, using CPU fallback")
|
||||
|
||||
# GPU persistence mode - keep frames on GPU between operations
|
||||
GPU_PERSIST = os.environ.get("STREAMING_GPU_PERSIST", "1") == "1"
|
||||
if GPU_AVAILABLE and GPU_PERSIST:
|
||||
print("[blending_gpu] GPU persistence enabled - frames stay on GPU")
|
||||
|
||||
|
||||
def _to_gpu(img):
|
||||
"""Move image to GPU if available."""
|
||||
if GPU_AVAILABLE and not isinstance(img, cp.ndarray):
|
||||
return cp.asarray(img)
|
||||
return img
|
||||
|
||||
|
||||
def _to_cpu(img):
|
||||
"""Move image back to CPU (only if GPU_PERSIST is disabled)."""
|
||||
if not GPU_PERSIST and GPU_AVAILABLE and isinstance(img, cp.ndarray):
|
||||
return cp.asnumpy(img)
|
||||
return img
|
||||
|
||||
|
||||
def _get_xp(img):
|
||||
"""Get the array module (numpy or cupy) for the given image."""
|
||||
if GPU_AVAILABLE and isinstance(img, cp.ndarray):
|
||||
return cp
|
||||
return np
|
||||
|
||||
|
||||
def prim_blend_images(a, b, alpha):
|
||||
"""Blend two images: a * (1-alpha) + b * alpha."""
|
||||
alpha = max(0.0, min(1.0, float(alpha)))
|
||||
|
||||
if GPU_AVAILABLE:
|
||||
a_gpu = _to_gpu(a)
|
||||
b_gpu = _to_gpu(b)
|
||||
result = (a_gpu.astype(cp.float32) * (1 - alpha) + b_gpu.astype(cp.float32) * alpha).astype(cp.uint8)
|
||||
return _to_cpu(result)
|
||||
|
||||
return (a.astype(float) * (1 - alpha) + b.astype(float) * alpha).astype(np.uint8)
|
||||
|
||||
|
||||
def prim_blend_mode(a, b, mode):
|
||||
"""Blend using Photoshop-style blend modes."""
|
||||
if GPU_AVAILABLE:
|
||||
a_gpu = _to_gpu(a).astype(cp.float32) / 255
|
||||
b_gpu = _to_gpu(b).astype(cp.float32) / 255
|
||||
xp = cp
|
||||
else:
|
||||
a_gpu = a.astype(float) / 255
|
||||
b_gpu = b.astype(float) / 255
|
||||
xp = np
|
||||
|
||||
if mode == "multiply":
|
||||
result = a_gpu * b_gpu
|
||||
elif mode == "screen":
|
||||
result = 1 - (1 - a_gpu) * (1 - b_gpu)
|
||||
elif mode == "overlay":
|
||||
mask = a_gpu < 0.5
|
||||
result = xp.where(mask, 2 * a_gpu * b_gpu, 1 - 2 * (1 - a_gpu) * (1 - b_gpu))
|
||||
elif mode == "soft-light":
|
||||
mask = b_gpu < 0.5
|
||||
result = xp.where(mask,
|
||||
a_gpu - (1 - 2 * b_gpu) * a_gpu * (1 - a_gpu),
|
||||
a_gpu + (2 * b_gpu - 1) * (xp.sqrt(a_gpu) - a_gpu))
|
||||
elif mode == "hard-light":
|
||||
mask = b_gpu < 0.5
|
||||
result = xp.where(mask, 2 * a_gpu * b_gpu, 1 - 2 * (1 - a_gpu) * (1 - b_gpu))
|
||||
elif mode == "color-dodge":
|
||||
result = xp.clip(a_gpu / (1 - b_gpu + 0.001), 0, 1)
|
||||
elif mode == "color-burn":
|
||||
result = 1 - xp.clip((1 - a_gpu) / (b_gpu + 0.001), 0, 1)
|
||||
elif mode == "difference":
|
||||
result = xp.abs(a_gpu - b_gpu)
|
||||
elif mode == "exclusion":
|
||||
result = a_gpu + b_gpu - 2 * a_gpu * b_gpu
|
||||
elif mode == "add":
|
||||
result = xp.clip(a_gpu + b_gpu, 0, 1)
|
||||
elif mode == "subtract":
|
||||
result = xp.clip(a_gpu - b_gpu, 0, 1)
|
||||
elif mode == "darken":
|
||||
result = xp.minimum(a_gpu, b_gpu)
|
||||
elif mode == "lighten":
|
||||
result = xp.maximum(a_gpu, b_gpu)
|
||||
else:
|
||||
# Default to normal (just return b)
|
||||
result = b_gpu
|
||||
|
||||
result = (result * 255).astype(xp.uint8)
|
||||
return _to_cpu(result)
|
||||
|
||||
|
||||
def prim_mask(img, mask_img):
|
||||
"""Apply grayscale mask to image (white=opaque, black=transparent)."""
|
||||
if GPU_AVAILABLE:
|
||||
img_gpu = _to_gpu(img)
|
||||
mask_gpu = _to_gpu(mask_img)
|
||||
|
||||
if len(mask_gpu.shape) == 3:
|
||||
mask = mask_gpu[:, :, 0].astype(cp.float32) / 255
|
||||
else:
|
||||
mask = mask_gpu.astype(cp.float32) / 255
|
||||
|
||||
mask = mask[:, :, cp.newaxis]
|
||||
result = (img_gpu.astype(cp.float32) * mask).astype(cp.uint8)
|
||||
return _to_cpu(result)
|
||||
|
||||
if len(mask_img.shape) == 3:
|
||||
mask = mask_img[:, :, 0].astype(float) / 255
|
||||
else:
|
||||
mask = mask_img.astype(float) / 255
|
||||
|
||||
mask = mask[:, :, np.newaxis]
|
||||
return (img.astype(float) * mask).astype(np.uint8)
|
||||
|
||||
|
||||
def prim_alpha_composite(base, overlay, alpha_channel):
|
||||
"""Composite overlay onto base using alpha channel."""
|
||||
if GPU_AVAILABLE:
|
||||
base_gpu = _to_gpu(base)
|
||||
overlay_gpu = _to_gpu(overlay)
|
||||
alpha_gpu = _to_gpu(alpha_channel)
|
||||
|
||||
if len(alpha_gpu.shape) == 3:
|
||||
alpha = alpha_gpu[:, :, 0].astype(cp.float32) / 255
|
||||
else:
|
||||
alpha = alpha_gpu.astype(cp.float32) / 255
|
||||
|
||||
alpha = alpha[:, :, cp.newaxis]
|
||||
result = base_gpu.astype(cp.float32) * (1 - alpha) + overlay_gpu.astype(cp.float32) * alpha
|
||||
return _to_cpu(result.astype(cp.uint8))
|
||||
|
||||
if len(alpha_channel.shape) == 3:
|
||||
alpha = alpha_channel[:, :, 0].astype(float) / 255
|
||||
else:
|
||||
alpha = alpha_channel.astype(float) / 255
|
||||
|
||||
alpha = alpha[:, :, np.newaxis]
|
||||
result = base.astype(float) * (1 - alpha) + overlay.astype(float) * alpha
|
||||
return result.astype(np.uint8)
|
||||
|
||||
|
||||
def prim_overlay(base, overlay, x, y, alpha=1.0):
|
||||
"""Overlay image at position (x, y) with optional alpha."""
|
||||
if GPU_AVAILABLE:
|
||||
base_gpu = _to_gpu(base)
|
||||
overlay_gpu = _to_gpu(overlay)
|
||||
result = base_gpu.copy()
|
||||
|
||||
x, y = int(x), int(y)
|
||||
oh, ow = overlay_gpu.shape[:2]
|
||||
bh, bw = base_gpu.shape[:2]
|
||||
|
||||
# Clip to bounds
|
||||
sx1 = max(0, -x)
|
||||
sy1 = max(0, -y)
|
||||
dx1 = max(0, x)
|
||||
dy1 = max(0, y)
|
||||
sx2 = min(ow, bw - x)
|
||||
sy2 = min(oh, bh - y)
|
||||
|
||||
if sx2 > sx1 and sy2 > sy1:
|
||||
src = overlay_gpu[sy1:sy2, sx1:sx2]
|
||||
dst = result[dy1:dy1+(sy2-sy1), dx1:dx1+(sx2-sx1)]
|
||||
blended = (dst.astype(cp.float32) * (1 - alpha) + src.astype(cp.float32) * alpha)
|
||||
result[dy1:dy1+(sy2-sy1), dx1:dx1+(sx2-sx1)] = blended.astype(cp.uint8)
|
||||
|
||||
return _to_cpu(result)
|
||||
|
||||
result = base.copy()
|
||||
x, y = int(x), int(y)
|
||||
oh, ow = overlay.shape[:2]
|
||||
bh, bw = base.shape[:2]
|
||||
|
||||
# Clip to bounds
|
||||
sx1 = max(0, -x)
|
||||
sy1 = max(0, -y)
|
||||
dx1 = max(0, x)
|
||||
dy1 = max(0, y)
|
||||
sx2 = min(ow, bw - x)
|
||||
sy2 = min(oh, bh - y)
|
||||
|
||||
if sx2 > sx1 and sy2 > sy1:
|
||||
src = overlay[sy1:sy2, sx1:sx2]
|
||||
dst = result[dy1:dy1+(sy2-sy1), dx1:dx1+(sx2-sx1)]
|
||||
blended = (dst.astype(float) * (1 - alpha) + src.astype(float) * alpha)
|
||||
result[dy1:dy1+(sy2-sy1), dx1:dx1+(sx2-sx1)] = blended.astype(np.uint8)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
PRIMITIVES = {
|
||||
# Basic blending
|
||||
'blend-images': prim_blend_images,
|
||||
'blend-mode': prim_blend_mode,
|
||||
|
||||
# Masking
|
||||
'mask': prim_mask,
|
||||
'alpha-composite': prim_alpha_composite,
|
||||
|
||||
# Overlay
|
||||
'overlay': prim_overlay,
|
||||
}
|
||||
280
sexp_effects/primitive_libs/color_ops_gpu.py
Normal file
280
sexp_effects/primitive_libs/color_ops_gpu.py
Normal file
@@ -0,0 +1,280 @@
|
||||
"""
|
||||
GPU-Accelerated Color Operations Library
|
||||
|
||||
Uses CuPy for CUDA-accelerated color transforms.
|
||||
|
||||
Performance Mode:
|
||||
- Set STREAMING_GPU_PERSIST=1 to keep frames on GPU between operations
|
||||
- This dramatically improves performance by avoiding CPU<->GPU transfers
|
||||
"""
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
# Try to import CuPy for GPU acceleration
|
||||
try:
|
||||
import cupy as cp
|
||||
GPU_AVAILABLE = True
|
||||
print("[color_ops_gpu] CuPy GPU acceleration enabled")
|
||||
except ImportError:
|
||||
cp = np
|
||||
GPU_AVAILABLE = False
|
||||
print("[color_ops_gpu] CuPy not available, using CPU fallback")
|
||||
|
||||
# GPU persistence mode - keep frames on GPU between operations
|
||||
GPU_PERSIST = os.environ.get("STREAMING_GPU_PERSIST", "1") == "1"
|
||||
if GPU_AVAILABLE and GPU_PERSIST:
|
||||
print("[color_ops_gpu] GPU persistence enabled - frames stay on GPU")
|
||||
|
||||
|
||||
def _to_gpu(img):
|
||||
"""Move image to GPU if available."""
|
||||
if GPU_AVAILABLE and not isinstance(img, cp.ndarray):
|
||||
return cp.asarray(img)
|
||||
return img
|
||||
|
||||
|
||||
def _to_cpu(img):
|
||||
"""Move image back to CPU (only if GPU_PERSIST is disabled)."""
|
||||
if not GPU_PERSIST and GPU_AVAILABLE and isinstance(img, cp.ndarray):
|
||||
return cp.asnumpy(img)
|
||||
return img
|
||||
|
||||
|
||||
def prim_invert(img):
|
||||
"""Invert image colors."""
|
||||
if GPU_AVAILABLE:
|
||||
img_gpu = _to_gpu(img)
|
||||
return _to_cpu(255 - img_gpu)
|
||||
return 255 - img
|
||||
|
||||
|
||||
def prim_grayscale(img):
|
||||
"""Convert to grayscale."""
|
||||
if img.ndim != 3:
|
||||
return img
|
||||
|
||||
if GPU_AVAILABLE:
|
||||
img_gpu = _to_gpu(img.astype(np.float32))
|
||||
# Standard luminance weights
|
||||
gray = 0.299 * img_gpu[:, :, 0] + 0.587 * img_gpu[:, :, 1] + 0.114 * img_gpu[:, :, 2]
|
||||
gray = cp.clip(gray, 0, 255).astype(cp.uint8)
|
||||
# Stack to 3 channels
|
||||
result = cp.stack([gray, gray, gray], axis=2)
|
||||
return _to_cpu(result)
|
||||
|
||||
gray = 0.299 * img[:, :, 0] + 0.587 * img[:, :, 1] + 0.114 * img[:, :, 2]
|
||||
gray = np.clip(gray, 0, 255).astype(np.uint8)
|
||||
return np.stack([gray, gray, gray], axis=2)
|
||||
|
||||
|
||||
def prim_brightness(img, factor=1.0):
|
||||
"""Adjust brightness by factor."""
|
||||
xp = cp if GPU_AVAILABLE else np
|
||||
if GPU_AVAILABLE:
|
||||
img_gpu = _to_gpu(img.astype(np.float32))
|
||||
result = xp.clip(img_gpu * factor, 0, 255).astype(xp.uint8)
|
||||
return _to_cpu(result)
|
||||
return np.clip(img.astype(np.float32) * factor, 0, 255).astype(np.uint8)
|
||||
|
||||
|
||||
def prim_contrast(img, factor=1.0):
|
||||
"""Adjust contrast around midpoint."""
|
||||
xp = cp if GPU_AVAILABLE else np
|
||||
if GPU_AVAILABLE:
|
||||
img_gpu = _to_gpu(img.astype(np.float32))
|
||||
result = xp.clip((img_gpu - 128) * factor + 128, 0, 255).astype(xp.uint8)
|
||||
return _to_cpu(result)
|
||||
return np.clip((img.astype(np.float32) - 128) * factor + 128, 0, 255).astype(np.uint8)
|
||||
|
||||
|
||||
# CUDA kernel for HSV hue shift
|
||||
if GPU_AVAILABLE:
|
||||
_hue_shift_kernel = cp.RawKernel(r'''
|
||||
extern "C" __global__
|
||||
void hue_shift(unsigned char* img, int width, int height, float shift) {
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x >= width || y >= height) return;
|
||||
|
||||
int idx = (y * width + x) * 3;
|
||||
|
||||
// Get RGB
|
||||
float r = img[idx] / 255.0f;
|
||||
float g = img[idx + 1] / 255.0f;
|
||||
float b = img[idx + 2] / 255.0f;
|
||||
|
||||
// RGB to HSV
|
||||
float max_c = fmaxf(r, fmaxf(g, b));
|
||||
float min_c = fminf(r, fminf(g, b));
|
||||
float delta = max_c - min_c;
|
||||
|
||||
float h = 0.0f, s = 0.0f, v = max_c;
|
||||
|
||||
if (delta > 0.00001f) {
|
||||
s = delta / max_c;
|
||||
|
||||
if (max_c == r) {
|
||||
h = 60.0f * fmodf((g - b) / delta, 6.0f);
|
||||
} else if (max_c == g) {
|
||||
h = 60.0f * ((b - r) / delta + 2.0f);
|
||||
} else {
|
||||
h = 60.0f * ((r - g) / delta + 4.0f);
|
||||
}
|
||||
|
||||
if (h < 0) h += 360.0f;
|
||||
}
|
||||
|
||||
// Shift hue
|
||||
h = fmodf(h + shift, 360.0f);
|
||||
if (h < 0) h += 360.0f;
|
||||
|
||||
// HSV to RGB
|
||||
float c = v * s;
|
||||
float x_val = c * (1.0f - fabsf(fmodf(h / 60.0f, 2.0f) - 1.0f));
|
||||
float m = v - c;
|
||||
|
||||
float r_out, g_out, b_out;
|
||||
if (h < 60) {
|
||||
r_out = c; g_out = x_val; b_out = 0;
|
||||
} else if (h < 120) {
|
||||
r_out = x_val; g_out = c; b_out = 0;
|
||||
} else if (h < 180) {
|
||||
r_out = 0; g_out = c; b_out = x_val;
|
||||
} else if (h < 240) {
|
||||
r_out = 0; g_out = x_val; b_out = c;
|
||||
} else if (h < 300) {
|
||||
r_out = x_val; g_out = 0; b_out = c;
|
||||
} else {
|
||||
r_out = c; g_out = 0; b_out = x_val;
|
||||
}
|
||||
|
||||
img[idx] = (unsigned char)fminf(255.0f, (r_out + m) * 255.0f);
|
||||
img[idx + 1] = (unsigned char)fminf(255.0f, (g_out + m) * 255.0f);
|
||||
img[idx + 2] = (unsigned char)fminf(255.0f, (b_out + m) * 255.0f);
|
||||
}
|
||||
''', 'hue_shift')
|
||||
|
||||
|
||||
def prim_hue_shift(img, shift=0.0):
|
||||
"""Shift hue by degrees."""
|
||||
if img.ndim != 3 or img.shape[2] != 3:
|
||||
return img
|
||||
|
||||
if not GPU_AVAILABLE:
|
||||
import cv2
|
||||
hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
|
||||
hsv[:, :, 0] = (hsv[:, :, 0].astype(np.float32) + shift / 2) % 180
|
||||
return cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
|
||||
|
||||
h, w = img.shape[:2]
|
||||
img_gpu = _to_gpu(img.astype(np.uint8)).copy()
|
||||
|
||||
block = (16, 16)
|
||||
grid = ((w + block[0] - 1) // block[0], (h + block[1] - 1) // block[1])
|
||||
|
||||
_hue_shift_kernel(grid, block, (img_gpu, np.int32(w), np.int32(h), np.float32(shift)))
|
||||
|
||||
return _to_cpu(img_gpu)
|
||||
|
||||
|
||||
def prim_saturate(img, factor=1.0):
|
||||
"""Adjust saturation by factor."""
|
||||
if img.ndim != 3:
|
||||
return img
|
||||
|
||||
if not GPU_AVAILABLE:
|
||||
import cv2
|
||||
hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV).astype(np.float32)
|
||||
hsv[:, :, 1] = np.clip(hsv[:, :, 1] * factor, 0, 255)
|
||||
return cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2RGB)
|
||||
|
||||
# GPU version - simple desaturation blend
|
||||
img_gpu = _to_gpu(img.astype(np.float32))
|
||||
gray = 0.299 * img_gpu[:, :, 0] + 0.587 * img_gpu[:, :, 1] + 0.114 * img_gpu[:, :, 2]
|
||||
gray = gray[:, :, cp.newaxis]
|
||||
|
||||
if factor < 1.0:
|
||||
# Desaturate: blend toward gray
|
||||
result = img_gpu * factor + gray * (1 - factor)
|
||||
else:
|
||||
# Oversaturate: extrapolate away from gray
|
||||
result = gray + (img_gpu - gray) * factor
|
||||
|
||||
result = cp.clip(result, 0, 255).astype(cp.uint8)
|
||||
return _to_cpu(result)
|
||||
|
||||
|
||||
def prim_blend(img1, img2, alpha=0.5):
|
||||
"""Blend two images with alpha."""
|
||||
xp = cp if GPU_AVAILABLE else np
|
||||
|
||||
if GPU_AVAILABLE:
|
||||
img1_gpu = _to_gpu(img1.astype(np.float32))
|
||||
img2_gpu = _to_gpu(img2.astype(np.float32))
|
||||
result = img1_gpu * (1 - alpha) + img2_gpu * alpha
|
||||
result = xp.clip(result, 0, 255).astype(xp.uint8)
|
||||
return _to_cpu(result)
|
||||
|
||||
result = img1.astype(np.float32) * (1 - alpha) + img2.astype(np.float32) * alpha
|
||||
return np.clip(result, 0, 255).astype(np.uint8)
|
||||
|
||||
|
||||
def prim_add(img1, img2):
|
||||
"""Add two images (clamped)."""
|
||||
xp = cp if GPU_AVAILABLE else np
|
||||
if GPU_AVAILABLE:
|
||||
result = xp.clip(_to_gpu(img1).astype(np.int16) + _to_gpu(img2).astype(np.int16), 0, 255)
|
||||
return _to_cpu(result.astype(xp.uint8))
|
||||
return np.clip(img1.astype(np.int16) + img2.astype(np.int16), 0, 255).astype(np.uint8)
|
||||
|
||||
|
||||
def prim_multiply(img1, img2):
|
||||
"""Multiply two images (normalized)."""
|
||||
xp = cp if GPU_AVAILABLE else np
|
||||
if GPU_AVAILABLE:
|
||||
result = (_to_gpu(img1).astype(np.float32) * _to_gpu(img2).astype(np.float32)) / 255.0
|
||||
result = xp.clip(result, 0, 255).astype(xp.uint8)
|
||||
return _to_cpu(result)
|
||||
result = (img1.astype(np.float32) * img2.astype(np.float32)) / 255.0
|
||||
return np.clip(result, 0, 255).astype(np.uint8)
|
||||
|
||||
|
||||
def prim_screen(img1, img2):
|
||||
"""Screen blend mode."""
|
||||
xp = cp if GPU_AVAILABLE else np
|
||||
if GPU_AVAILABLE:
|
||||
i1 = _to_gpu(img1).astype(np.float32) / 255.0
|
||||
i2 = _to_gpu(img2).astype(np.float32) / 255.0
|
||||
result = 1.0 - (1.0 - i1) * (1.0 - i2)
|
||||
result = xp.clip(result * 255, 0, 255).astype(xp.uint8)
|
||||
return _to_cpu(result)
|
||||
i1 = img1.astype(np.float32) / 255.0
|
||||
i2 = img2.astype(np.float32) / 255.0
|
||||
result = 1.0 - (1.0 - i1) * (1.0 - i2)
|
||||
return np.clip(result * 255, 0, 255).astype(np.uint8)
|
||||
|
||||
|
||||
# Import CPU primitives as fallbacks
|
||||
def _get_cpu_primitives():
|
||||
"""Get all primitives from CPU color_ops module as fallbacks."""
|
||||
from sexp_effects.primitive_libs import color_ops
|
||||
return color_ops.PRIMITIVES
|
||||
|
||||
|
||||
# Export functions - start with CPU primitives, then override with GPU versions
|
||||
PRIMITIVES = _get_cpu_primitives().copy()
|
||||
|
||||
# Override specific primitives with GPU-accelerated versions
|
||||
PRIMITIVES.update({
|
||||
'invert': prim_invert,
|
||||
'grayscale': prim_grayscale,
|
||||
'brightness': prim_brightness,
|
||||
'contrast': prim_contrast,
|
||||
'hue-shift': prim_hue_shift,
|
||||
'saturate': prim_saturate,
|
||||
'blend': prim_blend,
|
||||
'add': prim_add,
|
||||
'multiply': prim_multiply,
|
||||
'screen': prim_screen,
|
||||
})
|
||||
409
sexp_effects/primitive_libs/geometry_gpu.py
Normal file
409
sexp_effects/primitive_libs/geometry_gpu.py
Normal file
@@ -0,0 +1,409 @@
|
||||
"""
|
||||
GPU-Accelerated Geometry Primitives Library
|
||||
|
||||
Uses CuPy for CUDA-accelerated image transforms.
|
||||
Falls back to CPU if GPU unavailable.
|
||||
|
||||
Performance Mode:
|
||||
- Set STREAMING_GPU_PERSIST=1 to keep frames on GPU between operations
|
||||
- This dramatically improves performance by avoiding CPU<->GPU transfers
|
||||
- Frames only transfer to CPU at final output
|
||||
"""
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
# Try to import CuPy for GPU acceleration
|
||||
try:
|
||||
import cupy as cp
|
||||
from cupyx.scipy import ndimage as cpndimage
|
||||
GPU_AVAILABLE = True
|
||||
print("[geometry_gpu] CuPy GPU acceleration enabled")
|
||||
except ImportError:
|
||||
cp = np
|
||||
GPU_AVAILABLE = False
|
||||
print("[geometry_gpu] CuPy not available, using CPU fallback")
|
||||
|
||||
# GPU persistence mode - keep frames on GPU between operations
|
||||
# Set STREAMING_GPU_PERSIST=1 for maximum performance
|
||||
GPU_PERSIST = os.environ.get("STREAMING_GPU_PERSIST", "1") == "1"
|
||||
if GPU_AVAILABLE and GPU_PERSIST:
|
||||
print("[geometry_gpu] GPU persistence enabled - frames stay on GPU")
|
||||
|
||||
|
||||
def _to_gpu(img):
|
||||
"""Move image to GPU if available."""
|
||||
if GPU_AVAILABLE and not isinstance(img, cp.ndarray):
|
||||
return cp.asarray(img)
|
||||
return img
|
||||
|
||||
|
||||
def _to_cpu(img):
|
||||
"""Move image back to CPU (only if GPU_PERSIST is disabled)."""
|
||||
if not GPU_PERSIST and GPU_AVAILABLE and isinstance(img, cp.ndarray):
|
||||
return cp.asnumpy(img)
|
||||
return img
|
||||
|
||||
|
||||
def _ensure_output_format(img):
|
||||
"""Ensure output is in correct format based on GPU_PERSIST setting."""
|
||||
return _to_cpu(img)
|
||||
|
||||
|
||||
def prim_rotate(img, angle, cx=None, cy=None):
|
||||
"""Rotate image by angle degrees around center (cx, cy)."""
|
||||
if not GPU_AVAILABLE:
|
||||
# Fallback to OpenCV
|
||||
import cv2
|
||||
h, w = img.shape[:2]
|
||||
if cx is None:
|
||||
cx = w / 2
|
||||
if cy is None:
|
||||
cy = h / 2
|
||||
M = cv2.getRotationMatrix2D((cx, cy), angle, 1.0)
|
||||
return cv2.warpAffine(img, M, (w, h))
|
||||
|
||||
img_gpu = _to_gpu(img)
|
||||
h, w = img_gpu.shape[:2]
|
||||
|
||||
if cx is None:
|
||||
cx = w / 2
|
||||
if cy is None:
|
||||
cy = h / 2
|
||||
|
||||
# Use cupyx.scipy.ndimage.rotate
|
||||
# Note: scipy uses different angle convention
|
||||
rotated = cpndimage.rotate(img_gpu, angle, reshape=False, order=1)
|
||||
|
||||
return _to_cpu(rotated)
|
||||
|
||||
|
||||
def prim_scale(img, sx, sy, cx=None, cy=None):
|
||||
"""Scale image by (sx, sy) around center (cx, cy)."""
|
||||
if not GPU_AVAILABLE:
|
||||
import cv2
|
||||
h, w = img.shape[:2]
|
||||
if cx is None:
|
||||
cx = w / 2
|
||||
if cy is None:
|
||||
cy = h / 2
|
||||
M = np.float32([
|
||||
[sx, 0, cx * (1 - sx)],
|
||||
[0, sy, cy * (1 - sy)]
|
||||
])
|
||||
return cv2.warpAffine(img, M, (w, h))
|
||||
|
||||
img_gpu = _to_gpu(img)
|
||||
h, w = img_gpu.shape[:2]
|
||||
|
||||
if cx is None:
|
||||
cx = w / 2
|
||||
if cy is None:
|
||||
cy = h / 2
|
||||
|
||||
# Use cupyx.scipy.ndimage.zoom
|
||||
if img_gpu.ndim == 3:
|
||||
zoom_factors = (sy, sx, 1) # Don't zoom color channels
|
||||
else:
|
||||
zoom_factors = (sy, sx)
|
||||
|
||||
zoomed = cpndimage.zoom(img_gpu, zoom_factors, order=1)
|
||||
|
||||
# Crop/pad to original size
|
||||
zh, zw = zoomed.shape[:2]
|
||||
result = cp.zeros_like(img_gpu)
|
||||
|
||||
# Calculate offsets
|
||||
src_y = max(0, (zh - h) // 2)
|
||||
src_x = max(0, (zw - w) // 2)
|
||||
dst_y = max(0, (h - zh) // 2)
|
||||
dst_x = max(0, (w - zw) // 2)
|
||||
|
||||
copy_h = min(h - dst_y, zh - src_y)
|
||||
copy_w = min(w - dst_x, zw - src_x)
|
||||
|
||||
result[dst_y:dst_y+copy_h, dst_x:dst_x+copy_w] = zoomed[src_y:src_y+copy_h, src_x:src_x+copy_w]
|
||||
|
||||
return _to_cpu(result)
|
||||
|
||||
|
||||
def prim_translate(img, dx, dy):
|
||||
"""Translate image by (dx, dy) pixels."""
|
||||
if not GPU_AVAILABLE:
|
||||
import cv2
|
||||
h, w = img.shape[:2]
|
||||
M = np.float32([[1, 0, dx], [0, 1, dy]])
|
||||
return cv2.warpAffine(img, M, (w, h))
|
||||
|
||||
img_gpu = _to_gpu(img)
|
||||
# Use cupyx.scipy.ndimage.shift
|
||||
if img_gpu.ndim == 3:
|
||||
shift = (dy, dx, 0) # Don't shift color channels
|
||||
else:
|
||||
shift = (dy, dx)
|
||||
|
||||
shifted = cpndimage.shift(img_gpu, shift, order=1)
|
||||
return _to_cpu(shifted)
|
||||
|
||||
|
||||
def prim_flip_h(img):
|
||||
"""Flip image horizontally."""
|
||||
if GPU_AVAILABLE:
|
||||
img_gpu = _to_gpu(img)
|
||||
return _to_cpu(cp.flip(img_gpu, axis=1))
|
||||
return np.flip(img, axis=1)
|
||||
|
||||
|
||||
def prim_flip_v(img):
|
||||
"""Flip image vertically."""
|
||||
if GPU_AVAILABLE:
|
||||
img_gpu = _to_gpu(img)
|
||||
return _to_cpu(cp.flip(img_gpu, axis=0))
|
||||
return np.flip(img, axis=0)
|
||||
|
||||
|
||||
def prim_flip(img, direction="horizontal"):
|
||||
"""Flip image in given direction."""
|
||||
if direction in ("horizontal", "h"):
|
||||
return prim_flip_h(img)
|
||||
elif direction in ("vertical", "v"):
|
||||
return prim_flip_v(img)
|
||||
elif direction in ("both", "hv", "vh"):
|
||||
if GPU_AVAILABLE:
|
||||
img_gpu = _to_gpu(img)
|
||||
return _to_cpu(cp.flip(cp.flip(img_gpu, axis=0), axis=1))
|
||||
return np.flip(np.flip(img, axis=0), axis=1)
|
||||
return img
|
||||
|
||||
|
||||
# CUDA kernel for ripple effect
|
||||
if GPU_AVAILABLE:
|
||||
_ripple_kernel = cp.RawKernel(r'''
|
||||
extern "C" __global__
|
||||
void ripple(const unsigned char* src, unsigned char* dst,
|
||||
int width, int height, int channels,
|
||||
float amplitude, float frequency, float decay,
|
||||
float speed, float time, float cx, float cy) {
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x >= width || y >= height) return;
|
||||
|
||||
// Distance from center
|
||||
float dx = x - cx;
|
||||
float dy = y - cy;
|
||||
float dist = sqrtf(dx * dx + dy * dy);
|
||||
|
||||
// Ripple displacement
|
||||
float wave = sinf(dist * frequency * 0.1f - time * speed) * amplitude;
|
||||
float falloff = expf(-dist * decay * 0.01f);
|
||||
float displacement = wave * falloff;
|
||||
|
||||
// Direction from center
|
||||
float len = dist + 0.0001f; // Avoid division by zero
|
||||
float dir_x = dx / len;
|
||||
float dir_y = dy / len;
|
||||
|
||||
// Source coordinates
|
||||
float src_x = x - dir_x * displacement;
|
||||
float src_y = y - dir_y * displacement;
|
||||
|
||||
// Clamp to bounds
|
||||
src_x = fmaxf(0.0f, fminf(width - 1.0f, src_x));
|
||||
src_y = fmaxf(0.0f, fminf(height - 1.0f, src_y));
|
||||
|
||||
// Bilinear interpolation
|
||||
int x0 = (int)src_x;
|
||||
int y0 = (int)src_y;
|
||||
int x1 = min(x0 + 1, width - 1);
|
||||
int y1 = min(y0 + 1, height - 1);
|
||||
|
||||
float fx = src_x - x0;
|
||||
float fy = src_y - y0;
|
||||
|
||||
for (int c = 0; c < channels; c++) {
|
||||
float v00 = src[(y0 * width + x0) * channels + c];
|
||||
float v10 = src[(y0 * width + x1) * channels + c];
|
||||
float v01 = src[(y1 * width + x0) * channels + c];
|
||||
float v11 = src[(y1 * width + x1) * channels + c];
|
||||
|
||||
float v0 = v00 * (1 - fx) + v10 * fx;
|
||||
float v1 = v01 * (1 - fx) + v11 * fx;
|
||||
float val = v0 * (1 - fy) + v1 * fy;
|
||||
|
||||
dst[(y * width + x) * channels + c] = (unsigned char)fminf(255.0f, fmaxf(0.0f, val));
|
||||
}
|
||||
}
|
||||
''', 'ripple')
|
||||
|
||||
|
||||
def prim_ripple(img, amplitude=10.0, frequency=8.0, decay=2.0, speed=5.0,
|
||||
time=0.0, center_x=None, center_y=None):
|
||||
"""Apply ripple distortion effect."""
|
||||
h, w = img.shape[:2]
|
||||
channels = img.shape[2] if img.ndim == 3 else 1
|
||||
|
||||
if center_x is None:
|
||||
center_x = w / 2
|
||||
if center_y is None:
|
||||
center_y = h / 2
|
||||
|
||||
if not GPU_AVAILABLE:
|
||||
# CPU fallback using coordinate mapping
|
||||
import cv2
|
||||
y_coords, x_coords = np.mgrid[0:h, 0:w].astype(np.float32)
|
||||
|
||||
dx = x_coords - center_x
|
||||
dy = y_coords - center_y
|
||||
dist = np.sqrt(dx**2 + dy**2)
|
||||
|
||||
wave = np.sin(dist * frequency * 0.1 - time * speed) * amplitude
|
||||
falloff = np.exp(-dist * decay * 0.01)
|
||||
displacement = wave * falloff
|
||||
|
||||
length = dist + 0.0001
|
||||
dir_x = dx / length
|
||||
dir_y = dy / length
|
||||
|
||||
map_x = (x_coords - dir_x * displacement).astype(np.float32)
|
||||
map_y = (y_coords - dir_y * displacement).astype(np.float32)
|
||||
|
||||
return cv2.remap(img, map_x, map_y, cv2.INTER_LINEAR)
|
||||
|
||||
# GPU implementation
|
||||
img_gpu = _to_gpu(img.astype(np.uint8))
|
||||
if img_gpu.ndim == 2:
|
||||
img_gpu = img_gpu[:, :, cp.newaxis]
|
||||
channels = 1
|
||||
|
||||
dst = cp.zeros_like(img_gpu)
|
||||
|
||||
block = (16, 16)
|
||||
grid = ((w + block[0] - 1) // block[0], (h + block[1] - 1) // block[1])
|
||||
|
||||
_ripple_kernel(grid, block, (
|
||||
img_gpu, dst,
|
||||
np.int32(w), np.int32(h), np.int32(channels),
|
||||
np.float32(amplitude), np.float32(frequency), np.float32(decay),
|
||||
np.float32(speed), np.float32(time),
|
||||
np.float32(center_x), np.float32(center_y)
|
||||
))
|
||||
|
||||
result = _to_cpu(dst)
|
||||
if channels == 1:
|
||||
result = result[:, :, 0]
|
||||
return result
|
||||
|
||||
|
||||
# CUDA kernel for fast rotation with bilinear interpolation
|
||||
if GPU_AVAILABLE:
|
||||
_rotate_kernel = cp.RawKernel(r'''
|
||||
extern "C" __global__
|
||||
void rotate_img(const unsigned char* src, unsigned char* dst,
|
||||
int width, int height, int channels,
|
||||
float cos_a, float sin_a, float cx, float cy) {
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x >= width || y >= height) return;
|
||||
|
||||
// Translate to center, rotate, translate back
|
||||
float dx = x - cx;
|
||||
float dy = y - cy;
|
||||
|
||||
float src_x = cos_a * dx + sin_a * dy + cx;
|
||||
float src_y = -sin_a * dx + cos_a * dy + cy;
|
||||
|
||||
// Check bounds
|
||||
if (src_x < 0 || src_x >= width - 1 || src_y < 0 || src_y >= height - 1) {
|
||||
for (int c = 0; c < channels; c++) {
|
||||
dst[(y * width + x) * channels + c] = 0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Bilinear interpolation
|
||||
int x0 = (int)src_x;
|
||||
int y0 = (int)src_y;
|
||||
int x1 = x0 + 1;
|
||||
int y1 = y0 + 1;
|
||||
|
||||
float fx = src_x - x0;
|
||||
float fy = src_y - y0;
|
||||
|
||||
for (int c = 0; c < channels; c++) {
|
||||
float v00 = src[(y0 * width + x0) * channels + c];
|
||||
float v10 = src[(y0 * width + x1) * channels + c];
|
||||
float v01 = src[(y1 * width + x0) * channels + c];
|
||||
float v11 = src[(y1 * width + x1) * channels + c];
|
||||
|
||||
float v0 = v00 * (1 - fx) + v10 * fx;
|
||||
float v1 = v01 * (1 - fx) + v11 * fx;
|
||||
float val = v0 * (1 - fy) + v1 * fy;
|
||||
|
||||
dst[(y * width + x) * channels + c] = (unsigned char)fminf(255.0f, fmaxf(0.0f, val));
|
||||
}
|
||||
}
|
||||
''', 'rotate_img')
|
||||
|
||||
|
||||
def prim_rotate_gpu(img, angle, cx=None, cy=None):
|
||||
"""Fast GPU rotation using custom CUDA kernel."""
|
||||
if not GPU_AVAILABLE:
|
||||
return prim_rotate(img, angle, cx, cy)
|
||||
|
||||
h, w = img.shape[:2]
|
||||
channels = img.shape[2] if img.ndim == 3 else 1
|
||||
|
||||
if cx is None:
|
||||
cx = w / 2
|
||||
if cy is None:
|
||||
cy = h / 2
|
||||
|
||||
img_gpu = _to_gpu(img.astype(np.uint8))
|
||||
if img_gpu.ndim == 2:
|
||||
img_gpu = img_gpu[:, :, cp.newaxis]
|
||||
channels = 1
|
||||
|
||||
dst = cp.zeros_like(img_gpu)
|
||||
|
||||
# Convert angle to radians
|
||||
rad = np.radians(angle)
|
||||
cos_a = np.cos(rad)
|
||||
sin_a = np.sin(rad)
|
||||
|
||||
block = (16, 16)
|
||||
grid = ((w + block[0] - 1) // block[0], (h + block[1] - 1) // block[1])
|
||||
|
||||
_rotate_kernel(grid, block, (
|
||||
img_gpu, dst,
|
||||
np.int32(w), np.int32(h), np.int32(channels),
|
||||
np.float32(cos_a), np.float32(sin_a),
|
||||
np.float32(cx), np.float32(cy)
|
||||
))
|
||||
|
||||
result = _to_cpu(dst)
|
||||
if channels == 1:
|
||||
result = result[:, :, 0]
|
||||
return result
|
||||
|
||||
|
||||
# Import CPU primitives as fallbacks for functions we don't GPU-accelerate
|
||||
def _get_cpu_primitives():
|
||||
"""Get all primitives from CPU geometry module as fallbacks."""
|
||||
from sexp_effects.primitive_libs import geometry
|
||||
return geometry.PRIMITIVES
|
||||
|
||||
|
||||
# Export functions - start with CPU primitives, then override with GPU versions
|
||||
PRIMITIVES = _get_cpu_primitives().copy()
|
||||
|
||||
# Override specific primitives with GPU-accelerated versions
|
||||
PRIMITIVES.update({
|
||||
'translate': prim_translate,
|
||||
'rotate-img': prim_rotate_gpu if GPU_AVAILABLE else prim_rotate,
|
||||
'scale-img': prim_scale,
|
||||
'flip-h': prim_flip_h,
|
||||
'flip-v': prim_flip_v,
|
||||
'flip': prim_flip,
|
||||
# Note: ripple-displace uses CPU version (different API - returns coords, not image)
|
||||
})
|
||||
@@ -8,10 +8,16 @@ import cv2
|
||||
|
||||
|
||||
def prim_width(img):
|
||||
if isinstance(img, (list, tuple)):
|
||||
raise TypeError(f"image:width expects an image array, got {type(img).__name__} with {len(img)} elements")
|
||||
return img.shape[1]
|
||||
|
||||
|
||||
def prim_height(img):
|
||||
if isinstance(img, (list, tuple)):
|
||||
import sys
|
||||
print(f"DEBUG image:height got list: {img[:3]}... (types: {[type(x).__name__ for x in img[:3]]})", file=sys.stderr)
|
||||
raise TypeError(f"image:height expects an image array, got {type(img).__name__} with {len(img)} elements: {img}")
|
||||
return img.shape[0]
|
||||
|
||||
|
||||
|
||||
@@ -3,13 +3,52 @@ Streaming primitives for video/audio processing.
|
||||
|
||||
These primitives handle video source reading and audio analysis,
|
||||
keeping the interpreter completely generic.
|
||||
|
||||
GPU Acceleration:
|
||||
- Set STREAMING_GPU_PERSIST=1 to output CuPy arrays (frames stay on GPU)
|
||||
- Hardware video decoding (NVDEC) is used when available
|
||||
- Dramatically improves performance on GPU nodes
|
||||
"""
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import subprocess
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
# Try to import CuPy for GPU acceleration
|
||||
try:
|
||||
import cupy as cp
|
||||
CUPY_AVAILABLE = True
|
||||
except ImportError:
|
||||
cp = None
|
||||
CUPY_AVAILABLE = False
|
||||
|
||||
# GPU persistence mode - output CuPy arrays instead of numpy
|
||||
GPU_PERSIST = os.environ.get("STREAMING_GPU_PERSIST", "1") == "1" and CUPY_AVAILABLE
|
||||
|
||||
# Check for hardware decode support (cached)
|
||||
_HWDEC_AVAILABLE = None
|
||||
|
||||
|
||||
def _check_hwdec():
|
||||
"""Check if NVIDIA hardware decode is available."""
|
||||
global _HWDEC_AVAILABLE
|
||||
if _HWDEC_AVAILABLE is not None:
|
||||
return _HWDEC_AVAILABLE
|
||||
|
||||
try:
|
||||
result = subprocess.run(["nvidia-smi"], capture_output=True, timeout=2)
|
||||
if result.returncode != 0:
|
||||
_HWDEC_AVAILABLE = False
|
||||
return False
|
||||
result = subprocess.run(["ffmpeg", "-hwaccels"], capture_output=True, text=True, timeout=5)
|
||||
_HWDEC_AVAILABLE = "cuda" in result.stdout
|
||||
except Exception:
|
||||
_HWDEC_AVAILABLE = False
|
||||
|
||||
return _HWDEC_AVAILABLE
|
||||
|
||||
|
||||
class VideoSource:
|
||||
"""Video source with persistent streaming pipe for fast sequential reads."""
|
||||
@@ -57,7 +96,10 @@ class VideoSource:
|
||||
print(f"VideoSource: {self.path.name} duration={self._duration} size={self._frame_size}", file=sys.stderr)
|
||||
|
||||
def _start_stream(self, seek_time: float = 0):
|
||||
"""Start or restart the ffmpeg streaming process."""
|
||||
"""Start or restart the ffmpeg streaming process.
|
||||
|
||||
Uses NVIDIA hardware decoding (NVDEC) when available for better performance.
|
||||
"""
|
||||
if self._proc:
|
||||
self._proc.kill()
|
||||
self._proc = None
|
||||
@@ -67,15 +109,23 @@ class VideoSource:
|
||||
raise FileNotFoundError(f"Video file not found: {self.path}")
|
||||
|
||||
w, h = self._frame_size
|
||||
cmd = [
|
||||
"ffmpeg", "-v", "error", # Show errors instead of quiet
|
||||
|
||||
# Build ffmpeg command with optional hardware decode
|
||||
cmd = ["ffmpeg", "-v", "error"]
|
||||
|
||||
# Use hardware decode if available (significantly faster)
|
||||
if _check_hwdec():
|
||||
cmd.extend(["-hwaccel", "cuda"])
|
||||
|
||||
cmd.extend([
|
||||
"-ss", f"{seek_time:.3f}",
|
||||
"-i", str(self.path),
|
||||
"-f", "rawvideo", "-pix_fmt", "rgb24",
|
||||
"-s", f"{w}x{h}",
|
||||
"-r", str(self.fps), # Output at specified fps
|
||||
"-"
|
||||
]
|
||||
])
|
||||
|
||||
self._proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
self._stream_time = seek_time
|
||||
|
||||
@@ -88,8 +138,11 @@ class VideoSource:
|
||||
if err:
|
||||
print(f"ffmpeg error for {self.path.name}: {err}", file=sys.stderr)
|
||||
|
||||
def _read_frame_from_stream(self) -> np.ndarray:
|
||||
"""Read one frame from the stream."""
|
||||
def _read_frame_from_stream(self):
|
||||
"""Read one frame from the stream.
|
||||
|
||||
Returns CuPy array if GPU_PERSIST is enabled, numpy array otherwise.
|
||||
"""
|
||||
w, h = self._frame_size
|
||||
frame_size = w * h * 3
|
||||
|
||||
@@ -100,7 +153,12 @@ class VideoSource:
|
||||
if len(data) < frame_size:
|
||||
return None
|
||||
|
||||
return np.frombuffer(data, dtype=np.uint8).reshape((h, w, 3)).copy()
|
||||
frame = np.frombuffer(data, dtype=np.uint8).reshape((h, w, 3)).copy()
|
||||
|
||||
# Transfer to GPU if persistence mode enabled
|
||||
if GPU_PERSIST:
|
||||
return cp.asarray(frame)
|
||||
return frame
|
||||
|
||||
def read(self) -> np.ndarray:
|
||||
"""Read frame (uses last cached or t=0)."""
|
||||
@@ -120,6 +178,9 @@ class VideoSource:
|
||||
seek_time = t
|
||||
if self._duration and self._duration > 0:
|
||||
seek_time = t % self._duration
|
||||
# If we're within 0.1s of the end, wrap to beginning to avoid EOF issues
|
||||
if seek_time > self._duration - 0.1:
|
||||
seek_time = 0.0
|
||||
|
||||
# Decide whether to seek or continue streaming
|
||||
# Seek if: no stream, going backwards (more than 1 frame), or jumping more than 2 seconds ahead
|
||||
@@ -138,24 +199,59 @@ class VideoSource:
|
||||
self._start_stream(seek_time)
|
||||
|
||||
# Skip frames to reach target time
|
||||
skip_retries = 0
|
||||
while self._stream_time + self._frame_time <= seek_time:
|
||||
frame = self._read_frame_from_stream()
|
||||
if frame is None:
|
||||
# Stream ended, restart from seek point
|
||||
# Stream ended or failed - restart from seek point
|
||||
import time
|
||||
skip_retries += 1
|
||||
if skip_retries > 3:
|
||||
# Give up skipping, just start fresh at seek_time
|
||||
self._start_stream(seek_time)
|
||||
time.sleep(0.1)
|
||||
break
|
||||
self._start_stream(seek_time)
|
||||
time.sleep(0.05)
|
||||
continue
|
||||
self._stream_time += self._frame_time
|
||||
skip_retries = 0 # Reset on successful read
|
||||
|
||||
# Read the target frame
|
||||
# Read the target frame with retry logic
|
||||
frame = None
|
||||
max_retries = 3
|
||||
for attempt in range(max_retries):
|
||||
frame = self._read_frame_from_stream()
|
||||
if frame is None:
|
||||
if frame is not None:
|
||||
break
|
||||
|
||||
# Stream failed - try restarting
|
||||
import sys
|
||||
import time
|
||||
print(f"RETRY {self.path.name}: attempt {attempt+1}/{max_retries} at t={t:.2f}", file=sys.stderr)
|
||||
|
||||
# Check for ffmpeg errors
|
||||
if self._proc and self._proc.stderr:
|
||||
try:
|
||||
import select
|
||||
readable, _, _ = select.select([self._proc.stderr], [], [], 0.1)
|
||||
if readable:
|
||||
err = self._proc.stderr.read(4096).decode('utf-8', errors='ignore')
|
||||
if err:
|
||||
raise RuntimeError(f"Failed to read video frame from {self.path.name}: {err}")
|
||||
raise RuntimeError(f"Failed to read video frame from {self.path.name} at t={t:.2f} - file may be corrupted or inaccessible")
|
||||
print(f"ffmpeg error: {err}", file=sys.stderr)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Wait a bit and restart
|
||||
time.sleep(0.1)
|
||||
self._start_stream(seek_time)
|
||||
|
||||
# Give ffmpeg time to start
|
||||
time.sleep(0.1)
|
||||
|
||||
if frame is None:
|
||||
import sys
|
||||
raise RuntimeError(f"Failed to read video frame from {self.path.name} at t={t:.2f} after {max_retries} retries")
|
||||
else:
|
||||
self._stream_time += self._frame_time
|
||||
|
||||
|
||||
502
sexp_effects/primitive_libs/streaming_gpu.py
Normal file
502
sexp_effects/primitive_libs/streaming_gpu.py
Normal file
@@ -0,0 +1,502 @@
|
||||
"""
|
||||
GPU-Accelerated Streaming Primitives
|
||||
|
||||
Provides GPU-native video source and frame processing.
|
||||
Frames stay on GPU memory throughout the pipeline for maximum performance.
|
||||
|
||||
Architecture:
|
||||
- GPUFrame: Wrapper that tracks whether data is on CPU or GPU
|
||||
- GPUVideoSource: Hardware-accelerated decode to GPU memory
|
||||
- GPU primitives operate directly on GPU frames
|
||||
- Transfer to CPU only at final output
|
||||
|
||||
Requirements:
|
||||
- CuPy for CUDA support
|
||||
- FFmpeg with NVDEC support (for hardware decode)
|
||||
- NVIDIA GPU with CUDA capability
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import subprocess
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from typing import Optional, Tuple, Union
|
||||
|
||||
# Try to import CuPy
|
||||
try:
|
||||
import cupy as cp
|
||||
GPU_AVAILABLE = True
|
||||
except ImportError:
|
||||
cp = None
|
||||
GPU_AVAILABLE = False
|
||||
|
||||
# Check for hardware decode support
|
||||
_HWDEC_AVAILABLE: Optional[bool] = None
|
||||
|
||||
|
||||
def check_hwdec_available() -> bool:
|
||||
"""Check if NVIDIA hardware decode is available."""
|
||||
global _HWDEC_AVAILABLE
|
||||
if _HWDEC_AVAILABLE is not None:
|
||||
return _HWDEC_AVAILABLE
|
||||
|
||||
try:
|
||||
# Check for nvidia-smi (GPU present)
|
||||
result = subprocess.run(["nvidia-smi"], capture_output=True, timeout=2)
|
||||
if result.returncode != 0:
|
||||
_HWDEC_AVAILABLE = False
|
||||
return False
|
||||
|
||||
# Check for nvdec in ffmpeg
|
||||
result = subprocess.run(
|
||||
["ffmpeg", "-hwaccels"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5
|
||||
)
|
||||
_HWDEC_AVAILABLE = "cuda" in result.stdout
|
||||
except Exception:
|
||||
_HWDEC_AVAILABLE = False
|
||||
|
||||
return _HWDEC_AVAILABLE
|
||||
|
||||
|
||||
class GPUFrame:
|
||||
"""
|
||||
Frame container that tracks data location (CPU/GPU).
|
||||
|
||||
Enables zero-copy operations when data is already on the right device.
|
||||
Lazy transfer - only moves data when actually needed.
|
||||
"""
|
||||
|
||||
def __init__(self, data: Union[np.ndarray, 'cp.ndarray'], on_gpu: bool = None):
|
||||
self._cpu_data: Optional[np.ndarray] = None
|
||||
self._gpu_data = None # Optional[cp.ndarray]
|
||||
|
||||
if on_gpu is None:
|
||||
# Auto-detect based on type
|
||||
if GPU_AVAILABLE and isinstance(data, cp.ndarray):
|
||||
self._gpu_data = data
|
||||
else:
|
||||
self._cpu_data = np.asarray(data)
|
||||
elif on_gpu and GPU_AVAILABLE:
|
||||
self._gpu_data = cp.asarray(data) if not isinstance(data, cp.ndarray) else data
|
||||
else:
|
||||
self._cpu_data = np.asarray(data) if isinstance(data, np.ndarray) else cp.asnumpy(data)
|
||||
|
||||
@property
|
||||
def cpu(self) -> np.ndarray:
|
||||
"""Get frame as numpy array (transfers from GPU if needed)."""
|
||||
if self._cpu_data is None:
|
||||
if self._gpu_data is not None and GPU_AVAILABLE:
|
||||
self._cpu_data = cp.asnumpy(self._gpu_data)
|
||||
else:
|
||||
raise ValueError("No frame data available")
|
||||
return self._cpu_data
|
||||
|
||||
@property
|
||||
def gpu(self):
|
||||
"""Get frame as CuPy array (transfers to GPU if needed)."""
|
||||
if not GPU_AVAILABLE:
|
||||
raise RuntimeError("GPU not available")
|
||||
if self._gpu_data is None:
|
||||
if self._cpu_data is not None:
|
||||
self._gpu_data = cp.asarray(self._cpu_data)
|
||||
else:
|
||||
raise ValueError("No frame data available")
|
||||
return self._gpu_data
|
||||
|
||||
@property
|
||||
def is_on_gpu(self) -> bool:
|
||||
"""Check if data is currently on GPU."""
|
||||
return self._gpu_data is not None
|
||||
|
||||
@property
|
||||
def shape(self) -> Tuple[int, ...]:
|
||||
"""Get frame shape."""
|
||||
if self._gpu_data is not None:
|
||||
return self._gpu_data.shape
|
||||
return self._cpu_data.shape
|
||||
|
||||
@property
|
||||
def dtype(self):
|
||||
"""Get frame dtype."""
|
||||
if self._gpu_data is not None:
|
||||
return self._gpu_data.dtype
|
||||
return self._cpu_data.dtype
|
||||
|
||||
def numpy(self) -> np.ndarray:
|
||||
"""Alias for cpu property."""
|
||||
return self.cpu
|
||||
|
||||
def cupy(self):
|
||||
"""Alias for gpu property."""
|
||||
return self.gpu
|
||||
|
||||
def free_cpu(self):
|
||||
"""Free CPU memory (keep GPU only)."""
|
||||
if self._gpu_data is not None:
|
||||
self._cpu_data = None
|
||||
|
||||
def free_gpu(self):
|
||||
"""Free GPU memory (keep CPU only)."""
|
||||
if self._cpu_data is not None:
|
||||
self._gpu_data = None
|
||||
|
||||
|
||||
class GPUVideoSource:
|
||||
"""
|
||||
GPU-accelerated video source using hardware decode.
|
||||
|
||||
Uses NVDEC for hardware video decoding when available,
|
||||
keeping decoded frames in GPU memory for zero-copy processing.
|
||||
|
||||
Falls back to CPU decode if hardware decode unavailable.
|
||||
"""
|
||||
|
||||
def __init__(self, path: str, fps: float = 30, prefer_gpu: bool = True):
|
||||
self.path = Path(path)
|
||||
self.fps = fps
|
||||
self.prefer_gpu = prefer_gpu and GPU_AVAILABLE and check_hwdec_available()
|
||||
|
||||
self._frame_size: Optional[Tuple[int, int]] = None
|
||||
self._duration: Optional[float] = None
|
||||
self._proc = None
|
||||
self._stream_time = 0.0
|
||||
self._frame_time = 1.0 / fps
|
||||
self._last_read_time = -1
|
||||
self._cached_frame: Optional[GPUFrame] = None
|
||||
|
||||
# Get video info
|
||||
self._probe_video()
|
||||
|
||||
print(f"[GPUVideoSource] {self.path.name}: {self._frame_size}, "
|
||||
f"duration={self._duration:.1f}s, gpu={self.prefer_gpu}", file=sys.stderr)
|
||||
|
||||
def _probe_video(self):
|
||||
"""Probe video file for metadata."""
|
||||
cmd = ["ffprobe", "-v", "quiet", "-print_format", "json",
|
||||
"-show_streams", "-show_format", str(self.path)]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
info = json.loads(result.stdout)
|
||||
|
||||
for stream in info.get("streams", []):
|
||||
if stream.get("codec_type") == "video":
|
||||
self._frame_size = (stream.get("width", 720), stream.get("height", 720))
|
||||
if "duration" in stream:
|
||||
self._duration = float(stream["duration"])
|
||||
elif "tags" in stream and "DURATION" in stream["tags"]:
|
||||
dur_str = stream["tags"]["DURATION"]
|
||||
parts = dur_str.split(":")
|
||||
if len(parts) == 3:
|
||||
h, m, s = parts
|
||||
self._duration = int(h) * 3600 + int(m) * 60 + float(s)
|
||||
break
|
||||
|
||||
if self._duration is None and "format" in info:
|
||||
if "duration" in info["format"]:
|
||||
self._duration = float(info["format"]["duration"])
|
||||
|
||||
if not self._frame_size:
|
||||
self._frame_size = (720, 720)
|
||||
if not self._duration:
|
||||
self._duration = 60.0
|
||||
|
||||
def _start_stream(self, seek_time: float = 0):
|
||||
"""Start ffmpeg decode process."""
|
||||
if self._proc:
|
||||
self._proc.kill()
|
||||
self._proc = None
|
||||
|
||||
if not self.path.exists():
|
||||
raise FileNotFoundError(f"Video file not found: {self.path}")
|
||||
|
||||
w, h = self._frame_size
|
||||
|
||||
# Build ffmpeg command
|
||||
cmd = ["ffmpeg", "-v", "error"]
|
||||
|
||||
# Hardware decode if available
|
||||
if self.prefer_gpu:
|
||||
cmd.extend(["-hwaccel", "cuda"])
|
||||
|
||||
cmd.extend([
|
||||
"-ss", f"{seek_time:.3f}",
|
||||
"-i", str(self.path),
|
||||
"-f", "rawvideo",
|
||||
"-pix_fmt", "rgb24",
|
||||
"-s", f"{w}x{h}",
|
||||
"-r", str(self.fps),
|
||||
"-"
|
||||
])
|
||||
|
||||
self._proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
self._stream_time = seek_time
|
||||
|
||||
def _read_frame_raw(self) -> Optional[np.ndarray]:
|
||||
"""Read one frame from ffmpeg pipe."""
|
||||
w, h = self._frame_size
|
||||
frame_size = w * h * 3
|
||||
|
||||
if not self._proc or self._proc.poll() is not None:
|
||||
return None
|
||||
|
||||
data = self._proc.stdout.read(frame_size)
|
||||
if len(data) < frame_size:
|
||||
return None
|
||||
|
||||
return np.frombuffer(data, dtype=np.uint8).reshape((h, w, 3)).copy()
|
||||
|
||||
def read_at(self, t: float) -> Optional[GPUFrame]:
|
||||
"""
|
||||
Read frame at specific time.
|
||||
|
||||
Returns GPUFrame with data on GPU if GPU mode enabled.
|
||||
"""
|
||||
# Cache check
|
||||
if t == self._last_read_time and self._cached_frame is not None:
|
||||
return self._cached_frame
|
||||
|
||||
# Loop time for shorter videos
|
||||
seek_time = t
|
||||
if self._duration and self._duration > 0:
|
||||
seek_time = t % self._duration
|
||||
if seek_time > self._duration - 0.1:
|
||||
seek_time = 0.0
|
||||
|
||||
# Determine if we need to seek
|
||||
need_seek = (
|
||||
self._proc is None or
|
||||
self._proc.poll() is not None or
|
||||
seek_time < self._stream_time - self._frame_time or
|
||||
seek_time > self._stream_time + 2.0
|
||||
)
|
||||
|
||||
if need_seek:
|
||||
self._start_stream(seek_time)
|
||||
|
||||
# Skip frames to reach target
|
||||
while self._stream_time + self._frame_time <= seek_time:
|
||||
frame = self._read_frame_raw()
|
||||
if frame is None:
|
||||
self._start_stream(seek_time)
|
||||
break
|
||||
self._stream_time += self._frame_time
|
||||
|
||||
# Read target frame
|
||||
frame_np = self._read_frame_raw()
|
||||
if frame_np is None:
|
||||
return self._cached_frame
|
||||
|
||||
self._stream_time += self._frame_time
|
||||
self._last_read_time = t
|
||||
|
||||
# Create GPUFrame - transfer to GPU if in GPU mode
|
||||
self._cached_frame = GPUFrame(frame_np, on_gpu=self.prefer_gpu)
|
||||
|
||||
# Free CPU copy if on GPU (saves memory)
|
||||
if self.prefer_gpu and self._cached_frame.is_on_gpu:
|
||||
self._cached_frame.free_cpu()
|
||||
|
||||
return self._cached_frame
|
||||
|
||||
def read(self) -> Optional[GPUFrame]:
|
||||
"""Read current frame."""
|
||||
if self._cached_frame is not None:
|
||||
return self._cached_frame
|
||||
return self.read_at(0)
|
||||
|
||||
@property
|
||||
def size(self) -> Tuple[int, int]:
|
||||
return self._frame_size
|
||||
|
||||
@property
|
||||
def duration(self) -> float:
|
||||
return self._duration
|
||||
|
||||
def close(self):
|
||||
"""Close the video source."""
|
||||
if self._proc:
|
||||
self._proc.kill()
|
||||
self._proc = None
|
||||
|
||||
|
||||
# GPU-aware primitive functions
|
||||
|
||||
def gpu_blend(frame_a: GPUFrame, frame_b: GPUFrame, alpha: float = 0.5) -> GPUFrame:
|
||||
"""
|
||||
Blend two frames on GPU.
|
||||
|
||||
Both frames stay on GPU throughout - no CPU transfer.
|
||||
"""
|
||||
if not GPU_AVAILABLE:
|
||||
a = frame_a.cpu.astype(np.float32)
|
||||
b = frame_b.cpu.astype(np.float32)
|
||||
result = (a * alpha + b * (1 - alpha)).astype(np.uint8)
|
||||
return GPUFrame(result, on_gpu=False)
|
||||
|
||||
a = frame_a.gpu.astype(cp.float32)
|
||||
b = frame_b.gpu.astype(cp.float32)
|
||||
result = (a * alpha + b * (1 - alpha)).astype(cp.uint8)
|
||||
return GPUFrame(result, on_gpu=True)
|
||||
|
||||
|
||||
def gpu_resize(frame: GPUFrame, size: Tuple[int, int]) -> GPUFrame:
|
||||
"""Resize frame on GPU."""
|
||||
import cv2
|
||||
|
||||
if not GPU_AVAILABLE or not frame.is_on_gpu:
|
||||
resized = cv2.resize(frame.cpu, size)
|
||||
return GPUFrame(resized, on_gpu=False)
|
||||
|
||||
# CuPy doesn't have built-in resize, use scipy zoom
|
||||
from cupyx.scipy import ndimage as cpndimage
|
||||
|
||||
gpu_data = frame.gpu
|
||||
h, w = gpu_data.shape[:2]
|
||||
target_w, target_h = size
|
||||
|
||||
zoom_y = target_h / h
|
||||
zoom_x = target_w / w
|
||||
|
||||
if gpu_data.ndim == 3:
|
||||
resized = cpndimage.zoom(gpu_data, (zoom_y, zoom_x, 1), order=1)
|
||||
else:
|
||||
resized = cpndimage.zoom(gpu_data, (zoom_y, zoom_x), order=1)
|
||||
|
||||
return GPUFrame(resized, on_gpu=True)
|
||||
|
||||
|
||||
def gpu_rotate(frame: GPUFrame, angle: float) -> GPUFrame:
|
||||
"""Rotate frame on GPU."""
|
||||
if not GPU_AVAILABLE or not frame.is_on_gpu:
|
||||
import cv2
|
||||
h, w = frame.cpu.shape[:2]
|
||||
center = (w // 2, h // 2)
|
||||
M = cv2.getRotationMatrix2D(center, angle, 1.0)
|
||||
rotated = cv2.warpAffine(frame.cpu, M, (w, h))
|
||||
return GPUFrame(rotated, on_gpu=False)
|
||||
|
||||
from cupyx.scipy import ndimage as cpndimage
|
||||
rotated = cpndimage.rotate(frame.gpu, angle, reshape=False, order=1)
|
||||
return GPUFrame(rotated, on_gpu=True)
|
||||
|
||||
|
||||
def gpu_brightness(frame: GPUFrame, factor: float) -> GPUFrame:
|
||||
"""Adjust brightness on GPU."""
|
||||
if not GPU_AVAILABLE or not frame.is_on_gpu:
|
||||
result = np.clip(frame.cpu.astype(np.float32) * factor, 0, 255).astype(np.uint8)
|
||||
return GPUFrame(result, on_gpu=False)
|
||||
|
||||
result = cp.clip(frame.gpu.astype(cp.float32) * factor, 0, 255).astype(cp.uint8)
|
||||
return GPUFrame(result, on_gpu=True)
|
||||
|
||||
|
||||
def gpu_composite(frames: list, weights: list = None) -> GPUFrame:
|
||||
"""
|
||||
Composite multiple frames with weights.
|
||||
|
||||
All frames processed on GPU for efficiency.
|
||||
"""
|
||||
if not frames:
|
||||
raise ValueError("No frames to composite")
|
||||
|
||||
if len(frames) == 1:
|
||||
return frames[0]
|
||||
|
||||
if weights is None:
|
||||
weights = [1.0 / len(frames)] * len(frames)
|
||||
|
||||
# Normalize weights
|
||||
total = sum(weights)
|
||||
if total > 0:
|
||||
weights = [w / total for w in weights]
|
||||
|
||||
use_gpu = GPU_AVAILABLE and any(f.is_on_gpu for f in frames)
|
||||
|
||||
if use_gpu:
|
||||
# All on GPU
|
||||
target_shape = frames[0].gpu.shape
|
||||
result = cp.zeros(target_shape, dtype=cp.float32)
|
||||
|
||||
for frame, weight in zip(frames, weights):
|
||||
gpu_data = frame.gpu.astype(cp.float32)
|
||||
if gpu_data.shape != target_shape:
|
||||
# Resize to match
|
||||
from cupyx.scipy import ndimage as cpndimage
|
||||
h, w = target_shape[:2]
|
||||
fh, fw = gpu_data.shape[:2]
|
||||
zoom_factors = (h/fh, w/fw, 1) if gpu_data.ndim == 3 else (h/fh, w/fw)
|
||||
gpu_data = cpndimage.zoom(gpu_data, zoom_factors, order=1)
|
||||
result += gpu_data * weight
|
||||
|
||||
return GPUFrame(cp.clip(result, 0, 255).astype(cp.uint8), on_gpu=True)
|
||||
else:
|
||||
# All on CPU
|
||||
import cv2
|
||||
target_shape = frames[0].cpu.shape
|
||||
result = np.zeros(target_shape, dtype=np.float32)
|
||||
|
||||
for frame, weight in zip(frames, weights):
|
||||
cpu_data = frame.cpu.astype(np.float32)
|
||||
if cpu_data.shape != target_shape:
|
||||
cpu_data = cv2.resize(cpu_data, (target_shape[1], target_shape[0]))
|
||||
result += cpu_data * weight
|
||||
|
||||
return GPUFrame(np.clip(result, 0, 255).astype(np.uint8), on_gpu=False)
|
||||
|
||||
|
||||
# Primitive registration for streaming interpreter
|
||||
|
||||
def get_primitives():
|
||||
"""
|
||||
Get GPU-aware primitives for registration with interpreter.
|
||||
|
||||
These wrap the GPU functions to work with the sexp interpreter.
|
||||
"""
|
||||
def prim_make_video_source_gpu(path: str, fps: float = 30):
|
||||
"""Create GPU-accelerated video source."""
|
||||
return GPUVideoSource(path, fps, prefer_gpu=True)
|
||||
|
||||
def prim_gpu_blend(a, b, alpha=0.5):
|
||||
"""Blend two frames."""
|
||||
fa = a if isinstance(a, GPUFrame) else GPUFrame(a)
|
||||
fb = b if isinstance(b, GPUFrame) else GPUFrame(b)
|
||||
result = gpu_blend(fa, fb, alpha)
|
||||
return result.cpu # Return numpy for compatibility
|
||||
|
||||
def prim_gpu_rotate(img, angle):
|
||||
"""Rotate image."""
|
||||
f = img if isinstance(img, GPUFrame) else GPUFrame(img)
|
||||
result = gpu_rotate(f, angle)
|
||||
return result.cpu
|
||||
|
||||
def prim_gpu_brightness(img, factor):
|
||||
"""Adjust brightness."""
|
||||
f = img if isinstance(img, GPUFrame) else GPUFrame(img)
|
||||
result = gpu_brightness(f, factor)
|
||||
return result.cpu
|
||||
|
||||
return {
|
||||
'streaming-gpu:make-video-source': prim_make_video_source_gpu,
|
||||
'gpu:blend': prim_gpu_blend,
|
||||
'gpu:rotate': prim_gpu_rotate,
|
||||
'gpu:brightness': prim_gpu_brightness,
|
||||
}
|
||||
|
||||
|
||||
# Export
|
||||
__all__ = [
|
||||
'GPU_AVAILABLE',
|
||||
'GPUFrame',
|
||||
'GPUVideoSource',
|
||||
'gpu_blend',
|
||||
'gpu_resize',
|
||||
'gpu_rotate',
|
||||
'gpu_brightness',
|
||||
'gpu_composite',
|
||||
'get_primitives',
|
||||
'check_hwdec_available',
|
||||
]
|
||||
715
sexp_effects/wgsl_compiler.py
Normal file
715
sexp_effects/wgsl_compiler.py
Normal file
@@ -0,0 +1,715 @@
|
||||
"""
|
||||
S-Expression to WGSL Compiler
|
||||
|
||||
Compiles sexp effect definitions to WGSL compute shaders for GPU execution.
|
||||
The compilation happens at effect upload time (AOT), not at runtime.
|
||||
|
||||
Architecture:
|
||||
- Parse sexp AST
|
||||
- Analyze primitives used
|
||||
- Generate WGSL compute shader
|
||||
|
||||
Shader Categories:
|
||||
1. Per-pixel ops: brightness, invert, grayscale, sepia (1 thread per pixel)
|
||||
2. Geometric transforms: rotate, scale, wave, ripple (coordinate remap + sample)
|
||||
3. Neighborhood ops: blur, sharpen, edge detect (sample neighbors)
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Any, Optional, Tuple, Set
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
import math
|
||||
|
||||
from .parser import parse, parse_all, Symbol, Keyword
|
||||
|
||||
|
||||
@dataclass
|
||||
class WGSLParam:
|
||||
"""A shader parameter (uniform)."""
|
||||
name: str
|
||||
wgsl_type: str # f32, i32, u32, vec2f, etc.
|
||||
default: Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompiledEffect:
|
||||
"""Result of compiling an sexp effect to WGSL."""
|
||||
name: str
|
||||
wgsl_code: str
|
||||
params: List[WGSLParam]
|
||||
workgroup_size: Tuple[int, int, int] = (16, 16, 1)
|
||||
# Metadata for runtime
|
||||
uses_time: bool = False
|
||||
uses_sampling: bool = False # Needs texture sampler
|
||||
category: str = "per_pixel" # per_pixel, geometric, neighborhood
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompilerContext:
|
||||
"""Context during compilation."""
|
||||
effect_name: str = ""
|
||||
params: Dict[str, WGSLParam] = field(default_factory=dict)
|
||||
locals: Dict[str, str] = field(default_factory=dict) # local var -> wgsl expr
|
||||
required_libs: Set[str] = field(default_factory=set)
|
||||
uses_time: bool = False
|
||||
uses_sampling: bool = False
|
||||
temp_counter: int = 0
|
||||
|
||||
def fresh_temp(self) -> str:
|
||||
"""Generate a fresh temporary variable name."""
|
||||
self.temp_counter += 1
|
||||
return f"_t{self.temp_counter}"
|
||||
|
||||
|
||||
class SexpToWGSLCompiler:
|
||||
"""
|
||||
Compiles S-expression effect definitions to WGSL compute shaders.
|
||||
"""
|
||||
|
||||
# Map sexp types to WGSL types
|
||||
TYPE_MAP = {
|
||||
'int': 'i32',
|
||||
'float': 'f32',
|
||||
'bool': 'u32', # WGSL doesn't have bool in storage
|
||||
'string': None, # Strings handled specially
|
||||
}
|
||||
|
||||
# Per-pixel primitives that can be compiled directly
|
||||
PER_PIXEL_PRIMITIVES = {
|
||||
'color_ops:invert-img',
|
||||
'color_ops:grayscale',
|
||||
'color_ops:sepia',
|
||||
'color_ops:adjust',
|
||||
'color_ops:adjust-brightness',
|
||||
'color_ops:shift-hsv',
|
||||
'color_ops:quantize',
|
||||
}
|
||||
|
||||
# Geometric primitives (coordinate remapping)
|
||||
GEOMETRIC_PRIMITIVES = {
|
||||
'geometry:scale-img',
|
||||
'geometry:rotate-img',
|
||||
'geometry:translate',
|
||||
'geometry:flip-h',
|
||||
'geometry:flip-v',
|
||||
'geometry:remap',
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.ctx: Optional[CompilerContext] = None
|
||||
|
||||
def compile_file(self, path: str) -> CompiledEffect:
|
||||
"""Compile an effect from a .sexp file."""
|
||||
with open(path, 'r') as f:
|
||||
content = f.read()
|
||||
exprs = parse_all(content)
|
||||
return self.compile(exprs)
|
||||
|
||||
def compile_string(self, sexp_code: str) -> CompiledEffect:
|
||||
"""Compile an effect from an sexp string."""
|
||||
exprs = parse_all(sexp_code)
|
||||
return self.compile(exprs)
|
||||
|
||||
def compile(self, expr: Any) -> CompiledEffect:
|
||||
"""Compile a parsed sexp expression."""
|
||||
self.ctx = CompilerContext()
|
||||
|
||||
# Handle multiple top-level expressions (require-primitives, define-effect)
|
||||
if isinstance(expr, list) and expr and isinstance(expr[0], list):
|
||||
for e in expr:
|
||||
self._process_toplevel(e)
|
||||
else:
|
||||
self._process_toplevel(expr)
|
||||
|
||||
# Generate the WGSL shader
|
||||
wgsl = self._generate_wgsl()
|
||||
|
||||
# Determine category based on primitives used
|
||||
category = self._determine_category()
|
||||
|
||||
return CompiledEffect(
|
||||
name=self.ctx.effect_name,
|
||||
wgsl_code=wgsl,
|
||||
params=list(self.ctx.params.values()),
|
||||
uses_time=self.ctx.uses_time,
|
||||
uses_sampling=self.ctx.uses_sampling,
|
||||
category=category,
|
||||
)
|
||||
|
||||
def _process_toplevel(self, expr: Any):
|
||||
"""Process a top-level expression."""
|
||||
if not isinstance(expr, list) or not expr:
|
||||
return
|
||||
|
||||
head = expr[0]
|
||||
if isinstance(head, Symbol):
|
||||
if head.name == 'require-primitives':
|
||||
# Track required primitive libraries
|
||||
for lib in expr[1:]:
|
||||
lib_name = lib.name if isinstance(lib, Symbol) else str(lib)
|
||||
self.ctx.required_libs.add(lib_name)
|
||||
|
||||
elif head.name == 'define-effect':
|
||||
self._compile_effect_def(expr)
|
||||
|
||||
def _compile_effect_def(self, expr: list):
|
||||
"""Compile a define-effect form."""
|
||||
# (define-effect name :params (...) body)
|
||||
self.ctx.effect_name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1])
|
||||
|
||||
# Parse :params and body
|
||||
i = 2
|
||||
body = None
|
||||
while i < len(expr):
|
||||
item = expr[i]
|
||||
if isinstance(item, Keyword) and item.name == 'params':
|
||||
self._parse_params(expr[i + 1])
|
||||
i += 2
|
||||
elif isinstance(item, Keyword):
|
||||
i += 2 # Skip other keywords
|
||||
else:
|
||||
body = item
|
||||
i += 1
|
||||
|
||||
if body:
|
||||
self.ctx.body_expr = body
|
||||
|
||||
def _parse_params(self, params_list: list):
|
||||
"""Parse the :params block."""
|
||||
for param_def in params_list:
|
||||
if not isinstance(param_def, list):
|
||||
continue
|
||||
|
||||
name = param_def[0].name if isinstance(param_def[0], Symbol) else str(param_def[0])
|
||||
|
||||
# Parse keyword args
|
||||
param_type = 'float'
|
||||
default = 0
|
||||
|
||||
i = 1
|
||||
while i < len(param_def):
|
||||
item = param_def[i]
|
||||
if isinstance(item, Keyword):
|
||||
if i + 1 < len(param_def):
|
||||
val = param_def[i + 1]
|
||||
if item.name == 'type':
|
||||
param_type = val.name if isinstance(val, Symbol) else str(val)
|
||||
elif item.name == 'default':
|
||||
default = val
|
||||
i += 2
|
||||
else:
|
||||
i += 1
|
||||
|
||||
wgsl_type = self.TYPE_MAP.get(param_type, 'f32')
|
||||
if wgsl_type:
|
||||
self.ctx.params[name] = WGSLParam(name, wgsl_type, default)
|
||||
|
||||
def _determine_category(self) -> str:
|
||||
"""Determine shader category based on primitives used."""
|
||||
for lib in self.ctx.required_libs:
|
||||
if lib == 'geometry':
|
||||
return 'geometric'
|
||||
if lib == 'filters':
|
||||
return 'neighborhood'
|
||||
return 'per_pixel'
|
||||
|
||||
def _generate_wgsl(self) -> str:
|
||||
"""Generate the complete WGSL shader code."""
|
||||
lines = []
|
||||
|
||||
# Header comment
|
||||
lines.append(f"// WGSL Shader: {self.ctx.effect_name}")
|
||||
lines.append(f"// Auto-generated from sexp effect definition")
|
||||
lines.append("")
|
||||
|
||||
# Bindings
|
||||
lines.append("@group(0) @binding(0) var<storage, read> input: array<u32>;")
|
||||
lines.append("@group(0) @binding(1) var<storage, read_write> output: array<u32>;")
|
||||
lines.append("")
|
||||
|
||||
# Params struct
|
||||
if self.ctx.params:
|
||||
lines.append("struct Params {")
|
||||
lines.append(" width: u32,")
|
||||
lines.append(" height: u32,")
|
||||
lines.append(" time: f32,")
|
||||
for param in self.ctx.params.values():
|
||||
lines.append(f" {param.name}: {param.wgsl_type},")
|
||||
lines.append("}")
|
||||
lines.append("@group(0) @binding(2) var<uniform> params: Params;")
|
||||
else:
|
||||
lines.append("struct Params {")
|
||||
lines.append(" width: u32,")
|
||||
lines.append(" height: u32,")
|
||||
lines.append(" time: f32,")
|
||||
lines.append("}")
|
||||
lines.append("@group(0) @binding(2) var<uniform> params: Params;")
|
||||
lines.append("")
|
||||
|
||||
# Helper functions
|
||||
lines.extend(self._generate_helpers())
|
||||
lines.append("")
|
||||
|
||||
# Main compute shader
|
||||
lines.append("@compute @workgroup_size(16, 16, 1)")
|
||||
lines.append("fn main(@builtin(global_invocation_id) gid: vec3<u32>) {")
|
||||
lines.append(" let x = gid.x;")
|
||||
lines.append(" let y = gid.y;")
|
||||
lines.append(" if (x >= params.width || y >= params.height) { return; }")
|
||||
lines.append(" let idx = y * params.width + x;")
|
||||
lines.append("")
|
||||
|
||||
# Compile the effect body
|
||||
body_code = self._compile_expr(self.ctx.body_expr)
|
||||
lines.append(f" // Effect: {self.ctx.effect_name}")
|
||||
lines.append(body_code)
|
||||
lines.append("}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _generate_helpers(self) -> List[str]:
|
||||
"""Generate WGSL helper functions."""
|
||||
helpers = []
|
||||
|
||||
# Pack/unpack RGB from u32
|
||||
helpers.append("fn unpack_rgb(packed: u32) -> vec3<f32> {")
|
||||
helpers.append(" let r = f32((packed >> 16u) & 0xFFu) / 255.0;")
|
||||
helpers.append(" let g = f32((packed >> 8u) & 0xFFu) / 255.0;")
|
||||
helpers.append(" let b = f32(packed & 0xFFu) / 255.0;")
|
||||
helpers.append(" return vec3<f32>(r, g, b);")
|
||||
helpers.append("}")
|
||||
helpers.append("")
|
||||
|
||||
helpers.append("fn pack_rgb(rgb: vec3<f32>) -> u32 {")
|
||||
helpers.append(" let r = u32(clamp(rgb.r, 0.0, 1.0) * 255.0);")
|
||||
helpers.append(" let g = u32(clamp(rgb.g, 0.0, 1.0) * 255.0);")
|
||||
helpers.append(" let b = u32(clamp(rgb.b, 0.0, 1.0) * 255.0);")
|
||||
helpers.append(" return (r << 16u) | (g << 8u) | b;")
|
||||
helpers.append("}")
|
||||
helpers.append("")
|
||||
|
||||
# Bilinear sampling for geometric transforms
|
||||
if self.ctx.uses_sampling or 'geometry' in self.ctx.required_libs:
|
||||
helpers.append("fn sample_bilinear(sx: f32, sy: f32) -> vec3<f32> {")
|
||||
helpers.append(" let w = f32(params.width);")
|
||||
helpers.append(" let h = f32(params.height);")
|
||||
helpers.append(" let cx = clamp(sx, 0.0, w - 1.001);")
|
||||
helpers.append(" let cy = clamp(sy, 0.0, h - 1.001);")
|
||||
helpers.append(" let x0 = u32(cx);")
|
||||
helpers.append(" let y0 = u32(cy);")
|
||||
helpers.append(" let x1 = min(x0 + 1u, params.width - 1u);")
|
||||
helpers.append(" let y1 = min(y0 + 1u, params.height - 1u);")
|
||||
helpers.append(" let fx = cx - f32(x0);")
|
||||
helpers.append(" let fy = cy - f32(y0);")
|
||||
helpers.append(" let c00 = unpack_rgb(input[y0 * params.width + x0]);")
|
||||
helpers.append(" let c10 = unpack_rgb(input[y0 * params.width + x1]);")
|
||||
helpers.append(" let c01 = unpack_rgb(input[y1 * params.width + x0]);")
|
||||
helpers.append(" let c11 = unpack_rgb(input[y1 * params.width + x1]);")
|
||||
helpers.append(" let top = mix(c00, c10, fx);")
|
||||
helpers.append(" let bot = mix(c01, c11, fx);")
|
||||
helpers.append(" return mix(top, bot, fy);")
|
||||
helpers.append("}")
|
||||
helpers.append("")
|
||||
|
||||
# HSV conversion for color effects
|
||||
if 'color_ops' in self.ctx.required_libs or 'color' in self.ctx.required_libs:
|
||||
helpers.append("fn rgb_to_hsv(rgb: vec3<f32>) -> vec3<f32> {")
|
||||
helpers.append(" let mx = max(max(rgb.r, rgb.g), rgb.b);")
|
||||
helpers.append(" let mn = min(min(rgb.r, rgb.g), rgb.b);")
|
||||
helpers.append(" let d = mx - mn;")
|
||||
helpers.append(" var h = 0.0;")
|
||||
helpers.append(" if (d > 0.0) {")
|
||||
helpers.append(" if (mx == rgb.r) { h = (rgb.g - rgb.b) / d; }")
|
||||
helpers.append(" else if (mx == rgb.g) { h = 2.0 + (rgb.b - rgb.r) / d; }")
|
||||
helpers.append(" else { h = 4.0 + (rgb.r - rgb.g) / d; }")
|
||||
helpers.append(" h = h / 6.0;")
|
||||
helpers.append(" if (h < 0.0) { h = h + 1.0; }")
|
||||
helpers.append(" }")
|
||||
helpers.append(" let s = select(0.0, d / mx, mx > 0.0);")
|
||||
helpers.append(" return vec3<f32>(h, s, mx);")
|
||||
helpers.append("}")
|
||||
helpers.append("")
|
||||
|
||||
helpers.append("fn hsv_to_rgb(hsv: vec3<f32>) -> vec3<f32> {")
|
||||
helpers.append(" let h = hsv.x * 6.0;")
|
||||
helpers.append(" let s = hsv.y;")
|
||||
helpers.append(" let v = hsv.z;")
|
||||
helpers.append(" let c = v * s;")
|
||||
helpers.append(" let x = c * (1.0 - abs(h % 2.0 - 1.0));")
|
||||
helpers.append(" let m = v - c;")
|
||||
helpers.append(" var rgb: vec3<f32>;")
|
||||
helpers.append(" if (h < 1.0) { rgb = vec3<f32>(c, x, 0.0); }")
|
||||
helpers.append(" else if (h < 2.0) { rgb = vec3<f32>(x, c, 0.0); }")
|
||||
helpers.append(" else if (h < 3.0) { rgb = vec3<f32>(0.0, c, x); }")
|
||||
helpers.append(" else if (h < 4.0) { rgb = vec3<f32>(0.0, x, c); }")
|
||||
helpers.append(" else if (h < 5.0) { rgb = vec3<f32>(x, 0.0, c); }")
|
||||
helpers.append(" else { rgb = vec3<f32>(c, 0.0, x); }")
|
||||
helpers.append(" return rgb + vec3<f32>(m, m, m);")
|
||||
helpers.append("}")
|
||||
helpers.append("")
|
||||
|
||||
return helpers
|
||||
|
||||
def _compile_expr(self, expr: Any, indent: int = 4) -> str:
|
||||
"""Compile an sexp expression to WGSL code."""
|
||||
ind = " " * indent
|
||||
|
||||
# Literals
|
||||
if isinstance(expr, (int, float)):
|
||||
return f"{ind}// literal: {expr}"
|
||||
|
||||
if isinstance(expr, str):
|
||||
return f'{ind}// string: "{expr}"'
|
||||
|
||||
# Symbol reference
|
||||
if isinstance(expr, Symbol):
|
||||
name = expr.name
|
||||
if name == 'frame':
|
||||
return f"{ind}let rgb = unpack_rgb(input[idx]);"
|
||||
if name == 't' or name == '_time':
|
||||
self.ctx.uses_time = True
|
||||
return f"{ind}let t = params.time;"
|
||||
if name in self.ctx.params:
|
||||
return f"{ind}let {name} = params.{name};"
|
||||
if name in self.ctx.locals:
|
||||
return f"{ind}// local: {name}"
|
||||
return f"{ind}// unknown symbol: {name}"
|
||||
|
||||
# List (function call or special form)
|
||||
if isinstance(expr, list) and expr:
|
||||
head = expr[0]
|
||||
|
||||
if isinstance(head, Symbol):
|
||||
form = head.name
|
||||
|
||||
# Special forms
|
||||
if form == 'let' or form == 'let*':
|
||||
return self._compile_let(expr, indent)
|
||||
|
||||
if form == 'if':
|
||||
return self._compile_if(expr, indent)
|
||||
|
||||
if form == 'or':
|
||||
# (or a b) - return a if truthy, else b
|
||||
return self._compile_or(expr, indent)
|
||||
|
||||
# Primitive calls
|
||||
if ':' in form:
|
||||
return self._compile_primitive_call(expr, indent)
|
||||
|
||||
# Arithmetic
|
||||
if form in ('+', '-', '*', '/'):
|
||||
return self._compile_arithmetic(expr, indent)
|
||||
|
||||
if form in ('>', '<', '>=', '<=', '='):
|
||||
return self._compile_comparison(expr, indent)
|
||||
|
||||
if form == 'max':
|
||||
return self._compile_builtin('max', expr[1:], indent)
|
||||
|
||||
if form == 'min':
|
||||
return self._compile_builtin('min', expr[1:], indent)
|
||||
|
||||
return f"{ind}// unhandled: {expr}"
|
||||
|
||||
def _compile_let(self, expr: list, indent: int) -> str:
|
||||
"""Compile let/let* binding form."""
|
||||
ind = " " * indent
|
||||
lines = []
|
||||
|
||||
bindings = expr[1]
|
||||
body = expr[2]
|
||||
|
||||
# Parse bindings (Clojure style: [x 1 y 2] or Scheme style: ((x 1) (y 2)))
|
||||
pairs = []
|
||||
if bindings and isinstance(bindings[0], Symbol):
|
||||
# Clojure style
|
||||
i = 0
|
||||
while i < len(bindings) - 1:
|
||||
name = bindings[i].name if isinstance(bindings[i], Symbol) else str(bindings[i])
|
||||
value = bindings[i + 1]
|
||||
pairs.append((name, value))
|
||||
i += 2
|
||||
else:
|
||||
# Scheme style
|
||||
for binding in bindings:
|
||||
name = binding[0].name if isinstance(binding[0], Symbol) else str(binding[0])
|
||||
value = binding[1]
|
||||
pairs.append((name, value))
|
||||
|
||||
# Compile bindings
|
||||
for name, value in pairs:
|
||||
val_code = self._expr_to_wgsl(value)
|
||||
lines.append(f"{ind}let {name} = {val_code};")
|
||||
self.ctx.locals[name] = val_code
|
||||
|
||||
# Compile body
|
||||
body_lines = self._compile_body(body, indent)
|
||||
lines.append(body_lines)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _compile_body(self, body: Any, indent: int) -> str:
|
||||
"""Compile the body of an effect (the final image expression)."""
|
||||
ind = " " * indent
|
||||
|
||||
# Most effects end with a primitive call that produces the output
|
||||
if isinstance(body, list) and body:
|
||||
head = body[0]
|
||||
if isinstance(head, Symbol) and ':' in head.name:
|
||||
return self._compile_primitive_call(body, indent)
|
||||
|
||||
# If body is just 'frame', pass through
|
||||
if isinstance(body, Symbol) and body.name == 'frame':
|
||||
return f"{ind}output[idx] = input[idx];"
|
||||
|
||||
return f"{ind}// body: {body}"
|
||||
|
||||
def _compile_primitive_call(self, expr: list, indent: int) -> str:
|
||||
"""Compile a primitive function call."""
|
||||
ind = " " * indent
|
||||
head = expr[0]
|
||||
prim_name = head.name if isinstance(head, Symbol) else str(head)
|
||||
args = expr[1:]
|
||||
|
||||
# Per-pixel color operations
|
||||
if prim_name == 'color_ops:invert-img':
|
||||
return f"""{ind}let rgb = unpack_rgb(input[idx]);
|
||||
{ind}let result = vec3<f32>(1.0, 1.0, 1.0) - rgb;
|
||||
{ind}output[idx] = pack_rgb(result);"""
|
||||
|
||||
if prim_name == 'color_ops:grayscale':
|
||||
return f"""{ind}let rgb = unpack_rgb(input[idx]);
|
||||
{ind}let gray = 0.299 * rgb.r + 0.587 * rgb.g + 0.114 * rgb.b;
|
||||
{ind}let result = vec3<f32>(gray, gray, gray);
|
||||
{ind}output[idx] = pack_rgb(result);"""
|
||||
|
||||
if prim_name == 'color_ops:adjust-brightness':
|
||||
amount = self._expr_to_wgsl(args[1]) if len(args) > 1 else "0.0"
|
||||
return f"""{ind}let rgb = unpack_rgb(input[idx]);
|
||||
{ind}let adj = f32({amount}) / 255.0;
|
||||
{ind}let result = clamp(rgb + vec3<f32>(adj, adj, adj), vec3<f32>(0.0, 0.0, 0.0), vec3<f32>(1.0, 1.0, 1.0));
|
||||
{ind}output[idx] = pack_rgb(result);"""
|
||||
|
||||
if prim_name == 'color_ops:adjust':
|
||||
# (adjust img brightness contrast)
|
||||
brightness = self._expr_to_wgsl(args[1]) if len(args) > 1 else "0.0"
|
||||
contrast = self._expr_to_wgsl(args[2]) if len(args) > 2 else "1.0"
|
||||
return f"""{ind}let rgb = unpack_rgb(input[idx]);
|
||||
{ind}let centered = rgb - vec3<f32>(0.5, 0.5, 0.5);
|
||||
{ind}let contrasted = centered * {contrast};
|
||||
{ind}let brightened = contrasted + vec3<f32>(0.5, 0.5, 0.5) + vec3<f32>({brightness}/255.0);
|
||||
{ind}let result = clamp(brightened, vec3<f32>(0.0), vec3<f32>(1.0));
|
||||
{ind}output[idx] = pack_rgb(result);"""
|
||||
|
||||
if prim_name == 'color_ops:sepia':
|
||||
intensity = self._expr_to_wgsl(args[1]) if len(args) > 1 else "1.0"
|
||||
return f"""{ind}let rgb = unpack_rgb(input[idx]);
|
||||
{ind}let sepia_r = 0.393 * rgb.r + 0.769 * rgb.g + 0.189 * rgb.b;
|
||||
{ind}let sepia_g = 0.349 * rgb.r + 0.686 * rgb.g + 0.168 * rgb.b;
|
||||
{ind}let sepia_b = 0.272 * rgb.r + 0.534 * rgb.g + 0.131 * rgb.b;
|
||||
{ind}let sepia = vec3<f32>(sepia_r, sepia_g, sepia_b);
|
||||
{ind}let result = mix(rgb, sepia, {intensity});
|
||||
{ind}output[idx] = pack_rgb(clamp(result, vec3<f32>(0.0), vec3<f32>(1.0)));"""
|
||||
|
||||
if prim_name == 'color_ops:shift-hsv':
|
||||
h_shift = self._expr_to_wgsl(args[1]) if len(args) > 1 else "0.0"
|
||||
s_mult = self._expr_to_wgsl(args[2]) if len(args) > 2 else "1.0"
|
||||
v_mult = self._expr_to_wgsl(args[3]) if len(args) > 3 else "1.0"
|
||||
return f"""{ind}let rgb = unpack_rgb(input[idx]);
|
||||
{ind}var hsv = rgb_to_hsv(rgb);
|
||||
{ind}hsv.x = fract(hsv.x + {h_shift} / 360.0);
|
||||
{ind}hsv.y = clamp(hsv.y * {s_mult}, 0.0, 1.0);
|
||||
{ind}hsv.z = clamp(hsv.z * {v_mult}, 0.0, 1.0);
|
||||
{ind}let result = hsv_to_rgb(hsv);
|
||||
{ind}output[idx] = pack_rgb(result);"""
|
||||
|
||||
if prim_name == 'color_ops:quantize':
|
||||
levels = self._expr_to_wgsl(args[1]) if len(args) > 1 else "8.0"
|
||||
return f"""{ind}let rgb = unpack_rgb(input[idx]);
|
||||
{ind}let lvl = max(2.0, {levels});
|
||||
{ind}let result = floor(rgb * lvl) / lvl;
|
||||
{ind}output[idx] = pack_rgb(result);"""
|
||||
|
||||
# Geometric transforms
|
||||
if prim_name == 'geometry:scale-img':
|
||||
sx = self._expr_to_wgsl(args[1]) if len(args) > 1 else "1.0"
|
||||
sy = self._expr_to_wgsl(args[2]) if len(args) > 2 else sx
|
||||
self.ctx.uses_sampling = True
|
||||
return f"""{ind}let w = f32(params.width);
|
||||
{ind}let h = f32(params.height);
|
||||
{ind}let cx = w / 2.0;
|
||||
{ind}let cy = h / 2.0;
|
||||
{ind}let sx = f32(x) - cx;
|
||||
{ind}let sy = f32(y) - cy;
|
||||
{ind}let src_x = sx / {sx} + cx;
|
||||
{ind}let src_y = sy / {sy} + cy;
|
||||
{ind}let result = sample_bilinear(src_x, src_y);
|
||||
{ind}output[idx] = pack_rgb(result);"""
|
||||
|
||||
if prim_name == 'geometry:rotate-img':
|
||||
angle = self._expr_to_wgsl(args[1]) if len(args) > 1 else "0.0"
|
||||
self.ctx.uses_sampling = True
|
||||
return f"""{ind}let w = f32(params.width);
|
||||
{ind}let h = f32(params.height);
|
||||
{ind}let cx = w / 2.0;
|
||||
{ind}let cy = h / 2.0;
|
||||
{ind}let angle_rad = {angle} * 3.14159265 / 180.0;
|
||||
{ind}let cos_a = cos(-angle_rad);
|
||||
{ind}let sin_a = sin(-angle_rad);
|
||||
{ind}let dx = f32(x) - cx;
|
||||
{ind}let dy = f32(y) - cy;
|
||||
{ind}let src_x = dx * cos_a - dy * sin_a + cx;
|
||||
{ind}let src_y = dx * sin_a + dy * cos_a + cy;
|
||||
{ind}let result = sample_bilinear(src_x, src_y);
|
||||
{ind}output[idx] = pack_rgb(result);"""
|
||||
|
||||
if prim_name == 'geometry:flip-h':
|
||||
return f"""{ind}let src_idx = y * params.width + (params.width - 1u - x);
|
||||
{ind}output[idx] = input[src_idx];"""
|
||||
|
||||
if prim_name == 'geometry:flip-v':
|
||||
return f"""{ind}let src_idx = (params.height - 1u - y) * params.width + x;
|
||||
{ind}output[idx] = input[src_idx];"""
|
||||
|
||||
# Image library
|
||||
if prim_name == 'image:blur':
|
||||
radius = self._expr_to_wgsl(args[1]) if len(args) > 1 else "5"
|
||||
# Box blur approximation (separable would be better)
|
||||
return f"""{ind}let radius = i32({radius});
|
||||
{ind}var sum = vec3<f32>(0.0, 0.0, 0.0);
|
||||
{ind}var count = 0.0;
|
||||
{ind}for (var dy = -radius; dy <= radius; dy = dy + 1) {{
|
||||
{ind} for (var dx = -radius; dx <= radius; dx = dx + 1) {{
|
||||
{ind} let sx = i32(x) + dx;
|
||||
{ind} let sy = i32(y) + dy;
|
||||
{ind} if (sx >= 0 && sx < i32(params.width) && sy >= 0 && sy < i32(params.height)) {{
|
||||
{ind} let sidx = u32(sy) * params.width + u32(sx);
|
||||
{ind} sum = sum + unpack_rgb(input[sidx]);
|
||||
{ind} count = count + 1.0;
|
||||
{ind} }}
|
||||
{ind} }}
|
||||
{ind}}}
|
||||
{ind}let result = sum / count;
|
||||
{ind}output[idx] = pack_rgb(result);"""
|
||||
|
||||
# Fallback - passthrough
|
||||
return f"""{ind}// Unimplemented primitive: {prim_name}
|
||||
{ind}output[idx] = input[idx];"""
|
||||
|
||||
def _compile_if(self, expr: list, indent: int) -> str:
|
||||
"""Compile if expression."""
|
||||
ind = " " * indent
|
||||
cond = self._expr_to_wgsl(expr[1])
|
||||
then_expr = expr[2]
|
||||
else_expr = expr[3] if len(expr) > 3 else None
|
||||
|
||||
lines = []
|
||||
lines.append(f"{ind}if ({cond}) {{")
|
||||
lines.append(self._compile_body(then_expr, indent + 4))
|
||||
if else_expr:
|
||||
lines.append(f"{ind}}} else {{")
|
||||
lines.append(self._compile_body(else_expr, indent + 4))
|
||||
lines.append(f"{ind}}}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _compile_or(self, expr: list, indent: int) -> str:
|
||||
"""Compile or expression - returns first truthy value."""
|
||||
# For numeric context, (or a b) means "a if a != 0 else b"
|
||||
a = self._expr_to_wgsl(expr[1])
|
||||
b = self._expr_to_wgsl(expr[2]) if len(expr) > 2 else "0.0"
|
||||
return f"select({b}, {a}, {a} != 0.0)"
|
||||
|
||||
def _compile_arithmetic(self, expr: list, indent: int) -> str:
|
||||
"""Compile arithmetic expression to inline WGSL."""
|
||||
op = expr[0].name
|
||||
operands = [self._expr_to_wgsl(arg) for arg in expr[1:]]
|
||||
|
||||
if len(operands) == 1:
|
||||
if op == '-':
|
||||
return f"(-{operands[0]})"
|
||||
return operands[0]
|
||||
|
||||
return f"({f' {op} '.join(operands)})"
|
||||
|
||||
def _compile_comparison(self, expr: list, indent: int) -> str:
|
||||
"""Compile comparison expression."""
|
||||
op = expr[0].name
|
||||
if op == '=':
|
||||
op = '=='
|
||||
a = self._expr_to_wgsl(expr[1])
|
||||
b = self._expr_to_wgsl(expr[2])
|
||||
return f"({a} {op} {b})"
|
||||
|
||||
def _compile_builtin(self, fn: str, args: list, indent: int) -> str:
|
||||
"""Compile builtin function call."""
|
||||
compiled_args = [self._expr_to_wgsl(arg) for arg in args]
|
||||
return f"{fn}({', '.join(compiled_args)})"
|
||||
|
||||
def _expr_to_wgsl(self, expr: Any) -> str:
|
||||
"""Convert an expression to inline WGSL code."""
|
||||
if isinstance(expr, (int, float)):
|
||||
# Ensure floats have decimal point
|
||||
if isinstance(expr, float) or '.' not in str(expr):
|
||||
return f"{float(expr)}"
|
||||
return str(expr)
|
||||
|
||||
if isinstance(expr, str):
|
||||
return f'"{expr}"'
|
||||
|
||||
if isinstance(expr, Symbol):
|
||||
name = expr.name
|
||||
if name == 'frame':
|
||||
return "rgb" # Assume rgb is already loaded
|
||||
if name == 't' or name == '_time':
|
||||
self.ctx.uses_time = True
|
||||
return "params.time"
|
||||
if name == 'pi':
|
||||
return "3.14159265"
|
||||
if name in self.ctx.params:
|
||||
return f"params.{name}"
|
||||
if name in self.ctx.locals:
|
||||
return name
|
||||
return name
|
||||
|
||||
if isinstance(expr, list) and expr:
|
||||
head = expr[0]
|
||||
if isinstance(head, Symbol):
|
||||
form = head.name
|
||||
|
||||
# Arithmetic
|
||||
if form in ('+', '-', '*', '/'):
|
||||
return self._compile_arithmetic(expr, 0)
|
||||
|
||||
# Comparison
|
||||
if form in ('>', '<', '>=', '<=', '='):
|
||||
return self._compile_comparison(expr, 0)
|
||||
|
||||
# Builtins
|
||||
if form in ('max', 'min', 'abs', 'floor', 'ceil', 'sin', 'cos', 'sqrt'):
|
||||
args = [self._expr_to_wgsl(a) for a in expr[1:]]
|
||||
return f"{form}({', '.join(args)})"
|
||||
|
||||
if form == 'or':
|
||||
return self._compile_or(expr, 0)
|
||||
|
||||
# Image dimension queries
|
||||
if form == 'image:width':
|
||||
return "f32(params.width)"
|
||||
if form == 'image:height':
|
||||
return "f32(params.height)"
|
||||
|
||||
return f"/* unknown: {expr} */"
|
||||
|
||||
|
||||
def compile_effect(sexp_code: str) -> CompiledEffect:
|
||||
"""Convenience function to compile an sexp effect string."""
|
||||
compiler = SexpToWGSLCompiler()
|
||||
return compiler.compile_string(sexp_code)
|
||||
|
||||
|
||||
def compile_effect_file(path: str) -> CompiledEffect:
|
||||
"""Convenience function to compile an sexp effect file."""
|
||||
compiler = SexpToWGSLCompiler()
|
||||
return compiler.compile_file(path)
|
||||
@@ -68,6 +68,8 @@ class NumpyBackend(Backend):
|
||||
|
||||
def load_effect(self, effect_path: Path) -> Any:
|
||||
"""Load an effect from sexp file."""
|
||||
if isinstance(effect_path, str):
|
||||
effect_path = Path(effect_path)
|
||||
effect_key = str(effect_path)
|
||||
if effect_key not in self._loaded_effects:
|
||||
interp = self._get_interpreter()
|
||||
@@ -260,23 +262,258 @@ class NumpyBackend(Backend):
|
||||
return np.clip(result, 0, 255).astype(np.uint8)
|
||||
|
||||
|
||||
class GLSLBackend(Backend):
|
||||
class WGPUBackend(Backend):
|
||||
"""
|
||||
GPU-based effect processing using OpenGL/GLSL.
|
||||
GPU-based effect processing using wgpu/WebGPU compute shaders.
|
||||
|
||||
Requires GPU with OpenGL 3.3+ support (or Mesa software renderer).
|
||||
Achieves 30+ fps real-time processing.
|
||||
Compiles sexp effects to WGSL at load time, executes on GPU.
|
||||
Achieves 30+ fps real-time processing on supported hardware.
|
||||
|
||||
TODO: Implement when ready for GPU acceleration.
|
||||
Requirements:
|
||||
- wgpu-py library
|
||||
- Vulkan-capable GPU (or software renderer)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
raise NotImplementedError(
|
||||
"GLSL backend not yet implemented. Use NumpyBackend for now."
|
||||
)
|
||||
def __init__(self, recipe_dir: Path = None):
|
||||
self.recipe_dir = recipe_dir or Path(".")
|
||||
self._device = None
|
||||
self._loaded_effects: Dict[str, Any] = {} # name -> compiled shader info
|
||||
self._numpy_fallback = NumpyBackend(recipe_dir)
|
||||
# Buffer pool for reuse - keyed by (width, height)
|
||||
self._buffer_pool: Dict[tuple, Dict] = {}
|
||||
|
||||
def _ensure_device(self):
|
||||
"""Lazy-initialize wgpu device."""
|
||||
if self._device is not None:
|
||||
return
|
||||
|
||||
try:
|
||||
import wgpu
|
||||
adapter = wgpu.gpu.request_adapter_sync(power_preference="high-performance")
|
||||
self._device = adapter.request_device_sync()
|
||||
print(f"[WGPUBackend] Using GPU: {adapter.info.get('device', 'unknown')}")
|
||||
except Exception as e:
|
||||
print(f"[WGPUBackend] GPU init failed: {e}, falling back to CPU")
|
||||
self._device = None
|
||||
|
||||
def load_effect(self, effect_path: Path) -> Any:
|
||||
pass
|
||||
"""Load and compile an effect from sexp file to WGSL."""
|
||||
effect_key = str(effect_path)
|
||||
if effect_key in self._loaded_effects:
|
||||
return self._loaded_effects[effect_key]
|
||||
|
||||
try:
|
||||
from sexp_effects.wgsl_compiler import compile_effect_file
|
||||
compiled = compile_effect_file(str(effect_path))
|
||||
|
||||
self._ensure_device()
|
||||
if self._device is None:
|
||||
# Fall back to numpy
|
||||
return self._numpy_fallback.load_effect(effect_path)
|
||||
|
||||
# Create shader module
|
||||
import wgpu
|
||||
shader_module = self._device.create_shader_module(code=compiled.wgsl_code)
|
||||
|
||||
# Create compute pipeline
|
||||
pipeline = self._device.create_compute_pipeline(
|
||||
layout="auto",
|
||||
compute={"module": shader_module, "entry_point": "main"}
|
||||
)
|
||||
|
||||
self._loaded_effects[effect_key] = {
|
||||
'compiled': compiled,
|
||||
'pipeline': pipeline,
|
||||
'name': compiled.name,
|
||||
}
|
||||
return compiled.name
|
||||
|
||||
except Exception as e:
|
||||
print(f"[WGPUBackend] Failed to compile {effect_path}: {e}")
|
||||
# Fall back to numpy for this effect
|
||||
return self._numpy_fallback.load_effect(effect_path)
|
||||
|
||||
def _resolve_binding(self, value: Any, t: float, analysis_data: Dict) -> Any:
|
||||
"""Resolve a parameter binding to its value at time t."""
|
||||
# Delegate to numpy backend's implementation
|
||||
return self._numpy_fallback._resolve_binding(value, t, analysis_data)
|
||||
|
||||
def _get_or_create_buffers(self, w: int, h: int):
|
||||
"""Get or create reusable buffers for given dimensions."""
|
||||
import wgpu
|
||||
|
||||
key = (w, h)
|
||||
if key in self._buffer_pool:
|
||||
return self._buffer_pool[key]
|
||||
|
||||
size = w * h * 4 # u32 per pixel
|
||||
|
||||
# Create staging buffer for uploads (MAP_WRITE)
|
||||
staging_buffer = self._device.create_buffer(
|
||||
size=size,
|
||||
usage=wgpu.BufferUsage.MAP_WRITE | wgpu.BufferUsage.COPY_SRC,
|
||||
mapped_at_creation=False,
|
||||
)
|
||||
|
||||
# Create input buffer (STORAGE, receives data from staging)
|
||||
input_buffer = self._device.create_buffer(
|
||||
size=size,
|
||||
usage=wgpu.BufferUsage.STORAGE | wgpu.BufferUsage.COPY_DST,
|
||||
)
|
||||
|
||||
# Create output buffer (STORAGE + COPY_SRC for readback)
|
||||
output_buffer = self._device.create_buffer(
|
||||
size=size,
|
||||
usage=wgpu.BufferUsage.STORAGE | wgpu.BufferUsage.COPY_SRC,
|
||||
)
|
||||
|
||||
# Params buffer (uniform, 256 bytes should be enough)
|
||||
params_buffer = self._device.create_buffer(
|
||||
size=256,
|
||||
usage=wgpu.BufferUsage.UNIFORM | wgpu.BufferUsage.COPY_DST,
|
||||
)
|
||||
|
||||
self._buffer_pool[key] = {
|
||||
'staging': staging_buffer,
|
||||
'input': input_buffer,
|
||||
'output': output_buffer,
|
||||
'params': params_buffer,
|
||||
'size': size,
|
||||
}
|
||||
return self._buffer_pool[key]
|
||||
|
||||
def _apply_effect_gpu(
|
||||
self,
|
||||
frame: np.ndarray,
|
||||
effect_name: str,
|
||||
params: Dict,
|
||||
t: float,
|
||||
) -> Optional[np.ndarray]:
|
||||
"""Apply effect using GPU. Returns None if GPU not available."""
|
||||
import wgpu
|
||||
|
||||
# Find the loaded effect
|
||||
effect_info = None
|
||||
for key, info in self._loaded_effects.items():
|
||||
if info.get('name') == effect_name:
|
||||
effect_info = info
|
||||
break
|
||||
|
||||
if effect_info is None or self._device is None:
|
||||
return None
|
||||
|
||||
compiled = effect_info['compiled']
|
||||
pipeline = effect_info['pipeline']
|
||||
|
||||
h, w = frame.shape[:2]
|
||||
|
||||
# Get reusable buffers
|
||||
buffers = self._get_or_create_buffers(w, h)
|
||||
|
||||
# Pack frame as u32 array (RGB -> packed u32)
|
||||
r = frame[:, :, 0].astype(np.uint32)
|
||||
g = frame[:, :, 1].astype(np.uint32)
|
||||
b = frame[:, :, 2].astype(np.uint32)
|
||||
packed = (r << 16) | (g << 8) | b
|
||||
input_data = packed.flatten().astype(np.uint32)
|
||||
|
||||
# Upload input data via queue.write_buffer (more efficient than recreation)
|
||||
self._device.queue.write_buffer(buffers['input'], 0, input_data.tobytes())
|
||||
|
||||
# Build params struct
|
||||
import struct
|
||||
param_values = [w, h] # width, height as u32
|
||||
param_format = "II" # two u32
|
||||
|
||||
# Add time as f32
|
||||
param_values.append(t)
|
||||
param_format += "f"
|
||||
|
||||
# Add effect-specific params
|
||||
for param in compiled.params:
|
||||
val = params.get(param.name, param.default)
|
||||
if val is None:
|
||||
val = 0
|
||||
if param.wgsl_type == 'f32':
|
||||
param_values.append(float(val))
|
||||
param_format += "f"
|
||||
elif param.wgsl_type == 'i32':
|
||||
param_values.append(int(val))
|
||||
param_format += "i"
|
||||
elif param.wgsl_type == 'u32':
|
||||
param_values.append(int(val))
|
||||
param_format += "I"
|
||||
|
||||
# Pad to 16-byte alignment
|
||||
param_bytes = struct.pack(param_format, *param_values)
|
||||
while len(param_bytes) % 16 != 0:
|
||||
param_bytes += b'\x00'
|
||||
|
||||
self._device.queue.write_buffer(buffers['params'], 0, param_bytes)
|
||||
|
||||
# Create bind group (unfortunately this can't be easily reused with different effects)
|
||||
bind_group = self._device.create_bind_group(
|
||||
layout=pipeline.get_bind_group_layout(0),
|
||||
entries=[
|
||||
{"binding": 0, "resource": {"buffer": buffers['input']}},
|
||||
{"binding": 1, "resource": {"buffer": buffers['output']}},
|
||||
{"binding": 2, "resource": {"buffer": buffers['params']}},
|
||||
]
|
||||
)
|
||||
|
||||
# Dispatch compute
|
||||
encoder = self._device.create_command_encoder()
|
||||
compute_pass = encoder.begin_compute_pass()
|
||||
compute_pass.set_pipeline(pipeline)
|
||||
compute_pass.set_bind_group(0, bind_group)
|
||||
|
||||
# Workgroups: ceil(w/16) x ceil(h/16)
|
||||
wg_x = (w + 15) // 16
|
||||
wg_y = (h + 15) // 16
|
||||
compute_pass.dispatch_workgroups(wg_x, wg_y, 1)
|
||||
compute_pass.end()
|
||||
|
||||
self._device.queue.submit([encoder.finish()])
|
||||
|
||||
# Read back result
|
||||
result_data = self._device.queue.read_buffer(buffers['output'])
|
||||
result_packed = np.frombuffer(result_data, dtype=np.uint32).reshape(h, w)
|
||||
|
||||
# Unpack u32 -> RGB
|
||||
result = np.zeros((h, w, 3), dtype=np.uint8)
|
||||
result[:, :, 0] = ((result_packed >> 16) & 0xFF).astype(np.uint8)
|
||||
result[:, :, 1] = ((result_packed >> 8) & 0xFF).astype(np.uint8)
|
||||
result[:, :, 2] = (result_packed & 0xFF).astype(np.uint8)
|
||||
|
||||
return result
|
||||
|
||||
def _apply_effect(
|
||||
self,
|
||||
frame: np.ndarray,
|
||||
effect_name: str,
|
||||
params: Dict,
|
||||
t: float,
|
||||
analysis_data: Dict,
|
||||
) -> np.ndarray:
|
||||
"""Apply a single effect to a frame."""
|
||||
# Resolve bindings in params
|
||||
resolved_params = {"_time": t}
|
||||
for key, value in params.items():
|
||||
if key in ("effect", "effect_path", "cid", "analysis_refs"):
|
||||
continue
|
||||
resolved_params[key] = self._resolve_binding(value, t, analysis_data)
|
||||
|
||||
# Try GPU first
|
||||
self._ensure_device()
|
||||
if self._device is not None:
|
||||
result = self._apply_effect_gpu(frame, effect_name, resolved_params, t)
|
||||
if result is not None:
|
||||
return result
|
||||
|
||||
# Fall back to numpy
|
||||
return self._numpy_fallback._apply_effect(
|
||||
frame, effect_name, params, t, analysis_data
|
||||
)
|
||||
|
||||
def process_frame(
|
||||
self,
|
||||
@@ -286,7 +523,34 @@ class GLSLBackend(Backend):
|
||||
t: float,
|
||||
analysis_data: Dict,
|
||||
) -> np.ndarray:
|
||||
pass
|
||||
"""Process frames through effects and composite."""
|
||||
if not frames:
|
||||
return np.zeros((720, 1280, 3), dtype=np.uint8)
|
||||
|
||||
processed = []
|
||||
|
||||
# Apply effects to each input frame
|
||||
for i, (frame, effects) in enumerate(zip(frames, effects_per_frame)):
|
||||
result = frame.copy()
|
||||
for effect_config in effects:
|
||||
effect_name = effect_config.get("effect", "")
|
||||
if effect_name:
|
||||
result = self._apply_effect(
|
||||
result, effect_name, effect_config, t, analysis_data
|
||||
)
|
||||
processed.append(result)
|
||||
|
||||
# Composite layers (use numpy backend for now)
|
||||
if len(processed) == 1:
|
||||
return processed[0]
|
||||
|
||||
return self._numpy_fallback._composite(
|
||||
processed, compositor_config, t, analysis_data
|
||||
)
|
||||
|
||||
|
||||
# Keep GLSLBackend as alias for backwards compatibility
|
||||
GLSLBackend = WGPUBackend
|
||||
|
||||
|
||||
def get_backend(name: str = "numpy", **kwargs) -> Backend:
|
||||
@@ -294,7 +558,7 @@ def get_backend(name: str = "numpy", **kwargs) -> Backend:
|
||||
Get a backend by name.
|
||||
|
||||
Args:
|
||||
name: "numpy" or "glsl"
|
||||
name: "numpy", "wgpu", or "glsl" (alias for wgpu)
|
||||
**kwargs: Backend-specific options
|
||||
|
||||
Returns:
|
||||
@@ -302,7 +566,7 @@ def get_backend(name: str = "numpy", **kwargs) -> Backend:
|
||||
"""
|
||||
if name == "numpy":
|
||||
return NumpyBackend(**kwargs)
|
||||
elif name == "glsl":
|
||||
return GLSLBackend(**kwargs)
|
||||
elif name in ("wgpu", "glsl", "gpu"):
|
||||
return WGPUBackend(**kwargs)
|
||||
else:
|
||||
raise ValueError(f"Unknown backend: {name}")
|
||||
|
||||
@@ -5,14 +5,99 @@ Supports:
|
||||
- Display window (preview)
|
||||
- File output (recording)
|
||||
- Stream output (RTMP, etc.) - future
|
||||
- NVENC hardware encoding (auto-detected)
|
||||
- CuPy GPU arrays (auto-converted to numpy for output)
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import subprocess
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Tuple, Optional
|
||||
from typing import Tuple, Optional, List, Union
|
||||
from pathlib import Path
|
||||
|
||||
# Try to import CuPy for GPU array support
|
||||
try:
|
||||
import cupy as cp
|
||||
CUPY_AVAILABLE = True
|
||||
except ImportError:
|
||||
cp = None
|
||||
CUPY_AVAILABLE = False
|
||||
|
||||
|
||||
def ensure_numpy(frame: Union[np.ndarray, 'cp.ndarray']) -> np.ndarray:
|
||||
"""Convert frame to numpy array if it's a CuPy array."""
|
||||
if CUPY_AVAILABLE and isinstance(frame, cp.ndarray):
|
||||
return cp.asnumpy(frame)
|
||||
return frame
|
||||
|
||||
# Cache NVENC availability check
|
||||
_nvenc_available: Optional[bool] = None
|
||||
|
||||
|
||||
def check_nvenc_available() -> bool:
|
||||
"""Check if NVENC hardware encoding is available."""
|
||||
global _nvenc_available
|
||||
if _nvenc_available is not None:
|
||||
return _nvenc_available
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["ffmpeg", "-encoders"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5
|
||||
)
|
||||
_nvenc_available = "h264_nvenc" in result.stdout
|
||||
except Exception:
|
||||
_nvenc_available = False
|
||||
|
||||
return _nvenc_available
|
||||
|
||||
|
||||
def get_encoder_params(codec: str, preset: str, crf: int) -> List[str]:
|
||||
"""
|
||||
Get encoder-specific FFmpeg parameters.
|
||||
|
||||
For NVENC (h264_nvenc, hevc_nvenc):
|
||||
- Uses -cq for constant quality (similar to CRF)
|
||||
- Presets: p1 (fastest) to p7 (slowest/best quality)
|
||||
- Mapping: fast->p4, medium->p5, slow->p6
|
||||
|
||||
For libx264:
|
||||
- Uses -crf for constant rate factor
|
||||
- Presets: ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow
|
||||
"""
|
||||
if codec in ("h264_nvenc", "hevc_nvenc"):
|
||||
# Map libx264 presets to NVENC presets
|
||||
nvenc_preset_map = {
|
||||
"ultrafast": "p1",
|
||||
"superfast": "p2",
|
||||
"veryfast": "p3",
|
||||
"faster": "p3",
|
||||
"fast": "p4",
|
||||
"medium": "p5",
|
||||
"slow": "p6",
|
||||
"slower": "p6",
|
||||
"veryslow": "p7",
|
||||
}
|
||||
nvenc_preset = nvenc_preset_map.get(preset, "p4")
|
||||
|
||||
# NVENC quality: 0 (best) to 51 (worst), similar to CRF
|
||||
# CRF 18 = high quality, CRF 23 = good quality
|
||||
return [
|
||||
"-c:v", codec,
|
||||
"-preset", nvenc_preset,
|
||||
"-cq", str(crf), # Constant quality mode
|
||||
"-rc", "vbr", # Variable bitrate with quality target
|
||||
]
|
||||
else:
|
||||
# Standard libx264 params
|
||||
return [
|
||||
"-c:v", codec,
|
||||
"-preset", preset,
|
||||
"-crf", str(crf),
|
||||
]
|
||||
|
||||
|
||||
class Output(ABC):
|
||||
"""Abstract base class for output targets."""
|
||||
@@ -91,6 +176,9 @@ class DisplayOutput(Output):
|
||||
if not self._is_open:
|
||||
return
|
||||
|
||||
# Convert GPU array to numpy if needed
|
||||
frame = ensure_numpy(frame)
|
||||
|
||||
# Ensure frame is correct format
|
||||
if frame.dtype != np.uint8:
|
||||
frame = np.clip(frame, 0, 255).astype(np.uint8)
|
||||
@@ -136,6 +224,9 @@ class DisplayOutput(Output):
|
||||
class FileOutput(Output):
|
||||
"""
|
||||
Write frames to a video file using ffmpeg.
|
||||
|
||||
Automatically uses NVENC hardware encoding when available,
|
||||
falling back to libx264 CPU encoding otherwise.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -143,7 +234,7 @@ class FileOutput(Output):
|
||||
path: str,
|
||||
size: Tuple[int, int],
|
||||
fps: float = 30,
|
||||
codec: str = "libx264",
|
||||
codec: str = "auto", # "auto", "h264_nvenc", "libx264"
|
||||
crf: int = 18,
|
||||
preset: str = "fast",
|
||||
audio_source: str = None,
|
||||
@@ -153,6 +244,11 @@ class FileOutput(Output):
|
||||
self.fps = fps
|
||||
self._is_open = True
|
||||
|
||||
# Auto-detect NVENC
|
||||
if codec == "auto":
|
||||
codec = "h264_nvenc" if check_nvenc_available() else "libx264"
|
||||
self.codec = codec
|
||||
|
||||
# Build ffmpeg command
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
@@ -170,12 +266,9 @@ class FileOutput(Output):
|
||||
# Explicitly map: video from input 0 (rawvideo), audio from input 1
|
||||
cmd.extend(["-map", "0:v", "-map", "1:a"])
|
||||
|
||||
cmd.extend([
|
||||
"-c:v", codec,
|
||||
"-preset", preset,
|
||||
"-crf", str(crf),
|
||||
"-pix_fmt", "yuv420p",
|
||||
])
|
||||
# Get encoder-specific params
|
||||
cmd.extend(get_encoder_params(codec, preset, crf))
|
||||
cmd.extend(["-pix_fmt", "yuv420p"])
|
||||
|
||||
# Add audio codec if we have audio
|
||||
if audio_source:
|
||||
@@ -201,11 +294,20 @@ class FileOutput(Output):
|
||||
self._is_open = False
|
||||
return
|
||||
|
||||
# Convert GPU array to numpy if needed
|
||||
frame = ensure_numpy(frame)
|
||||
|
||||
# Resize if needed
|
||||
if frame.shape[1] != self.size[0] or frame.shape[0] != self.size[1]:
|
||||
import cv2
|
||||
frame = cv2.resize(frame, self.size)
|
||||
|
||||
# Ensure correct format
|
||||
if frame.dtype != np.uint8:
|
||||
frame = np.clip(frame, 0, 255).astype(np.uint8)
|
||||
if not frame.flags['C_CONTIGUOUS']:
|
||||
frame = np.ascontiguousarray(frame)
|
||||
|
||||
try:
|
||||
self._process.stdin.write(frame.tobytes())
|
||||
except BrokenPipeError:
|
||||
@@ -335,6 +437,9 @@ class PipeOutput(Output):
|
||||
self._is_open = False
|
||||
return
|
||||
|
||||
# Convert GPU array to numpy if needed
|
||||
frame = ensure_numpy(frame)
|
||||
|
||||
# Resize if needed
|
||||
if frame.shape[1] != self.size[0] or frame.shape[0] != self.size[1]:
|
||||
import cv2
|
||||
@@ -371,3 +476,424 @@ class PipeOutput(Output):
|
||||
if self._process and self._process.poll() is not None:
|
||||
self._is_open = False
|
||||
return self._is_open
|
||||
|
||||
|
||||
class HLSOutput(Output):
|
||||
"""
|
||||
Write frames as HLS stream (m3u8 playlist + .ts segments).
|
||||
|
||||
This enables true live streaming where the browser can poll
|
||||
for new segments as they become available.
|
||||
|
||||
Automatically uses NVENC hardware encoding when available.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
output_dir: str,
|
||||
size: Tuple[int, int],
|
||||
fps: float = 30,
|
||||
segment_duration: float = 4.0, # 4s segments for stability
|
||||
codec: str = "auto", # "auto", "h264_nvenc", "libx264"
|
||||
crf: int = 23,
|
||||
preset: str = "fast", # Better quality than ultrafast
|
||||
audio_source: str = None,
|
||||
):
|
||||
self.output_dir = Path(output_dir)
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.size = size
|
||||
self.fps = fps
|
||||
self.segment_duration = segment_duration
|
||||
self._is_open = True
|
||||
|
||||
# Auto-detect NVENC
|
||||
if codec == "auto":
|
||||
codec = "h264_nvenc" if check_nvenc_available() else "libx264"
|
||||
self.codec = codec
|
||||
|
||||
# HLS playlist path
|
||||
self.playlist_path = self.output_dir / "stream.m3u8"
|
||||
|
||||
# Build ffmpeg command for HLS output
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-f", "rawvideo",
|
||||
"-vcodec", "rawvideo",
|
||||
"-pix_fmt", "rgb24",
|
||||
"-s", f"{size[0]}x{size[1]}",
|
||||
"-r", str(fps),
|
||||
"-i", "-",
|
||||
]
|
||||
|
||||
# Add audio input if provided
|
||||
if audio_source:
|
||||
cmd.extend(["-i", str(audio_source)])
|
||||
cmd.extend(["-map", "0:v", "-map", "1:a"])
|
||||
|
||||
# Keyframe interval - must be exactly segment_duration for clean cuts
|
||||
gop_size = int(fps * segment_duration)
|
||||
|
||||
# Get encoder-specific params
|
||||
cmd.extend(get_encoder_params(codec, preset, crf))
|
||||
cmd.extend([
|
||||
"-pix_fmt", "yuv420p",
|
||||
# Force keyframes at exact intervals for clean segment boundaries
|
||||
"-g", str(gop_size),
|
||||
"-keyint_min", str(gop_size),
|
||||
"-sc_threshold", "0", # Disable scene change detection
|
||||
"-force_key_frames", f"expr:gte(t,n_forced*{segment_duration})",
|
||||
# Reduce buffering for faster segment availability
|
||||
"-flush_packets", "1",
|
||||
])
|
||||
|
||||
# Add audio codec if we have audio
|
||||
if audio_source:
|
||||
cmd.extend(["-c:a", "aac", "-b:a", "128k"])
|
||||
|
||||
# HLS specific options for smooth live streaming
|
||||
cmd.extend([
|
||||
"-f", "hls",
|
||||
"-hls_time", str(segment_duration),
|
||||
"-hls_list_size", "0", # Keep all segments in playlist
|
||||
"-hls_flags", "independent_segments+append_list+split_by_time",
|
||||
"-hls_segment_type", "mpegts",
|
||||
"-hls_segment_filename", str(self.output_dir / "segment_%05d.ts"),
|
||||
str(self.playlist_path),
|
||||
])
|
||||
|
||||
import sys
|
||||
print(f"HLSOutput cmd: {' '.join(cmd)}", file=sys.stderr)
|
||||
self._process = subprocess.Popen(
|
||||
cmd,
|
||||
stdin=subprocess.PIPE,
|
||||
stderr=None, # Show errors for debugging
|
||||
)
|
||||
|
||||
# Track segments for status reporting
|
||||
self.segments_written = 0
|
||||
self._last_segment_check = 0
|
||||
|
||||
def write(self, frame: np.ndarray, t: float):
|
||||
"""Write frame to HLS stream."""
|
||||
if not self._is_open or self._process.poll() is not None:
|
||||
self._is_open = False
|
||||
return
|
||||
|
||||
# Convert GPU array to numpy if needed
|
||||
frame = ensure_numpy(frame)
|
||||
|
||||
# Resize if needed
|
||||
if frame.shape[1] != self.size[0] or frame.shape[0] != self.size[1]:
|
||||
import cv2
|
||||
frame = cv2.resize(frame, self.size)
|
||||
|
||||
# Ensure correct format
|
||||
if frame.dtype != np.uint8:
|
||||
frame = np.clip(frame, 0, 255).astype(np.uint8)
|
||||
if not frame.flags['C_CONTIGUOUS']:
|
||||
frame = np.ascontiguousarray(frame)
|
||||
|
||||
try:
|
||||
self._process.stdin.write(frame.tobytes())
|
||||
except BrokenPipeError:
|
||||
self._is_open = False
|
||||
|
||||
# Periodically count segments
|
||||
if t - self._last_segment_check > 1.0:
|
||||
self._last_segment_check = t
|
||||
self.segments_written = len(list(self.output_dir.glob("segment_*.ts")))
|
||||
|
||||
def close(self):
|
||||
"""Close the HLS stream."""
|
||||
if self._process:
|
||||
self._process.stdin.close()
|
||||
self._process.wait()
|
||||
self._is_open = False
|
||||
|
||||
# Final segment count
|
||||
self.segments_written = len(list(self.output_dir.glob("segment_*.ts")))
|
||||
|
||||
# Mark playlist as ended (VOD mode)
|
||||
if self.playlist_path.exists():
|
||||
with open(self.playlist_path, "a") as f:
|
||||
f.write("#EXT-X-ENDLIST\n")
|
||||
|
||||
@property
|
||||
def is_open(self) -> bool:
|
||||
return self._is_open and self._process.poll() is None
|
||||
|
||||
|
||||
class IPFSHLSOutput(Output):
|
||||
"""
|
||||
Write frames as HLS stream with segments uploaded to IPFS.
|
||||
|
||||
Each segment is uploaded to IPFS as it's created, enabling distributed
|
||||
streaming where clients can fetch segments from any IPFS gateway.
|
||||
|
||||
The m3u8 playlist is continuously updated with IPFS URLs and can be
|
||||
fetched via get_playlist() or the playlist_cid property.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
output_dir: str,
|
||||
size: Tuple[int, int],
|
||||
fps: float = 30,
|
||||
segment_duration: float = 4.0,
|
||||
codec: str = "auto",
|
||||
crf: int = 23,
|
||||
preset: str = "fast",
|
||||
audio_source: str = None,
|
||||
ipfs_gateway: str = "https://ipfs.io/ipfs",
|
||||
):
|
||||
self.output_dir = Path(output_dir)
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.size = size
|
||||
self.fps = fps
|
||||
self.segment_duration = segment_duration
|
||||
self.ipfs_gateway = ipfs_gateway.rstrip("/")
|
||||
self._is_open = True
|
||||
|
||||
# Auto-detect NVENC
|
||||
if codec == "auto":
|
||||
codec = "h264_nvenc" if check_nvenc_available() else "libx264"
|
||||
self.codec = codec
|
||||
|
||||
# Track segment CIDs
|
||||
self.segment_cids: dict = {} # segment_number -> cid
|
||||
self._last_segment_checked = -1
|
||||
self._playlist_cid: Optional[str] = None
|
||||
|
||||
# Import IPFS client
|
||||
from ipfs_client import add_file, add_bytes
|
||||
self._ipfs_add_file = add_file
|
||||
self._ipfs_add_bytes = add_bytes
|
||||
|
||||
# Local HLS paths
|
||||
self.local_playlist_path = self.output_dir / "stream.m3u8"
|
||||
|
||||
# Build ffmpeg command for HLS output
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-f", "rawvideo",
|
||||
"-vcodec", "rawvideo",
|
||||
"-pix_fmt", "rgb24",
|
||||
"-s", f"{size[0]}x{size[1]}",
|
||||
"-r", str(fps),
|
||||
"-i", "-",
|
||||
]
|
||||
|
||||
# Add audio input if provided
|
||||
if audio_source:
|
||||
cmd.extend(["-i", str(audio_source)])
|
||||
cmd.extend(["-map", "0:v", "-map", "1:a"])
|
||||
|
||||
# Keyframe interval
|
||||
gop_size = int(fps * segment_duration)
|
||||
|
||||
# Get encoder-specific params
|
||||
cmd.extend(get_encoder_params(codec, preset, crf))
|
||||
cmd.extend([
|
||||
"-pix_fmt", "yuv420p",
|
||||
"-g", str(gop_size),
|
||||
"-keyint_min", str(gop_size),
|
||||
"-sc_threshold", "0",
|
||||
"-force_key_frames", f"expr:gte(t,n_forced*{segment_duration})",
|
||||
"-flush_packets", "1",
|
||||
])
|
||||
|
||||
# Add audio codec if we have audio
|
||||
if audio_source:
|
||||
cmd.extend(["-c:a", "aac", "-b:a", "128k"])
|
||||
|
||||
# HLS options
|
||||
cmd.extend([
|
||||
"-f", "hls",
|
||||
"-hls_time", str(segment_duration),
|
||||
"-hls_list_size", "0",
|
||||
"-hls_flags", "independent_segments+append_list+split_by_time",
|
||||
"-hls_segment_type", "mpegts",
|
||||
"-hls_segment_filename", str(self.output_dir / "segment_%05d.ts"),
|
||||
str(self.local_playlist_path),
|
||||
])
|
||||
|
||||
import sys
|
||||
print(f"IPFSHLSOutput: starting ffmpeg", file=sys.stderr)
|
||||
self._process = subprocess.Popen(
|
||||
cmd,
|
||||
stdin=subprocess.PIPE,
|
||||
stderr=None,
|
||||
)
|
||||
|
||||
def _upload_new_segments(self):
|
||||
"""Check for new segments and upload them to IPFS."""
|
||||
import sys
|
||||
|
||||
# Find all segments
|
||||
segments = sorted(self.output_dir.glob("segment_*.ts"))
|
||||
|
||||
for seg_path in segments:
|
||||
# Extract segment number from filename
|
||||
seg_name = seg_path.stem # segment_00000
|
||||
seg_num = int(seg_name.split("_")[1])
|
||||
|
||||
# Skip if already uploaded
|
||||
if seg_num in self.segment_cids:
|
||||
continue
|
||||
|
||||
# Skip if segment is still being written (check if file size is stable)
|
||||
try:
|
||||
size1 = seg_path.stat().st_size
|
||||
if size1 == 0:
|
||||
continue # Empty file, still being created
|
||||
|
||||
import time
|
||||
time.sleep(0.1)
|
||||
size2 = seg_path.stat().st_size
|
||||
if size1 != size2:
|
||||
continue # File still being written
|
||||
except FileNotFoundError:
|
||||
continue
|
||||
|
||||
# Upload to IPFS
|
||||
cid = self._ipfs_add_file(seg_path, pin=True)
|
||||
if cid:
|
||||
self.segment_cids[seg_num] = cid
|
||||
print(f"IPFS: segment_{seg_num:05d}.ts -> {cid}", file=sys.stderr)
|
||||
|
||||
# Update playlist after each segment upload
|
||||
self._update_ipfs_playlist()
|
||||
|
||||
def _update_ipfs_playlist(self):
|
||||
"""Generate and upload IPFS-aware m3u8 playlist."""
|
||||
if not self.segment_cids:
|
||||
return
|
||||
|
||||
import sys
|
||||
|
||||
# Build m3u8 content with IPFS URLs
|
||||
lines = [
|
||||
"#EXTM3U",
|
||||
"#EXT-X-VERSION:3",
|
||||
f"#EXT-X-TARGETDURATION:{int(self.segment_duration) + 1}",
|
||||
"#EXT-X-MEDIA-SEQUENCE:0",
|
||||
]
|
||||
|
||||
# Add segments in order
|
||||
for seg_num in sorted(self.segment_cids.keys()):
|
||||
cid = self.segment_cids[seg_num]
|
||||
lines.append(f"#EXTINF:{self.segment_duration:.3f},")
|
||||
lines.append(f"{self.ipfs_gateway}/{cid}")
|
||||
|
||||
playlist_content = "\n".join(lines) + "\n"
|
||||
|
||||
# Upload playlist to IPFS
|
||||
cid = self._ipfs_add_bytes(playlist_content.encode("utf-8"), pin=True)
|
||||
if cid:
|
||||
self._playlist_cid = cid
|
||||
print(f"IPFS: playlist updated -> {cid} ({len(self.segment_cids)} segments)", file=sys.stderr)
|
||||
|
||||
def write(self, frame: np.ndarray, t: float):
|
||||
"""Write frame to HLS stream and upload segments to IPFS."""
|
||||
if not self._is_open or self._process.poll() is not None:
|
||||
self._is_open = False
|
||||
return
|
||||
|
||||
# Convert GPU array to numpy if needed
|
||||
frame = ensure_numpy(frame)
|
||||
|
||||
# Resize if needed
|
||||
if frame.shape[1] != self.size[0] or frame.shape[0] != self.size[1]:
|
||||
import cv2
|
||||
frame = cv2.resize(frame, self.size)
|
||||
|
||||
# Ensure correct format
|
||||
if frame.dtype != np.uint8:
|
||||
frame = np.clip(frame, 0, 255).astype(np.uint8)
|
||||
if not frame.flags['C_CONTIGUOUS']:
|
||||
frame = np.ascontiguousarray(frame)
|
||||
|
||||
try:
|
||||
self._process.stdin.write(frame.tobytes())
|
||||
except BrokenPipeError:
|
||||
self._is_open = False
|
||||
return
|
||||
|
||||
# Check for new segments periodically (every second)
|
||||
current_segment = int(t / self.segment_duration)
|
||||
if current_segment > self._last_segment_checked:
|
||||
self._last_segment_checked = current_segment
|
||||
self._upload_new_segments()
|
||||
|
||||
def close(self):
|
||||
"""Close the HLS stream and finalize IPFS uploads."""
|
||||
import sys
|
||||
|
||||
if self._process:
|
||||
self._process.stdin.close()
|
||||
self._process.wait()
|
||||
self._is_open = False
|
||||
|
||||
# Upload any remaining segments
|
||||
self._upload_new_segments()
|
||||
|
||||
# Generate final playlist with #EXT-X-ENDLIST
|
||||
if self.segment_cids:
|
||||
lines = [
|
||||
"#EXTM3U",
|
||||
"#EXT-X-VERSION:3",
|
||||
f"#EXT-X-TARGETDURATION:{int(self.segment_duration) + 1}",
|
||||
"#EXT-X-MEDIA-SEQUENCE:0",
|
||||
"#EXT-X-PLAYLIST-TYPE:VOD",
|
||||
]
|
||||
|
||||
for seg_num in sorted(self.segment_cids.keys()):
|
||||
cid = self.segment_cids[seg_num]
|
||||
lines.append(f"#EXTINF:{self.segment_duration:.3f},")
|
||||
lines.append(f"{self.ipfs_gateway}/{cid}")
|
||||
|
||||
lines.append("#EXT-X-ENDLIST")
|
||||
playlist_content = "\n".join(lines) + "\n"
|
||||
|
||||
cid = self._ipfs_add_bytes(playlist_content.encode("utf-8"), pin=True)
|
||||
if cid:
|
||||
self._playlist_cid = cid
|
||||
print(f"IPFS: final playlist -> {cid} ({len(self.segment_cids)} segments)", file=sys.stderr)
|
||||
|
||||
@property
|
||||
def playlist_cid(self) -> Optional[str]:
|
||||
"""Get the current playlist CID."""
|
||||
return self._playlist_cid
|
||||
|
||||
@property
|
||||
def playlist_url(self) -> Optional[str]:
|
||||
"""Get the full IPFS URL for the playlist."""
|
||||
if self._playlist_cid:
|
||||
return f"{self.ipfs_gateway}/{self._playlist_cid}"
|
||||
return None
|
||||
|
||||
def get_playlist(self) -> str:
|
||||
"""Get the current m3u8 playlist content with IPFS URLs."""
|
||||
if not self.segment_cids:
|
||||
return "#EXTM3U\n"
|
||||
|
||||
lines = [
|
||||
"#EXTM3U",
|
||||
"#EXT-X-VERSION:3",
|
||||
f"#EXT-X-TARGETDURATION:{int(self.segment_duration) + 1}",
|
||||
"#EXT-X-MEDIA-SEQUENCE:0",
|
||||
]
|
||||
|
||||
for seg_num in sorted(self.segment_cids.keys()):
|
||||
cid = self.segment_cids[seg_num]
|
||||
lines.append(f"#EXTINF:{self.segment_duration:.3f},")
|
||||
lines.append(f"{self.ipfs_gateway}/{cid}")
|
||||
|
||||
if not self._is_open:
|
||||
lines.append("#EXT-X-ENDLIST")
|
||||
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
@property
|
||||
def is_open(self) -> bool:
|
||||
return self._is_open and self._process.poll() is None
|
||||
@@ -159,36 +159,51 @@ class StreamInterpreter:
|
||||
return config
|
||||
|
||||
def _load_primitives(self, lib_name: str):
|
||||
"""Load primitives from a Python library file."""
|
||||
"""Load primitives from a Python library file.
|
||||
|
||||
Prefers GPU-accelerated versions (*_gpu.py) when available.
|
||||
"""
|
||||
import importlib.util
|
||||
|
||||
lib_paths = [
|
||||
self.primitive_lib_dir / f"{lib_name}.py",
|
||||
self.sexp_dir / "primitive_libs" / f"{lib_name}.py",
|
||||
self.sexp_dir.parent / "sexp_effects" / "primitive_libs" / f"{lib_name}.py",
|
||||
]
|
||||
# Try GPU version first, then fall back to CPU version
|
||||
lib_names_to_try = [f"{lib_name}_gpu", lib_name]
|
||||
|
||||
lib_path = None
|
||||
actual_lib_name = lib_name
|
||||
|
||||
for try_lib in lib_names_to_try:
|
||||
lib_paths = [
|
||||
self.primitive_lib_dir / f"{try_lib}.py",
|
||||
self.sexp_dir / "primitive_libs" / f"{try_lib}.py",
|
||||
self.sexp_dir.parent / "sexp_effects" / "primitive_libs" / f"{try_lib}.py",
|
||||
]
|
||||
for p in lib_paths:
|
||||
if p.exists():
|
||||
lib_path = p
|
||||
actual_lib_name = try_lib
|
||||
break
|
||||
if lib_path:
|
||||
break
|
||||
|
||||
if not lib_path:
|
||||
print(f"Warning: primitive library '{lib_name}' not found", file=sys.stderr)
|
||||
return
|
||||
|
||||
spec = importlib.util.spec_from_file_location(lib_name, lib_path)
|
||||
spec = importlib.util.spec_from_file_location(actual_lib_name, lib_path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
|
||||
# Check if this is a GPU-accelerated module
|
||||
is_gpu = actual_lib_name.endswith('_gpu')
|
||||
gpu_tag = " [GPU]" if is_gpu else ""
|
||||
|
||||
count = 0
|
||||
for name in dir(module):
|
||||
if name.startswith('prim_'):
|
||||
func = getattr(module, name)
|
||||
prim_name = name[5:]
|
||||
dash_name = prim_name.replace('_', '-')
|
||||
# Register ONLY with namespace (geometry:ripple-displace)
|
||||
# Register with original lib_name namespace (geometry:rotate, not geometry_gpu:rotate)
|
||||
# Don't overwrite if already registered (allows pre-registration of overrides)
|
||||
key = f"{lib_name}:{dash_name}"
|
||||
if key not in self.primitives:
|
||||
@@ -199,7 +214,7 @@ class StreamInterpreter:
|
||||
prims = getattr(module, 'PRIMITIVES')
|
||||
if isinstance(prims, dict):
|
||||
for name, func in prims.items():
|
||||
# Register ONLY with namespace
|
||||
# Register with original lib_name namespace
|
||||
# Don't overwrite if already registered
|
||||
dash_name = name.replace('_', '-')
|
||||
key = f"{lib_name}:{dash_name}"
|
||||
@@ -207,7 +222,7 @@ class StreamInterpreter:
|
||||
self.primitives[key] = func
|
||||
count += 1
|
||||
|
||||
print(f"Loaded primitives: {lib_name} ({count} functions)", file=sys.stderr)
|
||||
print(f"Loaded primitives: {lib_name} ({count} functions){gpu_tag}", file=sys.stderr)
|
||||
|
||||
def _load_effect(self, effect_path: Path):
|
||||
"""Load and register an effect from a .sexp file."""
|
||||
@@ -807,8 +822,11 @@ class StreamInterpreter:
|
||||
self._record_error(f"Primitive {op} error: {e}")
|
||||
raise RuntimeError(f"Primitive {op} failed: {e}")
|
||||
|
||||
# Unknown - return as-is
|
||||
return expr
|
||||
# Unknown function call - raise meaningful error
|
||||
raise RuntimeError(f"Unknown function or primitive: '{op}'. "
|
||||
f"Available primitives: {sorted(list(self.primitives.keys())[:10])}... "
|
||||
f"Available effects: {sorted(list(self.effects.keys())[:10])}... "
|
||||
f"Available macros: {sorted(list(self.macros.keys())[:10])}...")
|
||||
|
||||
def _step_scans(self, ctx: Context, env: dict):
|
||||
"""Step scans based on trigger evaluation."""
|
||||
@@ -833,9 +851,9 @@ class StreamInterpreter:
|
||||
"""Run the streaming pipeline."""
|
||||
# Import output classes - handle both package and direct execution
|
||||
try:
|
||||
from .output import PipeOutput, DisplayOutput, FileOutput
|
||||
from .output import PipeOutput, DisplayOutput, FileOutput, HLSOutput, IPFSHLSOutput
|
||||
except ImportError:
|
||||
from output import PipeOutput, DisplayOutput, FileOutput
|
||||
from output import PipeOutput, DisplayOutput, FileOutput, HLSOutput, IPFSHLSOutput
|
||||
|
||||
self._init()
|
||||
|
||||
@@ -871,6 +889,16 @@ class StreamInterpreter:
|
||||
out = PipeOutput(size=(w, h), fps=fps, audio_source=audio)
|
||||
elif output == "preview":
|
||||
out = DisplayOutput(size=(w, h), fps=fps, audio_source=audio)
|
||||
elif output.endswith("/hls"):
|
||||
# HLS output - output is a directory path ending in /hls
|
||||
hls_dir = output[:-4] # Remove /hls suffix
|
||||
out = HLSOutput(hls_dir, size=(w, h), fps=fps, audio_source=audio)
|
||||
elif output.endswith("/ipfs-hls"):
|
||||
# IPFS HLS output - segments uploaded to IPFS as they're created
|
||||
hls_dir = output[:-9] # Remove /ipfs-hls suffix
|
||||
import os
|
||||
ipfs_gateway = os.environ.get("IPFS_GATEWAY_URL", "https://ipfs.io/ipfs")
|
||||
out = IPFSHLSOutput(hls_dir, size=(w, h), fps=fps, audio_source=audio, ipfs_gateway=ipfs_gateway)
|
||||
else:
|
||||
out = FileOutput(output, size=(w, h), fps=fps, audio_source=audio)
|
||||
|
||||
@@ -916,6 +944,8 @@ class StreamInterpreter:
|
||||
|
||||
finally:
|
||||
out.close()
|
||||
# Store output for access to properties like playlist_cid
|
||||
self.output = out
|
||||
print("\nDone", file=sys.stderr)
|
||||
|
||||
|
||||
|
||||
@@ -69,6 +69,16 @@ def upload_to_ipfs(self, local_cid: str, actor_id: str) -> Optional[str]:
|
||||
database.update_cache_item_ipfs_cid(local_cid, ipfs_cid)
|
||||
)
|
||||
|
||||
# Update friendly_names table to use IPFS CID instead of local hash
|
||||
# This ensures assets can be fetched by remote workers via IPFS
|
||||
try:
|
||||
loop.run_until_complete(
|
||||
database.update_friendly_name_cid(actor_id, local_cid, ipfs_cid)
|
||||
)
|
||||
logger.info(f"Friendly name updated: {local_cid[:16]}... -> {ipfs_cid[:16]}...")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to update friendly name CID: {e}")
|
||||
|
||||
# Create index from IPFS CID to local cache
|
||||
cache_mgr._set_content_index(ipfs_cid, local_cid)
|
||||
|
||||
|
||||
@@ -83,6 +83,28 @@ def resolve_asset(ref: str, actor_id: Optional[str] = None) -> Optional[Path]:
|
||||
print(f"RESOLVE_ASSET: SUCCESS - resolved to {path}", file=sys.stderr)
|
||||
logger.info(f"Resolved '{ref}' via friendly name to {path}")
|
||||
return path
|
||||
|
||||
# File not in local cache - try fetching from IPFS
|
||||
# The CID from friendly_names is an IPFS CID
|
||||
print(f"RESOLVE_ASSET: file not local, trying IPFS fetch for {cid}", file=sys.stderr)
|
||||
import ipfs_client
|
||||
content = ipfs_client.get_bytes(cid, use_gateway_fallback=True)
|
||||
if content:
|
||||
# Save to local cache
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.sexp') as tmp:
|
||||
tmp.write(content)
|
||||
tmp_path = Path(tmp.name)
|
||||
# Store in cache
|
||||
cached_file, _ = cache_mgr.put(tmp_path, node_type="effect", skip_ipfs=True)
|
||||
# Index by IPFS CID for future lookups
|
||||
cache_mgr._set_content_index(cid, cached_file.cid)
|
||||
print(f"RESOLVE_ASSET: fetched from IPFS and cached at {cached_file.path}", file=sys.stderr)
|
||||
logger.info(f"Fetched '{ref}' from IPFS and cached at {cached_file.path}")
|
||||
return cached_file.path
|
||||
else:
|
||||
print(f"RESOLVE_ASSET: IPFS fetch failed for {cid}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
print(f"RESOLVE_ASSET: ERROR - {e}", file=sys.stderr)
|
||||
logger.warning(f"Failed to resolve friendly name '{ref}': {e}")
|
||||
@@ -260,7 +282,8 @@ def run_stream(
|
||||
cache_dir = Path(os.environ.get("CACHE_DIR", "/data/cache"))
|
||||
stream_dir = cache_dir / "streaming" / run_id
|
||||
stream_dir.mkdir(parents=True, exist_ok=True)
|
||||
output_path = stream_dir / "output.mp4" # Always mp4 for streaming
|
||||
# Use IPFS HLS output for distributed streaming - segments uploaded to IPFS
|
||||
output_path = str(stream_dir) + "/ipfs-hls" # /ipfs-hls suffix triggers IPFS HLS mode
|
||||
|
||||
# Create symlinks to effect directories so relative paths work
|
||||
(work_dir / "sexp_effects").symlink_to(sexp_effects_dir)
|
||||
@@ -320,15 +343,50 @@ def run_stream(
|
||||
|
||||
self.update_state(state='CACHING', meta={'progress': 90})
|
||||
|
||||
# Validate output file (must be > 1KB to have actual frames)
|
||||
if output_path.exists() and output_path.stat().st_size < 1024:
|
||||
raise RuntimeError(f"Output file is too small ({output_path.stat().st_size} bytes) - rendering likely failed")
|
||||
# Get IPFS playlist CID if available (from IPFSHLSOutput)
|
||||
ipfs_playlist_cid = None
|
||||
ipfs_playlist_url = None
|
||||
segment_cids = {}
|
||||
if hasattr(interp, 'output') and hasattr(interp.output, 'playlist_cid'):
|
||||
ipfs_playlist_cid = interp.output.playlist_cid
|
||||
ipfs_playlist_url = interp.output.playlist_url
|
||||
segment_cids = getattr(interp.output, 'segment_cids', {})
|
||||
logger.info(f"IPFS HLS: playlist={ipfs_playlist_cid}, segments={len(segment_cids)}")
|
||||
|
||||
# HLS output creates stream.m3u8 and segment_*.ts files in stream_dir
|
||||
hls_playlist = stream_dir / "stream.m3u8"
|
||||
|
||||
# Validate HLS output (must have playlist and at least one segment)
|
||||
if not hls_playlist.exists():
|
||||
raise RuntimeError("HLS playlist not created - rendering likely failed")
|
||||
|
||||
segments = list(stream_dir.glob("segment_*.ts"))
|
||||
if not segments:
|
||||
raise RuntimeError("No HLS segments created - rendering likely failed")
|
||||
|
||||
logger.info(f"HLS rendering complete: {len(segments)} segments created, IPFS playlist: {ipfs_playlist_cid}")
|
||||
|
||||
# Mux HLS segments into a single MP4 for persistent cache storage
|
||||
final_mp4 = stream_dir / "output.mp4"
|
||||
import subprocess
|
||||
mux_cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-i", str(hls_playlist),
|
||||
"-c", "copy", # Just copy streams, no re-encoding
|
||||
str(final_mp4)
|
||||
]
|
||||
logger.info(f"Muxing HLS to MP4: {' '.join(mux_cmd)}")
|
||||
result = subprocess.run(mux_cmd, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
logger.warning(f"HLS mux failed: {result.stderr}")
|
||||
# Fall back to using the first segment for caching
|
||||
final_mp4 = segments[0]
|
||||
|
||||
# Store output in cache
|
||||
if output_path.exists():
|
||||
if final_mp4.exists():
|
||||
cache_mgr = get_cache_manager()
|
||||
cached_file, ipfs_cid = cache_mgr.put(
|
||||
source_path=output_path,
|
||||
source_path=final_mp4,
|
||||
node_type="STREAM_OUTPUT",
|
||||
node_id=f"stream_{task_id}",
|
||||
)
|
||||
@@ -365,6 +423,15 @@ def run_stream(
|
||||
ipfs_cid=ipfs_cid,
|
||||
actor_id=actor_id,
|
||||
))
|
||||
# Register output as video type so frontend displays it correctly
|
||||
_resolve_loop.run_until_complete(database.add_item_type(
|
||||
cid=cached_file.cid,
|
||||
actor_id=actor_id,
|
||||
item_type="video",
|
||||
path=str(cached_file.path),
|
||||
description=f"Stream output from run {run_id}",
|
||||
))
|
||||
logger.info(f"Registered output {cached_file.cid} as video type")
|
||||
# Update pending run status
|
||||
_resolve_loop.run_until_complete(database.update_pending_run_status(
|
||||
run_id=run_id,
|
||||
@@ -381,6 +448,10 @@ def run_stream(
|
||||
"output_cid": cached_file.cid,
|
||||
"ipfs_cid": ipfs_cid,
|
||||
"output_path": str(cached_file.path),
|
||||
# IPFS HLS streaming info
|
||||
"ipfs_playlist_cid": ipfs_playlist_cid,
|
||||
"ipfs_playlist_url": ipfs_playlist_url,
|
||||
"ipfs_segment_count": len(segment_cids),
|
||||
}
|
||||
else:
|
||||
# Update pending run status to failed - reuse module loop
|
||||
|
||||
Reference in New Issue
Block a user