Fix GPU encoding black frames and improve debug logging
Some checks are pending
GPU Worker CI/CD / test (push) Waiting to run
GPU Worker CI/CD / deploy (push) Blocked by required conditions

- Add CUDA sync before encoding to ensure RGB->NV12 kernel completes
- Add debug logging for frame data validation (sum check)
- Handle GPUFrame objects in GPUHLSOutput.write()
- Fix cv2.resize for CuPy arrays (use cupyx.scipy.ndimage.zoom)
- Fix fused pipeline parameter ordering (geometric first, color second)
- Add raindrop-style ripple with random position/freq/decay/amp
- Generate final VOD playlist with #EXT-X-ENDLIST

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
giles
2026-02-04 16:33:12 +00:00
parent b15e381f81
commit 9a8a701492
8 changed files with 471 additions and 37 deletions

View File

@@ -1137,6 +1137,52 @@ async def serve_hls_content(
raise HTTPException(404, f"File not found: {filename}")
@router.get("/{run_id}/playlist.m3u8")
async def get_playlist(run_id: str, request: Request):
"""Get live HLS playlist for a streaming run.
Returns the latest playlist content directly, allowing HLS players
to poll this URL for updates without dealing with changing IPFS CIDs.
"""
import database
import os
import httpx
from fastapi.responses import Response
await database.init_db()
pending = await database.get_pending_run(run_id)
if not pending:
raise HTTPException(404, "Run not found")
ipfs_playlist_cid = pending.get("ipfs_playlist_cid")
if not ipfs_playlist_cid:
raise HTTPException(404, "Playlist not yet available")
# Fetch playlist from local IPFS node
ipfs_api = os.environ.get("IPFS_API_URL", "http://celery_ipfs:5001")
try:
async with httpx.AsyncClient(timeout=10.0) as client:
resp = await client.post(f"{ipfs_api}/api/v0/cat?arg={ipfs_playlist_cid}")
if resp.status_code != 200:
raise HTTPException(502, "Failed to fetch playlist from IPFS")
playlist_content = resp.text
except httpx.RequestError as e:
raise HTTPException(502, f"IPFS error: {e}")
return Response(
content=playlist_content,
media_type="application/vnd.apple.mpegurl",
headers={
"Cache-Control": "no-cache, no-store, must-revalidate",
"Pragma": "no-cache",
"Expires": "0",
"Access-Control-Allow-Origin": "*",
}
)
@router.get("/{run_id}/ipfs-stream")
async def get_ipfs_stream_info(run_id: str, request: Request):
"""Get IPFS streaming info for a run.

View File

@@ -103,15 +103,34 @@
const video = document.getElementById('live-video');
const statusEl = document.getElementById('stream-status');
const loadingEl = document.getElementById('stream-loading');
const hlsUrl = '/runs/{{ run.run_id }}/hls/stream.m3u8';
// Use dynamic playlist endpoint with cache busting
const baseUrl = '/runs/{{ run.run_id }}/playlist.m3u8';
function getHlsUrl() {
return baseUrl + '?_t=' + Date.now();
}
let hls = null;
let retryCount = 0;
const maxRetries = 120; // Try for up to 4 minutes
let segmentsLoaded = 0;
// Custom playlist loader that adds cache-busting to every request
class CacheBustingPlaylistLoader extends Hls.DefaultConfig.loader {
load(context, config, callbacks) {
if (context.type === 'manifest' || context.type === 'level') {
const url = new URL(context.url, window.location.origin);
url.searchParams.set('_t', Date.now());
context.url = url.toString();
}
super.load(context, config, callbacks);
}
}
function initHls() {
if (Hls.isSupported()) {
hls = new Hls({
// Custom loader to bust cache on playlist requests
pLoader: CacheBustingPlaylistLoader,
// Stay far behind live edge - rendering is slow (~0.1x speed)
// 10 segments = 40s of buffer before catching up
liveSyncDurationCount: 10, // Stay 10 segments behind live edge
@@ -177,7 +196,7 @@
// Exponential backoff with jitter
const delay = Math.min(1000 * Math.pow(1.5, Math.min(retryCount, 6)), 10000);
setTimeout(() => {
hls.loadSource(hlsUrl);
hls.loadSource(getHlsUrl());
}, delay + Math.random() * 1000);
} else {
statusEl.textContent = 'Stream unavailable';
@@ -246,11 +265,11 @@
}
}, 1000);
hls.loadSource(hlsUrl);
hls.loadSource(getHlsUrl());
hls.attachMedia(video);
} else if (video.canPlayType('application/vnd.apple.mpegurl')) {
// Native HLS support (Safari)
video.src = hlsUrl;
video.src = getHlsUrl();
video.addEventListener('loadedmetadata', function() {
loadingEl.classList.add('hidden');
statusEl.textContent = 'Playing';

223
recipes/woods-lowres.sexp Normal file
View File

@@ -0,0 +1,223 @@
;; Woods Recipe - OPTIMIZED VERSION
;;
;; Uses fused-pipeline for GPU acceleration when available,
;; falls back to individual primitives on CPU.
;;
;; Key optimizations:
;; 1. Uses streaming_gpu primitives with fast CUDA kernels
;; 2. Uses fused-pipeline to batch effects into single kernel passes
;; 3. GPU persistence - frames stay on GPU throughout pipeline
(stream "woods-lowres"
:fps 30
:width 640
:height 360
:seed 42
;; Load standard primitives (includes proper asset resolution)
;; Auto-selects GPU versions when available, falls back to CPU
(include :name "tpl-standard-primitives")
;; === SOURCES (using streaming: which has proper asset resolution) ===
(def sources [
(streaming:make-video-source "woods-1" 30)
(streaming:make-video-source "woods-2" 30)
(streaming:make-video-source "woods-3" 30)
(streaming:make-video-source "woods-4" 30)
(streaming:make-video-source "woods-5" 30)
(streaming:make-video-source "woods-6" 30)
(streaming:make-video-source "woods-7" 30)
(streaming:make-video-source "woods-8" 30)
])
;; Per-pair config
(def pair-configs [
{:dir -1 :rot-a 45 :rot-b -45 :zoom-a 1.5 :zoom-b 0.5}
{:dir 1 :rot-a 45 :rot-b -45 :zoom-a 1.5 :zoom-b 0.5}
{:dir 1 :rot-a 45 :rot-b -45 :zoom-a 1.5 :zoom-b 0.5}
{:dir -1 :rot-a -45 :rot-b 45 :zoom-a 0.5 :zoom-b 1.5}
{:dir -1 :rot-a 45 :rot-b -45 :zoom-a 1.5 :zoom-b 0.5}
{:dir 1 :rot-a 30 :rot-b -30 :zoom-a 1.3 :zoom-b 0.7}
{:dir -1 :rot-a -45 :rot-b 45 :zoom-a 0.5 :zoom-b 1.5}
{:dir 1 :rot-a 45 :rot-b -45 :zoom-a 1.5 :zoom-b 0.5}
])
;; Audio
(def music (streaming:make-audio-analyzer "woods-audio"))
(audio-playback "woods-audio")
;; === SCANS ===
;; Cycle state
(scan cycle (streaming:audio-beat music t)
:init {:active 0 :beat 0 :clen 16}
:step (if (< (+ beat 1) clen)
(dict :active active :beat (+ beat 1) :clen clen)
(dict :active (mod (+ active 1) (len sources)) :beat 0
:clen (+ 8 (mod (* (streaming:audio-beat-count music t) 7) 17)))))
;; Spin scan
(scan spin (streaming:audio-beat music t)
:init {:angle 0 :dir 1 :speed 2}
:step (let [new-dir (if (< (core:rand) 0.05) (* dir -1) dir)
new-speed (if (< (core:rand) 0.1) (+ 1 (core:rand-int 1 4)) speed)]
(dict :angle (+ angle (* new-dir new-speed))
:dir new-dir
:speed new-speed)))
;; Ripple scan - raindrop style, all params randomized
;; Higher freq = bigger gaps between waves (formula is dist/freq)
(scan ripple-state (streaming:audio-beat music t)
:init {:gate 0 :cx 320 :cy 180 :freq 20 :decay 6 :amp-mult 1.0}
:step (let [new-gate (if (< (core:rand) 0.2) (+ 2 (core:rand-int 0 4)) (core:max 0 (- gate 1)))
triggered (> new-gate gate)
new-cx (if triggered (core:rand-int 50 590) cx)
new-cy (if triggered (core:rand-int 50 310) cy)
new-freq (if triggered (+ 15 (core:rand-int 0 20)) freq)
new-decay (if triggered (+ 5 (core:rand-int 0 4)) decay)
new-amp-mult (if triggered (+ 0.8 (* (core:rand) 1.2)) amp-mult)]
(dict :gate new-gate :cx new-cx :cy new-cy :freq new-freq :decay new-decay :amp-mult new-amp-mult)))
;; Pair states
(scan pairs (streaming:audio-beat music t)
:init {:states (map (core:range (len sources)) (lambda (_)
{:inv-a 0 :inv-b 0 :hue-a 0 :hue-b 0 :hue-a-val 0 :hue-b-val 0 :mix 0.5 :mix-rem 5 :angle 0 :rot-beat 0 :rot-clen 25}))}
:step (dict :states (map states (lambda (p)
(let [new-inv-a (if (< (core:rand) 0.1) (+ 1 (core:rand-int 1 4)) (core:max 0 (- (get p :inv-a) 1)))
new-inv-b (if (< (core:rand) 0.1) (+ 1 (core:rand-int 1 4)) (core:max 0 (- (get p :inv-b) 1)))
old-hue-a (get p :hue-a)
old-hue-b (get p :hue-b)
new-hue-a (if (< (core:rand) 0.1) (+ 1 (core:rand-int 1 4)) (core:max 0 (- old-hue-a 1)))
new-hue-b (if (< (core:rand) 0.1) (+ 1 (core:rand-int 1 4)) (core:max 0 (- old-hue-b 1)))
new-hue-a-val (if (> new-hue-a old-hue-a) (+ 30 (* (core:rand) 300)) (get p :hue-a-val))
new-hue-b-val (if (> new-hue-b old-hue-b) (+ 30 (* (core:rand) 300)) (get p :hue-b-val))
mix-rem (get p :mix-rem)
old-mix (get p :mix)
new-mix-rem (if (> mix-rem 0) (- mix-rem 1) (+ 1 (core:rand-int 1 10)))
new-mix (if (> mix-rem 0) old-mix (* (core:rand-int 0 2) 0.5))
rot-beat (get p :rot-beat)
rot-clen (get p :rot-clen)
old-angle (get p :angle)
new-rot-beat (if (< (+ rot-beat 1) rot-clen) (+ rot-beat 1) 0)
new-rot-clen (if (< (+ rot-beat 1) rot-clen) rot-clen (+ 20 (core:rand-int 0 10)))
new-angle (+ old-angle (/ 360 rot-clen))]
(dict :inv-a new-inv-a :inv-b new-inv-b
:hue-a new-hue-a :hue-b new-hue-b
:hue-a-val new-hue-a-val :hue-b-val new-hue-b-val
:mix new-mix :mix-rem new-mix-rem
:angle new-angle :rot-beat new-rot-beat :rot-clen new-rot-clen))))))
;; === OPTIMIZED PROCESS-PAIR MACRO ===
;; Uses fused-pipeline to batch rotate+hue+invert into single kernel
(defmacro process-pair-fast (idx)
(let [;; Get sources for this pair (with safe modulo indexing)
num-sources (len sources)
src-a (nth sources (mod (* idx 2) num-sources))
src-b (nth sources (mod (+ (* idx 2) 1) num-sources))
cfg (nth pair-configs idx)
pstate (nth (bind pairs :states) idx)
;; Read frames (GPU decode, stays on GPU)
frame-a (streaming:source-read src-a t)
frame-b (streaming:source-read src-b t)
;; Get state values
dir (get cfg :dir)
rot-max-a (get cfg :rot-a)
rot-max-b (get cfg :rot-b)
zoom-max-a (get cfg :zoom-a)
zoom-max-b (get cfg :zoom-b)
pair-angle (get pstate :angle)
inv-a-on (> (get pstate :inv-a) 0)
inv-b-on (> (get pstate :inv-b) 0)
hue-a-on (> (get pstate :hue-a) 0)
hue-b-on (> (get pstate :hue-b) 0)
hue-a-val (get pstate :hue-a-val)
hue-b-val (get pstate :hue-b-val)
mix-ratio (get pstate :mix)
;; Calculate rotation angles
angle-a (* dir pair-angle rot-max-a 0.01)
angle-b (* dir pair-angle rot-max-b 0.01)
;; Energy-driven zoom (maps audio energy 0-1 to 1-max)
zoom-a (core:map-range e 0 1 1 zoom-max-a)
zoom-b (core:map-range e 0 1 1 zoom-max-b)
;; Define effect pipelines for each source
;; These get compiled to single CUDA kernels!
;; First resize to target resolution, then apply effects
effects-a [{:op "resize" :width 640 :height 360}
{:op "zoom" :amount zoom-a}
{:op "rotate" :angle angle-a}
{:op "hue_shift" :degrees (if hue-a-on hue-a-val 0)}
{:op "invert" :amount (if inv-a-on 1 0)}]
effects-b [{:op "resize" :width 640 :height 360}
{:op "zoom" :amount zoom-b}
{:op "rotate" :angle angle-b}
{:op "hue_shift" :degrees (if hue-b-on hue-b-val 0)}
{:op "invert" :amount (if inv-b-on 1 0)}]
;; Apply fused pipelines (single kernel per source!)
processed-a (streaming:fused-pipeline frame-a effects-a)
processed-b (streaming:fused-pipeline frame-b effects-b)]
;; Blend the two processed frames
(blending:blend-images processed-a processed-b mix-ratio)))
;; === FRAME PIPELINE ===
(frame
(let [now t
e (streaming:audio-energy music now)
;; Get cycle state
active (bind cycle :active)
beat-pos (bind cycle :beat)
clen (bind cycle :clen)
;; Transition logic
phase3 (* beat-pos 3)
fading (and (>= phase3 (* clen 2)) (< phase3 (* clen 3)))
fade-amt (if fading (/ (- phase3 (* clen 2)) clen) 0)
next-idx (mod (+ active 1) (len sources))
;; Process active pair with fused pipeline
active-frame (process-pair-fast active)
;; Crossfade with zoom during transition
;; Old pair: zooms out (1.0 -> 2.0) and fades out
;; New pair: starts small (0.1), zooms in (-> 1.0) and fades in
result (if fading
(let [next-frame (process-pair-fast next-idx)
;; Active zooms out as it fades
active-zoom (+ 1.0 fade-amt)
active-zoomed (streaming:fused-pipeline active-frame
[{:op "zoom" :amount active-zoom}])
;; Next starts small and zooms in
next-zoom (+ 0.1 (* fade-amt 0.9))
next-zoomed (streaming:fused-pipeline next-frame
[{:op "zoom" :amount next-zoom}])]
(blending:blend-images active-zoomed next-zoomed fade-amt))
active-frame)
;; Final effects pipeline (fused!)
spin-angle (bind spin :angle)
;; Ripple params - all randomized per ripple trigger
rip-gate (bind ripple-state :gate)
rip-amp-mult (bind ripple-state :amp-mult)
rip-amp (* rip-gate rip-amp-mult (core:map-range e 0 1 50 200))
rip-cx (bind ripple-state :cx)
rip-cy (bind ripple-state :cy)
rip-freq (bind ripple-state :freq)
rip-decay (bind ripple-state :decay)
;; Fused final effects
final-effects [{:op "rotate" :angle spin-angle}
{:op "ripple" :amplitude rip-amp :frequency rip-freq :decay rip-decay
:phase (* now 5) :center_x rip-cx :center_y rip-cy}]]
;; Apply final fused pipeline
(streaming:fused-pipeline result final-effects
:rotate_angle spin-angle
:ripple_phase (* now 5)
:ripple_amplitude rip-amp))))

View File

@@ -894,7 +894,7 @@ def prim_fused_pipeline(img, effects_list, **dynamic_params):
Returns:
Processed image as GPU array
Supported ops: rotate, zoom, ripple, invert, hue_shift, brightness
Supported ops: rotate, zoom, ripple, invert, hue_shift, brightness, resize
"""
global _FUSED_CALL_COUNT
_FUSED_CALL_COUNT += 1
@@ -904,8 +904,34 @@ def prim_fused_pipeline(img, effects_list, **dynamic_params):
# Normalize effects list - convert Keyword keys to strings
effects_list = [_normalize_effect_dict(e) for e in effects_list]
# Handle resize separately - it changes dimensions so must happen before fused kernel
resize_ops = [e for e in effects_list if e.get('op') == 'resize']
other_effects = [e for e in effects_list if e.get('op') != 'resize']
# Apply resize first if needed
if resize_ops:
for resize_op in resize_ops:
target_w = int(resize_op.get('width', 640))
target_h = int(resize_op.get('height', 360))
# Wrap in GPUFrame if needed
if isinstance(img, GPUFrame):
img = gpu_resize(img, (target_w, target_h))
img = img.gpu if img.is_on_gpu else img.cpu
else:
frame = GPUFrame(img, on_gpu=hasattr(img, '__cuda_array_interface__'))
img = gpu_resize(frame, (target_w, target_h))
img = img.gpu if img.is_on_gpu else img.cpu
# If no other effects, just return the resized image
if not other_effects:
return img
# Update effects list to exclude resize ops
effects_list = other_effects
if not _FUSED_KERNELS_AVAILABLE:
# Fallback: apply effects one by one
print(f"[FUSED FALLBACK] Using fallback path for {len(effects_list)} effects", file=sys.stderr)
# Wrap in GPUFrame if needed (GPU functions expect GPUFrame objects)
if isinstance(img, GPUFrame):
result = img
@@ -922,20 +948,27 @@ def prim_fused_pipeline(img, effects_list, **dynamic_params):
result = gpu_zoom(result, amount)
elif op == 'hue_shift':
degrees = effect.get('degrees', 0)
result = gpu_hue_shift(result, degrees)
if abs(degrees) > 0.1: # Only apply if significant shift
result = gpu_hue_shift(result, degrees)
elif op == 'ripple':
result = gpu_ripple(result,
amplitude=dynamic_params.get('ripple_amplitude', effect.get('amplitude', 10)),
frequency=effect.get('frequency', 8),
decay=effect.get('decay', 2),
phase=dynamic_params.get('ripple_phase', effect.get('phase', 0)),
cx=effect.get('center_x'),
cy=effect.get('center_y'))
amplitude = dynamic_params.get('ripple_amplitude', effect.get('amplitude', 10))
if amplitude > 0.1: # Only apply if amplitude is significant
result = gpu_ripple(result,
amplitude=amplitude,
frequency=effect.get('frequency', 8),
decay=effect.get('decay', 2),
phase=dynamic_params.get('ripple_phase', effect.get('phase', 0)),
cx=effect.get('center_x'),
cy=effect.get('center_y'))
elif op == 'brightness':
factor = effect.get('factor', 1.0)
result = gpu_contrast(result, factor, 0)
elif op == 'invert':
result = gpu_invert(result)
amount = effect.get('amount', 0)
if amount > 0.5: # Only invert if amount > 0.5
result = gpu_invert(result)
else:
raise ValueError(f"Unsupported fused pipeline operation: '{op}'. Supported ops: rotate, zoom, hue_shift, ripple, brightness, invert, resize")
# Return raw array, not GPUFrame (downstream expects arrays with .flags attribute)
if isinstance(result, GPUFrame):
return result.gpu if result.is_on_gpu else result.cpu

View File

@@ -99,10 +99,13 @@ class GPUEncoder:
self._init_frame_buffer()
# Create encoder with low-latency settings (no B-frames for immediate output)
# Use H264 codec explicitly, with SPS/PPS headers for browser compatibility
self.encoder = nvc.CreateEncoder(
width, height, "NV12", usecpuinputbuffer=False,
codec="h264", # Explicit H.264 (not HEVC)
bf=0, # No B-frames - immediate output
lowLatency=1, # Low latency mode
repeatSPSPPS=1, # Include SPS/PPS with each IDR frame
idrPeriod=30, # IDR frame every 30 frames (1 sec at 30fps)
)
# CUDA kernel grid/block config
@@ -189,10 +192,25 @@ class GPUEncoder:
if not frame_gpu.flags['C_CONTIGUOUS']:
frame_gpu = cp.ascontiguousarray(frame_gpu)
# Debug: check input frame has actual data (first few frames only)
if self._frame_count < 3:
frame_sum = float(cp.sum(frame_gpu))
print(f"[GPUEncoder] Frame {self._frame_count}: shape={frame_gpu.shape}, dtype={frame_gpu.dtype}, sum={frame_sum:.0f}", file=sys.stderr)
if frame_sum < 1000:
print(f"[GPUEncoder] WARNING: Frame appears to be mostly black!", file=sys.stderr)
# Convert RGB to NV12 on GPU
kernel = _get_rgb_to_nv12_kernel()
kernel(self._grid, self._block, (frame_gpu, self._y_plane, self._uv_plane, self.width, self.height))
# CRITICAL: Synchronize CUDA to ensure kernel completes before encoding
cp.cuda.Stream.null.synchronize()
# Debug: check Y plane has data after conversion (first few frames only)
if self._frame_count < 3:
y_sum = float(cp.sum(self._y_plane))
print(f"[GPUEncoder] Frame {self._frame_count}: Y plane sum={y_sum:.0f}", file=sys.stderr)
# Encode (GPU to GPU)
result = self.encoder.Encode(self._template_frame)
self._frame_count += 1
@@ -312,6 +330,11 @@ class GPUHLSOutput:
if not self._is_open:
return
# Handle GPUFrame objects (from streaming_gpu primitives)
if hasattr(frame, 'gpu') and hasattr(frame, 'is_on_gpu'):
# It's a GPUFrame - extract the underlying array
frame = frame.gpu if frame.is_on_gpu else frame.cpu
# GPU encode
encoded = self._gpu_encoder.encode_frame(frame)
@@ -439,8 +462,44 @@ class GPUHLSOutput:
self._upload_queue.put(None) # Signal shutdown
self._upload_thread.join(timeout=30)
# Generate final playlist with #EXT-X-ENDLIST for VOD playback
self._generate_final_playlist()
self._gpu_encoder.close()
def _generate_final_playlist(self):
"""Generate final IPFS playlist with #EXT-X-ENDLIST for completed streams."""
with self._upload_lock:
if not self.segment_cids:
return
lines = [
"#EXTM3U",
"#EXT-X-VERSION:3",
f"#EXT-X-TARGETDURATION:{int(self.segment_duration) + 1}",
"#EXT-X-MEDIA-SEQUENCE:0",
"#EXT-X-PLAYLIST-TYPE:VOD", # Mark as VOD for completed streams
]
for seg_num in sorted(self.segment_cids.keys()):
cid = self.segment_cids[seg_num]
lines.append(f"#EXTINF:{self.segment_duration:.3f},")
# Use /ipfs-ts/ path for segments to get correct MIME type (video/mp2t)
segment_gateway = self.ipfs_gateway.replace("/ipfs", "/ipfs-ts")
lines.append(f"{segment_gateway}/{cid}")
# Mark stream as complete - critical for VOD playback
lines.append("#EXT-X-ENDLIST")
playlist_content = "\n".join(lines) + "\n"
# Upload final playlist
self._playlist_cid = self._ipfs_add_bytes(playlist_content.encode(), pin=True)
if self._playlist_cid:
print(f"[GPUHLSOutput] Final VOD playlist: {self._playlist_cid} ({len(self.segment_cids)} segments)", file=sys.stderr)
if self._on_playlist_update:
self._on_playlist_update(self._playlist_cid)
@property
def is_open(self) -> bool:
return self._is_open

View File

@@ -11,6 +11,9 @@ import numpy as np
from typing import Dict, List, Any, Optional, Tuple
import hashlib
import sys
import logging
logger = logging.getLogger(__name__)
# Kernel cache
_COMPILED_KERNELS: Dict[str, Any] = {}
@@ -72,6 +75,13 @@ def compile_frame_pipeline(effects: List[dict], width: int, height: int) -> call
def _generate_fused_kernel(effects: List[dict], width: int, height: int) -> str:
"""Generate CUDA kernel code for fused effects pipeline."""
# Validate all ops are supported
SUPPORTED_OPS = {'rotate', 'zoom', 'ripple', 'invert', 'hue_shift', 'brightness'}
for effect in effects:
op = effect.get('op')
if op not in SUPPORTED_OPS:
raise ValueError(f"Unsupported CUDA kernel operation: '{op}'. Supported ops: {', '.join(sorted(SUPPORTED_OPS))}. Note: 'resize' must be handled separately before the fused kernel.")
# Build the kernel
code = r'''
extern "C" __global__
@@ -129,7 +139,7 @@ void fused_pipeline(
'''
elif op == 'ripple':
code += f'''
// Ripple {i}
// Ripple {i} - matching original formula: sin(dist/freq - phase) * exp(-dist*decay/maxdim)
{{
float amplitude = params[param_idx++];
float frequency = params[param_idx++];
@@ -141,9 +151,11 @@ void fused_pipeline(
float rdx = src_x - rcx;
float rdy = src_y - rcy;
float dist = sqrtf(rdx * rdx + rdy * rdy);
float max_dim = (float)(width > height ? width : height);
float wave = sinf(dist * frequency * 0.1f + phase);
float amp = amplitude * expf(-dist * decay * 0.01f);
// Original formula: sin(dist / frequency - phase) * exp(-dist * decay / max_dim)
float wave = sinf(dist / frequency - phase);
float amp = amplitude * expf(-dist * decay / max_dim);
if (dist > 0.001f) {{
ripple_dx += rdx / dist * wave * amp;
@@ -288,10 +300,25 @@ void fused_pipeline(
return code
_BUILD_PARAMS_COUNT = 0
def _build_params(effects: List[dict], dynamic_params: dict) -> cp.ndarray:
"""Build parameter array for kernel."""
"""Build parameter array for kernel.
IMPORTANT: Parameters must be built in the same order the kernel consumes them:
1. First all geometric transforms (rotate, zoom, ripple) in list order
2. Then all color transforms (invert, hue_shift, brightness) in list order
"""
global _BUILD_PARAMS_COUNT
_BUILD_PARAMS_COUNT += 1
# ALWAYS log first few calls - use WARNING to ensure visibility in Celery logs
if _BUILD_PARAMS_COUNT <= 3:
logger.warning(f"[BUILD_PARAMS #{_BUILD_PARAMS_COUNT}] effects={[e['op'] for e in effects]}")
params = []
# First pass: geometric transforms (matches kernel's first loop)
for effect in effects:
op = effect['op']
@@ -300,16 +327,30 @@ def _build_params(effects: List[dict], dynamic_params: dict) -> cp.ndarray:
elif op == 'zoom':
params.append(float(dynamic_params.get('zoom_amount', effect.get('amount', 1.0))))
elif op == 'ripple':
params.append(float(dynamic_params.get('ripple_amplitude', effect.get('amplitude', 10))))
params.append(float(effect.get('frequency', 8)))
params.append(float(effect.get('decay', 2)))
params.append(float(dynamic_params.get('ripple_phase', effect.get('phase', 0))))
params.append(float(effect.get('center_x', 960)))
params.append(float(effect.get('center_y', 540)))
elif op == 'invert':
params.append(float(effect.get('amount', 0)))
amp = float(dynamic_params.get('ripple_amplitude', effect.get('amplitude', 10)))
freq = float(effect.get('frequency', 8))
decay = float(effect.get('decay', 2))
phase = float(dynamic_params.get('ripple_phase', effect.get('phase', 0)))
cx = float(effect.get('center_x', 960))
cy = float(effect.get('center_y', 540))
params.extend([amp, freq, decay, phase, cx, cy])
if _BUILD_PARAMS_COUNT <= 10 or _BUILD_PARAMS_COUNT % 500 == 0:
logger.warning(f"[BUILD_PARAMS #{_BUILD_PARAMS_COUNT}] ripple amp={amp} freq={freq} decay={decay} phase={phase:.2f} cx={cx} cy={cy}")
# Second pass: color transforms (matches kernel's second loop)
for effect in effects:
op = effect['op']
if op == 'invert':
amt = float(effect.get('amount', 0))
params.append(amt)
if _BUILD_PARAMS_COUNT <= 10 or _BUILD_PARAMS_COUNT % 500 == 0:
logger.warning(f"[BUILD_PARAMS #{_BUILD_PARAMS_COUNT}] invert amount={amt}")
elif op == 'hue_shift':
params.append(float(effect.get('degrees', 0)))
deg = float(effect.get('degrees', 0))
params.append(deg)
if _BUILD_PARAMS_COUNT <= 10 or _BUILD_PARAMS_COUNT % 500 == 0:
logger.warning(f"[BUILD_PARAMS #{_BUILD_PARAMS_COUNT}] hue_shift degrees={deg}")
elif op == 'brightness':
params.append(float(effect.get('factor', 1.0)))

View File

@@ -1028,7 +1028,24 @@ class StreamInterpreter:
if result is not None:
import cv2
if result.shape[:2] != (h, w):
result = cv2.resize(result, (w, h))
# Handle CuPy arrays - cv2 can't resize them directly
if hasattr(result, '__cuda_array_interface__'):
# Use GPU resize via cupyx.scipy
try:
import cupy as cp
from cupyx.scipy import ndimage as cpndimage
curr_h, curr_w = result.shape[:2]
zoom_y = h / curr_h
zoom_x = w / curr_w
if result.ndim == 3:
result = cpndimage.zoom(result, (zoom_y, zoom_x, 1), order=1)
else:
result = cpndimage.zoom(result, (zoom_y, zoom_x), order=1)
except ImportError:
# Fallback to CPU resize
result = cv2.resize(cp.asnumpy(result), (w, h))
else:
result = cv2.resize(result, (w, h))
out.write(result, self.ctx.t)
# Progress

View File

@@ -144,8 +144,7 @@ class StreamInterpreter:
"""Load a config file and process its definitions."""
config_path = Path(config_path) # Accept str or Path
if not config_path.exists():
print(f"Warning: config file not found: {config_path}", file=sys.stderr)
return
raise FileNotFoundError(f"Config file not found: {config_path}")
text = config_path.read_text()
ast = parse_all(text)
@@ -221,8 +220,7 @@ class StreamInterpreter:
break
if not lib_path:
print(f"Warning: primitive library '{lib_name}' not found", file=sys.stderr)
return
raise FileNotFoundError(f"Primitive library '{lib_name}' not found. Searched paths: {lib_paths}")
spec = importlib.util.spec_from_file_location(actual_lib_name, lib_path)
module = importlib.util.module_from_spec(spec)
@@ -262,8 +260,7 @@ class StreamInterpreter:
def _load_effect(self, effect_path: Path):
"""Load and register an effect from a .sexp file."""
if not effect_path.exists():
print(f"Warning: effect file not found: {effect_path}", file=sys.stderr)
return
raise FileNotFoundError(f"Effect/include file not found: {effect_path}")
text = effect_path.read_text()
ast = parse_all(text)
@@ -938,8 +935,7 @@ class StreamInterpreter:
audio = str(resolved)
print(f"Lazy resolved audio: {audio}", file=sys.stderr)
else:
print(f"WARNING: Audio file not found: {audio}", file=sys.stderr)
audio = None
raise FileNotFoundError(f"Audio file not found: {audio}")
if output == "pipe":
out = PipeOutput(size=(w, h), fps=fps, audio_source=audio)
elif output == "preview":