diff --git a/sexp_effects/primitive_libs/streaming_gpu.py b/sexp_effects/primitive_libs/streaming_gpu.py index 78a3f6c..8a9fc85 100644 --- a/sexp_effects/primitive_libs/streaming_gpu.py +++ b/sexp_effects/primitive_libs/streaming_gpu.py @@ -845,9 +845,11 @@ PRIMITIVES = _get_cpu_primitives().copy() # Try to import fused kernel compiler _FUSED_KERNELS_AVAILABLE = False _compile_frame_pipeline = None +_compile_autonomous_pipeline = None try: if GPU_AVAILABLE: from streaming.sexp_to_cuda import compile_frame_pipeline as _compile_frame_pipeline + from streaming.sexp_to_cuda import compile_autonomous_pipeline as _compile_autonomous_pipeline _FUSED_KERNELS_AVAILABLE = True print("[streaming_gpu] Fused CUDA kernel compiler loaded", file=sys.stderr) except ImportError as e: @@ -953,6 +955,87 @@ def prim_fused_pipeline(img, effects_list, **dynamic_params): return pipeline(gpu_img, **dynamic_params) +# Autonomous pipeline cache (separate from fused) +_AUTONOMOUS_PIPELINE_CACHE = {} + + +def prim_autonomous_pipeline(img, effects_list, dynamic_expressions, frame_num, fps=30.0): + """ + Apply a fully autonomous CUDA kernel pipeline. + + This computes ALL parameters on GPU - including time-based expressions + like sin(t), t*30, etc. Zero Python in the hot path! + + Args: + img: Input image (GPU array or numpy array) + effects_list: List of effect dicts + dynamic_expressions: Dict mapping param names to CUDA expressions: + {'rotate_angle': 't * 30.0f', + 'ripple_phase': 't * 2.0f', + 'brightness_factor': '0.8f + 0.4f * sinf(t * 2.0f)'} + frame_num: Current frame number + fps: Frames per second (default 30) + + Returns: + Processed image as GPU array + + Note: Expressions use CUDA syntax - use sinf() not sin(), etc. + """ + # Normalize effects and expressions + effects_list = [_normalize_effect_dict(e) for e in effects_list] + dynamic_expressions = { + (k.name if hasattr(k, 'name') else str(k)): v + for k, v in dynamic_expressions.items() + } + + if not _FUSED_KERNELS_AVAILABLE or _compile_autonomous_pipeline is None: + # Fallback to regular fused pipeline with Python-computed params + import math + t = float(frame_num) / float(fps) + # Evaluate expressions in Python as fallback + dynamic_params = {} + for key, expr in dynamic_expressions.items(): + try: + # Simple eval with t and math functions + result = eval(expr.replace('f', '').replace('sin', 'math.sin').replace('cos', 'math.cos'), + {'t': t, 'math': math, 'frame_num': frame_num}) + dynamic_params[key] = result + except: + dynamic_params[key] = 0 + return prim_fused_pipeline(img, effects_list, **dynamic_params) + + # Get image dimensions + if hasattr(img, 'shape'): + h, w = img.shape[:2] + else: + raise ValueError("Image must have shape attribute") + + # Create cache key + import hashlib + ops_key = str([(e['op'], {k:v for k,v in e.items() if k != 'src2'}) for e in effects_list]) + expr_key = str(sorted(dynamic_expressions.items())) + cache_key = f"auto_{w}x{h}_{hashlib.md5((ops_key + expr_key).encode()).hexdigest()}" + + # Compile or get cached pipeline + if cache_key not in _AUTONOMOUS_PIPELINE_CACHE: + _AUTONOMOUS_PIPELINE_CACHE[cache_key] = _compile_autonomous_pipeline( + effects_list, w, h, dynamic_expressions) + + pipeline = _AUTONOMOUS_PIPELINE_CACHE[cache_key] + + # Ensure image is on GPU + if hasattr(img, '__cuda_array_interface__'): + gpu_img = img + elif GPU_AVAILABLE: + gpu_img = cp.asarray(img) + else: + gpu_img = img + + # Run - just pass frame_num and fps, kernel does the rest! + return pipeline(gpu_img, int(frame_num), float(fps)) + + # Add GPU-specific primitives PRIMITIVES['fused-pipeline'] = prim_fused_pipeline +PRIMITIVES['autonomous-pipeline'] = prim_autonomous_pipeline # (The GPU video source will be added by create_cid_primitives in the task) diff --git a/test_autonomous.sexp b/test_autonomous.sexp new file mode 100644 index 0000000..fbb1835 --- /dev/null +++ b/test_autonomous.sexp @@ -0,0 +1,36 @@ +;; Autonomous Pipeline Test +;; +;; Uses the autonomous-pipeline primitive which computes ALL parameters +;; on GPU - including sin/cos expressions. Zero Python in the hot path! + +(stream "autonomous_test" + :fps 30 + :width 1920 + :height 1080 + :seed 42 + + ;; Load primitives + (require-primitives "streaming_gpu") + (require-primitives "image") + + ;; Effects pipeline (what effects to apply) + (def effects + [{:op "rotate" :angle 0} + {:op "hue_shift" :degrees 30} + {:op "ripple" :amplitude 15 :frequency 10 :decay 2 :phase 0 :center_x 960 :center_y 540} + {:op "brightness" :factor 1.0}]) + + ;; Dynamic expressions (computed on GPU!) + ;; These use CUDA syntax: sinf(), cosf(), t (time), frame_num + (def expressions + {:rotate_angle "t * 30.0f" + :ripple_phase "t * 2.0f" + :brightness_factor "0.8f + 0.4f * sinf(t * 2.0f)"}) + + ;; Frame pipeline - creates image and applies autonomous pipeline + (frame + (let [;; Create base gradient (still needs Python for now) + base (image:make-image 1920 1080 [128 100 200])] + + ;; Apply autonomous pipeline - ALL EFFECTS + ALL MATH ON GPU! + (streaming_gpu:autonomous-pipeline base effects expressions frame-num 30.0))))