Add autonomous-pipeline primitive for zero-Python hot path
This commit is contained in:
@@ -845,9 +845,11 @@ PRIMITIVES = _get_cpu_primitives().copy()
|
|||||||
# Try to import fused kernel compiler
|
# Try to import fused kernel compiler
|
||||||
_FUSED_KERNELS_AVAILABLE = False
|
_FUSED_KERNELS_AVAILABLE = False
|
||||||
_compile_frame_pipeline = None
|
_compile_frame_pipeline = None
|
||||||
|
_compile_autonomous_pipeline = None
|
||||||
try:
|
try:
|
||||||
if GPU_AVAILABLE:
|
if GPU_AVAILABLE:
|
||||||
from streaming.sexp_to_cuda import compile_frame_pipeline as _compile_frame_pipeline
|
from streaming.sexp_to_cuda import compile_frame_pipeline as _compile_frame_pipeline
|
||||||
|
from streaming.sexp_to_cuda import compile_autonomous_pipeline as _compile_autonomous_pipeline
|
||||||
_FUSED_KERNELS_AVAILABLE = True
|
_FUSED_KERNELS_AVAILABLE = True
|
||||||
print("[streaming_gpu] Fused CUDA kernel compiler loaded", file=sys.stderr)
|
print("[streaming_gpu] Fused CUDA kernel compiler loaded", file=sys.stderr)
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
@@ -953,6 +955,87 @@ def prim_fused_pipeline(img, effects_list, **dynamic_params):
|
|||||||
return pipeline(gpu_img, **dynamic_params)
|
return pipeline(gpu_img, **dynamic_params)
|
||||||
|
|
||||||
|
|
||||||
|
# Autonomous pipeline cache (separate from fused)
|
||||||
|
_AUTONOMOUS_PIPELINE_CACHE = {}
|
||||||
|
|
||||||
|
|
||||||
|
def prim_autonomous_pipeline(img, effects_list, dynamic_expressions, frame_num, fps=30.0):
|
||||||
|
"""
|
||||||
|
Apply a fully autonomous CUDA kernel pipeline.
|
||||||
|
|
||||||
|
This computes ALL parameters on GPU - including time-based expressions
|
||||||
|
like sin(t), t*30, etc. Zero Python in the hot path!
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img: Input image (GPU array or numpy array)
|
||||||
|
effects_list: List of effect dicts
|
||||||
|
dynamic_expressions: Dict mapping param names to CUDA expressions:
|
||||||
|
{'rotate_angle': 't * 30.0f',
|
||||||
|
'ripple_phase': 't * 2.0f',
|
||||||
|
'brightness_factor': '0.8f + 0.4f * sinf(t * 2.0f)'}
|
||||||
|
frame_num: Current frame number
|
||||||
|
fps: Frames per second (default 30)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Processed image as GPU array
|
||||||
|
|
||||||
|
Note: Expressions use CUDA syntax - use sinf() not sin(), etc.
|
||||||
|
"""
|
||||||
|
# Normalize effects and expressions
|
||||||
|
effects_list = [_normalize_effect_dict(e) for e in effects_list]
|
||||||
|
dynamic_expressions = {
|
||||||
|
(k.name if hasattr(k, 'name') else str(k)): v
|
||||||
|
for k, v in dynamic_expressions.items()
|
||||||
|
}
|
||||||
|
|
||||||
|
if not _FUSED_KERNELS_AVAILABLE or _compile_autonomous_pipeline is None:
|
||||||
|
# Fallback to regular fused pipeline with Python-computed params
|
||||||
|
import math
|
||||||
|
t = float(frame_num) / float(fps)
|
||||||
|
# Evaluate expressions in Python as fallback
|
||||||
|
dynamic_params = {}
|
||||||
|
for key, expr in dynamic_expressions.items():
|
||||||
|
try:
|
||||||
|
# Simple eval with t and math functions
|
||||||
|
result = eval(expr.replace('f', '').replace('sin', 'math.sin').replace('cos', 'math.cos'),
|
||||||
|
{'t': t, 'math': math, 'frame_num': frame_num})
|
||||||
|
dynamic_params[key] = result
|
||||||
|
except:
|
||||||
|
dynamic_params[key] = 0
|
||||||
|
return prim_fused_pipeline(img, effects_list, **dynamic_params)
|
||||||
|
|
||||||
|
# Get image dimensions
|
||||||
|
if hasattr(img, 'shape'):
|
||||||
|
h, w = img.shape[:2]
|
||||||
|
else:
|
||||||
|
raise ValueError("Image must have shape attribute")
|
||||||
|
|
||||||
|
# Create cache key
|
||||||
|
import hashlib
|
||||||
|
ops_key = str([(e['op'], {k:v for k,v in e.items() if k != 'src2'}) for e in effects_list])
|
||||||
|
expr_key = str(sorted(dynamic_expressions.items()))
|
||||||
|
cache_key = f"auto_{w}x{h}_{hashlib.md5((ops_key + expr_key).encode()).hexdigest()}"
|
||||||
|
|
||||||
|
# Compile or get cached pipeline
|
||||||
|
if cache_key not in _AUTONOMOUS_PIPELINE_CACHE:
|
||||||
|
_AUTONOMOUS_PIPELINE_CACHE[cache_key] = _compile_autonomous_pipeline(
|
||||||
|
effects_list, w, h, dynamic_expressions)
|
||||||
|
|
||||||
|
pipeline = _AUTONOMOUS_PIPELINE_CACHE[cache_key]
|
||||||
|
|
||||||
|
# Ensure image is on GPU
|
||||||
|
if hasattr(img, '__cuda_array_interface__'):
|
||||||
|
gpu_img = img
|
||||||
|
elif GPU_AVAILABLE:
|
||||||
|
gpu_img = cp.asarray(img)
|
||||||
|
else:
|
||||||
|
gpu_img = img
|
||||||
|
|
||||||
|
# Run - just pass frame_num and fps, kernel does the rest!
|
||||||
|
return pipeline(gpu_img, int(frame_num), float(fps))
|
||||||
|
|
||||||
|
|
||||||
# Add GPU-specific primitives
|
# Add GPU-specific primitives
|
||||||
PRIMITIVES['fused-pipeline'] = prim_fused_pipeline
|
PRIMITIVES['fused-pipeline'] = prim_fused_pipeline
|
||||||
|
PRIMITIVES['autonomous-pipeline'] = prim_autonomous_pipeline
|
||||||
# (The GPU video source will be added by create_cid_primitives in the task)
|
# (The GPU video source will be added by create_cid_primitives in the task)
|
||||||
|
|||||||
36
test_autonomous.sexp
Normal file
36
test_autonomous.sexp
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
;; Autonomous Pipeline Test
|
||||||
|
;;
|
||||||
|
;; Uses the autonomous-pipeline primitive which computes ALL parameters
|
||||||
|
;; on GPU - including sin/cos expressions. Zero Python in the hot path!
|
||||||
|
|
||||||
|
(stream "autonomous_test"
|
||||||
|
:fps 30
|
||||||
|
:width 1920
|
||||||
|
:height 1080
|
||||||
|
:seed 42
|
||||||
|
|
||||||
|
;; Load primitives
|
||||||
|
(require-primitives "streaming_gpu")
|
||||||
|
(require-primitives "image")
|
||||||
|
|
||||||
|
;; Effects pipeline (what effects to apply)
|
||||||
|
(def effects
|
||||||
|
[{:op "rotate" :angle 0}
|
||||||
|
{:op "hue_shift" :degrees 30}
|
||||||
|
{:op "ripple" :amplitude 15 :frequency 10 :decay 2 :phase 0 :center_x 960 :center_y 540}
|
||||||
|
{:op "brightness" :factor 1.0}])
|
||||||
|
|
||||||
|
;; Dynamic expressions (computed on GPU!)
|
||||||
|
;; These use CUDA syntax: sinf(), cosf(), t (time), frame_num
|
||||||
|
(def expressions
|
||||||
|
{:rotate_angle "t * 30.0f"
|
||||||
|
:ripple_phase "t * 2.0f"
|
||||||
|
:brightness_factor "0.8f + 0.4f * sinf(t * 2.0f)"})
|
||||||
|
|
||||||
|
;; Frame pipeline - creates image and applies autonomous pipeline
|
||||||
|
(frame
|
||||||
|
(let [;; Create base gradient (still needs Python for now)
|
||||||
|
base (image:make-image 1920 1080 [128 100 200])]
|
||||||
|
|
||||||
|
;; Apply autonomous pipeline - ALL EFFECTS + ALL MATH ON GPU!
|
||||||
|
(streaming_gpu:autonomous-pipeline base effects expressions frame-num 30.0))))
|
||||||
Reference in New Issue
Block a user