#!/usr/bin/env python3 """ Test autonomous pipeline with pre-allocated buffer. This eliminates ALL Python from the hot path. """ import time import sys sys.path.insert(0, '/app') import cupy as cp from streaming.sexp_to_cuda import compile_autonomous_pipeline def test_autonomous_prealloc(): width, height = 1920, 1080 n_frames = 300 fps = 30.0 print(f"Testing {n_frames} frames at {width}x{height}") print("=" * 60) # Pre-allocate frame buffer (stays on GPU) frame = cp.zeros((height, width, 3), dtype=cp.uint8) frame[:, :, 0] = 128 # R frame[:, :, 1] = 100 # G frame[:, :, 2] = 200 # B # Define effects effects = [ {'op': 'rotate', 'angle': 0}, {'op': 'hue_shift', 'degrees': 30}, {'op': 'ripple', 'amplitude': 15, 'frequency': 10, 'decay': 2, 'phase': 0, 'center_x': 960, 'center_y': 540}, {'op': 'brightness', 'factor': 1.0}, ] # Dynamic expressions (computed on GPU) dynamic_expressions = { 'rotate_angle': 't * 30.0f', 'ripple_phase': 't * 2.0f', 'brightness_factor': '0.8f + 0.4f * sinf(t * 2.0f)', } # Compile autonomous pipeline print("Compiling autonomous pipeline...") pipeline = compile_autonomous_pipeline(effects, width, height, dynamic_expressions) # Warmup output = pipeline(frame, 0, fps) cp.cuda.Stream.null.synchronize() # Benchmark - ZERO Python in the hot path! print(f"Running {n_frames} frames...") start = time.time() for i in range(n_frames): output = pipeline(frame, i, fps) cp.cuda.Stream.null.synchronize() elapsed = time.time() - start ms_per_frame = elapsed / n_frames * 1000 actual_fps = n_frames / elapsed print("=" * 60) print(f"Time: {ms_per_frame:.2f}ms per frame") print(f"FPS: {actual_fps:.0f}") print(f"Real-time: {actual_fps / 30:.1f}x (at 30fps target)") print("=" * 60) # Compare with original baseline print(f"\nOriginal Python sexp: ~150ms = 6 fps") print(f"Autonomous GPU: {ms_per_frame:.2f}ms = {actual_fps:.0f} fps") print(f"Speedup: {150 / ms_per_frame:.0f}x faster!") if __name__ == '__main__': test_autonomous_prealloc()