Add autonomous prealloc benchmark

This commit is contained in:
giles
2026-02-04 10:03:24 +00:00
parent e4349ba501
commit 1bd171b892

View File

@@ -0,0 +1,75 @@
#!/usr/bin/env python3
"""
Test autonomous pipeline with pre-allocated buffer.
This eliminates ALL Python from the hot path.
"""
import time
import sys
sys.path.insert(0, '/app')
import cupy as cp
from streaming.sexp_to_cuda import compile_autonomous_pipeline
def test_autonomous_prealloc():
width, height = 1920, 1080
n_frames = 300
fps = 30.0
print(f"Testing {n_frames} frames at {width}x{height}")
print("=" * 60)
# Pre-allocate frame buffer (stays on GPU)
frame = cp.zeros((height, width, 3), dtype=cp.uint8)
frame[:, :, 0] = 128 # R
frame[:, :, 1] = 100 # G
frame[:, :, 2] = 200 # B
# Define effects
effects = [
{'op': 'rotate', 'angle': 0},
{'op': 'hue_shift', 'degrees': 30},
{'op': 'ripple', 'amplitude': 15, 'frequency': 10, 'decay': 2, 'phase': 0, 'center_x': 960, 'center_y': 540},
{'op': 'brightness', 'factor': 1.0},
]
# Dynamic expressions (computed on GPU)
dynamic_expressions = {
'rotate_angle': 't * 30.0f',
'ripple_phase': 't * 2.0f',
'brightness_factor': '0.8f + 0.4f * sinf(t * 2.0f)',
}
# Compile autonomous pipeline
print("Compiling autonomous pipeline...")
pipeline = compile_autonomous_pipeline(effects, width, height, dynamic_expressions)
# Warmup
output = pipeline(frame, 0, fps)
cp.cuda.Stream.null.synchronize()
# Benchmark - ZERO Python in the hot path!
print(f"Running {n_frames} frames...")
start = time.time()
for i in range(n_frames):
output = pipeline(frame, i, fps)
cp.cuda.Stream.null.synchronize()
elapsed = time.time() - start
ms_per_frame = elapsed / n_frames * 1000
actual_fps = n_frames / elapsed
print("=" * 60)
print(f"Time: {ms_per_frame:.2f}ms per frame")
print(f"FPS: {actual_fps:.0f}")
print(f"Real-time: {actual_fps / 30:.1f}x (at 30fps target)")
print("=" * 60)
# Compare with original baseline
print(f"\nOriginal Python sexp: ~150ms = 6 fps")
print(f"Autonomous GPU: {ms_per_frame:.2f}ms = {actual_fps:.0f} fps")
print(f"Speedup: {150 / ms_per_frame:.0f}x faster!")
if __name__ == '__main__':
test_autonomous_prealloc()