From 1bd171b8922643983bfdfea4fb9f6a94a981f313 Mon Sep 17 00:00:00 2001 From: giles Date: Wed, 4 Feb 2026 10:03:24 +0000 Subject: [PATCH] Add autonomous prealloc benchmark --- test_autonomous_prealloc.py | 75 +++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 test_autonomous_prealloc.py diff --git a/test_autonomous_prealloc.py b/test_autonomous_prealloc.py new file mode 100644 index 0000000..5fde7f7 --- /dev/null +++ b/test_autonomous_prealloc.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +""" +Test autonomous pipeline with pre-allocated buffer. +This eliminates ALL Python from the hot path. +""" + +import time +import sys +sys.path.insert(0, '/app') + +import cupy as cp +from streaming.sexp_to_cuda import compile_autonomous_pipeline + +def test_autonomous_prealloc(): + width, height = 1920, 1080 + n_frames = 300 + fps = 30.0 + + print(f"Testing {n_frames} frames at {width}x{height}") + print("=" * 60) + + # Pre-allocate frame buffer (stays on GPU) + frame = cp.zeros((height, width, 3), dtype=cp.uint8) + frame[:, :, 0] = 128 # R + frame[:, :, 1] = 100 # G + frame[:, :, 2] = 200 # B + + # Define effects + effects = [ + {'op': 'rotate', 'angle': 0}, + {'op': 'hue_shift', 'degrees': 30}, + {'op': 'ripple', 'amplitude': 15, 'frequency': 10, 'decay': 2, 'phase': 0, 'center_x': 960, 'center_y': 540}, + {'op': 'brightness', 'factor': 1.0}, + ] + + # Dynamic expressions (computed on GPU) + dynamic_expressions = { + 'rotate_angle': 't * 30.0f', + 'ripple_phase': 't * 2.0f', + 'brightness_factor': '0.8f + 0.4f * sinf(t * 2.0f)', + } + + # Compile autonomous pipeline + print("Compiling autonomous pipeline...") + pipeline = compile_autonomous_pipeline(effects, width, height, dynamic_expressions) + + # Warmup + output = pipeline(frame, 0, fps) + cp.cuda.Stream.null.synchronize() + + # Benchmark - ZERO Python in the hot path! + print(f"Running {n_frames} frames...") + start = time.time() + for i in range(n_frames): + output = pipeline(frame, i, fps) + cp.cuda.Stream.null.synchronize() + elapsed = time.time() - start + + ms_per_frame = elapsed / n_frames * 1000 + actual_fps = n_frames / elapsed + + print("=" * 60) + print(f"Time: {ms_per_frame:.2f}ms per frame") + print(f"FPS: {actual_fps:.0f}") + print(f"Real-time: {actual_fps / 30:.1f}x (at 30fps target)") + print("=" * 60) + + # Compare with original baseline + print(f"\nOriginal Python sexp: ~150ms = 6 fps") + print(f"Autonomous GPU: {ms_per_frame:.2f}ms = {actual_fps:.0f} fps") + print(f"Speedup: {150 / ms_per_frame:.0f}x faster!") + + +if __name__ == '__main__': + test_autonomous_prealloc()