mono/artdag/l1/test_autonomous_prealloc.py

#!/usr/bin/env python3
"""
Test autonomous pipeline with pre-allocated buffer.
This eliminates ALL Python from the hot path.
"""

import time
import sys
sys.path.insert(0, '/app')

import cupy as cp
from streaming.sexp_to_cuda import compile_autonomous_pipeline

def test_autonomous_prealloc():
    width, height = 1920, 1080
    n_frames = 300
    fps = 30.0

    print(f"Testing {n_frames} frames at {width}x{height}")
    print("=" * 60)

    # Pre-allocate frame buffer (stays on GPU)
    frame = cp.zeros((height, width, 3), dtype=cp.uint8)
    frame[:, :, 0] = 128  # R
    frame[:, :, 1] = 100  # G
    frame[:, :, 2] = 200  # B

    # Define effects
    effects = [
        {'op': 'rotate', 'angle': 0},
        {'op': 'hue_shift', 'degrees': 30},
        {'op': 'ripple', 'amplitude': 15, 'frequency': 10, 'decay': 2, 'phase': 0, 'center_x': 960, 'center_y': 540},
        {'op': 'brightness', 'factor': 1.0},
    ]

    # Dynamic expressions (computed on GPU)
    dynamic_expressions = {
        'rotate_angle': 't * 30.0f',
        'ripple_phase': 't * 2.0f',
        'brightness_factor': '0.8f + 0.4f * sinf(t * 2.0f)',
    }

    # Compile autonomous pipeline
    print("Compiling autonomous pipeline...")
    pipeline = compile_autonomous_pipeline(effects, width, height, dynamic_expressions)

    # Warmup
    output = pipeline(frame, 0, fps)
    cp.cuda.Stream.null.synchronize()

    # Benchmark - ZERO Python in the hot path!
    print(f"Running {n_frames} frames...")
    start = time.time()
    for i in range(n_frames):
        output = pipeline(frame, i, fps)
    cp.cuda.Stream.null.synchronize()
    elapsed = time.time() - start

    ms_per_frame = elapsed / n_frames * 1000
    actual_fps = n_frames / elapsed

    print("=" * 60)
    print(f"Time:      {ms_per_frame:.2f}ms per frame")
    print(f"FPS:       {actual_fps:.0f}")
    print(f"Real-time: {actual_fps / 30:.1f}x (at 30fps target)")
    print("=" * 60)

    # Compare with original baseline
    print(f"\nOriginal Python sexp: ~150ms = 6 fps")
    print(f"Autonomous GPU:       {ms_per_frame:.2f}ms = {actual_fps:.0f} fps")
    print(f"Speedup:              {150 / ms_per_frame:.0f}x faster!")


if __name__ == '__main__':
    test_autonomous_prealloc()