Add profiling to stream interpreter to find bottleneck
Some checks are pending
GPU Worker CI/CD / test (push) Waiting to run
GPU Worker CI/CD / deploy (push) Blocked by required conditions

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
giles
2026-02-04 09:32:53 +00:00
parent ad1d7893f8
commit 4d95ec5a32

View File

@@ -949,6 +949,11 @@ class StreamInterpreter:
try:
frame_times = []
profile_interval = 30 # Profile every N frames
scan_times = []
eval_times = []
write_times = []
for frame_num in range(n_frames):
if not out.is_open:
break
@@ -969,23 +974,32 @@ class StreamInterpreter:
}
# Step scans
t0 = time.time()
self._step_scans(ctx, frame_env)
scan_times.append(time.time() - t0)
# Evaluate pipeline
t1 = time.time()
result = self._eval(self.frame_pipeline, frame_env)
eval_times.append(time.time() - t1)
t2 = time.time()
if result is not None and hasattr(result, 'shape'):
out.write(result, ctx.t)
write_times.append(time.time() - t2)
frame_elapsed = time.time() - frame_start
frame_times.append(frame_elapsed)
# Progress with timing
if frame_num % 30 == 0:
# Progress with timing and profile breakdown
if frame_num % profile_interval == 0 and frame_num > 0:
pct = 100 * frame_num / n_frames
avg_ms = 1000 * sum(frame_times[-30:]) / max(1, len(frame_times[-30:]))
avg_ms = 1000 * sum(frame_times[-profile_interval:]) / max(1, len(frame_times[-profile_interval:]))
avg_scan = 1000 * sum(scan_times[-profile_interval:]) / max(1, len(scan_times[-profile_interval:]))
avg_eval = 1000 * sum(eval_times[-profile_interval:]) / max(1, len(eval_times[-profile_interval:]))
avg_write = 1000 * sum(write_times[-profile_interval:]) / max(1, len(write_times[-profile_interval:]))
target_ms = 1000 * frame_time
print(f"\r{pct:5.1f}% [{avg_ms:.0f}ms/frame, target {target_ms:.0f}ms]", end="", file=sys.stderr, flush=True)
print(f"\r{pct:5.1f}% [{avg_ms:.0f}ms/frame, target {target_ms:.0f}ms] scan={avg_scan:.0f}ms eval={avg_eval:.0f}ms write={avg_write:.0f}ms", end="", file=sys.stderr, flush=True)
finally:
out.close()