#!/usr/bin/env python3 """ Comprehensive Performance Benchmarking Suite for PixelToVoxelProjector This suite provides end-to-end performance testing including: - Pipeline benchmarking - Component-level performance tests - GPU utilization monitoring - Memory bandwidth measurements - Latency profiling - Performance regression detection """ import os import sys import time import json import psutil import numpy as np import matplotlib.pyplot as plt from dataclasses import dataclass, asdict from typing import Dict, List, Optional, Tuple from pathlib import Path import subprocess import threading from datetime import datetime try: import pynvml HAS_NVML = True except ImportError: HAS_NVML = False print("Warning: pynvml not available. GPU monitoring disabled.") try: import cv2 HAS_CV2 = True except ImportError: HAS_CV2 = False print("Warning: OpenCV not available. Some benchmarks will be skipped.") @dataclass class BenchmarkResult: """Container for benchmark results""" name: str duration_ms: float throughput_fps: float memory_mb: float gpu_utilization_percent: float gpu_memory_mb: float cpu_utilization_percent: float latency_p50_ms: float latency_p95_ms: float latency_p99_ms: float timestamp: str metadata: Dict @dataclass class PerformanceBaseline: """Performance baseline for regression detection""" name: str min_throughput_fps: float max_latency_p99_ms: float max_memory_mb: float max_gpu_memory_mb: float class GPUMonitor: """Monitor GPU utilization and memory usage""" def __init__(self): self.monitoring = False self.samples = [] self.thread = None if HAS_NVML: try: pynvml.nvmlInit() self.handle = pynvml.nvmlDeviceGetHandleByIndex(0) self.available = True except Exception as e: print(f"Warning: Could not initialize NVML: {e}") self.available = False else: self.available = False def start(self): """Start monitoring in background thread""" if not self.available: return self.monitoring = True self.samples = [] self.thread = threading.Thread(target=self._monitor_loop) self.thread.daemon = True self.thread.start() def stop(self) -> Dict: """Stop monitoring and return statistics""" if not self.available: return {"utilization": 0, "memory_mb": 0} self.monitoring = False if self.thread: self.thread.join(timeout=1.0) if not self.samples: return {"utilization": 0, "memory_mb": 0} utils = [s['util'] for s in self.samples] mems = [s['mem'] for s in self.samples] return { "utilization": np.mean(utils), "memory_mb": np.mean(mems), "max_utilization": np.max(utils), "max_memory_mb": np.max(mems), } def _monitor_loop(self): """Background monitoring loop""" while self.monitoring: try: util = pynvml.nvmlDeviceGetUtilizationRates(self.handle) mem = pynvml.nvmlDeviceGetMemoryInfo(self.handle) self.samples.append({ 'util': util.gpu, 'mem': mem.used / (1024 ** 2), # Convert to MB }) except Exception: pass time.sleep(0.1) # Sample every 100ms def __del__(self): if HAS_NVML and self.available: try: pynvml.nvmlShutdown() except Exception: pass class CPUMonitor: """Monitor CPU utilization and memory""" def __init__(self): self.process = psutil.Process() self.samples = [] self.monitoring = False self.thread = None def start(self): """Start monitoring""" self.monitoring = True self.samples = [] self.thread = threading.Thread(target=self._monitor_loop) self.thread.daemon = True self.thread.start() def stop(self) -> Dict: """Stop monitoring and return stats""" self.monitoring = False if self.thread: self.thread.join(timeout=1.0) if not self.samples: return {"cpu_percent": 0, "memory_mb": 0} cpu_vals = [s['cpu'] for s in self.samples] mem_vals = [s['mem'] for s in self.samples] return { "cpu_percent": np.mean(cpu_vals), "memory_mb": np.mean(mem_vals), "max_cpu_percent": np.max(cpu_vals), "max_memory_mb": np.max(mem_vals), } def _monitor_loop(self): """Background monitoring loop""" while self.monitoring: try: cpu = self.process.cpu_percent(interval=0.1) mem = self.process.memory_info().rss / (1024 ** 2) # MB self.samples.append({ 'cpu': cpu, 'mem': mem, }) except Exception: pass time.sleep(0.1) class BenchmarkSuite: """Main benchmark suite orchestrator""" def __init__(self, output_dir: str = "benchmark_results"): self.output_dir = Path(output_dir) self.output_dir.mkdir(parents=True, exist_ok=True) self.results: List[BenchmarkResult] = [] self.gpu_monitor = GPUMonitor() self.cpu_monitor = CPUMonitor() # Performance baselines for regression detection self.baselines = self._load_baselines() def _load_baselines(self) -> Dict[str, PerformanceBaseline]: """Load performance baselines from file""" baseline_file = self.output_dir / "baselines.json" if not baseline_file.exists(): return {} try: with open(baseline_file, 'r') as f: data = json.load(f) return {k: PerformanceBaseline(**v) for k, v in data.items()} except Exception as e: print(f"Warning: Could not load baselines: {e}") return {} def save_baselines(self): """Save current results as baselines""" baselines = {} for result in self.results: baselines[result.name] = PerformanceBaseline( name=result.name, min_throughput_fps=result.throughput_fps * 0.9, # 10% tolerance max_latency_p99_ms=result.latency_p99_ms * 1.1, max_memory_mb=result.memory_mb * 1.1, max_gpu_memory_mb=result.gpu_memory_mb * 1.1, ) baseline_file = self.output_dir / "baselines.json" with open(baseline_file, 'w') as f: json.dump({k: asdict(v) for k, v in baselines.items()}, f, indent=2) print(f"Saved {len(baselines)} baselines to {baseline_file}") def check_regression(self, result: BenchmarkResult) -> List[str]: """Check for performance regressions""" if result.name not in self.baselines: return [] baseline = self.baselines[result.name] regressions = [] if result.throughput_fps < baseline.min_throughput_fps: regressions.append( f"Throughput regression: {result.throughput_fps:.2f} < {baseline.min_throughput_fps:.2f} FPS" ) if result.latency_p99_ms > baseline.max_latency_p99_ms: regressions.append( f"Latency regression: {result.latency_p99_ms:.2f} > {baseline.max_latency_p99_ms:.2f} ms" ) if result.memory_mb > baseline.max_memory_mb: regressions.append( f"Memory regression: {result.memory_mb:.2f} > {baseline.max_memory_mb:.2f} MB" ) if result.gpu_memory_mb > baseline.max_gpu_memory_mb: regressions.append( f"GPU memory regression: {result.gpu_memory_mb:.2f} > {baseline.max_gpu_memory_mb:.2f} MB" ) return regressions def run_benchmark(self, name: str, benchmark_fn, iterations: int = 100, warmup: int = 10, **kwargs) -> BenchmarkResult: """Run a single benchmark with monitoring""" print(f"\n{'='*60}") print(f"Running benchmark: {name}") print(f"{'='*60}") # Warmup print(f"Warmup ({warmup} iterations)...") for i in range(warmup): benchmark_fn(**kwargs) # Start monitoring self.gpu_monitor.start() self.cpu_monitor.start() # Run benchmark print(f"Running ({iterations} iterations)...") latencies = [] start_time = time.time() for i in range(iterations): iter_start = time.time() benchmark_fn(**kwargs) iter_end = time.time() latencies.append((iter_end - iter_start) * 1000) # ms if (i + 1) % 10 == 0: print(f" Progress: {i+1}/{iterations}") end_time = time.time() total_duration = (end_time - start_time) * 1000 # ms # Stop monitoring gpu_stats = self.gpu_monitor.stop() cpu_stats = self.cpu_monitor.stop() # Calculate statistics latencies_np = np.array(latencies) result = BenchmarkResult( name=name, duration_ms=total_duration, throughput_fps=iterations / (total_duration / 1000), memory_mb=cpu_stats.get('memory_mb', 0), gpu_utilization_percent=gpu_stats.get('utilization', 0), gpu_memory_mb=gpu_stats.get('memory_mb', 0), cpu_utilization_percent=cpu_stats.get('cpu_percent', 0), latency_p50_ms=np.percentile(latencies_np, 50), latency_p95_ms=np.percentile(latencies_np, 95), latency_p99_ms=np.percentile(latencies_np, 99), timestamp=datetime.now().isoformat(), metadata={ 'iterations': iterations, 'warmup': warmup, 'max_gpu_util': gpu_stats.get('max_utilization', 0), 'max_gpu_mem_mb': gpu_stats.get('max_memory_mb', 0), 'max_cpu_percent': cpu_stats.get('max_cpu_percent', 0), 'max_memory_mb': cpu_stats.get('max_memory_mb', 0), **kwargs } ) # Print results print(f"\n{'='*60}") print(f"Results for: {name}") print(f"{'='*60}") print(f"Duration: {result.duration_ms:.2f} ms") print(f"Throughput: {result.throughput_fps:.2f} FPS") print(f"Latency (p50): {result.latency_p50_ms:.2f} ms") print(f"Latency (p95): {result.latency_p95_ms:.2f} ms") print(f"Latency (p99): {result.latency_p99_ms:.2f} ms") print(f"CPU Util: {result.cpu_utilization_percent:.1f}%") print(f"Memory: {result.memory_mb:.2f} MB") print(f"GPU Util: {result.gpu_utilization_percent:.1f}%") print(f"GPU Memory: {result.gpu_memory_mb:.2f} MB") # Check for regressions regressions = self.check_regression(result) if regressions: print(f"\nWARNING: Performance regressions detected:") for reg in regressions: print(f" - {reg}") else: print(f"\nNo performance regressions detected.") self.results.append(result) return result def save_results(self): """Save all results to files""" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Save JSON json_file = self.output_dir / f"results_{timestamp}.json" with open(json_file, 'w') as f: json.dump([asdict(r) for r in self.results], f, indent=2) print(f"\nSaved results to {json_file}") # Save CSV csv_file = self.output_dir / f"results_{timestamp}.csv" with open(csv_file, 'w') as f: if self.results: # Header f.write("name,duration_ms,throughput_fps,latency_p50_ms,latency_p95_ms,latency_p99_ms,") f.write("cpu_percent,memory_mb,gpu_percent,gpu_memory_mb,timestamp\n") # Data for r in self.results: f.write(f"{r.name},{r.duration_ms:.2f},{r.throughput_fps:.2f},") f.write(f"{r.latency_p50_ms:.2f},{r.latency_p95_ms:.2f},{r.latency_p99_ms:.2f},") f.write(f"{r.cpu_utilization_percent:.1f},{r.memory_mb:.2f},") f.write(f"{r.gpu_utilization_percent:.1f},{r.gpu_memory_mb:.2f},{r.timestamp}\n") print(f"Saved CSV to {csv_file}") def generate_report(self): """Generate HTML performance report with graphs""" if not self.results: print("No results to report") return timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Create comparison plots self._plot_throughput_comparison() self._plot_latency_distribution() self._plot_resource_utilization() # Generate HTML report html_file = self.output_dir / f"report_{timestamp}.html" with open(html_file, 'w') as f: f.write(self._generate_html_report()) print(f"\nGenerated report: {html_file}") def _plot_throughput_comparison(self): """Plot throughput comparison across benchmarks""" if not self.results: return names = [r.name for r in self.results] throughputs = [r.throughput_fps for r in self.results] plt.figure(figsize=(12, 6)) bars = plt.bar(range(len(names)), throughputs, color='steelblue') plt.xlabel('Benchmark') plt.ylabel('Throughput (FPS)') plt.title('Throughput Comparison') plt.xticks(range(len(names)), names, rotation=45, ha='right') plt.grid(axis='y', alpha=0.3) plt.tight_layout() # Add value labels on bars for i, bar in enumerate(bars): height = bar.get_height() plt.text(bar.get_x() + bar.get_width()/2., height, f'{throughputs[i]:.1f}', ha='center', va='bottom', fontsize=9) plt.savefig(self.output_dir / 'throughput_comparison.png', dpi=150) plt.close() def _plot_latency_distribution(self): """Plot latency percentiles""" if not self.results: return names = [r.name for r in self.results] p50 = [r.latency_p50_ms for r in self.results] p95 = [r.latency_p95_ms for r in self.results] p99 = [r.latency_p99_ms for r in self.results] x = np.arange(len(names)) width = 0.25 plt.figure(figsize=(14, 6)) plt.bar(x - width, p50, width, label='p50', color='lightgreen') plt.bar(x, p95, width, label='p95', color='orange') plt.bar(x + width, p99, width, label='p99', color='red') plt.xlabel('Benchmark') plt.ylabel('Latency (ms)') plt.title('Latency Distribution (Percentiles)') plt.xticks(x, names, rotation=45, ha='right') plt.legend() plt.grid(axis='y', alpha=0.3) plt.tight_layout() plt.savefig(self.output_dir / 'latency_distribution.png', dpi=150) plt.close() def _plot_resource_utilization(self): """Plot CPU/GPU utilization""" if not self.results: return names = [r.name for r in self.results] cpu_util = [r.cpu_utilization_percent for r in self.results] gpu_util = [r.gpu_utilization_percent for r in self.results] x = np.arange(len(names)) width = 0.35 fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10)) # CPU/GPU utilization ax1.bar(x - width/2, cpu_util, width, label='CPU %', color='cornflowerblue') ax1.bar(x + width/2, gpu_util, width, label='GPU %', color='orange') ax1.set_ylabel('Utilization (%)') ax1.set_title('CPU vs GPU Utilization') ax1.set_xticks(x) ax1.set_xticklabels(names, rotation=45, ha='right') ax1.legend() ax1.grid(axis='y', alpha=0.3) # Memory usage cpu_mem = [r.memory_mb for r in self.results] gpu_mem = [r.gpu_memory_mb for r in self.results] ax2.bar(x - width/2, cpu_mem, width, label='CPU Memory (MB)', color='steelblue') ax2.bar(x + width/2, gpu_mem, width, label='GPU Memory (MB)', color='darkorange') ax2.set_ylabel('Memory (MB)') ax2.set_title('Memory Usage') ax2.set_xticks(x) ax2.set_xticklabels(names, rotation=45, ha='right') ax2.legend() ax2.grid(axis='y', alpha=0.3) plt.tight_layout() plt.savefig(self.output_dir / 'resource_utilization.png', dpi=150) plt.close() def _generate_html_report(self) -> str: """Generate HTML report content""" html = """ PixelToVoxel Benchmark Report

PixelToVoxel Performance Benchmark Report

Generated: """ + datetime.now().strftime("%Y-%m-%d %H:%M:%S") + """

Summary

""" if self.results: avg_throughput = np.mean([r.throughput_fps for r in self.results]) avg_latency = np.mean([r.latency_p50_ms for r in self.results]) avg_cpu = np.mean([r.cpu_utilization_percent for r in self.results]) avg_gpu = np.mean([r.gpu_utilization_percent for r in self.results]) html += f"""
{avg_throughput:.1f}
Avg Throughput (FPS)
{avg_latency:.1f}
Avg Latency (ms)
{avg_cpu:.0f}%
Avg CPU Usage
{avg_gpu:.0f}%
Avg GPU Usage
""" html += """

Performance Charts

Throughput Comparison

Throughput Comparison

Latency Distribution

Latency Distribution

Resource Utilization

Resource Utilization

Detailed Results

""" for result in self.results: regressions = self.check_regression(result) status = 'REGRESSION' if regressions else 'PASS' html += f""" """ html += """
Benchmark Throughput (FPS) p50 (ms) p95 (ms) p99 (ms) CPU % GPU % Memory (MB) Status
{result.name} {result.throughput_fps:.2f} {result.latency_p50_ms:.2f} {result.latency_p95_ms:.2f} {result.latency_p99_ms:.2f} {result.cpu_utilization_percent:.1f} {result.gpu_utilization_percent:.1f} {result.memory_mb:.1f} {status}
""" return html # Example benchmark functions def benchmark_voxel_ray_casting(grid_size=500, num_rays=1000): """Benchmark voxel ray casting performance""" grid = np.zeros((grid_size, grid_size, grid_size), dtype=np.float32) # Simulate ray casting for _ in range(num_rays): # Random ray origin and direction origin = np.random.rand(3) * grid_size direction = np.random.randn(3) direction /= np.linalg.norm(direction) # Simple DDA-like traversal t = 0 step = 1.0 max_t = grid_size * 1.414 # Diagonal while t < max_t: pos = origin + direction * t idx = np.clip(pos.astype(int), 0, grid_size - 1) if np.all(idx >= 0) and np.all(idx < grid_size): grid[idx[0], idx[1], idx[2]] += 1.0 t += step def benchmark_motion_detection(width=7680, height=4320): """Benchmark motion detection on 8K frames""" if not HAS_CV2: return # Generate synthetic frames frame1 = np.random.randint(0, 256, (height, width), dtype=np.uint8) frame2 = np.random.randint(0, 256, (height, width), dtype=np.uint8) # Motion detection diff = cv2.absdiff(frame1, frame2) _, thresh = cv2.threshold(diff, 25, 255, cv2.THRESH_BINARY) def benchmark_voxel_update(grid_size=500, num_updates=10000): """Benchmark voxel grid update performance""" grid = np.zeros((grid_size, grid_size, grid_size), dtype=np.float32) # Random updates indices = np.random.randint(0, grid_size, (num_updates, 3)) values = np.random.rand(num_updates).astype(np.float32) for idx, val in zip(indices, values): grid[idx[0], idx[1], idx[2]] += val def main(): """Run the complete benchmark suite""" suite = BenchmarkSuite(output_dir="benchmark_results") print("="*60) print("PixelToVoxel Performance Benchmark Suite") print("="*60) # Run benchmarks suite.run_benchmark( "Voxel Ray Casting (500^3)", benchmark_voxel_ray_casting, iterations=50, warmup=5, grid_size=500, num_rays=1000 ) if HAS_CV2: suite.run_benchmark( "Motion Detection (8K)", benchmark_motion_detection, iterations=50, warmup=5, width=7680, height=4320 ) suite.run_benchmark( "Voxel Grid Updates", benchmark_voxel_update, iterations=100, warmup=10, grid_size=500, num_updates=10000 ) # Save results and generate report suite.save_results() suite.generate_report() # Ask if user wants to save as baseline print("\n" + "="*60) response = input("Save these results as performance baseline? (y/n): ") if response.lower() == 'y': suite.save_baselines() print("\n" + "="*60) print("Benchmark suite completed!") print("="*60) if __name__ == "__main__": main()