# Agent 13: Performance Benchmark Suite ## Overview Comprehensive Criterion-based benchmark suite for measuring and tracking performance across all latent space operations, attention mechanisms, and search algorithms. ## 1. Criterion Benchmarks ### 1.1 Latency Benchmarks Complete benchmark code for measuring operation latency across various dimensions and neighbor counts. ```rust // benches/latency_benchmarks.rs use criterion::{ black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput, }; use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType}; use ruvector::metrics::DistanceMetric; use rand::Rng; /// Generate random embedding of specified dimension fn random_embedding(dim: usize) -> Vec { let mut rng = rand::thread_rng(); (0..dim).map(|_| rng.gen_range(-1.0..1.0)).collect() } /// Generate dataset of random embeddings fn generate_dataset(num_vectors: usize, dim: usize) -> Vec> { (0..num_vectors) .map(|_| random_embedding(dim)) .collect() } /// Benchmark latent space creation fn bench_latent_space_creation(c: &mut Criterion) { let mut group = c.benchmark_group("latent_space_creation"); for dim in [64, 128, 256, 512, 1024].iter() { group.bench_with_input( BenchmarkId::from_parameter(dim), dim, |b, &dim| { b.iter(|| { let config = LatentConfig { dimension: dim, attention_type: AttentionType::Standard, num_heads: 8, distance_metric: DistanceMetric::Euclidean, ..Default::default() }; black_box(LatentSpace::new(config)) }); }, ); } group.finish(); } /// Benchmark embedding addition fn bench_add_embedding(c: &mut Criterion) { let mut group = c.benchmark_group("add_embedding"); for dim in [64, 128, 256, 512, 1024].iter() { let config = LatentConfig { dimension: *dim, attention_type: AttentionType::Standard, num_heads: 8, distance_metric: DistanceMetric::Euclidean, ..Default::default() }; let mut space = LatentSpace::new(config).unwrap(); let embedding = random_embedding(*dim); group.bench_with_input( BenchmarkId::from_parameter(dim), &embedding, |b, emb| { b.iter(|| { black_box(space.add_embedding(emb.clone(), None)) }); }, ); } group.finish(); } /// Benchmark KNN search with varying neighbor counts fn bench_knn_search(c: &mut Criterion) { let mut group = c.benchmark_group("knn_search"); let dimensions = [128, 256, 512]; let neighbor_counts = [10, 50, 100, 500, 1000]; let dataset_size = 10000; for &dim in &dimensions { for &k in &neighbor_counts { let config = LatentConfig { dimension: dim, attention_type: AttentionType::Standard, num_heads: 8, distance_metric: DistanceMetric::Euclidean, ..Default::default() }; let mut space = LatentSpace::new(config).unwrap(); let dataset = generate_dataset(dataset_size, dim); // Populate space for emb in dataset.iter() { space.add_embedding(emb.clone(), None).unwrap(); } let query = random_embedding(dim); group.throughput(Throughput::Elements(k as u64)); group.bench_with_input( BenchmarkId::new(format!("dim_{}", dim), k), &k, |b, &neighbors| { b.iter(|| { black_box(space.knn_search(&query, neighbors)) }); }, ); } } group.finish(); } /// Benchmark attention computation fn bench_attention_computation(c: &mut Criterion) { let mut group = c.benchmark_group("attention_computation"); let dimensions = [64, 128, 256, 512]; let attention_types = [ AttentionType::Standard, AttentionType::Flash, AttentionType::MultiHead { num_heads: 8 }, AttentionType::MoE { num_experts: 4 }, ]; for &dim in &dimensions { for attention_type in &attention_types { let config = LatentConfig { dimension: dim, attention_type: attention_type.clone(), num_heads: 8, distance_metric: DistanceMetric::Euclidean, ..Default::default() }; let mut space = LatentSpace::new(config).unwrap(); let embeddings = generate_dataset(100, dim); for emb in embeddings.iter() { space.add_embedding(emb.clone(), None).unwrap(); } let query = random_embedding(dim); group.bench_with_input( BenchmarkId::new(format!("dim_{}", dim), format!("{:?}", attention_type)), &query, |b, q| { b.iter(|| { black_box(space.compute_attention(q)) }); }, ); } } group.finish(); } criterion_group!( latency_benches, bench_latent_space_creation, bench_add_embedding, bench_knn_search, bench_attention_computation ); criterion_main!(latency_benches); ``` ### 1.2 Throughput Benchmarks Benchmark batch processing and parallel operations. ```rust // benches/throughput_benchmarks.rs use criterion::{ black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput, }; use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType}; use ruvector::metrics::DistanceMetric; use rand::Rng; /// Benchmark batch embedding addition fn bench_batch_add_embeddings(c: &mut Criterion) { let mut group = c.benchmark_group("batch_add_embeddings"); let batch_sizes = [1, 8, 32, 128, 512]; let dim = 256; for &batch_size in &batch_sizes { let config = LatentConfig { dimension: dim, attention_type: AttentionType::Standard, num_heads: 8, distance_metric: DistanceMetric::Euclidean, ..Default::default() }; let mut space = LatentSpace::new(config).unwrap(); let embeddings = generate_dataset(batch_size, dim); group.throughput(Throughput::Elements(batch_size as u64)); group.bench_with_input( BenchmarkId::from_parameter(batch_size), &embeddings, |b, embs| { b.iter(|| { for emb in embs { black_box(space.add_embedding(emb.clone(), None)); } }); }, ); } group.finish(); } /// Benchmark parallel KNN search fn bench_parallel_knn_search(c: &mut Criterion) { use rayon::prelude::*; let mut group = c.benchmark_group("parallel_knn_search"); let query_counts = [1, 8, 32, 128]; let dim = 256; let k = 100; let dataset_size = 10000; for &num_queries in &query_counts { let config = LatentConfig { dimension: dim, attention_type: AttentionType::Standard, num_heads: 8, distance_metric: DistanceMetric::Euclidean, ..Default::default() }; let mut space = LatentSpace::new(config).unwrap(); let dataset = generate_dataset(dataset_size, dim); for emb in dataset.iter() { space.add_embedding(emb.clone(), None).unwrap(); } let queries: Vec> = (0..num_queries) .map(|_| random_embedding(dim)) .collect(); group.throughput(Throughput::Elements(num_queries as u64)); group.bench_with_input( BenchmarkId::from_parameter(num_queries), &queries, |b, qs| { b.iter(|| { let results: Vec<_> = qs.par_iter() .map(|q| space.knn_search(q, k)) .collect(); black_box(results) }); }, ); } group.finish(); } /// Benchmark batch attention computation fn bench_batch_attention(c: &mut Criterion) { let mut group = c.benchmark_group("batch_attention"); let batch_sizes = [8, 32, 128]; let dim = 256; for &batch_size in &batch_sizes { let config = LatentConfig { dimension: dim, attention_type: AttentionType::Flash, num_heads: 8, distance_metric: DistanceMetric::Euclidean, ..Default::default() }; let mut space = LatentSpace::new(config).unwrap(); let embeddings = generate_dataset(1000, dim); for emb in embeddings.iter() { space.add_embedding(emb.clone(), None).unwrap(); } let queries = generate_dataset(batch_size, dim); group.throughput(Throughput::Elements(batch_size as u64)); group.bench_with_input( BenchmarkId::from_parameter(batch_size), &queries, |b, qs| { b.iter(|| { for q in qs { black_box(space.compute_attention(q)); } }); }, ); } group.finish(); } criterion_group!( throughput_benches, bench_batch_add_embeddings, bench_parallel_knn_search, bench_batch_attention ); criterion_main!(throughput_benches); ``` ### 1.3 Memory Benchmarks Track peak memory usage and allocation patterns. ```rust // benches/memory_benchmarks.rs use criterion::{ black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput, }; use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType}; use ruvector::metrics::DistanceMetric; use std::alloc::{GlobalAlloc, Layout, System}; use std::sync::atomic::{AtomicUsize, Ordering}; /// Custom allocator to track memory usage struct TrackingAllocator; static ALLOCATED: AtomicUsize = AtomicUsize::new(0); static PEAK_ALLOCATED: AtomicUsize = AtomicUsize::new(0); unsafe impl GlobalAlloc for TrackingAllocator { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { let size = layout.size(); let current = ALLOCATED.fetch_add(size, Ordering::SeqCst) + size; // Update peak if necessary let mut peak = PEAK_ALLOCATED.load(Ordering::SeqCst); while current > peak { match PEAK_ALLOCATED.compare_exchange( peak, current, Ordering::SeqCst, Ordering::SeqCst, ) { Ok(_) => break, Err(p) => peak = p, } } System.alloc(layout) } unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { ALLOCATED.fetch_sub(layout.size(), Ordering::SeqCst); System.dealloc(ptr, layout) } } #[global_allocator] static GLOBAL: TrackingAllocator = TrackingAllocator; fn reset_memory_tracking() { ALLOCATED.store(0, Ordering::SeqCst); PEAK_ALLOCATED.store(0, Ordering::SeqCst); } fn get_peak_memory() -> usize { PEAK_ALLOCATED.load(Ordering::SeqCst) } /// Benchmark memory usage for different dataset sizes fn bench_memory_scaling(c: &mut Criterion) { let mut group = c.benchmark_group("memory_scaling"); let dataset_sizes = [1000, 5000, 10000, 50000, 100000]; let dim = 256; for &size in &dataset_sizes { group.bench_with_input( BenchmarkId::from_parameter(size), &size, |b, &num_vectors| { b.iter_custom(|iters| { let mut total_duration = std::time::Duration::ZERO; for _ in 0..iters { reset_memory_tracking(); let start = std::time::Instant::now(); let config = LatentConfig { dimension: dim, attention_type: AttentionType::Standard, num_heads: 8, distance_metric: DistanceMetric::Euclidean, ..Default::default() }; let mut space = LatentSpace::new(config).unwrap(); for i in 0..num_vectors { let emb = random_embedding(dim); space.add_embedding(emb, Some(i as u64)).unwrap(); } let elapsed = start.elapsed(); total_duration += elapsed; let peak = get_peak_memory(); println!( "Dataset size: {}, Peak memory: {} MB", num_vectors, peak / 1_000_000 ); black_box(space); } total_duration }); }, ); } group.finish(); } /// Benchmark memory usage by dimension fn bench_memory_by_dimension(c: &mut Criterion) { let mut group = c.benchmark_group("memory_by_dimension"); let dimensions = [64, 128, 256, 512, 1024]; let num_vectors = 10000; for &dim in &dimensions { group.bench_with_input( BenchmarkId::from_parameter(dim), &dim, |b, &dimension| { b.iter_custom(|iters| { let mut total_duration = std::time::Duration::ZERO; for _ in 0..iters { reset_memory_tracking(); let start = std::time::Instant::now(); let config = LatentConfig { dimension, attention_type: AttentionType::Standard, num_heads: 8, distance_metric: DistanceMetric::Euclidean, ..Default::default() }; let mut space = LatentSpace::new(config).unwrap(); for i in 0..num_vectors { let emb = random_embedding(dimension); space.add_embedding(emb, Some(i as u64)).unwrap(); } let elapsed = start.elapsed(); total_duration += elapsed; let peak = get_peak_memory(); println!( "Dimension: {}, Peak memory: {} MB", dimension, peak / 1_000_000 ); black_box(space); } total_duration }); }, ); } group.finish(); } criterion_group!( memory_benches, bench_memory_scaling, bench_memory_by_dimension ); criterion_main!(memory_benches); ``` ## 2. Benchmark Matrix ### 2.1 Complete Test Matrix Configuration ```rust // benches/benchmark_matrix.rs use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType}; use ruvector::metrics::DistanceMetric; /// Comprehensive benchmark matrix struct BenchmarkMatrix { dimensions: Vec, neighbors: Vec, batch_sizes: Vec, dataset_sizes: Vec, } impl Default for BenchmarkMatrix { fn default() -> Self { Self { dimensions: vec![64, 128, 256, 512, 1024], neighbors: vec![10, 50, 100, 500, 1000, 10000], batch_sizes: vec![1, 8, 32, 128], dataset_sizes: vec![1000, 5000, 10000, 50000], } } } impl BenchmarkMatrix { /// Run complete benchmark matrix fn run_complete_matrix(&self, c: &mut Criterion) { for &dim in &self.dimensions { for &k in &self.neighbors { for &batch_size in &self.batch_sizes { for &dataset_size in &self.dataset_sizes { self.bench_configuration( c, dim, k, batch_size, dataset_size, ); } } } } } /// Benchmark specific configuration fn bench_configuration( &self, c: &mut Criterion, dim: usize, k: usize, batch_size: usize, dataset_size: usize, ) { let group_name = format!( "matrix/dim_{}/k_{}/batch_{}/dataset_{}", dim, k, batch_size, dataset_size ); let mut group = c.benchmark_group(&group_name); let config = LatentConfig { dimension: dim, attention_type: AttentionType::Standard, num_heads: 8, distance_metric: DistanceMetric::Euclidean, ..Default::default() }; let mut space = LatentSpace::new(config).unwrap(); let dataset = generate_dataset(dataset_size, dim); for emb in dataset.iter() { space.add_embedding(emb.clone(), None).unwrap(); } let queries = generate_dataset(batch_size, dim); group.bench_function("knn_search", |b| { b.iter(|| { for query in &queries { black_box(space.knn_search(query, k)); } }); }); group.finish(); } } /// Run critical path benchmarks only (reduced matrix) fn bench_critical_path(c: &mut Criterion) { let matrix = BenchmarkMatrix { dimensions: vec![128, 256, 512], neighbors: vec![10, 100, 1000], batch_sizes: vec![1, 32], dataset_sizes: vec![10000], }; matrix.run_complete_matrix(c); } criterion_group!(matrix_benches, bench_critical_path); criterion_main!(matrix_benches); ``` ### 2.2 Benchmark Matrix Results Format Expected output format for tracking: ```toml # benchmark_results.toml [latency.dim_128.k_10] mean = "45.2 µs" std_dev = "2.1 µs" median = "44.8 µs" mad = "1.4 µs" [latency.dim_256.k_100] mean = "124.5 µs" std_dev = "5.3 µs" median = "123.1 µs" mad = "3.2 µs" [throughput.batch_32.dim_256] throughput = "2.34 GiB/s" ops_per_sec = "8542" [memory.dataset_10000.dim_256] peak_mb = "245" average_mb = "198" allocations = "12543" ``` ## 3. Comparative Benchmarks ### 3.1 Attention Mechanism Comparison ```rust // benches/attention_comparison.rs use criterion::{ black_box, criterion_group, criterion_main, BenchmarkId, Criterion, PlotConfiguration, }; use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType}; use ruvector::metrics::DistanceMetric; fn compare_attention_mechanisms(c: &mut Criterion) { let mut group = c.benchmark_group("attention_comparison"); group.plot_config(PlotConfiguration::default().summary_scale(criterion::AxisScale::Logarithmic)); let dimensions = [128, 256, 512]; let sequence_lengths = [100, 500, 1000]; let attention_types = vec![ ("Standard", AttentionType::Standard), ("Flash", AttentionType::Flash), ("MultiHead_4", AttentionType::MultiHead { num_heads: 4 }), ("MultiHead_8", AttentionType::MultiHead { num_heads: 8 }), ("MoE_2", AttentionType::MoE { num_experts: 2 }), ("MoE_4", AttentionType::MoE { num_experts: 4 }), ]; for &dim in &dimensions { for &seq_len in &sequence_lengths { for (name, attn_type) in &attention_types { let config = LatentConfig { dimension: dim, attention_type: attn_type.clone(), num_heads: 8, distance_metric: DistanceMetric::Euclidean, ..Default::default() }; let mut space = LatentSpace::new(config).unwrap(); let embeddings = generate_dataset(seq_len, dim); for emb in embeddings.iter() { space.add_embedding(emb.clone(), None).unwrap(); } let query = random_embedding(dim); group.bench_with_input( BenchmarkId::new( format!("{}@dim_{}", name, dim), seq_len, ), &query, |b, q| { b.iter(|| { black_box(space.compute_attention(q)) }); }, ); } } } group.finish(); } /// Generate comparison report fn generate_comparison_report() { use std::collections::HashMap; let results = HashMap::from([ ("Standard", vec![45.2, 124.5, 456.7]), ("Flash", vec![32.1, 89.3, 301.2]), ("MultiHead_8", vec![52.3, 142.1, 512.4]), ("MoE_4", vec![48.9, 128.7, 445.3]), ]); println!("\n=== Attention Mechanism Comparison ===\n"); println!("{:<15} {:>12} {:>12} {:>12}", "Type", "128-dim", "256-dim", "512-dim"); println!("{:-<55}", ""); for (name, times) in results.iter() { println!( "{:<15} {:>10.1} µs {:>10.1} µs {:>10.1} µs", name, times[0], times[1], times[2] ); } // Calculate speedup vs standard if let Some(baseline) = results.get("Standard") { println!("\n=== Speedup vs Standard ===\n"); for (name, times) in results.iter() { if name != &"Standard" { let speedups: Vec = times .iter() .zip(baseline.iter()) .map(|(t, b)| b / t) .collect(); println!( "{:<15} {:>10.2}x {:>10.2}x {:>10.2}x", name, speedups[0], speedups[1], speedups[2] ); } } } } criterion_group!(attention_benches, compare_attention_mechanisms); criterion_main!(attention_benches); ``` ### 3.2 Distance Metric Comparison ```rust // benches/distance_comparison.rs use criterion::{ black_box, criterion_group, criterion_main, BenchmarkId, Criterion, }; use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType}; use ruvector::metrics::DistanceMetric; fn compare_distance_metrics(c: &mut Criterion) { let mut group = c.benchmark_group("distance_comparison"); let dimensions = [128, 256, 512]; let metrics = vec![ ("Euclidean", DistanceMetric::Euclidean), ("Cosine", DistanceMetric::Cosine), ("Manhattan", DistanceMetric::Manhattan), ("Hyperbolic", DistanceMetric::Hyperbolic { curvature: -1.0 }), ]; for &dim in &dimensions { for (name, metric) in &metrics { let config = LatentConfig { dimension: dim, attention_type: AttentionType::Standard, num_heads: 8, distance_metric: metric.clone(), ..Default::default() }; let mut space = LatentSpace::new(config).unwrap(); let dataset = generate_dataset(10000, dim); for emb in dataset.iter() { space.add_embedding(emb.clone(), None).unwrap(); } let query = random_embedding(dim); group.bench_with_input( BenchmarkId::new(format!("{}@dim_{}", name, dim), "knn_100"), &query, |b, q| { b.iter(|| { black_box(space.knn_search(q, 100)) }); }, ); } } group.finish(); } criterion_group!(distance_benches, compare_distance_metrics); criterion_main!(distance_benches); ``` ### 3.3 Standard vs Flash Attention Detailed comparison focusing on Flash attention optimizations: ```rust // benches/flash_attention_benchmark.rs use criterion::{ black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput, }; use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType}; use ruvector::metrics::DistanceMetric; fn bench_flash_vs_standard(c: &mut Criterion) { let mut group = c.benchmark_group("flash_vs_standard"); // Test at various sequence lengths where Flash shines let sequence_lengths = [256, 512, 1024, 2048, 4096]; let dim = 256; let num_heads = 8; for &seq_len in &sequence_lengths { // Standard attention { let config = LatentConfig { dimension: dim, attention_type: AttentionType::Standard, num_heads, distance_metric: DistanceMetric::Euclidean, ..Default::default() }; let mut space = LatentSpace::new(config).unwrap(); let embeddings = generate_dataset(seq_len, dim); for emb in embeddings.iter() { space.add_embedding(emb.clone(), None).unwrap(); } let query = random_embedding(dim); group.throughput(Throughput::Elements(seq_len as u64)); group.bench_with_input( BenchmarkId::new("Standard", seq_len), &query, |b, q| { b.iter(|| { black_box(space.compute_attention(q)) }); }, ); } // Flash attention { let config = LatentConfig { dimension: dim, attention_type: AttentionType::Flash, num_heads, distance_metric: DistanceMetric::Euclidean, ..Default::default() }; let mut space = LatentSpace::new(config).unwrap(); let embeddings = generate_dataset(seq_len, dim); for emb in embeddings.iter() { space.add_embedding(emb.clone(), None).unwrap(); } let query = random_embedding(dim); group.throughput(Throughput::Elements(seq_len as u64)); group.bench_with_input( BenchmarkId::new("Flash", seq_len), &query, |b, q| { b.iter(|| { black_box(space.compute_attention(q)) }); }, ); } } group.finish(); } /// Memory comparison between Flash and Standard fn bench_memory_flash_vs_standard(c: &mut Criterion) { let mut group = c.benchmark_group("memory_flash_vs_standard"); let sequence_lengths = [512, 1024, 2048]; let dim = 256; for &seq_len in &sequence_lengths { group.bench_with_input( BenchmarkId::new("Standard", seq_len), &seq_len, |b, &len| { b.iter_custom(|iters| { let mut total_duration = std::time::Duration::ZERO; for _ in 0..iters { reset_memory_tracking(); let start = std::time::Instant::now(); let config = LatentConfig { dimension: dim, attention_type: AttentionType::Standard, num_heads: 8, distance_metric: DistanceMetric::Euclidean, ..Default::default() }; let mut space = LatentSpace::new(config).unwrap(); let embeddings = generate_dataset(len, dim); for emb in embeddings.iter() { space.add_embedding(emb.clone(), None).unwrap(); } let query = random_embedding(dim); black_box(space.compute_attention(&query)); total_duration += start.elapsed(); println!( "Standard@{}: Peak {} MB", len, get_peak_memory() / 1_000_000 ); } total_duration }); }, ); group.bench_with_input( BenchmarkId::new("Flash", seq_len), &seq_len, |b, &len| { b.iter_custom(|iters| { let mut total_duration = std::time::Duration::ZERO; for _ in 0..iters { reset_memory_tracking(); let start = std::time::Instant::now(); let config = LatentConfig { dimension: dim, attention_type: AttentionType::Flash, num_heads: 8, distance_metric: DistanceMetric::Euclidean, ..Default::default() }; let mut space = LatentSpace::new(config).unwrap(); let embeddings = generate_dataset(len, dim); for emb in embeddings.iter() { space.add_embedding(emb.clone(), None).unwrap(); } let query = random_embedding(dim); black_box(space.compute_attention(&query)); total_duration += start.elapsed(); println!( "Flash@{}: Peak {} MB", len, get_peak_memory() / 1_000_000 ); } total_duration }); }, ); } group.finish(); } criterion_group!( flash_benches, bench_flash_vs_standard, bench_memory_flash_vs_standard ); criterion_main!(flash_benches); ``` ## 4. Regression Detection ### 4.1 Baseline Storage System ```rust // benches/regression_detection.rs use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::fs; use std::path::Path; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct BenchmarkBaseline { pub name: String, pub dimension: usize, pub mean: f64, pub std_dev: f64, pub median: f64, pub throughput: Option, pub memory_peak_mb: Option, pub timestamp: String, pub git_commit: String, } #[derive(Debug, Serialize, Deserialize)] pub struct BaselineCollection { pub baselines: HashMap, pub created_at: String, pub updated_at: String, } impl BaselineCollection { /// Load baselines from file pub fn load>(path: P) -> Result> { let content = fs::read_to_string(path)?; let collection: BaselineCollection = serde_json::from_str(&content)?; Ok(collection) } /// Save baselines to file pub fn save>(&self, path: P) -> Result<(), Box> { let content = serde_json::to_string_pretty(self)?; fs::write(path, content)?; Ok(()) } /// Add or update baseline pub fn update_baseline(&mut self, key: String, baseline: BenchmarkBaseline) { self.baselines.insert(key, baseline); self.updated_at = chrono::Utc::now().to_rfc3339(); } /// Check for regression pub fn check_regression( &self, key: &str, current: &BenchmarkResult, threshold_percent: f64, ) -> RegressionStatus { if let Some(baseline) = self.baselines.get(key) { let change_percent = ((current.mean - baseline.mean) / baseline.mean) * 100.0; if change_percent > threshold_percent { RegressionStatus::Regression { baseline: baseline.mean, current: current.mean, change_percent, } } else if change_percent < -threshold_percent / 2.0 { RegressionStatus::Improvement { baseline: baseline.mean, current: current.mean, change_percent: -change_percent, } } else { RegressionStatus::NoChange } } else { RegressionStatus::NewBenchmark } } } #[derive(Debug)] pub enum RegressionStatus { Regression { baseline: f64, current: f64, change_percent: f64, }, Improvement { baseline: f64, current: f64, change_percent: f64, }, NoChange, NewBenchmark, } #[derive(Debug, Clone)] pub struct BenchmarkResult { pub name: String, pub mean: f64, pub std_dev: f64, pub median: f64, } /// Get current git commit fn get_git_commit() -> String { use std::process::Command; let output = Command::new("git") .args(&["rev-parse", "HEAD"]) .output() .expect("Failed to get git commit"); String::from_utf8_lossy(&output.stdout).trim().to_string() } /// Create baseline from current run pub fn create_baseline(results: Vec) -> BaselineCollection { let mut baselines = HashMap::new(); let commit = get_git_commit(); let timestamp = chrono::Utc::now().to_rfc3339(); for result in results { let baseline = BenchmarkBaseline { name: result.name.clone(), dimension: 256, // Extract from name or pass explicitly mean: result.mean, std_dev: result.std_dev, median: result.median, throughput: None, memory_peak_mb: None, timestamp: timestamp.clone(), git_commit: commit.clone(), }; baselines.insert(result.name.clone(), baseline); } BaselineCollection { baselines, created_at: timestamp.clone(), updated_at: timestamp, } } ``` ### 4.2 CI Integration Script ```bash #!/bin/bash # scripts/benchmark_ci.sh set -e BASELINE_FILE="benches/baselines.json" RESULTS_FILE="target/criterion/results.json" THRESHOLD=10.0 # 10% regression threshold echo "Running benchmarks..." cargo bench --bench latency_benchmarks -- --save-baseline current echo "Comparing with baseline..." if [ -f "$BASELINE_FILE" ]; then cargo run --bin compare_benchmarks -- \ --baseline "$BASELINE_FILE" \ --current "$RESULTS_FILE" \ --threshold "$THRESHOLD" REGRESSION_STATUS=$? if [ $REGRESSION_STATUS -eq 1 ]; then echo "❌ Performance regression detected!" exit 1 elif [ $REGRESSION_STATUS -eq 2 ]; then echo "✅ Performance improvement detected!" else echo "✅ No significant performance change" fi else echo "No baseline found, creating new baseline..." cp "$RESULTS_FILE" "$BASELINE_FILE" fi echo "Generating benchmark report..." cargo run --bin benchmark_report -- \ --baseline "$BASELINE_FILE" \ --output "target/benchmark_report.md" echo "Done!" ``` ### 4.3 Threshold Configuration ```toml # benches/regression_config.toml [thresholds] # Global default threshold (%) default = 10.0 # Per-benchmark thresholds [thresholds.latency] knn_search = 5.0 add_embedding = 8.0 attention = 12.0 [thresholds.throughput] batch_operations = 15.0 parallel_search = 10.0 [thresholds.memory] peak_usage = 20.0 allocation_count = 25.0 [regression_actions] # What to do on regression fail_ci = true create_issue = true notify_team = true [regression_actions.notifications] slack_webhook = "${SLACK_WEBHOOK_URL}" email = "dev-team@example.com" [baseline_management] # Auto-update baseline on main branch auto_update_main = true # Require manual approval for baseline updates require_approval = false # Keep history of baselines keep_history = true max_history_count = 50 ``` ### 4.4 Benchmark Comparison Tool ```rust // src/bin/compare_benchmarks.rs use clap::Parser; use ruvector_benches::regression::{BaselineCollection, BenchmarkResult, RegressionStatus}; use std::path::PathBuf; #[derive(Parser)] struct Args { #[arg(long)] baseline: PathBuf, #[arg(long)] current: PathBuf, #[arg(long, default_value = "10.0")] threshold: f64, } fn main() -> Result<(), Box> { let args = Args::parse(); let baselines = BaselineCollection::load(&args.baseline)?; let current_results = load_current_results(&args.current)?; let mut has_regression = false; let mut has_improvement = false; println!("\n=== Benchmark Comparison Report ===\n"); for result in current_results { let status = baselines.check_regression( &result.name, &result, args.threshold, ); match status { RegressionStatus::Regression { baseline, current, change_percent, } => { has_regression = true; println!( "❌ REGRESSION: {}\n Baseline: {:.2} µs\n Current: {:.2} µs\n Change: +{:.1}%\n", result.name, baseline, current, change_percent ); } RegressionStatus::Improvement { baseline, current, change_percent, } => { has_improvement = true; println!( "✅ IMPROVEMENT: {}\n Baseline: {:.2} µs\n Current: {:.2} µs\n Change: -{:.1}%\n", result.name, baseline, current, change_percent ); } RegressionStatus::NoChange => { println!("➡️ NO CHANGE: {}\n", result.name); } RegressionStatus::NewBenchmark => { println!("🆕 NEW: {}\n", result.name); } } } if has_regression { std::process::exit(1); } else if has_improvement { std::process::exit(2); } else { std::process::exit(0); } } fn load_current_results(path: &PathBuf) -> Result, Box> { // Parse Criterion JSON output // Implementation depends on Criterion output format Ok(vec![]) } ``` ### 4.5 GitHub Actions Workflow ```yaml # .github/workflows/benchmarks.yml name: Performance Benchmarks on: pull_request: branches: [ main ] push: branches: [ main ] jobs: benchmark: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 with: fetch-depth: 0 # Fetch all history for baseline comparison - name: Setup Rust uses: actions-rs/toolchain@v1 with: toolchain: stable override: true - name: Cache cargo registry uses: actions/cache@v3 with: path: ~/.cargo/registry key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} - name: Cache criterion results uses: actions/cache@v3 with: path: target/criterion key: ${{ runner.os }}-criterion-${{ github.sha }} restore-keys: | ${{ runner.os }}-criterion- - name: Restore baseline run: | if [ -f benches/baselines.json ]; then echo "Baseline found" else echo "No baseline, will create new one" fi - name: Run benchmarks run: | cargo bench --bench latency_benchmarks cargo bench --bench throughput_benchmarks cargo bench --bench memory_benchmarks cargo bench --bench attention_comparison - name: Compare with baseline id: compare run: | chmod +x scripts/benchmark_ci.sh ./scripts/benchmark_ci.sh continue-on-error: true - name: Generate report run: | cargo run --bin benchmark_report -- \ --baseline benches/baselines.json \ --output target/benchmark_report.md - name: Comment PR if: github.event_name == 'pull_request' uses: actions/github-script@v6 with: script: | const fs = require('fs'); const report = fs.readFileSync('target/benchmark_report.md', 'utf8'); github.rest.issues.createComment({ issue_number: context.issue.number, owner: context.repo.owner, repo: context.repo.repo, body: report }); - name: Update baseline on main if: github.ref == 'refs/heads/main' && steps.compare.outcome == 'success' run: | cp target/criterion/results.json benches/baselines.json git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" git add benches/baselines.json git commit -m "chore: update performance baselines [skip ci]" git push - name: Fail on regression if: steps.compare.outcome == 'failure' run: exit 1 - name: Upload benchmark results uses: actions/upload-artifact@v3 with: name: benchmark-results path: | target/criterion/ target/benchmark_report.md ``` ## 5. Benchmark Organization ### 5.1 Directory Structure ``` benches/ ├── latency_benchmarks.rs # Latency measurements ├── throughput_benchmarks.rs # Throughput measurements ├── memory_benchmarks.rs # Memory usage tracking ├── attention_comparison.rs # Attention mechanism comparison ├── distance_comparison.rs # Distance metric comparison ├── flash_attention_benchmark.rs # Flash vs Standard detailed ├── benchmark_matrix.rs # Complete test matrix ├── regression_detection.rs # Baseline & regression tools ├── baselines.json # Stored baselines └── regression_config.toml # Threshold configuration scripts/ ├── benchmark_ci.sh # CI integration script ├── generate_report.sh # Report generation └── update_baseline.sh # Baseline management src/bin/ ├── compare_benchmarks.rs # Comparison tool └── benchmark_report.rs # Report generator ``` ### 5.2 Cargo.toml Configuration ```toml [dev-dependencies] criterion = { version = "0.5", features = ["html_reports"] } rayon = "1.7" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" chrono = "0.4" [[bench]] name = "latency_benchmarks" harness = false [[bench]] name = "throughput_benchmarks" harness = false [[bench]] name = "memory_benchmarks" harness = false [[bench]] name = "attention_comparison" harness = false [[bench]] name = "distance_comparison" harness = false [[bench]] name = "flash_attention_benchmark" harness = false [[bench]] name = "benchmark_matrix" harness = false [profile.bench] opt-level = 3 lto = true codegen-units = 1 ``` ## Summary This benchmark suite provides: 1. **Comprehensive Coverage**: - Latency benchmarks across all dimensions - Throughput measurements for batch operations - Memory usage tracking and profiling - Full test matrix with 4D parameter space 2. **Comparative Analysis**: - Attention mechanism comparison (Standard, Flash, Multi-Head, MoE) - Distance metric comparison (Euclidean, Cosine, Manhattan, Hyperbolic) - Detailed Flash vs Standard analysis 3. **Regression Detection**: - Baseline storage and versioning - Automated comparison with configurable thresholds - CI/CD integration with GitHub Actions - Automatic PR comments with benchmark results 4. **Production Ready**: - Complete Criterion integration - Structured result storage - Automated reporting - Performance tracking over time Run benchmarks with: ```bash # Run all benchmarks cargo bench # Run specific benchmark suite cargo bench --bench latency_benchmarks # Compare with baseline ./scripts/benchmark_ci.sh # Generate report cargo run --bin benchmark_report ```