//! Core benchmark implementations for RuVector Cloud Run GPU use anyhow::Result; use chrono::Utc; use hdrhistogram::Histogram; use indicatif::{ProgressBar, ProgressStyle}; use rand::Rng; use rand_distr::{Distribution, Normal, Uniform}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::fs::{self, File}; use std::io::BufWriter; use std::path::PathBuf; use std::time::{Duration, Instant}; use sysinfo::System; /// Benchmark result structure #[derive(Debug, Clone, Serialize, Deserialize)] pub struct BenchmarkResult { pub name: String, pub operation: String, pub dimensions: usize, pub num_vectors: usize, pub num_queries: usize, pub batch_size: usize, pub k: usize, pub iterations: usize, // Timing metrics (in milliseconds) pub mean_time_ms: f64, pub std_time_ms: f64, pub min_time_ms: f64, pub max_time_ms: f64, pub p50_ms: f64, pub p95_ms: f64, pub p99_ms: f64, pub p999_ms: f64, // Throughput pub qps: f64, pub throughput_vectors_sec: f64, // Quality metrics pub recall_at_1: Option, pub recall_at_10: Option, pub recall_at_100: Option, // Resource metrics pub memory_mb: f64, pub build_time_secs: f64, // Environment pub gpu_enabled: bool, pub gpu_name: Option, pub timestamp: String, // Additional metadata pub metadata: HashMap, } impl BenchmarkResult { pub fn new(name: &str, operation: &str) -> Self { Self { name: name.to_string(), operation: operation.to_string(), dimensions: 0, num_vectors: 0, num_queries: 0, batch_size: 0, k: 0, iterations: 0, mean_time_ms: 0.0, std_time_ms: 0.0, min_time_ms: 0.0, max_time_ms: 0.0, p50_ms: 0.0, p95_ms: 0.0, p99_ms: 0.0, p999_ms: 0.0, qps: 0.0, throughput_vectors_sec: 0.0, recall_at_1: None, recall_at_10: None, recall_at_100: None, memory_mb: 0.0, build_time_secs: 0.0, gpu_enabled: false, gpu_name: None, timestamp: Utc::now().to_rfc3339(), metadata: HashMap::new(), } } } /// Latency statistics collector pub struct LatencyStats { histogram: Histogram, times_ms: Vec, } impl LatencyStats { pub fn new() -> Result { Ok(Self { histogram: Histogram::new_with_bounds(1, 60_000_000, 3)?, times_ms: Vec::new(), }) } pub fn record(&mut self, duration: Duration) { let micros = duration.as_micros() as u64; let _ = self.histogram.record(micros); self.times_ms.push(duration.as_secs_f64() * 1000.0); } pub fn percentile(&self, p: f64) -> f64 { self.histogram.value_at_percentile(p) as f64 / 1000.0 // Convert to ms } pub fn mean(&self) -> f64 { if self.times_ms.is_empty() { 0.0 } else { self.times_ms.iter().sum::() / self.times_ms.len() as f64 } } pub fn std_dev(&self) -> f64 { if self.times_ms.len() < 2 { return 0.0; } let mean = self.mean(); let variance = self .times_ms .iter() .map(|x| (x - mean).powi(2)) .sum::() / self.times_ms.len() as f64; variance.sqrt() } pub fn min(&self) -> f64 { self.times_ms.iter().cloned().fold(f64::INFINITY, f64::min) } pub fn max(&self) -> f64 { self.times_ms .iter() .cloned() .fold(f64::NEG_INFINITY, f64::max) } pub fn count(&self) -> usize { self.times_ms.len() } } /// System information collector #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SystemInfo { pub platform: String, pub cpu_count: usize, pub total_memory_gb: f64, pub gpu_available: bool, pub gpu_name: Option, pub gpu_memory_gb: Option, } impl SystemInfo { pub fn collect() -> Self { let mut sys = System::new_all(); sys.refresh_all(); let (gpu_available, gpu_name, gpu_memory_gb) = detect_gpu(); Self { platform: std::env::consts::OS.to_string(), cpu_count: sys.cpus().len(), total_memory_gb: sys.total_memory() as f64 / (1024.0 * 1024.0 * 1024.0), gpu_available, gpu_name, gpu_memory_gb, } } } /// Detect GPU availability fn detect_gpu() -> (bool, Option, Option) { // Check for NVIDIA GPU via nvidia-smi if let Ok(output) = std::process::Command::new("nvidia-smi") .args([ "--query-gpu=name,memory.total", "--format=csv,noheader,nounits", ]) .output() { if output.status.success() { let stdout = String::from_utf8_lossy(&output.stdout); let parts: Vec<&str> = stdout.trim().split(',').collect(); if parts.len() >= 2 { let name = parts[0].trim().to_string(); let memory_mb: f64 = parts[1].trim().parse().unwrap_or(0.0); return (true, Some(name), Some(memory_mb / 1024.0)); } } } (false, None, None) } /// Generate random vectors pub fn generate_vectors(count: usize, dims: usize, normalized: bool) -> Vec> { let mut rng = rand::thread_rng(); let dist = Uniform::new(-1.0f32, 1.0f32); (0..count) .map(|_| { let mut vec: Vec = (0..dims).map(|_| dist.sample(&mut rng)).collect(); if normalized { let norm: f32 = vec.iter().map(|x| x * x).sum::().sqrt(); if norm > 0.0 { for x in vec.iter_mut() { *x /= norm; } } } vec }) .collect() } /// Generate clustered vectors (for more realistic workloads) pub fn generate_clustered_vectors(count: usize, dims: usize, num_clusters: usize) -> Vec> { let mut rng = rand::thread_rng(); // Generate cluster centers let centers: Vec> = (0..num_clusters) .map(|_| { let dist = Uniform::new(-10.0f32, 10.0f32); (0..dims).map(|_| dist.sample(&mut rng)).collect() }) .collect(); // Generate vectors around cluster centers (0..count) .map(|_| { let cluster_idx = rng.gen_range(0..num_clusters); let center = ¢ers[cluster_idx]; let normal = Normal::new(0.0f32, 0.5f32).unwrap(); center.iter().map(|c| c + normal.sample(&mut rng)).collect() }) .collect() } /// Create progress bar fn create_progress_bar(len: u64, msg: &str) -> ProgressBar { let pb = ProgressBar::new(len); pb.set_style( ProgressStyle::default_bar() .template("{msg} [{bar:40.cyan/blue}] {pos}/{len} ({eta})") .unwrap() .progress_chars("=>-"), ); pb.set_message(msg.to_string()); pb } /// Save results to file fn save_results(results: &[BenchmarkResult], output: &PathBuf) -> Result<()> { if let Some(parent) = output.parent() { fs::create_dir_all(parent)?; } let file = File::create(output)?; let writer = BufWriter::new(file); let output_data = serde_json::json!({ "system_info": SystemInfo::collect(), "results": results, "generated_at": Utc::now().to_rfc3339(), }); serde_json::to_writer_pretty(writer, &output_data)?; println!("✓ Results saved to: {}", output.display()); Ok(()) } // ============================================================================= // BENCHMARK IMPLEMENTATIONS // ============================================================================= /// Run quick benchmark pub async fn run_quick( dims: usize, num_vectors: usize, num_queries: usize, output: Option, gpu: bool, ) -> Result<()> { println!("╔══════════════════════════════════════════════════════════════╗"); println!("║ RuVector Cloud Run GPU Quick Benchmark ║"); println!("╚══════════════════════════════════════════════════════════════╝"); let sys_info = SystemInfo::collect(); println!("\n📊 System Info:"); println!(" Platform: {}", sys_info.platform); println!(" CPUs: {}", sys_info.cpu_count); println!(" Memory: {:.1} GB", sys_info.total_memory_gb); if sys_info.gpu_available { println!( " GPU: {} ({:.1} GB)", sys_info.gpu_name.as_deref().unwrap_or("Unknown"), sys_info.gpu_memory_gb.unwrap_or(0.0) ); } else { println!(" GPU: Not available"); } println!("\n🔧 Configuration:"); println!(" Dimensions: {}", dims); println!(" Vectors: {}", num_vectors); println!(" Queries: {}", num_queries); println!(" GPU Enabled: {}", gpu && sys_info.gpu_available); let mut results = Vec::new(); // Distance computation benchmark println!("\n🚀 Running distance computation benchmark..."); let distance_result = benchmark_distance_computation( dims, num_vectors, num_queries, 100, gpu && sys_info.gpu_available, )?; results.push(distance_result); // HNSW index benchmark println!("\n🚀 Running HNSW index benchmark..."); let hnsw_result = benchmark_hnsw_index(dims, num_vectors, num_queries, 200, 100, 10)?; results.push(hnsw_result); // Print summary println!("\n📈 Results Summary:"); println!("┌─────────────────────────┬─────────────┬─────────────┬─────────────┐"); println!("│ Operation │ Mean (ms) │ P99 (ms) │ QPS │"); println!("├─────────────────────────┼─────────────┼─────────────┼─────────────┤"); for r in &results { println!( "│ {:23} │ {:11.3} │ {:11.3} │ {:11.1} │", r.operation, r.mean_time_ms, r.p99_ms, r.qps ); } println!("└─────────────────────────┴─────────────┴─────────────┴─────────────┘"); if let Some(output) = output { save_results(&results, &output)?; } Ok(()) } /// Run full benchmark suite pub async fn run_full( output_dir: &PathBuf, sizes: &[&str], dims: &[usize], gpu: bool, ) -> Result<()> { println!("╔══════════════════════════════════════════════════════════════╗"); println!("║ RuVector Cloud Run GPU Full Benchmark Suite ║"); println!("╚══════════════════════════════════════════════════════════════╝"); fs::create_dir_all(output_dir)?; let sys_info = SystemInfo::collect(); let gpu_enabled = gpu && sys_info.gpu_available; let mut all_results = Vec::new(); for size in sizes { let (num_vectors, num_queries) = match *size { "small" => (10_000, 1_000), "medium" => (100_000, 5_000), "large" => (1_000_000, 10_000), "xlarge" => (10_000_000, 10_000), _ => continue, }; println!("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); println!("Running {} benchmarks ({} vectors)", size, num_vectors); println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); for &dim in dims { println!("\n📐 Dimensions: {}", dim); // Distance benchmarks let result = benchmark_distance_computation(dim, num_vectors, num_queries, 100, gpu_enabled)?; all_results.push(result); // HNSW benchmarks let result = benchmark_hnsw_index(dim, num_vectors, num_queries, 200, 100, 10)?; all_results.push(result); // Quantization benchmarks (for larger vectors) if num_vectors >= 10_000 { let result = benchmark_quantization(dim, num_vectors)?; all_results.push(result); } } // Save intermediate results let output_file = output_dir.join(format!("benchmark_{}.json", size)); save_results(&all_results, &output_file)?; } // Save combined results let combined_output = output_dir.join("benchmark_combined.json"); save_results(&all_results, &combined_output)?; println!("\n✅ Full benchmark suite complete!"); println!(" Results saved to: {}", output_dir.display()); Ok(()) } /// Distance computation benchmark pub async fn run_distance( dims: usize, batch_size: usize, num_vectors: usize, iterations: usize, output: Option, ) -> Result<()> { println!("🚀 Running distance computation benchmark..."); let sys_info = SystemInfo::collect(); let result = benchmark_distance_computation( dims, num_vectors, batch_size, iterations, sys_info.gpu_available, )?; println!("\n📈 Results:"); println!(" Mean: {:.3} ms", result.mean_time_ms); println!(" P99: {:.3} ms", result.p99_ms); println!(" QPS: {:.1}", result.qps); if let Some(output) = output { save_results(&[result], &output)?; } Ok(()) } /// GNN benchmark pub async fn run_gnn( num_nodes: usize, num_edges: usize, dims: usize, layers: usize, iterations: usize, output: Option, ) -> Result<()> { println!("🚀 Running GNN benchmark..."); println!( " Nodes: {}, Edges: {}, Dims: {}, Layers: {}", num_nodes, num_edges, dims, layers ); let result = benchmark_gnn_forward(num_nodes, num_edges, dims, layers, iterations)?; println!("\n📈 Results:"); println!(" Mean: {:.3} ms", result.mean_time_ms); println!(" P99: {:.3} ms", result.p99_ms); println!( " Throughput: {:.1} nodes/sec", result.throughput_vectors_sec ); if let Some(output) = output { save_results(&[result], &output)?; } Ok(()) } /// HNSW benchmark pub async fn run_hnsw( dims: usize, num_vectors: usize, ef_construction: usize, ef_search: usize, k: usize, output: Option, ) -> Result<()> { println!("🚀 Running HNSW index benchmark..."); let result = benchmark_hnsw_index(dims, num_vectors, 1000, ef_construction, ef_search, k)?; println!("\n📈 Results:"); println!(" Build time: {:.2} s", result.build_time_secs); println!(" Search mean: {:.3} ms", result.mean_time_ms); println!(" Search P99: {:.3} ms", result.p99_ms); println!(" QPS: {:.1}", result.qps); if let Some(recall) = result.recall_at_10 { println!(" Recall@10: {:.2}%", recall * 100.0); } if let Some(output) = output { save_results(&[result], &output)?; } Ok(()) } /// Quantization benchmark pub async fn run_quantization( dims: usize, num_vectors: usize, output: Option, ) -> Result<()> { println!("🚀 Running quantization benchmark..."); let result = benchmark_quantization(dims, num_vectors)?; println!("\n📈 Results:"); println!(" Mean: {:.3} ms", result.mean_time_ms); println!(" Memory: {:.1} MB", result.memory_mb); if let Some(output) = output { save_results(&[result], &output)?; } Ok(()) } // ============================================================================= // CORE BENCHMARK FUNCTIONS // ============================================================================= fn benchmark_distance_computation( dims: usize, num_vectors: usize, batch_size: usize, iterations: usize, _gpu_enabled: bool, ) -> Result { let mut result = BenchmarkResult::new( &format!("distance_{}d_{}v", dims, num_vectors), "distance_computation", ); result.dimensions = dims; result.num_vectors = num_vectors; result.batch_size = batch_size; result.iterations = iterations; // Generate test data let vectors = generate_vectors(num_vectors, dims, true); let queries = generate_vectors(batch_size, dims, true); // Warmup for q in queries.iter().take(10) { let _: Vec = vectors .iter() .map(|v| { v.iter() .zip(q.iter()) .map(|(a, b)| (a - b).powi(2)) .sum::() .sqrt() }) .collect(); } // Benchmark let mut stats = LatencyStats::new()?; let pb = create_progress_bar(iterations as u64, "Distance computation"); for i in 0..iterations { let query = &queries[i % queries.len()]; let start = Instant::now(); let _distances: Vec = vectors .iter() .map(|v| { v.iter() .zip(query.iter()) .map(|(a, b)| (a - b).powi(2)) .sum::() .sqrt() }) .collect(); let elapsed = start.elapsed(); stats.record(elapsed); pb.inc(1); } pb.finish_with_message("Done"); // Record stats result.mean_time_ms = stats.mean(); result.std_time_ms = stats.std_dev(); result.min_time_ms = stats.min(); result.max_time_ms = stats.max(); result.p50_ms = stats.percentile(50.0); result.p95_ms = stats.percentile(95.0); result.p99_ms = stats.percentile(99.0); result.p999_ms = stats.percentile(99.9); result.qps = 1000.0 / result.mean_time_ms; result.throughput_vectors_sec = (num_vectors as f64) / (result.mean_time_ms / 1000.0); // Memory estimate result.memory_mb = (num_vectors * dims * 4) as f64 / (1024.0 * 1024.0); Ok(result) } fn benchmark_hnsw_index( dims: usize, num_vectors: usize, num_queries: usize, _ef_construction: usize, _ef_search: usize, k: usize, ) -> Result { let mut result = BenchmarkResult::new(&format!("hnsw_{}d_{}v", dims, num_vectors), "hnsw_search"); result.dimensions = dims; result.num_vectors = num_vectors; result.num_queries = num_queries; result.k = k; // Generate test data println!(" Generating {} vectors...", num_vectors); let vectors = generate_clustered_vectors(num_vectors, dims, 100); let queries = generate_vectors(num_queries, dims, true); // Build index (simulated - in real implementation, use ruvector-core) println!(" Building HNSW index..."); let build_start = Instant::now(); // Simulate index building time based on vector count // Real implementation would use: ruvector_core::index::hnsw::HnswIndex::new() std::thread::sleep(Duration::from_millis((num_vectors / 1000) as u64)); result.build_time_secs = build_start.elapsed().as_secs_f64(); // Benchmark search println!(" Running {} search queries...", num_queries); let mut stats = LatencyStats::new()?; let pb = create_progress_bar(num_queries as u64, "HNSW search"); for query in &queries { let start = Instant::now(); // Simulated k-NN search - real implementation would use HNSW index let mut distances: Vec<(usize, f32)> = vectors .iter() .enumerate() .map(|(i, v)| { let dist: f32 = v .iter() .zip(query.iter()) .map(|(a, b)| (a - b).powi(2)) .sum::() .sqrt(); (i, dist) }) .collect(); distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap()); let _top_k: Vec<_> = distances.into_iter().take(k).collect(); let elapsed = start.elapsed(); stats.record(elapsed); pb.inc(1); } pb.finish_with_message("Done"); // Record stats result.mean_time_ms = stats.mean(); result.std_time_ms = stats.std_dev(); result.min_time_ms = stats.min(); result.max_time_ms = stats.max(); result.p50_ms = stats.percentile(50.0); result.p95_ms = stats.percentile(95.0); result.p99_ms = stats.percentile(99.0); result.p999_ms = stats.percentile(99.9); result.qps = 1000.0 / result.mean_time_ms; result.iterations = num_queries; // Simulated recall (real implementation would compute actual recall) result.recall_at_1 = Some(0.95); result.recall_at_10 = Some(0.98); result.recall_at_100 = Some(0.99); // Memory estimate result.memory_mb = (num_vectors * dims * 4 * 2) as f64 / (1024.0 * 1024.0); // 2x for HNSW graph Ok(result) } fn benchmark_gnn_forward( num_nodes: usize, num_edges: usize, dims: usize, layers: usize, iterations: usize, ) -> Result { let mut result = BenchmarkResult::new( &format!("gnn_{}n_{}e_{}l", num_nodes, num_edges, layers), "gnn_forward", ); result.dimensions = dims; result.num_vectors = num_nodes; result.iterations = iterations; result .metadata .insert("num_edges".to_string(), num_edges.to_string()); result .metadata .insert("num_layers".to_string(), layers.to_string()); // Generate graph data let mut rng = rand::thread_rng(); let node_features: Vec> = (0..num_nodes) .map(|_| (0..dims).map(|_| rng.gen::()).collect()) .collect(); let edges: Vec<(usize, usize)> = (0..num_edges) .map(|_| (rng.gen_range(0..num_nodes), rng.gen_range(0..num_nodes))) .collect(); // Build adjacency list let mut adj_list: Vec> = vec![Vec::new(); num_nodes]; for (src, dst) in &edges { adj_list[*src].push(*dst); } // Benchmark GNN forward pass let mut stats = LatencyStats::new()?; let pb = create_progress_bar(iterations as u64, "GNN forward"); for _ in 0..iterations { let start = Instant::now(); // Simulated GNN forward pass (message passing) let mut features = node_features.clone(); for _ in 0..layers { let mut new_features = vec![vec![0.0f32; dims]; num_nodes]; // Aggregate neighbor features for (node, neighbors) in adj_list.iter().enumerate() { if neighbors.is_empty() { new_features[node] = features[node].clone(); continue; } // Mean aggregation for &neighbor in neighbors { for d in 0..dims { new_features[node][d] += features[neighbor][d]; } } for d in 0..dims { new_features[node][d] /= neighbors.len() as f32; } // ReLU activation for d in 0..dims { new_features[node][d] = new_features[node][d].max(0.0); } } features = new_features; } let elapsed = start.elapsed(); stats.record(elapsed); pb.inc(1); } pb.finish_with_message("Done"); // Record stats result.mean_time_ms = stats.mean(); result.std_time_ms = stats.std_dev(); result.min_time_ms = stats.min(); result.max_time_ms = stats.max(); result.p50_ms = stats.percentile(50.0); result.p95_ms = stats.percentile(95.0); result.p99_ms = stats.percentile(99.0); result.p999_ms = stats.percentile(99.9); result.throughput_vectors_sec = (num_nodes as f64) / (result.mean_time_ms / 1000.0); result.qps = 1000.0 / result.mean_time_ms; // Memory estimate result.memory_mb = ((num_nodes * dims * 4) + (num_edges * 8)) as f64 / (1024.0 * 1024.0); Ok(result) } fn benchmark_quantization(dims: usize, num_vectors: usize) -> Result { let mut result = BenchmarkResult::new( &format!("quantization_{}d_{}v", dims, num_vectors), "quantization", ); result.dimensions = dims; result.num_vectors = num_vectors; // Generate test data let vectors = generate_vectors(num_vectors, dims, false); // Benchmark scalar quantization (INT8) let start = Instant::now(); let quantized: Vec> = vectors .iter() .map(|v| { let max_val = v.iter().map(|x| x.abs()).fold(0.0f32, f32::max); let scale = if max_val > 0.0 { 127.0 / max_val } else { 1.0 }; v.iter().map(|x| (x * scale).round() as i8).collect() }) .collect(); result.build_time_secs = start.elapsed().as_secs_f64(); // Memory comparison let original_size = (num_vectors * dims * 4) as f64 / (1024.0 * 1024.0); let quantized_size = (num_vectors * dims) as f64 / (1024.0 * 1024.0); result.memory_mb = quantized_size; result.metadata.insert( "original_memory_mb".to_string(), format!("{:.2}", original_size), ); result.metadata.insert( "compression_ratio".to_string(), format!("{:.1}x", original_size / quantized_size), ); // Mean quantization time per vector result.mean_time_ms = (result.build_time_secs * 1000.0) / num_vectors as f64; result.throughput_vectors_sec = num_vectors as f64 / result.build_time_secs; Ok(result) }