git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
44 KiB
44 KiB
Agent 13: Performance Benchmark Suite
Overview
Comprehensive Criterion-based benchmark suite for measuring and tracking performance across all latent space operations, attention mechanisms, and search algorithms.
1. Criterion Benchmarks
1.1 Latency Benchmarks
Complete benchmark code for measuring operation latency across various dimensions and neighbor counts.
// benches/latency_benchmarks.rs
use criterion::{
black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
};
use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType};
use ruvector::metrics::DistanceMetric;
use rand::Rng;
/// Generate random embedding of specified dimension
fn random_embedding(dim: usize) -> Vec<f32> {
let mut rng = rand::thread_rng();
(0..dim).map(|_| rng.gen_range(-1.0..1.0)).collect()
}
/// Generate dataset of random embeddings
fn generate_dataset(num_vectors: usize, dim: usize) -> Vec<Vec<f32>> {
(0..num_vectors)
.map(|_| random_embedding(dim))
.collect()
}
/// Benchmark latent space creation
fn bench_latent_space_creation(c: &mut Criterion) {
let mut group = c.benchmark_group("latent_space_creation");
for dim in [64, 128, 256, 512, 1024].iter() {
group.bench_with_input(
BenchmarkId::from_parameter(dim),
dim,
|b, &dim| {
b.iter(|| {
let config = LatentConfig {
dimension: dim,
attention_type: AttentionType::Standard,
num_heads: 8,
distance_metric: DistanceMetric::Euclidean,
..Default::default()
};
black_box(LatentSpace::new(config))
});
},
);
}
group.finish();
}
/// Benchmark embedding addition
fn bench_add_embedding(c: &mut Criterion) {
let mut group = c.benchmark_group("add_embedding");
for dim in [64, 128, 256, 512, 1024].iter() {
let config = LatentConfig {
dimension: *dim,
attention_type: AttentionType::Standard,
num_heads: 8,
distance_metric: DistanceMetric::Euclidean,
..Default::default()
};
let mut space = LatentSpace::new(config).unwrap();
let embedding = random_embedding(*dim);
group.bench_with_input(
BenchmarkId::from_parameter(dim),
&embedding,
|b, emb| {
b.iter(|| {
black_box(space.add_embedding(emb.clone(), None))
});
},
);
}
group.finish();
}
/// Benchmark KNN search with varying neighbor counts
fn bench_knn_search(c: &mut Criterion) {
let mut group = c.benchmark_group("knn_search");
let dimensions = [128, 256, 512];
let neighbor_counts = [10, 50, 100, 500, 1000];
let dataset_size = 10000;
for &dim in &dimensions {
for &k in &neighbor_counts {
let config = LatentConfig {
dimension: dim,
attention_type: AttentionType::Standard,
num_heads: 8,
distance_metric: DistanceMetric::Euclidean,
..Default::default()
};
let mut space = LatentSpace::new(config).unwrap();
let dataset = generate_dataset(dataset_size, dim);
// Populate space
for emb in dataset.iter() {
space.add_embedding(emb.clone(), None).unwrap();
}
let query = random_embedding(dim);
group.throughput(Throughput::Elements(k as u64));
group.bench_with_input(
BenchmarkId::new(format!("dim_{}", dim), k),
&k,
|b, &neighbors| {
b.iter(|| {
black_box(space.knn_search(&query, neighbors))
});
},
);
}
}
group.finish();
}
/// Benchmark attention computation
fn bench_attention_computation(c: &mut Criterion) {
let mut group = c.benchmark_group("attention_computation");
let dimensions = [64, 128, 256, 512];
let attention_types = [
AttentionType::Standard,
AttentionType::Flash,
AttentionType::MultiHead { num_heads: 8 },
AttentionType::MoE { num_experts: 4 },
];
for &dim in &dimensions {
for attention_type in &attention_types {
let config = LatentConfig {
dimension: dim,
attention_type: attention_type.clone(),
num_heads: 8,
distance_metric: DistanceMetric::Euclidean,
..Default::default()
};
let mut space = LatentSpace::new(config).unwrap();
let embeddings = generate_dataset(100, dim);
for emb in embeddings.iter() {
space.add_embedding(emb.clone(), None).unwrap();
}
let query = random_embedding(dim);
group.bench_with_input(
BenchmarkId::new(format!("dim_{}", dim), format!("{:?}", attention_type)),
&query,
|b, q| {
b.iter(|| {
black_box(space.compute_attention(q))
});
},
);
}
}
group.finish();
}
criterion_group!(
latency_benches,
bench_latent_space_creation,
bench_add_embedding,
bench_knn_search,
bench_attention_computation
);
criterion_main!(latency_benches);
1.2 Throughput Benchmarks
Benchmark batch processing and parallel operations.
// benches/throughput_benchmarks.rs
use criterion::{
black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
};
use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType};
use ruvector::metrics::DistanceMetric;
use rand::Rng;
/// Benchmark batch embedding addition
fn bench_batch_add_embeddings(c: &mut Criterion) {
let mut group = c.benchmark_group("batch_add_embeddings");
let batch_sizes = [1, 8, 32, 128, 512];
let dim = 256;
for &batch_size in &batch_sizes {
let config = LatentConfig {
dimension: dim,
attention_type: AttentionType::Standard,
num_heads: 8,
distance_metric: DistanceMetric::Euclidean,
..Default::default()
};
let mut space = LatentSpace::new(config).unwrap();
let embeddings = generate_dataset(batch_size, dim);
group.throughput(Throughput::Elements(batch_size as u64));
group.bench_with_input(
BenchmarkId::from_parameter(batch_size),
&embeddings,
|b, embs| {
b.iter(|| {
for emb in embs {
black_box(space.add_embedding(emb.clone(), None));
}
});
},
);
}
group.finish();
}
/// Benchmark parallel KNN search
fn bench_parallel_knn_search(c: &mut Criterion) {
use rayon::prelude::*;
let mut group = c.benchmark_group("parallel_knn_search");
let query_counts = [1, 8, 32, 128];
let dim = 256;
let k = 100;
let dataset_size = 10000;
for &num_queries in &query_counts {
let config = LatentConfig {
dimension: dim,
attention_type: AttentionType::Standard,
num_heads: 8,
distance_metric: DistanceMetric::Euclidean,
..Default::default()
};
let mut space = LatentSpace::new(config).unwrap();
let dataset = generate_dataset(dataset_size, dim);
for emb in dataset.iter() {
space.add_embedding(emb.clone(), None).unwrap();
}
let queries: Vec<Vec<f32>> = (0..num_queries)
.map(|_| random_embedding(dim))
.collect();
group.throughput(Throughput::Elements(num_queries as u64));
group.bench_with_input(
BenchmarkId::from_parameter(num_queries),
&queries,
|b, qs| {
b.iter(|| {
let results: Vec<_> = qs.par_iter()
.map(|q| space.knn_search(q, k))
.collect();
black_box(results)
});
},
);
}
group.finish();
}
/// Benchmark batch attention computation
fn bench_batch_attention(c: &mut Criterion) {
let mut group = c.benchmark_group("batch_attention");
let batch_sizes = [8, 32, 128];
let dim = 256;
for &batch_size in &batch_sizes {
let config = LatentConfig {
dimension: dim,
attention_type: AttentionType::Flash,
num_heads: 8,
distance_metric: DistanceMetric::Euclidean,
..Default::default()
};
let mut space = LatentSpace::new(config).unwrap();
let embeddings = generate_dataset(1000, dim);
for emb in embeddings.iter() {
space.add_embedding(emb.clone(), None).unwrap();
}
let queries = generate_dataset(batch_size, dim);
group.throughput(Throughput::Elements(batch_size as u64));
group.bench_with_input(
BenchmarkId::from_parameter(batch_size),
&queries,
|b, qs| {
b.iter(|| {
for q in qs {
black_box(space.compute_attention(q));
}
});
},
);
}
group.finish();
}
criterion_group!(
throughput_benches,
bench_batch_add_embeddings,
bench_parallel_knn_search,
bench_batch_attention
);
criterion_main!(throughput_benches);
1.3 Memory Benchmarks
Track peak memory usage and allocation patterns.
// benches/memory_benchmarks.rs
use criterion::{
black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
};
use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType};
use ruvector::metrics::DistanceMetric;
use std::alloc::{GlobalAlloc, Layout, System};
use std::sync::atomic::{AtomicUsize, Ordering};
/// Custom allocator to track memory usage
struct TrackingAllocator;
static ALLOCATED: AtomicUsize = AtomicUsize::new(0);
static PEAK_ALLOCATED: AtomicUsize = AtomicUsize::new(0);
unsafe impl GlobalAlloc for TrackingAllocator {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
let size = layout.size();
let current = ALLOCATED.fetch_add(size, Ordering::SeqCst) + size;
// Update peak if necessary
let mut peak = PEAK_ALLOCATED.load(Ordering::SeqCst);
while current > peak {
match PEAK_ALLOCATED.compare_exchange(
peak,
current,
Ordering::SeqCst,
Ordering::SeqCst,
) {
Ok(_) => break,
Err(p) => peak = p,
}
}
System.alloc(layout)
}
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
ALLOCATED.fetch_sub(layout.size(), Ordering::SeqCst);
System.dealloc(ptr, layout)
}
}
#[global_allocator]
static GLOBAL: TrackingAllocator = TrackingAllocator;
fn reset_memory_tracking() {
ALLOCATED.store(0, Ordering::SeqCst);
PEAK_ALLOCATED.store(0, Ordering::SeqCst);
}
fn get_peak_memory() -> usize {
PEAK_ALLOCATED.load(Ordering::SeqCst)
}
/// Benchmark memory usage for different dataset sizes
fn bench_memory_scaling(c: &mut Criterion) {
let mut group = c.benchmark_group("memory_scaling");
let dataset_sizes = [1000, 5000, 10000, 50000, 100000];
let dim = 256;
for &size in &dataset_sizes {
group.bench_with_input(
BenchmarkId::from_parameter(size),
&size,
|b, &num_vectors| {
b.iter_custom(|iters| {
let mut total_duration = std::time::Duration::ZERO;
for _ in 0..iters {
reset_memory_tracking();
let start = std::time::Instant::now();
let config = LatentConfig {
dimension: dim,
attention_type: AttentionType::Standard,
num_heads: 8,
distance_metric: DistanceMetric::Euclidean,
..Default::default()
};
let mut space = LatentSpace::new(config).unwrap();
for i in 0..num_vectors {
let emb = random_embedding(dim);
space.add_embedding(emb, Some(i as u64)).unwrap();
}
let elapsed = start.elapsed();
total_duration += elapsed;
let peak = get_peak_memory();
println!(
"Dataset size: {}, Peak memory: {} MB",
num_vectors,
peak / 1_000_000
);
black_box(space);
}
total_duration
});
},
);
}
group.finish();
}
/// Benchmark memory usage by dimension
fn bench_memory_by_dimension(c: &mut Criterion) {
let mut group = c.benchmark_group("memory_by_dimension");
let dimensions = [64, 128, 256, 512, 1024];
let num_vectors = 10000;
for &dim in &dimensions {
group.bench_with_input(
BenchmarkId::from_parameter(dim),
&dim,
|b, &dimension| {
b.iter_custom(|iters| {
let mut total_duration = std::time::Duration::ZERO;
for _ in 0..iters {
reset_memory_tracking();
let start = std::time::Instant::now();
let config = LatentConfig {
dimension,
attention_type: AttentionType::Standard,
num_heads: 8,
distance_metric: DistanceMetric::Euclidean,
..Default::default()
};
let mut space = LatentSpace::new(config).unwrap();
for i in 0..num_vectors {
let emb = random_embedding(dimension);
space.add_embedding(emb, Some(i as u64)).unwrap();
}
let elapsed = start.elapsed();
total_duration += elapsed;
let peak = get_peak_memory();
println!(
"Dimension: {}, Peak memory: {} MB",
dimension,
peak / 1_000_000
);
black_box(space);
}
total_duration
});
},
);
}
group.finish();
}
criterion_group!(
memory_benches,
bench_memory_scaling,
bench_memory_by_dimension
);
criterion_main!(memory_benches);
2. Benchmark Matrix
2.1 Complete Test Matrix Configuration
// benches/benchmark_matrix.rs
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType};
use ruvector::metrics::DistanceMetric;
/// Comprehensive benchmark matrix
struct BenchmarkMatrix {
dimensions: Vec<usize>,
neighbors: Vec<usize>,
batch_sizes: Vec<usize>,
dataset_sizes: Vec<usize>,
}
impl Default for BenchmarkMatrix {
fn default() -> Self {
Self {
dimensions: vec![64, 128, 256, 512, 1024],
neighbors: vec![10, 50, 100, 500, 1000, 10000],
batch_sizes: vec![1, 8, 32, 128],
dataset_sizes: vec![1000, 5000, 10000, 50000],
}
}
}
impl BenchmarkMatrix {
/// Run complete benchmark matrix
fn run_complete_matrix(&self, c: &mut Criterion) {
for &dim in &self.dimensions {
for &k in &self.neighbors {
for &batch_size in &self.batch_sizes {
for &dataset_size in &self.dataset_sizes {
self.bench_configuration(
c,
dim,
k,
batch_size,
dataset_size,
);
}
}
}
}
}
/// Benchmark specific configuration
fn bench_configuration(
&self,
c: &mut Criterion,
dim: usize,
k: usize,
batch_size: usize,
dataset_size: usize,
) {
let group_name = format!(
"matrix/dim_{}/k_{}/batch_{}/dataset_{}",
dim, k, batch_size, dataset_size
);
let mut group = c.benchmark_group(&group_name);
let config = LatentConfig {
dimension: dim,
attention_type: AttentionType::Standard,
num_heads: 8,
distance_metric: DistanceMetric::Euclidean,
..Default::default()
};
let mut space = LatentSpace::new(config).unwrap();
let dataset = generate_dataset(dataset_size, dim);
for emb in dataset.iter() {
space.add_embedding(emb.clone(), None).unwrap();
}
let queries = generate_dataset(batch_size, dim);
group.bench_function("knn_search", |b| {
b.iter(|| {
for query in &queries {
black_box(space.knn_search(query, k));
}
});
});
group.finish();
}
}
/// Run critical path benchmarks only (reduced matrix)
fn bench_critical_path(c: &mut Criterion) {
let matrix = BenchmarkMatrix {
dimensions: vec![128, 256, 512],
neighbors: vec![10, 100, 1000],
batch_sizes: vec![1, 32],
dataset_sizes: vec![10000],
};
matrix.run_complete_matrix(c);
}
criterion_group!(matrix_benches, bench_critical_path);
criterion_main!(matrix_benches);
2.2 Benchmark Matrix Results Format
Expected output format for tracking:
# benchmark_results.toml
[latency.dim_128.k_10]
mean = "45.2 µs"
std_dev = "2.1 µs"
median = "44.8 µs"
mad = "1.4 µs"
[latency.dim_256.k_100]
mean = "124.5 µs"
std_dev = "5.3 µs"
median = "123.1 µs"
mad = "3.2 µs"
[throughput.batch_32.dim_256]
throughput = "2.34 GiB/s"
ops_per_sec = "8542"
[memory.dataset_10000.dim_256]
peak_mb = "245"
average_mb = "198"
allocations = "12543"
3. Comparative Benchmarks
3.1 Attention Mechanism Comparison
// benches/attention_comparison.rs
use criterion::{
black_box, criterion_group, criterion_main, BenchmarkId, Criterion, PlotConfiguration,
};
use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType};
use ruvector::metrics::DistanceMetric;
fn compare_attention_mechanisms(c: &mut Criterion) {
let mut group = c.benchmark_group("attention_comparison");
group.plot_config(PlotConfiguration::default().summary_scale(criterion::AxisScale::Logarithmic));
let dimensions = [128, 256, 512];
let sequence_lengths = [100, 500, 1000];
let attention_types = vec![
("Standard", AttentionType::Standard),
("Flash", AttentionType::Flash),
("MultiHead_4", AttentionType::MultiHead { num_heads: 4 }),
("MultiHead_8", AttentionType::MultiHead { num_heads: 8 }),
("MoE_2", AttentionType::MoE { num_experts: 2 }),
("MoE_4", AttentionType::MoE { num_experts: 4 }),
];
for &dim in &dimensions {
for &seq_len in &sequence_lengths {
for (name, attn_type) in &attention_types {
let config = LatentConfig {
dimension: dim,
attention_type: attn_type.clone(),
num_heads: 8,
distance_metric: DistanceMetric::Euclidean,
..Default::default()
};
let mut space = LatentSpace::new(config).unwrap();
let embeddings = generate_dataset(seq_len, dim);
for emb in embeddings.iter() {
space.add_embedding(emb.clone(), None).unwrap();
}
let query = random_embedding(dim);
group.bench_with_input(
BenchmarkId::new(
format!("{}@dim_{}", name, dim),
seq_len,
),
&query,
|b, q| {
b.iter(|| {
black_box(space.compute_attention(q))
});
},
);
}
}
}
group.finish();
}
/// Generate comparison report
fn generate_comparison_report() {
use std::collections::HashMap;
let results = HashMap::from([
("Standard", vec![45.2, 124.5, 456.7]),
("Flash", vec![32.1, 89.3, 301.2]),
("MultiHead_8", vec![52.3, 142.1, 512.4]),
("MoE_4", vec![48.9, 128.7, 445.3]),
]);
println!("\n=== Attention Mechanism Comparison ===\n");
println!("{:<15} {:>12} {:>12} {:>12}", "Type", "128-dim", "256-dim", "512-dim");
println!("{:-<55}", "");
for (name, times) in results.iter() {
println!(
"{:<15} {:>10.1} µs {:>10.1} µs {:>10.1} µs",
name, times[0], times[1], times[2]
);
}
// Calculate speedup vs standard
if let Some(baseline) = results.get("Standard") {
println!("\n=== Speedup vs Standard ===\n");
for (name, times) in results.iter() {
if name != &"Standard" {
let speedups: Vec<f64> = times
.iter()
.zip(baseline.iter())
.map(|(t, b)| b / t)
.collect();
println!(
"{:<15} {:>10.2}x {:>10.2}x {:>10.2}x",
name, speedups[0], speedups[1], speedups[2]
);
}
}
}
}
criterion_group!(attention_benches, compare_attention_mechanisms);
criterion_main!(attention_benches);
3.2 Distance Metric Comparison
// benches/distance_comparison.rs
use criterion::{
black_box, criterion_group, criterion_main, BenchmarkId, Criterion,
};
use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType};
use ruvector::metrics::DistanceMetric;
fn compare_distance_metrics(c: &mut Criterion) {
let mut group = c.benchmark_group("distance_comparison");
let dimensions = [128, 256, 512];
let metrics = vec![
("Euclidean", DistanceMetric::Euclidean),
("Cosine", DistanceMetric::Cosine),
("Manhattan", DistanceMetric::Manhattan),
("Hyperbolic", DistanceMetric::Hyperbolic { curvature: -1.0 }),
];
for &dim in &dimensions {
for (name, metric) in &metrics {
let config = LatentConfig {
dimension: dim,
attention_type: AttentionType::Standard,
num_heads: 8,
distance_metric: metric.clone(),
..Default::default()
};
let mut space = LatentSpace::new(config).unwrap();
let dataset = generate_dataset(10000, dim);
for emb in dataset.iter() {
space.add_embedding(emb.clone(), None).unwrap();
}
let query = random_embedding(dim);
group.bench_with_input(
BenchmarkId::new(format!("{}@dim_{}", name, dim), "knn_100"),
&query,
|b, q| {
b.iter(|| {
black_box(space.knn_search(q, 100))
});
},
);
}
}
group.finish();
}
criterion_group!(distance_benches, compare_distance_metrics);
criterion_main!(distance_benches);
3.3 Standard vs Flash Attention
Detailed comparison focusing on Flash attention optimizations:
// benches/flash_attention_benchmark.rs
use criterion::{
black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
};
use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType};
use ruvector::metrics::DistanceMetric;
fn bench_flash_vs_standard(c: &mut Criterion) {
let mut group = c.benchmark_group("flash_vs_standard");
// Test at various sequence lengths where Flash shines
let sequence_lengths = [256, 512, 1024, 2048, 4096];
let dim = 256;
let num_heads = 8;
for &seq_len in &sequence_lengths {
// Standard attention
{
let config = LatentConfig {
dimension: dim,
attention_type: AttentionType::Standard,
num_heads,
distance_metric: DistanceMetric::Euclidean,
..Default::default()
};
let mut space = LatentSpace::new(config).unwrap();
let embeddings = generate_dataset(seq_len, dim);
for emb in embeddings.iter() {
space.add_embedding(emb.clone(), None).unwrap();
}
let query = random_embedding(dim);
group.throughput(Throughput::Elements(seq_len as u64));
group.bench_with_input(
BenchmarkId::new("Standard", seq_len),
&query,
|b, q| {
b.iter(|| {
black_box(space.compute_attention(q))
});
},
);
}
// Flash attention
{
let config = LatentConfig {
dimension: dim,
attention_type: AttentionType::Flash,
num_heads,
distance_metric: DistanceMetric::Euclidean,
..Default::default()
};
let mut space = LatentSpace::new(config).unwrap();
let embeddings = generate_dataset(seq_len, dim);
for emb in embeddings.iter() {
space.add_embedding(emb.clone(), None).unwrap();
}
let query = random_embedding(dim);
group.throughput(Throughput::Elements(seq_len as u64));
group.bench_with_input(
BenchmarkId::new("Flash", seq_len),
&query,
|b, q| {
b.iter(|| {
black_box(space.compute_attention(q))
});
},
);
}
}
group.finish();
}
/// Memory comparison between Flash and Standard
fn bench_memory_flash_vs_standard(c: &mut Criterion) {
let mut group = c.benchmark_group("memory_flash_vs_standard");
let sequence_lengths = [512, 1024, 2048];
let dim = 256;
for &seq_len in &sequence_lengths {
group.bench_with_input(
BenchmarkId::new("Standard", seq_len),
&seq_len,
|b, &len| {
b.iter_custom(|iters| {
let mut total_duration = std::time::Duration::ZERO;
for _ in 0..iters {
reset_memory_tracking();
let start = std::time::Instant::now();
let config = LatentConfig {
dimension: dim,
attention_type: AttentionType::Standard,
num_heads: 8,
distance_metric: DistanceMetric::Euclidean,
..Default::default()
};
let mut space = LatentSpace::new(config).unwrap();
let embeddings = generate_dataset(len, dim);
for emb in embeddings.iter() {
space.add_embedding(emb.clone(), None).unwrap();
}
let query = random_embedding(dim);
black_box(space.compute_attention(&query));
total_duration += start.elapsed();
println!(
"Standard@{}: Peak {} MB",
len,
get_peak_memory() / 1_000_000
);
}
total_duration
});
},
);
group.bench_with_input(
BenchmarkId::new("Flash", seq_len),
&seq_len,
|b, &len| {
b.iter_custom(|iters| {
let mut total_duration = std::time::Duration::ZERO;
for _ in 0..iters {
reset_memory_tracking();
let start = std::time::Instant::now();
let config = LatentConfig {
dimension: dim,
attention_type: AttentionType::Flash,
num_heads: 8,
distance_metric: DistanceMetric::Euclidean,
..Default::default()
};
let mut space = LatentSpace::new(config).unwrap();
let embeddings = generate_dataset(len, dim);
for emb in embeddings.iter() {
space.add_embedding(emb.clone(), None).unwrap();
}
let query = random_embedding(dim);
black_box(space.compute_attention(&query));
total_duration += start.elapsed();
println!(
"Flash@{}: Peak {} MB",
len,
get_peak_memory() / 1_000_000
);
}
total_duration
});
},
);
}
group.finish();
}
criterion_group!(
flash_benches,
bench_flash_vs_standard,
bench_memory_flash_vs_standard
);
criterion_main!(flash_benches);
4. Regression Detection
4.1 Baseline Storage System
// benches/regression_detection.rs
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fs;
use std::path::Path;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BenchmarkBaseline {
pub name: String,
pub dimension: usize,
pub mean: f64,
pub std_dev: f64,
pub median: f64,
pub throughput: Option<f64>,
pub memory_peak_mb: Option<usize>,
pub timestamp: String,
pub git_commit: String,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct BaselineCollection {
pub baselines: HashMap<String, BenchmarkBaseline>,
pub created_at: String,
pub updated_at: String,
}
impl BaselineCollection {
/// Load baselines from file
pub fn load<P: AsRef<Path>>(path: P) -> Result<Self, Box<dyn std::error::Error>> {
let content = fs::read_to_string(path)?;
let collection: BaselineCollection = serde_json::from_str(&content)?;
Ok(collection)
}
/// Save baselines to file
pub fn save<P: AsRef<Path>>(&self, path: P) -> Result<(), Box<dyn std::error::Error>> {
let content = serde_json::to_string_pretty(self)?;
fs::write(path, content)?;
Ok(())
}
/// Add or update baseline
pub fn update_baseline(&mut self, key: String, baseline: BenchmarkBaseline) {
self.baselines.insert(key, baseline);
self.updated_at = chrono::Utc::now().to_rfc3339();
}
/// Check for regression
pub fn check_regression(
&self,
key: &str,
current: &BenchmarkResult,
threshold_percent: f64,
) -> RegressionStatus {
if let Some(baseline) = self.baselines.get(key) {
let change_percent = ((current.mean - baseline.mean) / baseline.mean) * 100.0;
if change_percent > threshold_percent {
RegressionStatus::Regression {
baseline: baseline.mean,
current: current.mean,
change_percent,
}
} else if change_percent < -threshold_percent / 2.0 {
RegressionStatus::Improvement {
baseline: baseline.mean,
current: current.mean,
change_percent: -change_percent,
}
} else {
RegressionStatus::NoChange
}
} else {
RegressionStatus::NewBenchmark
}
}
}
#[derive(Debug)]
pub enum RegressionStatus {
Regression {
baseline: f64,
current: f64,
change_percent: f64,
},
Improvement {
baseline: f64,
current: f64,
change_percent: f64,
},
NoChange,
NewBenchmark,
}
#[derive(Debug, Clone)]
pub struct BenchmarkResult {
pub name: String,
pub mean: f64,
pub std_dev: f64,
pub median: f64,
}
/// Get current git commit
fn get_git_commit() -> String {
use std::process::Command;
let output = Command::new("git")
.args(&["rev-parse", "HEAD"])
.output()
.expect("Failed to get git commit");
String::from_utf8_lossy(&output.stdout).trim().to_string()
}
/// Create baseline from current run
pub fn create_baseline(results: Vec<BenchmarkResult>) -> BaselineCollection {
let mut baselines = HashMap::new();
let commit = get_git_commit();
let timestamp = chrono::Utc::now().to_rfc3339();
for result in results {
let baseline = BenchmarkBaseline {
name: result.name.clone(),
dimension: 256, // Extract from name or pass explicitly
mean: result.mean,
std_dev: result.std_dev,
median: result.median,
throughput: None,
memory_peak_mb: None,
timestamp: timestamp.clone(),
git_commit: commit.clone(),
};
baselines.insert(result.name.clone(), baseline);
}
BaselineCollection {
baselines,
created_at: timestamp.clone(),
updated_at: timestamp,
}
}
4.2 CI Integration Script
#!/bin/bash
# scripts/benchmark_ci.sh
set -e
BASELINE_FILE="benches/baselines.json"
RESULTS_FILE="target/criterion/results.json"
THRESHOLD=10.0 # 10% regression threshold
echo "Running benchmarks..."
cargo bench --bench latency_benchmarks -- --save-baseline current
echo "Comparing with baseline..."
if [ -f "$BASELINE_FILE" ]; then
cargo run --bin compare_benchmarks -- \
--baseline "$BASELINE_FILE" \
--current "$RESULTS_FILE" \
--threshold "$THRESHOLD"
REGRESSION_STATUS=$?
if [ $REGRESSION_STATUS -eq 1 ]; then
echo "❌ Performance regression detected!"
exit 1
elif [ $REGRESSION_STATUS -eq 2 ]; then
echo "✅ Performance improvement detected!"
else
echo "✅ No significant performance change"
fi
else
echo "No baseline found, creating new baseline..."
cp "$RESULTS_FILE" "$BASELINE_FILE"
fi
echo "Generating benchmark report..."
cargo run --bin benchmark_report -- \
--baseline "$BASELINE_FILE" \
--output "target/benchmark_report.md"
echo "Done!"
4.3 Threshold Configuration
# benches/regression_config.toml
[thresholds]
# Global default threshold (%)
default = 10.0
# Per-benchmark thresholds
[thresholds.latency]
knn_search = 5.0
add_embedding = 8.0
attention = 12.0
[thresholds.throughput]
batch_operations = 15.0
parallel_search = 10.0
[thresholds.memory]
peak_usage = 20.0
allocation_count = 25.0
[regression_actions]
# What to do on regression
fail_ci = true
create_issue = true
notify_team = true
[regression_actions.notifications]
slack_webhook = "${SLACK_WEBHOOK_URL}"
email = "dev-team@example.com"
[baseline_management]
# Auto-update baseline on main branch
auto_update_main = true
# Require manual approval for baseline updates
require_approval = false
# Keep history of baselines
keep_history = true
max_history_count = 50
4.4 Benchmark Comparison Tool
// src/bin/compare_benchmarks.rs
use clap::Parser;
use ruvector_benches::regression::{BaselineCollection, BenchmarkResult, RegressionStatus};
use std::path::PathBuf;
#[derive(Parser)]
struct Args {
#[arg(long)]
baseline: PathBuf,
#[arg(long)]
current: PathBuf,
#[arg(long, default_value = "10.0")]
threshold: f64,
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let args = Args::parse();
let baselines = BaselineCollection::load(&args.baseline)?;
let current_results = load_current_results(&args.current)?;
let mut has_regression = false;
let mut has_improvement = false;
println!("\n=== Benchmark Comparison Report ===\n");
for result in current_results {
let status = baselines.check_regression(
&result.name,
&result,
args.threshold,
);
match status {
RegressionStatus::Regression {
baseline,
current,
change_percent,
} => {
has_regression = true;
println!(
"❌ REGRESSION: {}\n Baseline: {:.2} µs\n Current: {:.2} µs\n Change: +{:.1}%\n",
result.name, baseline, current, change_percent
);
}
RegressionStatus::Improvement {
baseline,
current,
change_percent,
} => {
has_improvement = true;
println!(
"✅ IMPROVEMENT: {}\n Baseline: {:.2} µs\n Current: {:.2} µs\n Change: -{:.1}%\n",
result.name, baseline, current, change_percent
);
}
RegressionStatus::NoChange => {
println!("➡️ NO CHANGE: {}\n", result.name);
}
RegressionStatus::NewBenchmark => {
println!("🆕 NEW: {}\n", result.name);
}
}
}
if has_regression {
std::process::exit(1);
} else if has_improvement {
std::process::exit(2);
} else {
std::process::exit(0);
}
}
fn load_current_results(path: &PathBuf) -> Result<Vec<BenchmarkResult>, Box<dyn std::error::Error>> {
// Parse Criterion JSON output
// Implementation depends on Criterion output format
Ok(vec![])
}
4.5 GitHub Actions Workflow
# .github/workflows/benchmarks.yml
name: Performance Benchmarks
on:
pull_request:
branches: [ main ]
push:
branches: [ main ]
jobs:
benchmark:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0 # Fetch all history for baseline comparison
- name: Setup Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Cache cargo registry
uses: actions/cache@v3
with:
path: ~/.cargo/registry
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}
- name: Cache criterion results
uses: actions/cache@v3
with:
path: target/criterion
key: ${{ runner.os }}-criterion-${{ github.sha }}
restore-keys: |
${{ runner.os }}-criterion-
- name: Restore baseline
run: |
if [ -f benches/baselines.json ]; then
echo "Baseline found"
else
echo "No baseline, will create new one"
fi
- name: Run benchmarks
run: |
cargo bench --bench latency_benchmarks
cargo bench --bench throughput_benchmarks
cargo bench --bench memory_benchmarks
cargo bench --bench attention_comparison
- name: Compare with baseline
id: compare
run: |
chmod +x scripts/benchmark_ci.sh
./scripts/benchmark_ci.sh
continue-on-error: true
- name: Generate report
run: |
cargo run --bin benchmark_report -- \
--baseline benches/baselines.json \
--output target/benchmark_report.md
- name: Comment PR
if: github.event_name == 'pull_request'
uses: actions/github-script@v6
with:
script: |
const fs = require('fs');
const report = fs.readFileSync('target/benchmark_report.md', 'utf8');
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: report
});
- name: Update baseline on main
if: github.ref == 'refs/heads/main' && steps.compare.outcome == 'success'
run: |
cp target/criterion/results.json benches/baselines.json
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add benches/baselines.json
git commit -m "chore: update performance baselines [skip ci]"
git push
- name: Fail on regression
if: steps.compare.outcome == 'failure'
run: exit 1
- name: Upload benchmark results
uses: actions/upload-artifact@v3
with:
name: benchmark-results
path: |
target/criterion/
target/benchmark_report.md
5. Benchmark Organization
5.1 Directory Structure
benches/
├── latency_benchmarks.rs # Latency measurements
├── throughput_benchmarks.rs # Throughput measurements
├── memory_benchmarks.rs # Memory usage tracking
├── attention_comparison.rs # Attention mechanism comparison
├── distance_comparison.rs # Distance metric comparison
├── flash_attention_benchmark.rs # Flash vs Standard detailed
├── benchmark_matrix.rs # Complete test matrix
├── regression_detection.rs # Baseline & regression tools
├── baselines.json # Stored baselines
└── regression_config.toml # Threshold configuration
scripts/
├── benchmark_ci.sh # CI integration script
├── generate_report.sh # Report generation
└── update_baseline.sh # Baseline management
src/bin/
├── compare_benchmarks.rs # Comparison tool
└── benchmark_report.rs # Report generator
5.2 Cargo.toml Configuration
[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
rayon = "1.7"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
chrono = "0.4"
[[bench]]
name = "latency_benchmarks"
harness = false
[[bench]]
name = "throughput_benchmarks"
harness = false
[[bench]]
name = "memory_benchmarks"
harness = false
[[bench]]
name = "attention_comparison"
harness = false
[[bench]]
name = "distance_comparison"
harness = false
[[bench]]
name = "flash_attention_benchmark"
harness = false
[[bench]]
name = "benchmark_matrix"
harness = false
[profile.bench]
opt-level = 3
lto = true
codegen-units = 1
Summary
This benchmark suite provides:
-
Comprehensive Coverage:
- Latency benchmarks across all dimensions
- Throughput measurements for batch operations
- Memory usage tracking and profiling
- Full test matrix with 4D parameter space
-
Comparative Analysis:
- Attention mechanism comparison (Standard, Flash, Multi-Head, MoE)
- Distance metric comparison (Euclidean, Cosine, Manhattan, Hyperbolic)
- Detailed Flash vs Standard analysis
-
Regression Detection:
- Baseline storage and versioning
- Automated comparison with configurable thresholds
- CI/CD integration with GitHub Actions
- Automatic PR comments with benchmark results
-
Production Ready:
- Complete Criterion integration
- Structured result storage
- Automated reporting
- Performance tracking over time
Run benchmarks with:
# Run all benchmarks
cargo bench
# Run specific benchmark suite
cargo bench --bench latency_benchmarks
# Compare with baseline
./scripts/benchmark_ci.sh
# Generate report
cargo run --bin benchmark_report