git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
1570 lines
44 KiB
Markdown
1570 lines
44 KiB
Markdown
# Agent 13: Performance Benchmark Suite
|
|
|
|
## Overview
|
|
|
|
Comprehensive Criterion-based benchmark suite for measuring and tracking performance across all latent space operations, attention mechanisms, and search algorithms.
|
|
|
|
## 1. Criterion Benchmarks
|
|
|
|
### 1.1 Latency Benchmarks
|
|
|
|
Complete benchmark code for measuring operation latency across various dimensions and neighbor counts.
|
|
|
|
```rust
|
|
// benches/latency_benchmarks.rs
|
|
use criterion::{
|
|
black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
|
|
};
|
|
use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType};
|
|
use ruvector::metrics::DistanceMetric;
|
|
use rand::Rng;
|
|
|
|
/// Generate random embedding of specified dimension
|
|
fn random_embedding(dim: usize) -> Vec<f32> {
|
|
let mut rng = rand::thread_rng();
|
|
(0..dim).map(|_| rng.gen_range(-1.0..1.0)).collect()
|
|
}
|
|
|
|
/// Generate dataset of random embeddings
|
|
fn generate_dataset(num_vectors: usize, dim: usize) -> Vec<Vec<f32>> {
|
|
(0..num_vectors)
|
|
.map(|_| random_embedding(dim))
|
|
.collect()
|
|
}
|
|
|
|
/// Benchmark latent space creation
|
|
fn bench_latent_space_creation(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("latent_space_creation");
|
|
|
|
for dim in [64, 128, 256, 512, 1024].iter() {
|
|
group.bench_with_input(
|
|
BenchmarkId::from_parameter(dim),
|
|
dim,
|
|
|b, &dim| {
|
|
b.iter(|| {
|
|
let config = LatentConfig {
|
|
dimension: dim,
|
|
attention_type: AttentionType::Standard,
|
|
num_heads: 8,
|
|
distance_metric: DistanceMetric::Euclidean,
|
|
..Default::default()
|
|
};
|
|
black_box(LatentSpace::new(config))
|
|
});
|
|
},
|
|
);
|
|
}
|
|
|
|
group.finish();
|
|
}
|
|
|
|
/// Benchmark embedding addition
|
|
fn bench_add_embedding(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("add_embedding");
|
|
|
|
for dim in [64, 128, 256, 512, 1024].iter() {
|
|
let config = LatentConfig {
|
|
dimension: *dim,
|
|
attention_type: AttentionType::Standard,
|
|
num_heads: 8,
|
|
distance_metric: DistanceMetric::Euclidean,
|
|
..Default::default()
|
|
};
|
|
let mut space = LatentSpace::new(config).unwrap();
|
|
let embedding = random_embedding(*dim);
|
|
|
|
group.bench_with_input(
|
|
BenchmarkId::from_parameter(dim),
|
|
&embedding,
|
|
|b, emb| {
|
|
b.iter(|| {
|
|
black_box(space.add_embedding(emb.clone(), None))
|
|
});
|
|
},
|
|
);
|
|
}
|
|
|
|
group.finish();
|
|
}
|
|
|
|
/// Benchmark KNN search with varying neighbor counts
|
|
fn bench_knn_search(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("knn_search");
|
|
|
|
let dimensions = [128, 256, 512];
|
|
let neighbor_counts = [10, 50, 100, 500, 1000];
|
|
let dataset_size = 10000;
|
|
|
|
for &dim in &dimensions {
|
|
for &k in &neighbor_counts {
|
|
let config = LatentConfig {
|
|
dimension: dim,
|
|
attention_type: AttentionType::Standard,
|
|
num_heads: 8,
|
|
distance_metric: DistanceMetric::Euclidean,
|
|
..Default::default()
|
|
};
|
|
|
|
let mut space = LatentSpace::new(config).unwrap();
|
|
let dataset = generate_dataset(dataset_size, dim);
|
|
|
|
// Populate space
|
|
for emb in dataset.iter() {
|
|
space.add_embedding(emb.clone(), None).unwrap();
|
|
}
|
|
|
|
let query = random_embedding(dim);
|
|
|
|
group.throughput(Throughput::Elements(k as u64));
|
|
group.bench_with_input(
|
|
BenchmarkId::new(format!("dim_{}", dim), k),
|
|
&k,
|
|
|b, &neighbors| {
|
|
b.iter(|| {
|
|
black_box(space.knn_search(&query, neighbors))
|
|
});
|
|
},
|
|
);
|
|
}
|
|
}
|
|
|
|
group.finish();
|
|
}
|
|
|
|
/// Benchmark attention computation
|
|
fn bench_attention_computation(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("attention_computation");
|
|
|
|
let dimensions = [64, 128, 256, 512];
|
|
let attention_types = [
|
|
AttentionType::Standard,
|
|
AttentionType::Flash,
|
|
AttentionType::MultiHead { num_heads: 8 },
|
|
AttentionType::MoE { num_experts: 4 },
|
|
];
|
|
|
|
for &dim in &dimensions {
|
|
for attention_type in &attention_types {
|
|
let config = LatentConfig {
|
|
dimension: dim,
|
|
attention_type: attention_type.clone(),
|
|
num_heads: 8,
|
|
distance_metric: DistanceMetric::Euclidean,
|
|
..Default::default()
|
|
};
|
|
|
|
let mut space = LatentSpace::new(config).unwrap();
|
|
let embeddings = generate_dataset(100, dim);
|
|
|
|
for emb in embeddings.iter() {
|
|
space.add_embedding(emb.clone(), None).unwrap();
|
|
}
|
|
|
|
let query = random_embedding(dim);
|
|
|
|
group.bench_with_input(
|
|
BenchmarkId::new(format!("dim_{}", dim), format!("{:?}", attention_type)),
|
|
&query,
|
|
|b, q| {
|
|
b.iter(|| {
|
|
black_box(space.compute_attention(q))
|
|
});
|
|
},
|
|
);
|
|
}
|
|
}
|
|
|
|
group.finish();
|
|
}
|
|
|
|
criterion_group!(
|
|
latency_benches,
|
|
bench_latent_space_creation,
|
|
bench_add_embedding,
|
|
bench_knn_search,
|
|
bench_attention_computation
|
|
);
|
|
criterion_main!(latency_benches);
|
|
```
|
|
|
|
### 1.2 Throughput Benchmarks
|
|
|
|
Benchmark batch processing and parallel operations.
|
|
|
|
```rust
|
|
// benches/throughput_benchmarks.rs
|
|
use criterion::{
|
|
black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
|
|
};
|
|
use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType};
|
|
use ruvector::metrics::DistanceMetric;
|
|
use rand::Rng;
|
|
|
|
/// Benchmark batch embedding addition
|
|
fn bench_batch_add_embeddings(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("batch_add_embeddings");
|
|
|
|
let batch_sizes = [1, 8, 32, 128, 512];
|
|
let dim = 256;
|
|
|
|
for &batch_size in &batch_sizes {
|
|
let config = LatentConfig {
|
|
dimension: dim,
|
|
attention_type: AttentionType::Standard,
|
|
num_heads: 8,
|
|
distance_metric: DistanceMetric::Euclidean,
|
|
..Default::default()
|
|
};
|
|
|
|
let mut space = LatentSpace::new(config).unwrap();
|
|
let embeddings = generate_dataset(batch_size, dim);
|
|
|
|
group.throughput(Throughput::Elements(batch_size as u64));
|
|
group.bench_with_input(
|
|
BenchmarkId::from_parameter(batch_size),
|
|
&embeddings,
|
|
|b, embs| {
|
|
b.iter(|| {
|
|
for emb in embs {
|
|
black_box(space.add_embedding(emb.clone(), None));
|
|
}
|
|
});
|
|
},
|
|
);
|
|
}
|
|
|
|
group.finish();
|
|
}
|
|
|
|
/// Benchmark parallel KNN search
|
|
fn bench_parallel_knn_search(c: &mut Criterion) {
|
|
use rayon::prelude::*;
|
|
|
|
let mut group = c.benchmark_group("parallel_knn_search");
|
|
|
|
let query_counts = [1, 8, 32, 128];
|
|
let dim = 256;
|
|
let k = 100;
|
|
let dataset_size = 10000;
|
|
|
|
for &num_queries in &query_counts {
|
|
let config = LatentConfig {
|
|
dimension: dim,
|
|
attention_type: AttentionType::Standard,
|
|
num_heads: 8,
|
|
distance_metric: DistanceMetric::Euclidean,
|
|
..Default::default()
|
|
};
|
|
|
|
let mut space = LatentSpace::new(config).unwrap();
|
|
let dataset = generate_dataset(dataset_size, dim);
|
|
|
|
for emb in dataset.iter() {
|
|
space.add_embedding(emb.clone(), None).unwrap();
|
|
}
|
|
|
|
let queries: Vec<Vec<f32>> = (0..num_queries)
|
|
.map(|_| random_embedding(dim))
|
|
.collect();
|
|
|
|
group.throughput(Throughput::Elements(num_queries as u64));
|
|
group.bench_with_input(
|
|
BenchmarkId::from_parameter(num_queries),
|
|
&queries,
|
|
|b, qs| {
|
|
b.iter(|| {
|
|
let results: Vec<_> = qs.par_iter()
|
|
.map(|q| space.knn_search(q, k))
|
|
.collect();
|
|
black_box(results)
|
|
});
|
|
},
|
|
);
|
|
}
|
|
|
|
group.finish();
|
|
}
|
|
|
|
/// Benchmark batch attention computation
|
|
fn bench_batch_attention(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("batch_attention");
|
|
|
|
let batch_sizes = [8, 32, 128];
|
|
let dim = 256;
|
|
|
|
for &batch_size in &batch_sizes {
|
|
let config = LatentConfig {
|
|
dimension: dim,
|
|
attention_type: AttentionType::Flash,
|
|
num_heads: 8,
|
|
distance_metric: DistanceMetric::Euclidean,
|
|
..Default::default()
|
|
};
|
|
|
|
let mut space = LatentSpace::new(config).unwrap();
|
|
let embeddings = generate_dataset(1000, dim);
|
|
|
|
for emb in embeddings.iter() {
|
|
space.add_embedding(emb.clone(), None).unwrap();
|
|
}
|
|
|
|
let queries = generate_dataset(batch_size, dim);
|
|
|
|
group.throughput(Throughput::Elements(batch_size as u64));
|
|
group.bench_with_input(
|
|
BenchmarkId::from_parameter(batch_size),
|
|
&queries,
|
|
|b, qs| {
|
|
b.iter(|| {
|
|
for q in qs {
|
|
black_box(space.compute_attention(q));
|
|
}
|
|
});
|
|
},
|
|
);
|
|
}
|
|
|
|
group.finish();
|
|
}
|
|
|
|
criterion_group!(
|
|
throughput_benches,
|
|
bench_batch_add_embeddings,
|
|
bench_parallel_knn_search,
|
|
bench_batch_attention
|
|
);
|
|
criterion_main!(throughput_benches);
|
|
```
|
|
|
|
### 1.3 Memory Benchmarks
|
|
|
|
Track peak memory usage and allocation patterns.
|
|
|
|
```rust
|
|
// benches/memory_benchmarks.rs
|
|
use criterion::{
|
|
black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
|
|
};
|
|
use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType};
|
|
use ruvector::metrics::DistanceMetric;
|
|
use std::alloc::{GlobalAlloc, Layout, System};
|
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
|
|
|
/// Custom allocator to track memory usage
|
|
struct TrackingAllocator;
|
|
|
|
static ALLOCATED: AtomicUsize = AtomicUsize::new(0);
|
|
static PEAK_ALLOCATED: AtomicUsize = AtomicUsize::new(0);
|
|
|
|
unsafe impl GlobalAlloc for TrackingAllocator {
|
|
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
|
|
let size = layout.size();
|
|
let current = ALLOCATED.fetch_add(size, Ordering::SeqCst) + size;
|
|
|
|
// Update peak if necessary
|
|
let mut peak = PEAK_ALLOCATED.load(Ordering::SeqCst);
|
|
while current > peak {
|
|
match PEAK_ALLOCATED.compare_exchange(
|
|
peak,
|
|
current,
|
|
Ordering::SeqCst,
|
|
Ordering::SeqCst,
|
|
) {
|
|
Ok(_) => break,
|
|
Err(p) => peak = p,
|
|
}
|
|
}
|
|
|
|
System.alloc(layout)
|
|
}
|
|
|
|
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
|
|
ALLOCATED.fetch_sub(layout.size(), Ordering::SeqCst);
|
|
System.dealloc(ptr, layout)
|
|
}
|
|
}
|
|
|
|
#[global_allocator]
|
|
static GLOBAL: TrackingAllocator = TrackingAllocator;
|
|
|
|
fn reset_memory_tracking() {
|
|
ALLOCATED.store(0, Ordering::SeqCst);
|
|
PEAK_ALLOCATED.store(0, Ordering::SeqCst);
|
|
}
|
|
|
|
fn get_peak_memory() -> usize {
|
|
PEAK_ALLOCATED.load(Ordering::SeqCst)
|
|
}
|
|
|
|
/// Benchmark memory usage for different dataset sizes
|
|
fn bench_memory_scaling(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("memory_scaling");
|
|
|
|
let dataset_sizes = [1000, 5000, 10000, 50000, 100000];
|
|
let dim = 256;
|
|
|
|
for &size in &dataset_sizes {
|
|
group.bench_with_input(
|
|
BenchmarkId::from_parameter(size),
|
|
&size,
|
|
|b, &num_vectors| {
|
|
b.iter_custom(|iters| {
|
|
let mut total_duration = std::time::Duration::ZERO;
|
|
|
|
for _ in 0..iters {
|
|
reset_memory_tracking();
|
|
|
|
let start = std::time::Instant::now();
|
|
|
|
let config = LatentConfig {
|
|
dimension: dim,
|
|
attention_type: AttentionType::Standard,
|
|
num_heads: 8,
|
|
distance_metric: DistanceMetric::Euclidean,
|
|
..Default::default()
|
|
};
|
|
|
|
let mut space = LatentSpace::new(config).unwrap();
|
|
|
|
for i in 0..num_vectors {
|
|
let emb = random_embedding(dim);
|
|
space.add_embedding(emb, Some(i as u64)).unwrap();
|
|
}
|
|
|
|
let elapsed = start.elapsed();
|
|
total_duration += elapsed;
|
|
|
|
let peak = get_peak_memory();
|
|
println!(
|
|
"Dataset size: {}, Peak memory: {} MB",
|
|
num_vectors,
|
|
peak / 1_000_000
|
|
);
|
|
|
|
black_box(space);
|
|
}
|
|
|
|
total_duration
|
|
});
|
|
},
|
|
);
|
|
}
|
|
|
|
group.finish();
|
|
}
|
|
|
|
/// Benchmark memory usage by dimension
|
|
fn bench_memory_by_dimension(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("memory_by_dimension");
|
|
|
|
let dimensions = [64, 128, 256, 512, 1024];
|
|
let num_vectors = 10000;
|
|
|
|
for &dim in &dimensions {
|
|
group.bench_with_input(
|
|
BenchmarkId::from_parameter(dim),
|
|
&dim,
|
|
|b, &dimension| {
|
|
b.iter_custom(|iters| {
|
|
let mut total_duration = std::time::Duration::ZERO;
|
|
|
|
for _ in 0..iters {
|
|
reset_memory_tracking();
|
|
|
|
let start = std::time::Instant::now();
|
|
|
|
let config = LatentConfig {
|
|
dimension,
|
|
attention_type: AttentionType::Standard,
|
|
num_heads: 8,
|
|
distance_metric: DistanceMetric::Euclidean,
|
|
..Default::default()
|
|
};
|
|
|
|
let mut space = LatentSpace::new(config).unwrap();
|
|
|
|
for i in 0..num_vectors {
|
|
let emb = random_embedding(dimension);
|
|
space.add_embedding(emb, Some(i as u64)).unwrap();
|
|
}
|
|
|
|
let elapsed = start.elapsed();
|
|
total_duration += elapsed;
|
|
|
|
let peak = get_peak_memory();
|
|
println!(
|
|
"Dimension: {}, Peak memory: {} MB",
|
|
dimension,
|
|
peak / 1_000_000
|
|
);
|
|
|
|
black_box(space);
|
|
}
|
|
|
|
total_duration
|
|
});
|
|
},
|
|
);
|
|
}
|
|
|
|
group.finish();
|
|
}
|
|
|
|
criterion_group!(
|
|
memory_benches,
|
|
bench_memory_scaling,
|
|
bench_memory_by_dimension
|
|
);
|
|
criterion_main!(memory_benches);
|
|
```
|
|
|
|
## 2. Benchmark Matrix
|
|
|
|
### 2.1 Complete Test Matrix Configuration
|
|
|
|
```rust
|
|
// benches/benchmark_matrix.rs
|
|
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
|
|
use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType};
|
|
use ruvector::metrics::DistanceMetric;
|
|
|
|
/// Comprehensive benchmark matrix
|
|
struct BenchmarkMatrix {
|
|
dimensions: Vec<usize>,
|
|
neighbors: Vec<usize>,
|
|
batch_sizes: Vec<usize>,
|
|
dataset_sizes: Vec<usize>,
|
|
}
|
|
|
|
impl Default for BenchmarkMatrix {
|
|
fn default() -> Self {
|
|
Self {
|
|
dimensions: vec![64, 128, 256, 512, 1024],
|
|
neighbors: vec![10, 50, 100, 500, 1000, 10000],
|
|
batch_sizes: vec![1, 8, 32, 128],
|
|
dataset_sizes: vec![1000, 5000, 10000, 50000],
|
|
}
|
|
}
|
|
}
|
|
|
|
impl BenchmarkMatrix {
|
|
/// Run complete benchmark matrix
|
|
fn run_complete_matrix(&self, c: &mut Criterion) {
|
|
for &dim in &self.dimensions {
|
|
for &k in &self.neighbors {
|
|
for &batch_size in &self.batch_sizes {
|
|
for &dataset_size in &self.dataset_sizes {
|
|
self.bench_configuration(
|
|
c,
|
|
dim,
|
|
k,
|
|
batch_size,
|
|
dataset_size,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Benchmark specific configuration
|
|
fn bench_configuration(
|
|
&self,
|
|
c: &mut Criterion,
|
|
dim: usize,
|
|
k: usize,
|
|
batch_size: usize,
|
|
dataset_size: usize,
|
|
) {
|
|
let group_name = format!(
|
|
"matrix/dim_{}/k_{}/batch_{}/dataset_{}",
|
|
dim, k, batch_size, dataset_size
|
|
);
|
|
|
|
let mut group = c.benchmark_group(&group_name);
|
|
|
|
let config = LatentConfig {
|
|
dimension: dim,
|
|
attention_type: AttentionType::Standard,
|
|
num_heads: 8,
|
|
distance_metric: DistanceMetric::Euclidean,
|
|
..Default::default()
|
|
};
|
|
|
|
let mut space = LatentSpace::new(config).unwrap();
|
|
let dataset = generate_dataset(dataset_size, dim);
|
|
|
|
for emb in dataset.iter() {
|
|
space.add_embedding(emb.clone(), None).unwrap();
|
|
}
|
|
|
|
let queries = generate_dataset(batch_size, dim);
|
|
|
|
group.bench_function("knn_search", |b| {
|
|
b.iter(|| {
|
|
for query in &queries {
|
|
black_box(space.knn_search(query, k));
|
|
}
|
|
});
|
|
});
|
|
|
|
group.finish();
|
|
}
|
|
}
|
|
|
|
/// Run critical path benchmarks only (reduced matrix)
|
|
fn bench_critical_path(c: &mut Criterion) {
|
|
let matrix = BenchmarkMatrix {
|
|
dimensions: vec![128, 256, 512],
|
|
neighbors: vec![10, 100, 1000],
|
|
batch_sizes: vec![1, 32],
|
|
dataset_sizes: vec![10000],
|
|
};
|
|
|
|
matrix.run_complete_matrix(c);
|
|
}
|
|
|
|
criterion_group!(matrix_benches, bench_critical_path);
|
|
criterion_main!(matrix_benches);
|
|
```
|
|
|
|
### 2.2 Benchmark Matrix Results Format
|
|
|
|
Expected output format for tracking:
|
|
|
|
```toml
|
|
# benchmark_results.toml
|
|
|
|
[latency.dim_128.k_10]
|
|
mean = "45.2 µs"
|
|
std_dev = "2.1 µs"
|
|
median = "44.8 µs"
|
|
mad = "1.4 µs"
|
|
|
|
[latency.dim_256.k_100]
|
|
mean = "124.5 µs"
|
|
std_dev = "5.3 µs"
|
|
median = "123.1 µs"
|
|
mad = "3.2 µs"
|
|
|
|
[throughput.batch_32.dim_256]
|
|
throughput = "2.34 GiB/s"
|
|
ops_per_sec = "8542"
|
|
|
|
[memory.dataset_10000.dim_256]
|
|
peak_mb = "245"
|
|
average_mb = "198"
|
|
allocations = "12543"
|
|
```
|
|
|
|
## 3. Comparative Benchmarks
|
|
|
|
### 3.1 Attention Mechanism Comparison
|
|
|
|
```rust
|
|
// benches/attention_comparison.rs
|
|
use criterion::{
|
|
black_box, criterion_group, criterion_main, BenchmarkId, Criterion, PlotConfiguration,
|
|
};
|
|
use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType};
|
|
use ruvector::metrics::DistanceMetric;
|
|
|
|
fn compare_attention_mechanisms(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("attention_comparison");
|
|
group.plot_config(PlotConfiguration::default().summary_scale(criterion::AxisScale::Logarithmic));
|
|
|
|
let dimensions = [128, 256, 512];
|
|
let sequence_lengths = [100, 500, 1000];
|
|
|
|
let attention_types = vec![
|
|
("Standard", AttentionType::Standard),
|
|
("Flash", AttentionType::Flash),
|
|
("MultiHead_4", AttentionType::MultiHead { num_heads: 4 }),
|
|
("MultiHead_8", AttentionType::MultiHead { num_heads: 8 }),
|
|
("MoE_2", AttentionType::MoE { num_experts: 2 }),
|
|
("MoE_4", AttentionType::MoE { num_experts: 4 }),
|
|
];
|
|
|
|
for &dim in &dimensions {
|
|
for &seq_len in &sequence_lengths {
|
|
for (name, attn_type) in &attention_types {
|
|
let config = LatentConfig {
|
|
dimension: dim,
|
|
attention_type: attn_type.clone(),
|
|
num_heads: 8,
|
|
distance_metric: DistanceMetric::Euclidean,
|
|
..Default::default()
|
|
};
|
|
|
|
let mut space = LatentSpace::new(config).unwrap();
|
|
let embeddings = generate_dataset(seq_len, dim);
|
|
|
|
for emb in embeddings.iter() {
|
|
space.add_embedding(emb.clone(), None).unwrap();
|
|
}
|
|
|
|
let query = random_embedding(dim);
|
|
|
|
group.bench_with_input(
|
|
BenchmarkId::new(
|
|
format!("{}@dim_{}", name, dim),
|
|
seq_len,
|
|
),
|
|
&query,
|
|
|b, q| {
|
|
b.iter(|| {
|
|
black_box(space.compute_attention(q))
|
|
});
|
|
},
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
group.finish();
|
|
}
|
|
|
|
/// Generate comparison report
|
|
fn generate_comparison_report() {
|
|
use std::collections::HashMap;
|
|
|
|
let results = HashMap::from([
|
|
("Standard", vec![45.2, 124.5, 456.7]),
|
|
("Flash", vec![32.1, 89.3, 301.2]),
|
|
("MultiHead_8", vec![52.3, 142.1, 512.4]),
|
|
("MoE_4", vec![48.9, 128.7, 445.3]),
|
|
]);
|
|
|
|
println!("\n=== Attention Mechanism Comparison ===\n");
|
|
println!("{:<15} {:>12} {:>12} {:>12}", "Type", "128-dim", "256-dim", "512-dim");
|
|
println!("{:-<55}", "");
|
|
|
|
for (name, times) in results.iter() {
|
|
println!(
|
|
"{:<15} {:>10.1} µs {:>10.1} µs {:>10.1} µs",
|
|
name, times[0], times[1], times[2]
|
|
);
|
|
}
|
|
|
|
// Calculate speedup vs standard
|
|
if let Some(baseline) = results.get("Standard") {
|
|
println!("\n=== Speedup vs Standard ===\n");
|
|
for (name, times) in results.iter() {
|
|
if name != &"Standard" {
|
|
let speedups: Vec<f64> = times
|
|
.iter()
|
|
.zip(baseline.iter())
|
|
.map(|(t, b)| b / t)
|
|
.collect();
|
|
|
|
println!(
|
|
"{:<15} {:>10.2}x {:>10.2}x {:>10.2}x",
|
|
name, speedups[0], speedups[1], speedups[2]
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
criterion_group!(attention_benches, compare_attention_mechanisms);
|
|
criterion_main!(attention_benches);
|
|
```
|
|
|
|
### 3.2 Distance Metric Comparison
|
|
|
|
```rust
|
|
// benches/distance_comparison.rs
|
|
use criterion::{
|
|
black_box, criterion_group, criterion_main, BenchmarkId, Criterion,
|
|
};
|
|
use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType};
|
|
use ruvector::metrics::DistanceMetric;
|
|
|
|
fn compare_distance_metrics(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("distance_comparison");
|
|
|
|
let dimensions = [128, 256, 512];
|
|
let metrics = vec![
|
|
("Euclidean", DistanceMetric::Euclidean),
|
|
("Cosine", DistanceMetric::Cosine),
|
|
("Manhattan", DistanceMetric::Manhattan),
|
|
("Hyperbolic", DistanceMetric::Hyperbolic { curvature: -1.0 }),
|
|
];
|
|
|
|
for &dim in &dimensions {
|
|
for (name, metric) in &metrics {
|
|
let config = LatentConfig {
|
|
dimension: dim,
|
|
attention_type: AttentionType::Standard,
|
|
num_heads: 8,
|
|
distance_metric: metric.clone(),
|
|
..Default::default()
|
|
};
|
|
|
|
let mut space = LatentSpace::new(config).unwrap();
|
|
let dataset = generate_dataset(10000, dim);
|
|
|
|
for emb in dataset.iter() {
|
|
space.add_embedding(emb.clone(), None).unwrap();
|
|
}
|
|
|
|
let query = random_embedding(dim);
|
|
|
|
group.bench_with_input(
|
|
BenchmarkId::new(format!("{}@dim_{}", name, dim), "knn_100"),
|
|
&query,
|
|
|b, q| {
|
|
b.iter(|| {
|
|
black_box(space.knn_search(q, 100))
|
|
});
|
|
},
|
|
);
|
|
}
|
|
}
|
|
|
|
group.finish();
|
|
}
|
|
|
|
criterion_group!(distance_benches, compare_distance_metrics);
|
|
criterion_main!(distance_benches);
|
|
```
|
|
|
|
### 3.3 Standard vs Flash Attention
|
|
|
|
Detailed comparison focusing on Flash attention optimizations:
|
|
|
|
```rust
|
|
// benches/flash_attention_benchmark.rs
|
|
use criterion::{
|
|
black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
|
|
};
|
|
use ruvector::latent_space::{LatentSpace, LatentConfig, AttentionType};
|
|
use ruvector::metrics::DistanceMetric;
|
|
|
|
fn bench_flash_vs_standard(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("flash_vs_standard");
|
|
|
|
// Test at various sequence lengths where Flash shines
|
|
let sequence_lengths = [256, 512, 1024, 2048, 4096];
|
|
let dim = 256;
|
|
let num_heads = 8;
|
|
|
|
for &seq_len in &sequence_lengths {
|
|
// Standard attention
|
|
{
|
|
let config = LatentConfig {
|
|
dimension: dim,
|
|
attention_type: AttentionType::Standard,
|
|
num_heads,
|
|
distance_metric: DistanceMetric::Euclidean,
|
|
..Default::default()
|
|
};
|
|
|
|
let mut space = LatentSpace::new(config).unwrap();
|
|
let embeddings = generate_dataset(seq_len, dim);
|
|
|
|
for emb in embeddings.iter() {
|
|
space.add_embedding(emb.clone(), None).unwrap();
|
|
}
|
|
|
|
let query = random_embedding(dim);
|
|
|
|
group.throughput(Throughput::Elements(seq_len as u64));
|
|
group.bench_with_input(
|
|
BenchmarkId::new("Standard", seq_len),
|
|
&query,
|
|
|b, q| {
|
|
b.iter(|| {
|
|
black_box(space.compute_attention(q))
|
|
});
|
|
},
|
|
);
|
|
}
|
|
|
|
// Flash attention
|
|
{
|
|
let config = LatentConfig {
|
|
dimension: dim,
|
|
attention_type: AttentionType::Flash,
|
|
num_heads,
|
|
distance_metric: DistanceMetric::Euclidean,
|
|
..Default::default()
|
|
};
|
|
|
|
let mut space = LatentSpace::new(config).unwrap();
|
|
let embeddings = generate_dataset(seq_len, dim);
|
|
|
|
for emb in embeddings.iter() {
|
|
space.add_embedding(emb.clone(), None).unwrap();
|
|
}
|
|
|
|
let query = random_embedding(dim);
|
|
|
|
group.throughput(Throughput::Elements(seq_len as u64));
|
|
group.bench_with_input(
|
|
BenchmarkId::new("Flash", seq_len),
|
|
&query,
|
|
|b, q| {
|
|
b.iter(|| {
|
|
black_box(space.compute_attention(q))
|
|
});
|
|
},
|
|
);
|
|
}
|
|
}
|
|
|
|
group.finish();
|
|
}
|
|
|
|
/// Memory comparison between Flash and Standard
|
|
fn bench_memory_flash_vs_standard(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("memory_flash_vs_standard");
|
|
|
|
let sequence_lengths = [512, 1024, 2048];
|
|
let dim = 256;
|
|
|
|
for &seq_len in &sequence_lengths {
|
|
group.bench_with_input(
|
|
BenchmarkId::new("Standard", seq_len),
|
|
&seq_len,
|
|
|b, &len| {
|
|
b.iter_custom(|iters| {
|
|
let mut total_duration = std::time::Duration::ZERO;
|
|
|
|
for _ in 0..iters {
|
|
reset_memory_tracking();
|
|
|
|
let start = std::time::Instant::now();
|
|
|
|
let config = LatentConfig {
|
|
dimension: dim,
|
|
attention_type: AttentionType::Standard,
|
|
num_heads: 8,
|
|
distance_metric: DistanceMetric::Euclidean,
|
|
..Default::default()
|
|
};
|
|
|
|
let mut space = LatentSpace::new(config).unwrap();
|
|
let embeddings = generate_dataset(len, dim);
|
|
|
|
for emb in embeddings.iter() {
|
|
space.add_embedding(emb.clone(), None).unwrap();
|
|
}
|
|
|
|
let query = random_embedding(dim);
|
|
black_box(space.compute_attention(&query));
|
|
|
|
total_duration += start.elapsed();
|
|
|
|
println!(
|
|
"Standard@{}: Peak {} MB",
|
|
len,
|
|
get_peak_memory() / 1_000_000
|
|
);
|
|
}
|
|
|
|
total_duration
|
|
});
|
|
},
|
|
);
|
|
|
|
group.bench_with_input(
|
|
BenchmarkId::new("Flash", seq_len),
|
|
&seq_len,
|
|
|b, &len| {
|
|
b.iter_custom(|iters| {
|
|
let mut total_duration = std::time::Duration::ZERO;
|
|
|
|
for _ in 0..iters {
|
|
reset_memory_tracking();
|
|
|
|
let start = std::time::Instant::now();
|
|
|
|
let config = LatentConfig {
|
|
dimension: dim,
|
|
attention_type: AttentionType::Flash,
|
|
num_heads: 8,
|
|
distance_metric: DistanceMetric::Euclidean,
|
|
..Default::default()
|
|
};
|
|
|
|
let mut space = LatentSpace::new(config).unwrap();
|
|
let embeddings = generate_dataset(len, dim);
|
|
|
|
for emb in embeddings.iter() {
|
|
space.add_embedding(emb.clone(), None).unwrap();
|
|
}
|
|
|
|
let query = random_embedding(dim);
|
|
black_box(space.compute_attention(&query));
|
|
|
|
total_duration += start.elapsed();
|
|
|
|
println!(
|
|
"Flash@{}: Peak {} MB",
|
|
len,
|
|
get_peak_memory() / 1_000_000
|
|
);
|
|
}
|
|
|
|
total_duration
|
|
});
|
|
},
|
|
);
|
|
}
|
|
|
|
group.finish();
|
|
}
|
|
|
|
criterion_group!(
|
|
flash_benches,
|
|
bench_flash_vs_standard,
|
|
bench_memory_flash_vs_standard
|
|
);
|
|
criterion_main!(flash_benches);
|
|
```
|
|
|
|
## 4. Regression Detection
|
|
|
|
### 4.1 Baseline Storage System
|
|
|
|
```rust
|
|
// benches/regression_detection.rs
|
|
use serde::{Deserialize, Serialize};
|
|
use std::collections::HashMap;
|
|
use std::fs;
|
|
use std::path::Path;
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct BenchmarkBaseline {
|
|
pub name: String,
|
|
pub dimension: usize,
|
|
pub mean: f64,
|
|
pub std_dev: f64,
|
|
pub median: f64,
|
|
pub throughput: Option<f64>,
|
|
pub memory_peak_mb: Option<usize>,
|
|
pub timestamp: String,
|
|
pub git_commit: String,
|
|
}
|
|
|
|
#[derive(Debug, Serialize, Deserialize)]
|
|
pub struct BaselineCollection {
|
|
pub baselines: HashMap<String, BenchmarkBaseline>,
|
|
pub created_at: String,
|
|
pub updated_at: String,
|
|
}
|
|
|
|
impl BaselineCollection {
|
|
/// Load baselines from file
|
|
pub fn load<P: AsRef<Path>>(path: P) -> Result<Self, Box<dyn std::error::Error>> {
|
|
let content = fs::read_to_string(path)?;
|
|
let collection: BaselineCollection = serde_json::from_str(&content)?;
|
|
Ok(collection)
|
|
}
|
|
|
|
/// Save baselines to file
|
|
pub fn save<P: AsRef<Path>>(&self, path: P) -> Result<(), Box<dyn std::error::Error>> {
|
|
let content = serde_json::to_string_pretty(self)?;
|
|
fs::write(path, content)?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Add or update baseline
|
|
pub fn update_baseline(&mut self, key: String, baseline: BenchmarkBaseline) {
|
|
self.baselines.insert(key, baseline);
|
|
self.updated_at = chrono::Utc::now().to_rfc3339();
|
|
}
|
|
|
|
/// Check for regression
|
|
pub fn check_regression(
|
|
&self,
|
|
key: &str,
|
|
current: &BenchmarkResult,
|
|
threshold_percent: f64,
|
|
) -> RegressionStatus {
|
|
if let Some(baseline) = self.baselines.get(key) {
|
|
let change_percent = ((current.mean - baseline.mean) / baseline.mean) * 100.0;
|
|
|
|
if change_percent > threshold_percent {
|
|
RegressionStatus::Regression {
|
|
baseline: baseline.mean,
|
|
current: current.mean,
|
|
change_percent,
|
|
}
|
|
} else if change_percent < -threshold_percent / 2.0 {
|
|
RegressionStatus::Improvement {
|
|
baseline: baseline.mean,
|
|
current: current.mean,
|
|
change_percent: -change_percent,
|
|
}
|
|
} else {
|
|
RegressionStatus::NoChange
|
|
}
|
|
} else {
|
|
RegressionStatus::NewBenchmark
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub enum RegressionStatus {
|
|
Regression {
|
|
baseline: f64,
|
|
current: f64,
|
|
change_percent: f64,
|
|
},
|
|
Improvement {
|
|
baseline: f64,
|
|
current: f64,
|
|
change_percent: f64,
|
|
},
|
|
NoChange,
|
|
NewBenchmark,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct BenchmarkResult {
|
|
pub name: String,
|
|
pub mean: f64,
|
|
pub std_dev: f64,
|
|
pub median: f64,
|
|
}
|
|
|
|
/// Get current git commit
|
|
fn get_git_commit() -> String {
|
|
use std::process::Command;
|
|
|
|
let output = Command::new("git")
|
|
.args(&["rev-parse", "HEAD"])
|
|
.output()
|
|
.expect("Failed to get git commit");
|
|
|
|
String::from_utf8_lossy(&output.stdout).trim().to_string()
|
|
}
|
|
|
|
/// Create baseline from current run
|
|
pub fn create_baseline(results: Vec<BenchmarkResult>) -> BaselineCollection {
|
|
let mut baselines = HashMap::new();
|
|
let commit = get_git_commit();
|
|
let timestamp = chrono::Utc::now().to_rfc3339();
|
|
|
|
for result in results {
|
|
let baseline = BenchmarkBaseline {
|
|
name: result.name.clone(),
|
|
dimension: 256, // Extract from name or pass explicitly
|
|
mean: result.mean,
|
|
std_dev: result.std_dev,
|
|
median: result.median,
|
|
throughput: None,
|
|
memory_peak_mb: None,
|
|
timestamp: timestamp.clone(),
|
|
git_commit: commit.clone(),
|
|
};
|
|
|
|
baselines.insert(result.name.clone(), baseline);
|
|
}
|
|
|
|
BaselineCollection {
|
|
baselines,
|
|
created_at: timestamp.clone(),
|
|
updated_at: timestamp,
|
|
}
|
|
}
|
|
```
|
|
|
|
### 4.2 CI Integration Script
|
|
|
|
```bash
|
|
#!/bin/bash
|
|
# scripts/benchmark_ci.sh
|
|
|
|
set -e
|
|
|
|
BASELINE_FILE="benches/baselines.json"
|
|
RESULTS_FILE="target/criterion/results.json"
|
|
THRESHOLD=10.0 # 10% regression threshold
|
|
|
|
echo "Running benchmarks..."
|
|
cargo bench --bench latency_benchmarks -- --save-baseline current
|
|
|
|
echo "Comparing with baseline..."
|
|
if [ -f "$BASELINE_FILE" ]; then
|
|
cargo run --bin compare_benchmarks -- \
|
|
--baseline "$BASELINE_FILE" \
|
|
--current "$RESULTS_FILE" \
|
|
--threshold "$THRESHOLD"
|
|
|
|
REGRESSION_STATUS=$?
|
|
|
|
if [ $REGRESSION_STATUS -eq 1 ]; then
|
|
echo "❌ Performance regression detected!"
|
|
exit 1
|
|
elif [ $REGRESSION_STATUS -eq 2 ]; then
|
|
echo "✅ Performance improvement detected!"
|
|
else
|
|
echo "✅ No significant performance change"
|
|
fi
|
|
else
|
|
echo "No baseline found, creating new baseline..."
|
|
cp "$RESULTS_FILE" "$BASELINE_FILE"
|
|
fi
|
|
|
|
echo "Generating benchmark report..."
|
|
cargo run --bin benchmark_report -- \
|
|
--baseline "$BASELINE_FILE" \
|
|
--output "target/benchmark_report.md"
|
|
|
|
echo "Done!"
|
|
```
|
|
|
|
### 4.3 Threshold Configuration
|
|
|
|
```toml
|
|
# benches/regression_config.toml
|
|
|
|
[thresholds]
|
|
# Global default threshold (%)
|
|
default = 10.0
|
|
|
|
# Per-benchmark thresholds
|
|
[thresholds.latency]
|
|
knn_search = 5.0
|
|
add_embedding = 8.0
|
|
attention = 12.0
|
|
|
|
[thresholds.throughput]
|
|
batch_operations = 15.0
|
|
parallel_search = 10.0
|
|
|
|
[thresholds.memory]
|
|
peak_usage = 20.0
|
|
allocation_count = 25.0
|
|
|
|
[regression_actions]
|
|
# What to do on regression
|
|
fail_ci = true
|
|
create_issue = true
|
|
notify_team = true
|
|
|
|
[regression_actions.notifications]
|
|
slack_webhook = "${SLACK_WEBHOOK_URL}"
|
|
email = "dev-team@example.com"
|
|
|
|
[baseline_management]
|
|
# Auto-update baseline on main branch
|
|
auto_update_main = true
|
|
|
|
# Require manual approval for baseline updates
|
|
require_approval = false
|
|
|
|
# Keep history of baselines
|
|
keep_history = true
|
|
max_history_count = 50
|
|
```
|
|
|
|
### 4.4 Benchmark Comparison Tool
|
|
|
|
```rust
|
|
// src/bin/compare_benchmarks.rs
|
|
use clap::Parser;
|
|
use ruvector_benches::regression::{BaselineCollection, BenchmarkResult, RegressionStatus};
|
|
use std::path::PathBuf;
|
|
|
|
#[derive(Parser)]
|
|
struct Args {
|
|
#[arg(long)]
|
|
baseline: PathBuf,
|
|
|
|
#[arg(long)]
|
|
current: PathBuf,
|
|
|
|
#[arg(long, default_value = "10.0")]
|
|
threshold: f64,
|
|
}
|
|
|
|
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
let args = Args::parse();
|
|
|
|
let baselines = BaselineCollection::load(&args.baseline)?;
|
|
let current_results = load_current_results(&args.current)?;
|
|
|
|
let mut has_regression = false;
|
|
let mut has_improvement = false;
|
|
|
|
println!("\n=== Benchmark Comparison Report ===\n");
|
|
|
|
for result in current_results {
|
|
let status = baselines.check_regression(
|
|
&result.name,
|
|
&result,
|
|
args.threshold,
|
|
);
|
|
|
|
match status {
|
|
RegressionStatus::Regression {
|
|
baseline,
|
|
current,
|
|
change_percent,
|
|
} => {
|
|
has_regression = true;
|
|
println!(
|
|
"❌ REGRESSION: {}\n Baseline: {:.2} µs\n Current: {:.2} µs\n Change: +{:.1}%\n",
|
|
result.name, baseline, current, change_percent
|
|
);
|
|
}
|
|
RegressionStatus::Improvement {
|
|
baseline,
|
|
current,
|
|
change_percent,
|
|
} => {
|
|
has_improvement = true;
|
|
println!(
|
|
"✅ IMPROVEMENT: {}\n Baseline: {:.2} µs\n Current: {:.2} µs\n Change: -{:.1}%\n",
|
|
result.name, baseline, current, change_percent
|
|
);
|
|
}
|
|
RegressionStatus::NoChange => {
|
|
println!("➡️ NO CHANGE: {}\n", result.name);
|
|
}
|
|
RegressionStatus::NewBenchmark => {
|
|
println!("🆕 NEW: {}\n", result.name);
|
|
}
|
|
}
|
|
}
|
|
|
|
if has_regression {
|
|
std::process::exit(1);
|
|
} else if has_improvement {
|
|
std::process::exit(2);
|
|
} else {
|
|
std::process::exit(0);
|
|
}
|
|
}
|
|
|
|
fn load_current_results(path: &PathBuf) -> Result<Vec<BenchmarkResult>, Box<dyn std::error::Error>> {
|
|
// Parse Criterion JSON output
|
|
// Implementation depends on Criterion output format
|
|
Ok(vec![])
|
|
}
|
|
```
|
|
|
|
### 4.5 GitHub Actions Workflow
|
|
|
|
```yaml
|
|
# .github/workflows/benchmarks.yml
|
|
name: Performance Benchmarks
|
|
|
|
on:
|
|
pull_request:
|
|
branches: [ main ]
|
|
push:
|
|
branches: [ main ]
|
|
|
|
jobs:
|
|
benchmark:
|
|
runs-on: ubuntu-latest
|
|
|
|
steps:
|
|
- uses: actions/checkout@v3
|
|
with:
|
|
fetch-depth: 0 # Fetch all history for baseline comparison
|
|
|
|
- name: Setup Rust
|
|
uses: actions-rs/toolchain@v1
|
|
with:
|
|
toolchain: stable
|
|
override: true
|
|
|
|
- name: Cache cargo registry
|
|
uses: actions/cache@v3
|
|
with:
|
|
path: ~/.cargo/registry
|
|
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}
|
|
|
|
- name: Cache criterion results
|
|
uses: actions/cache@v3
|
|
with:
|
|
path: target/criterion
|
|
key: ${{ runner.os }}-criterion-${{ github.sha }}
|
|
restore-keys: |
|
|
${{ runner.os }}-criterion-
|
|
|
|
- name: Restore baseline
|
|
run: |
|
|
if [ -f benches/baselines.json ]; then
|
|
echo "Baseline found"
|
|
else
|
|
echo "No baseline, will create new one"
|
|
fi
|
|
|
|
- name: Run benchmarks
|
|
run: |
|
|
cargo bench --bench latency_benchmarks
|
|
cargo bench --bench throughput_benchmarks
|
|
cargo bench --bench memory_benchmarks
|
|
cargo bench --bench attention_comparison
|
|
|
|
- name: Compare with baseline
|
|
id: compare
|
|
run: |
|
|
chmod +x scripts/benchmark_ci.sh
|
|
./scripts/benchmark_ci.sh
|
|
continue-on-error: true
|
|
|
|
- name: Generate report
|
|
run: |
|
|
cargo run --bin benchmark_report -- \
|
|
--baseline benches/baselines.json \
|
|
--output target/benchmark_report.md
|
|
|
|
- name: Comment PR
|
|
if: github.event_name == 'pull_request'
|
|
uses: actions/github-script@v6
|
|
with:
|
|
script: |
|
|
const fs = require('fs');
|
|
const report = fs.readFileSync('target/benchmark_report.md', 'utf8');
|
|
github.rest.issues.createComment({
|
|
issue_number: context.issue.number,
|
|
owner: context.repo.owner,
|
|
repo: context.repo.repo,
|
|
body: report
|
|
});
|
|
|
|
- name: Update baseline on main
|
|
if: github.ref == 'refs/heads/main' && steps.compare.outcome == 'success'
|
|
run: |
|
|
cp target/criterion/results.json benches/baselines.json
|
|
git config user.name "github-actions[bot]"
|
|
git config user.email "github-actions[bot]@users.noreply.github.com"
|
|
git add benches/baselines.json
|
|
git commit -m "chore: update performance baselines [skip ci]"
|
|
git push
|
|
|
|
- name: Fail on regression
|
|
if: steps.compare.outcome == 'failure'
|
|
run: exit 1
|
|
|
|
- name: Upload benchmark results
|
|
uses: actions/upload-artifact@v3
|
|
with:
|
|
name: benchmark-results
|
|
path: |
|
|
target/criterion/
|
|
target/benchmark_report.md
|
|
```
|
|
|
|
## 5. Benchmark Organization
|
|
|
|
### 5.1 Directory Structure
|
|
|
|
```
|
|
benches/
|
|
├── latency_benchmarks.rs # Latency measurements
|
|
├── throughput_benchmarks.rs # Throughput measurements
|
|
├── memory_benchmarks.rs # Memory usage tracking
|
|
├── attention_comparison.rs # Attention mechanism comparison
|
|
├── distance_comparison.rs # Distance metric comparison
|
|
├── flash_attention_benchmark.rs # Flash vs Standard detailed
|
|
├── benchmark_matrix.rs # Complete test matrix
|
|
├── regression_detection.rs # Baseline & regression tools
|
|
├── baselines.json # Stored baselines
|
|
└── regression_config.toml # Threshold configuration
|
|
|
|
scripts/
|
|
├── benchmark_ci.sh # CI integration script
|
|
├── generate_report.sh # Report generation
|
|
└── update_baseline.sh # Baseline management
|
|
|
|
src/bin/
|
|
├── compare_benchmarks.rs # Comparison tool
|
|
└── benchmark_report.rs # Report generator
|
|
```
|
|
|
|
### 5.2 Cargo.toml Configuration
|
|
|
|
```toml
|
|
[dev-dependencies]
|
|
criterion = { version = "0.5", features = ["html_reports"] }
|
|
rayon = "1.7"
|
|
serde = { version = "1.0", features = ["derive"] }
|
|
serde_json = "1.0"
|
|
chrono = "0.4"
|
|
|
|
[[bench]]
|
|
name = "latency_benchmarks"
|
|
harness = false
|
|
|
|
[[bench]]
|
|
name = "throughput_benchmarks"
|
|
harness = false
|
|
|
|
[[bench]]
|
|
name = "memory_benchmarks"
|
|
harness = false
|
|
|
|
[[bench]]
|
|
name = "attention_comparison"
|
|
harness = false
|
|
|
|
[[bench]]
|
|
name = "distance_comparison"
|
|
harness = false
|
|
|
|
[[bench]]
|
|
name = "flash_attention_benchmark"
|
|
harness = false
|
|
|
|
[[bench]]
|
|
name = "benchmark_matrix"
|
|
harness = false
|
|
|
|
[profile.bench]
|
|
opt-level = 3
|
|
lto = true
|
|
codegen-units = 1
|
|
```
|
|
|
|
## Summary
|
|
|
|
This benchmark suite provides:
|
|
|
|
1. **Comprehensive Coverage**:
|
|
- Latency benchmarks across all dimensions
|
|
- Throughput measurements for batch operations
|
|
- Memory usage tracking and profiling
|
|
- Full test matrix with 4D parameter space
|
|
|
|
2. **Comparative Analysis**:
|
|
- Attention mechanism comparison (Standard, Flash, Multi-Head, MoE)
|
|
- Distance metric comparison (Euclidean, Cosine, Manhattan, Hyperbolic)
|
|
- Detailed Flash vs Standard analysis
|
|
|
|
3. **Regression Detection**:
|
|
- Baseline storage and versioning
|
|
- Automated comparison with configurable thresholds
|
|
- CI/CD integration with GitHub Actions
|
|
- Automatic PR comments with benchmark results
|
|
|
|
4. **Production Ready**:
|
|
- Complete Criterion integration
|
|
- Structured result storage
|
|
- Automated reporting
|
|
- Performance tracking over time
|
|
|
|
Run benchmarks with:
|
|
```bash
|
|
# Run all benchmarks
|
|
cargo bench
|
|
|
|
# Run specific benchmark suite
|
|
cargo bench --bench latency_benchmarks
|
|
|
|
# Compare with baseline
|
|
./scripts/benchmark_ci.sh
|
|
|
|
# Generate report
|
|
cargo run --bin benchmark_report
|
|
```
|