Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,538 @@
//! AgenticDB compatibility benchmark
//!
//! Tests AgenticDB-specific workloads:
//! - Reflexion episode storage and retrieval
//! - Skill library search
//! - Causal graph queries
//! - Learning session throughput
use anyhow::Result;
use clap::Parser;
use rand::Rng;
use ruvector_bench::{
create_progress_bar, BenchmarkResult, DatasetGenerator, LatencyStats, MemoryProfiler,
ResultWriter, VectorDistribution,
};
use ruvector_core::{
types::{DbOptions, HnswConfig, QuantizationConfig},
DistanceMetric, SearchQuery, VectorDB, VectorEntry,
};
use serde_json::json;
use std::collections::HashMap;
use std::path::PathBuf;
use std::time::Instant;
#[derive(Parser)]
#[command(name = "agenticdb-benchmark")]
#[command(about = "AgenticDB workload testing")]
struct Args {
/// Number of episodes
#[arg(long, default_value = "10000")]
episodes: usize,
/// Number of skills
#[arg(long, default_value = "1000")]
skills: usize,
/// Number of queries
#[arg(short, long, default_value = "500")]
queries: usize,
/// Output directory
#[arg(short, long, default_value = "bench_results")]
output: PathBuf,
}
fn main() -> Result<()> {
let args = Args::parse();
println!("╔════════════════════════════════════════╗");
println!("║ Ruvector AgenticDB Benchmark ║");
println!("╚════════════════════════════════════════╝\n");
let mut all_results = Vec::new();
// Test 1: Reflexion episode storage/retrieval
println!("\n{}", "=".repeat(60));
println!("Test 1: Reflexion Episode Storage & Retrieval");
println!("{}\n", "=".repeat(60));
let result = bench_reflexion_episodes(&args)?;
all_results.push(result);
// Test 2: Skill library search
println!("\n{}", "=".repeat(60));
println!("Test 2: Skill Library Search");
println!("{}\n", "=".repeat(60));
let result = bench_skill_library(&args)?;
all_results.push(result);
// Test 3: Causal graph queries
println!("\n{}", "=".repeat(60));
println!("Test 3: Causal Graph Queries");
println!("{}\n", "=".repeat(60));
let result = bench_causal_graph(&args)?;
all_results.push(result);
// Test 4: Learning session throughput
println!("\n{}", "=".repeat(60));
println!("Test 4: Learning Session Throughput");
println!("{}\n", "=".repeat(60));
let result = bench_learning_session(&args)?;
all_results.push(result);
// Write results
let writer = ResultWriter::new(&args.output)?;
writer.write_json("agenticdb_benchmark", &all_results)?;
writer.write_csv("agenticdb_benchmark", &all_results)?;
writer.write_markdown_report("agenticdb_benchmark", &all_results)?;
print_summary(&all_results);
println!(
"\n✓ AgenticDB benchmark complete! Results saved to: {}",
args.output.display()
);
Ok(())
}
fn bench_reflexion_episodes(args: &Args) -> Result<BenchmarkResult> {
println!("Simulating {} Reflexion episodes...", args.episodes);
// Reflexion episodes use 384D embeddings (typical for sentence-transformers)
let dimensions = 384;
let temp_dir = tempfile::tempdir()?;
let db_path = temp_dir.path().join("episodes.db");
let options = DbOptions {
dimensions,
distance_metric: DistanceMetric::Cosine,
storage_path: db_path.to_str().unwrap().to_string(),
hnsw_config: Some(HnswConfig::default()),
quantization: Some(QuantizationConfig::Scalar),
};
let mem_profiler = MemoryProfiler::new();
let build_start = Instant::now();
let db = VectorDB::new(options)?;
// Generate episode data
let gen = DatasetGenerator::new(
dimensions,
VectorDistribution::Normal {
mean: 0.0,
std_dev: 1.0,
},
);
println!("Storing episodes...");
let pb = create_progress_bar(args.episodes as u64, "Storing episodes");
for i in 0..args.episodes {
let entry = VectorEntry {
id: Some(format!("episode_{}", i)),
vector: gen.generate(1).into_iter().next().unwrap(),
metadata: Some(
vec![
("trajectory".to_string(), json!(format!("traj_{}", i))),
("reward".to_string(), json!(rand::thread_rng().gen::<f32>())),
(
"success".to_string(),
json!(rand::thread_rng().gen_bool(0.7)),
),
(
"step_count".to_string(),
json!(rand::thread_rng().gen_range(10..100)),
),
]
.into_iter()
.collect(),
),
};
db.insert(entry)?;
pb.inc(1);
}
pb.finish_with_message("✓ Episodes stored");
let build_time = build_start.elapsed();
let memory_mb = mem_profiler.current_usage_mb();
// Query similar episodes
println!("Querying similar episodes...");
let mut latency_stats = LatencyStats::new()?;
let query_vectors = gen.generate(args.queries);
let search_start = Instant::now();
let pb = create_progress_bar(args.queries as u64, "Searching");
for query in query_vectors {
let query_start = Instant::now();
db.search(SearchQuery {
vector: query,
k: 10,
filter: None,
ef_search: None,
})?;
latency_stats.record(query_start.elapsed())?;
pb.inc(1);
}
pb.finish_with_message("✓ Search complete");
let total_search_time = search_start.elapsed();
let qps = args.queries as f64 / total_search_time.as_secs_f64();
Ok(BenchmarkResult {
name: "reflexion_episodes".to_string(),
dataset: "reflexion".to_string(),
dimensions,
num_vectors: args.episodes,
num_queries: args.queries,
k: 10,
qps,
latency_p50: latency_stats.percentile(0.50).as_secs_f64() * 1000.0,
latency_p95: latency_stats.percentile(0.95).as_secs_f64() * 1000.0,
latency_p99: latency_stats.percentile(0.99).as_secs_f64() * 1000.0,
latency_p999: latency_stats.percentile(0.999).as_secs_f64() * 1000.0,
recall_at_1: 1.0, // No ground truth for synthetic
recall_at_10: 1.0,
recall_at_100: 1.0,
memory_mb,
build_time_secs: build_time.as_secs_f64(),
metadata: HashMap::new(),
})
}
fn bench_skill_library(args: &Args) -> Result<BenchmarkResult> {
println!("Simulating {} skills in library...", args.skills);
let dimensions = 768; // Larger embeddings for code/skill descriptions
let temp_dir = tempfile::tempdir()?;
let db_path = temp_dir.path().join("skills.db");
let options = DbOptions {
dimensions,
distance_metric: DistanceMetric::Cosine,
storage_path: db_path.to_str().unwrap().to_string(),
hnsw_config: Some(HnswConfig::default()),
quantization: Some(QuantizationConfig::Scalar),
};
let mem_profiler = MemoryProfiler::new();
let build_start = Instant::now();
let db = VectorDB::new(options)?;
let gen = DatasetGenerator::new(
dimensions,
VectorDistribution::Clustered {
num_clusters: 20, // Skills grouped by categories
},
);
println!("Storing skills...");
let pb = create_progress_bar(args.skills as u64, "Storing skills");
for i in 0..args.skills {
let entry = VectorEntry {
id: Some(format!("skill_{}", i)),
vector: gen.generate(1).into_iter().next().unwrap(),
metadata: Some(
vec![
("name".to_string(), json!(format!("skill_{}", i))),
("category".to_string(), json!(format!("cat_{}", i % 20))),
(
"success_rate".to_string(),
json!(rand::thread_rng().gen::<f32>()),
),
(
"usage_count".to_string(),
json!(rand::thread_rng().gen_range(0..1000)),
),
]
.into_iter()
.collect(),
),
};
db.insert(entry)?;
pb.inc(1);
}
pb.finish_with_message("✓ Skills stored");
let build_time = build_start.elapsed();
let memory_mb = mem_profiler.current_usage_mb();
// Search for relevant skills
println!("Searching for relevant skills...");
let mut latency_stats = LatencyStats::new()?;
let query_vectors = gen.generate(args.queries);
let search_start = Instant::now();
let pb = create_progress_bar(args.queries as u64, "Searching");
for query in query_vectors {
let query_start = Instant::now();
db.search(SearchQuery {
vector: query,
k: 5,
filter: None,
ef_search: None,
})?;
latency_stats.record(query_start.elapsed())?;
pb.inc(1);
}
pb.finish_with_message("✓ Search complete");
let total_search_time = search_start.elapsed();
let qps = args.queries as f64 / total_search_time.as_secs_f64();
Ok(BenchmarkResult {
name: "skill_library".to_string(),
dataset: "skills".to_string(),
dimensions,
num_vectors: args.skills,
num_queries: args.queries,
k: 5,
qps,
latency_p50: latency_stats.percentile(0.50).as_secs_f64() * 1000.0,
latency_p95: latency_stats.percentile(0.95).as_secs_f64() * 1000.0,
latency_p99: latency_stats.percentile(0.99).as_secs_f64() * 1000.0,
latency_p999: latency_stats.percentile(0.999).as_secs_f64() * 1000.0,
recall_at_1: 1.0,
recall_at_10: 1.0,
recall_at_100: 1.0,
memory_mb,
build_time_secs: build_time.as_secs_f64(),
metadata: HashMap::new(),
})
}
fn bench_causal_graph(args: &Args) -> Result<BenchmarkResult> {
println!(
"Simulating causal graph with {} nodes...",
args.episodes / 10
);
let dimensions = 256;
let num_nodes = args.episodes / 10;
let temp_dir = tempfile::tempdir()?;
let db_path = temp_dir.path().join("causal.db");
let options = DbOptions {
dimensions,
distance_metric: DistanceMetric::Cosine,
storage_path: db_path.to_str().unwrap().to_string(),
hnsw_config: Some(HnswConfig::default()),
quantization: Some(QuantizationConfig::Scalar),
};
let mem_profiler = MemoryProfiler::new();
let build_start = Instant::now();
let db = VectorDB::new(options)?;
let gen = DatasetGenerator::new(
dimensions,
VectorDistribution::Normal {
mean: 0.0,
std_dev: 1.0,
},
);
println!("Building causal graph...");
let pb = create_progress_bar(num_nodes as u64, "Storing nodes");
for i in 0..num_nodes {
let entry = VectorEntry {
id: Some(format!("node_{}", i)),
vector: gen.generate(1).into_iter().next().unwrap(),
metadata: Some(
vec![
("state".to_string(), json!(format!("state_{}", i))),
("action".to_string(), json!(format!("action_{}", i % 50))),
(
"causal_strength".to_string(),
json!(rand::thread_rng().gen::<f32>()),
),
]
.into_iter()
.collect(),
),
};
db.insert(entry)?;
pb.inc(1);
}
pb.finish_with_message("✓ Graph built");
let build_time = build_start.elapsed();
let memory_mb = mem_profiler.current_usage_mb();
// Query causal relationships
println!("Querying causal relationships...");
let mut latency_stats = LatencyStats::new()?;
let query_vectors = gen.generate(args.queries / 2);
let search_start = Instant::now();
let pb = create_progress_bar((args.queries / 2) as u64, "Searching");
for query in query_vectors {
let query_start = Instant::now();
db.search(SearchQuery {
vector: query,
k: 20,
filter: None,
ef_search: None,
})?;
latency_stats.record(query_start.elapsed())?;
pb.inc(1);
}
pb.finish_with_message("✓ Search complete");
let total_search_time = search_start.elapsed();
let qps = (args.queries / 2) as f64 / total_search_time.as_secs_f64();
Ok(BenchmarkResult {
name: "causal_graph".to_string(),
dataset: "causal".to_string(),
dimensions,
num_vectors: num_nodes,
num_queries: args.queries / 2,
k: 20,
qps,
latency_p50: latency_stats.percentile(0.50).as_secs_f64() * 1000.0,
latency_p95: latency_stats.percentile(0.95).as_secs_f64() * 1000.0,
latency_p99: latency_stats.percentile(0.99).as_secs_f64() * 1000.0,
latency_p999: latency_stats.percentile(0.999).as_secs_f64() * 1000.0,
recall_at_1: 1.0,
recall_at_10: 1.0,
recall_at_100: 1.0,
memory_mb,
build_time_secs: build_time.as_secs_f64(),
metadata: HashMap::new(),
})
}
fn bench_learning_session(args: &Args) -> Result<BenchmarkResult> {
println!("Simulating mixed-workload learning session...");
let dimensions = 512;
let num_items = args.episodes;
let temp_dir = tempfile::tempdir()?;
let db_path = temp_dir.path().join("learning.db");
let options = DbOptions {
dimensions,
distance_metric: DistanceMetric::Cosine,
storage_path: db_path.to_str().unwrap().to_string(),
hnsw_config: Some(HnswConfig::default()),
quantization: Some(QuantizationConfig::Scalar),
};
let mem_profiler = MemoryProfiler::new();
let build_start = Instant::now();
let db = VectorDB::new(options)?;
let gen = DatasetGenerator::new(
dimensions,
VectorDistribution::Normal {
mean: 0.0,
std_dev: 1.0,
},
);
println!("Running learning session with mixed read/write...");
let mut latency_stats = LatencyStats::new()?;
let pb = create_progress_bar(num_items as u64, "Processing");
let mut write_count = 0;
let mut read_count = 0;
for i in 0..num_items {
// 70% writes, 30% reads (typical learning scenario)
if rand::thread_rng().gen_bool(0.7) {
let entry = VectorEntry {
id: Some(format!("item_{}", i)),
vector: gen.generate(1).into_iter().next().unwrap(),
metadata: Some(
vec![("timestamp".to_string(), json!(i))]
.into_iter()
.collect(),
),
};
db.insert(entry)?;
write_count += 1;
} else {
let query = gen.generate(1).into_iter().next().unwrap();
let query_start = Instant::now();
db.search(SearchQuery {
vector: query,
k: 10,
filter: None,
ef_search: None,
})?;
latency_stats.record(query_start.elapsed())?;
read_count += 1;
}
pb.inc(1);
}
pb.finish_with_message("✓ Learning session complete");
let build_time = build_start.elapsed();
let memory_mb = mem_profiler.current_usage_mb();
let throughput = num_items as f64 / build_time.as_secs_f64();
Ok(BenchmarkResult {
name: "learning_session".to_string(),
dataset: "mixed_workload".to_string(),
dimensions,
num_vectors: write_count,
num_queries: read_count,
k: 10,
qps: throughput,
latency_p50: latency_stats.percentile(0.50).as_secs_f64() * 1000.0,
latency_p95: latency_stats.percentile(0.95).as_secs_f64() * 1000.0,
latency_p99: latency_stats.percentile(0.99).as_secs_f64() * 1000.0,
latency_p999: latency_stats.percentile(0.999).as_secs_f64() * 1000.0,
recall_at_1: 1.0,
recall_at_10: 1.0,
recall_at_100: 1.0,
memory_mb,
build_time_secs: build_time.as_secs_f64(),
metadata: vec![
("writes".to_string(), write_count.to_string()),
("reads".to_string(), read_count.to_string()),
]
.into_iter()
.collect(),
})
}
fn print_summary(results: &[BenchmarkResult]) {
use tabled::{Table, Tabled};
#[derive(Tabled)]
struct ResultRow {
#[tabled(rename = "Workload")]
name: String,
#[tabled(rename = "Vectors")]
vectors: String,
#[tabled(rename = "Throughput")]
qps: String,
#[tabled(rename = "p50 (ms)")]
p50: String,
#[tabled(rename = "p99 (ms)")]
p99: String,
#[tabled(rename = "Memory (MB)")]
memory: String,
}
let rows: Vec<ResultRow> = results
.iter()
.map(|r| ResultRow {
name: r.name.clone(),
vectors: r.num_vectors.to_string(),
qps: format!("{:.0} ops/s", r.qps),
p50: format!("{:.2}", r.latency_p50),
p99: format!("{:.2}", r.latency_p99),
memory: format!("{:.1}", r.memory_mb),
})
.collect();
println!("\n\n{}", Table::new(rows));
}

View File

@@ -0,0 +1,400 @@
//! ANN-Benchmarks compatible benchmark suite
//!
//! Runs standard benchmarks on SIFT1M, GIST1M, and Deep1M datasets
//! compatible with http://ann-benchmarks.com format
use anyhow::{Context, Result};
use clap::Parser;
use ruvector_bench::{
calculate_recall, create_progress_bar, BenchmarkResult, DatasetGenerator, LatencyStats,
MemoryProfiler, ResultWriter, VectorDistribution,
};
use ruvector_core::{
types::{DbOptions, HnswConfig, QuantizationConfig},
DistanceMetric, SearchQuery, VectorDB, VectorEntry,
};
use std::collections::HashMap;
use std::path::PathBuf;
use std::time::Instant;
#[derive(Parser)]
#[command(name = "ann-benchmark")]
#[command(about = "ANN-Benchmarks compatible testing")]
struct Args {
/// Dataset to use: sift1m, gist1m, deep1m, or synthetic
#[arg(short, long, default_value = "synthetic")]
dataset: String,
/// Number of vectors for synthetic dataset
#[arg(short, long, default_value = "100000")]
num_vectors: usize,
/// Number of queries
#[arg(short = 'q', long, default_value = "1000")]
num_queries: usize,
/// Vector dimensions (for synthetic)
#[arg(short = 'd', long, default_value = "128")]
dimensions: usize,
/// K nearest neighbors to retrieve
#[arg(short, long, default_value = "10")]
k: usize,
/// HNSW M parameter
#[arg(short, long, default_value = "32")]
m: usize,
/// HNSW ef_construction
#[arg(long, default_value = "200")]
ef_construction: usize,
/// HNSW ef_search values to test (comma-separated)
#[arg(long, default_value = "50,100,200,400")]
ef_search_values: String,
/// Output directory for results
#[arg(short, long, default_value = "bench_results")]
output: PathBuf,
/// Distance metric
#[arg(long, default_value = "cosine")]
metric: String,
/// Quantization: none, scalar, binary
#[arg(long, default_value = "scalar")]
quantization: String,
}
fn main() -> Result<()> {
let args = Args::parse();
println!("╔════════════════════════════════════════╗");
println!("║ Ruvector ANN-Benchmarks Suite ║");
println!("╚════════════════════════════════════════╝\n");
// Parse ef_search values
let ef_search_values: Vec<usize> = args
.ef_search_values
.split(',')
.map(|s| s.trim().parse().unwrap())
.collect();
// Load or generate dataset
let (vectors, queries, ground_truth) = load_dataset(&args)?;
println!(
"✓ Dataset loaded: {} vectors, {} queries",
vectors.len(),
queries.len()
);
let mut all_results = Vec::new();
// Run benchmarks for each ef_search value
for &ef_search in &ef_search_values {
println!("\n{}", "=".repeat(60));
println!("Testing with ef_search = {}", ef_search);
println!("{}\n", "=".repeat(60));
let result = run_benchmark(&args, &vectors, &queries, &ground_truth, ef_search)?;
all_results.push(result);
}
// Write results
let writer = ResultWriter::new(&args.output)?;
writer.write_json("ann_benchmark", &all_results)?;
writer.write_csv("ann_benchmark", &all_results)?;
writer.write_markdown_report("ann_benchmark", &all_results)?;
// Print summary table
print_summary_table(&all_results);
println!(
"\n✓ Benchmark complete! Results saved to: {}",
args.output.display()
);
Ok(())
}
fn load_dataset(args: &Args) -> Result<(Vec<Vec<f32>>, Vec<Vec<f32>>, Vec<Vec<String>>)> {
match args.dataset.as_str() {
"sift1m" => load_sift1m(),
"gist1m" => load_gist1m(),
"deep1m" => load_deep1m(),
"synthetic" | _ => {
println!("Generating synthetic {} dataset...", args.dataset);
let gen = DatasetGenerator::new(
args.dimensions,
VectorDistribution::Normal {
mean: 0.0,
std_dev: 1.0,
},
);
let pb = create_progress_bar(args.num_vectors as u64, "Generating vectors");
let vectors: Vec<Vec<f32>> = (0..args.num_vectors)
.map(|_| {
pb.inc(1);
gen.generate(1).into_iter().next().unwrap()
})
.collect();
pb.finish_with_message("✓ Vectors generated");
let queries = gen.generate(args.num_queries);
// Generate ground truth using brute force
let ground_truth = compute_ground_truth(&vectors, &queries, args.k)?;
Ok((vectors, queries, ground_truth))
}
}
}
fn load_sift1m() -> Result<(Vec<Vec<f32>>, Vec<Vec<f32>>, Vec<Vec<String>>)> {
// TODO: Implement HDF5 loading when dataset is available
println!("⚠ SIFT1M dataset not found, using synthetic data");
println!(" Download SIFT1M with: scripts/download_datasets.sh");
let gen = DatasetGenerator::new(
128,
VectorDistribution::Normal {
mean: 0.0,
std_dev: 1.0,
},
);
let vectors = gen.generate(10000);
let queries = gen.generate(100);
let ground_truth = compute_ground_truth(&vectors, &queries, 10)?;
Ok((vectors, queries, ground_truth))
}
fn load_gist1m() -> Result<(Vec<Vec<f32>>, Vec<Vec<f32>>, Vec<Vec<String>>)> {
println!("⚠ GIST1M dataset not found, using synthetic data");
let gen = DatasetGenerator::new(
960,
VectorDistribution::Normal {
mean: 0.0,
std_dev: 1.0,
},
);
let vectors = gen.generate(10000);
let queries = gen.generate(100);
let ground_truth = compute_ground_truth(&vectors, &queries, 10)?;
Ok((vectors, queries, ground_truth))
}
fn load_deep1m() -> Result<(Vec<Vec<f32>>, Vec<Vec<f32>>, Vec<Vec<String>>)> {
println!("⚠ Deep1M dataset not found, using synthetic data");
let gen = DatasetGenerator::new(
96,
VectorDistribution::Normal {
mean: 0.0,
std_dev: 1.0,
},
);
let vectors = gen.generate(10000);
let queries = gen.generate(100);
let ground_truth = compute_ground_truth(&vectors, &queries, 10)?;
Ok((vectors, queries, ground_truth))
}
fn compute_ground_truth(
vectors: &[Vec<f32>],
queries: &[Vec<f32>],
k: usize,
) -> Result<Vec<Vec<String>>> {
println!("Computing ground truth with brute force...");
let pb = create_progress_bar(queries.len() as u64, "Computing ground truth");
let ground_truth: Vec<Vec<String>> = queries
.iter()
.map(|query| {
pb.inc(1);
let mut distances: Vec<(usize, f32)> = vectors
.iter()
.enumerate()
.map(|(idx, vec)| {
let dist = cosine_distance(query, vec);
(idx, dist)
})
.collect();
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
distances
.iter()
.take(k)
.map(|(idx, _)| idx.to_string())
.collect()
})
.collect();
pb.finish_with_message("✓ Ground truth computed");
Ok(ground_truth)
}
fn cosine_distance(a: &[f32], b: &[f32]) -> f32 {
let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
1.0 - (dot / (norm_a * norm_b))
}
fn run_benchmark(
args: &Args,
vectors: &[Vec<f32>],
queries: &[Vec<f32>],
ground_truth: &[Vec<String>],
ef_search: usize,
) -> Result<BenchmarkResult> {
let temp_dir = tempfile::tempdir()?;
let db_path = temp_dir.path().join("bench.db");
// Parse distance metric
let distance_metric = match args.metric.as_str() {
"cosine" => DistanceMetric::Cosine,
"euclidean" => DistanceMetric::Euclidean,
"dot" => DistanceMetric::DotProduct,
_ => DistanceMetric::Cosine,
};
// Parse quantization
let quantization = match args.quantization.as_str() {
"none" => QuantizationConfig::None,
"scalar" => QuantizationConfig::Scalar,
"binary" => QuantizationConfig::Binary,
_ => QuantizationConfig::Scalar,
};
let dimensions = vectors[0].len();
let options = DbOptions {
dimensions,
distance_metric,
storage_path: db_path.to_str().unwrap().to_string(),
hnsw_config: Some(HnswConfig {
m: args.m,
ef_construction: args.ef_construction,
ef_search,
max_elements: vectors.len() * 2,
}),
quantization: Some(quantization),
};
// Measure build time and memory
let mem_profiler = MemoryProfiler::new();
let build_start = Instant::now();
let db = VectorDB::new(options)?;
println!("Indexing {} vectors...", vectors.len());
let pb = create_progress_bar(vectors.len() as u64, "Indexing");
for (idx, vector) in vectors.iter().enumerate() {
let entry = VectorEntry {
id: Some(idx.to_string()),
vector: vector.clone(),
metadata: None,
};
db.insert(entry)?;
pb.inc(1);
}
pb.finish_with_message("✓ Indexing complete");
let build_time = build_start.elapsed();
let memory_mb = mem_profiler.current_usage_mb();
// Run search benchmark
println!("Running {} queries...", queries.len());
let mut latency_stats = LatencyStats::new()?;
let mut search_results = Vec::new();
let search_start = Instant::now();
let pb = create_progress_bar(queries.len() as u64, "Searching");
for query in queries {
let query_start = Instant::now();
let results = db.search(SearchQuery {
vector: query.clone(),
k: args.k,
filter: None,
ef_search: Some(ef_search),
})?;
latency_stats.record(query_start.elapsed())?;
let result_ids: Vec<String> = results.into_iter().map(|r| r.id).collect();
search_results.push(result_ids);
pb.inc(1);
}
pb.finish_with_message("✓ Search complete");
let total_search_time = search_start.elapsed();
let qps = queries.len() as f64 / total_search_time.as_secs_f64();
// Calculate recall
let recall_1 = calculate_recall(&search_results, ground_truth, 1);
let recall_10 = calculate_recall(&search_results, ground_truth, 10.min(args.k));
let recall_100 = calculate_recall(&search_results, ground_truth, 100.min(args.k));
let mut metadata = HashMap::new();
metadata.insert("m".to_string(), args.m.to_string());
metadata.insert(
"ef_construction".to_string(),
args.ef_construction.to_string(),
);
metadata.insert("ef_search".to_string(), ef_search.to_string());
metadata.insert("metric".to_string(), args.metric.clone());
metadata.insert("quantization".to_string(), args.quantization.clone());
Ok(BenchmarkResult {
name: format!("ruvector-ef{}", ef_search),
dataset: args.dataset.clone(),
dimensions,
num_vectors: vectors.len(),
num_queries: queries.len(),
k: args.k,
qps,
latency_p50: latency_stats.percentile(0.50).as_secs_f64() * 1000.0,
latency_p95: latency_stats.percentile(0.95).as_secs_f64() * 1000.0,
latency_p99: latency_stats.percentile(0.99).as_secs_f64() * 1000.0,
latency_p999: latency_stats.percentile(0.999).as_secs_f64() * 1000.0,
recall_at_1: recall_1,
recall_at_10: recall_10,
recall_at_100: recall_100,
memory_mb,
build_time_secs: build_time.as_secs_f64(),
metadata,
})
}
fn print_summary_table(results: &[BenchmarkResult]) {
use tabled::{Table, Tabled};
#[derive(Tabled)]
struct ResultRow {
#[tabled(rename = "ef_search")]
ef_search: String,
#[tabled(rename = "QPS")]
qps: String,
#[tabled(rename = "p50 (ms)")]
p50: String,
#[tabled(rename = "p99 (ms)")]
p99: String,
#[tabled(rename = "Recall@10")]
recall: String,
#[tabled(rename = "Memory (MB)")]
memory: String,
}
let rows: Vec<ResultRow> = results
.iter()
.map(|r| ResultRow {
ef_search: r.metadata.get("ef_search").unwrap().clone(),
qps: format!("{:.0}", r.qps),
p50: format!("{:.2}", r.latency_p50),
p99: format!("{:.2}", r.latency_p99),
recall: format!("{:.2}%", r.recall_at_10 * 100.0),
memory: format!("{:.1}", r.memory_mb),
})
.collect();
println!("\n\n{}", Table::new(rows));
}

View File

@@ -0,0 +1,386 @@
//! Cross-system performance comparison benchmark
//!
//! Compares Ruvector against:
//! - Pure Python implementations (simulated)
//! - Other vector databases (placeholder for future integration)
//!
//! Documents performance improvements (target: 10-100x)
use anyhow::Result;
use clap::Parser;
use ruvector_bench::{
create_progress_bar, BenchmarkResult, DatasetGenerator, LatencyStats, MemoryProfiler,
ResultWriter, VectorDistribution,
};
use ruvector_core::types::{DbOptions, HnswConfig, QuantizationConfig};
use ruvector_core::{DistanceMetric, SearchQuery, VectorDB, VectorEntry};
use std::collections::HashMap;
use std::path::PathBuf;
use std::time::Instant;
#[derive(Parser)]
#[command(name = "comparison-benchmark")]
#[command(about = "Cross-system performance comparison")]
struct Args {
/// Number of vectors
#[arg(short, long, default_value = "50000")]
num_vectors: usize,
/// Number of queries
#[arg(short, long, default_value = "1000")]
queries: usize,
/// Vector dimensions
#[arg(short, long, default_value = "384")]
dimensions: usize,
/// Output directory
#[arg(short, long, default_value = "bench_results")]
output: PathBuf,
}
fn main() -> Result<()> {
let args = Args::parse();
println!("╔════════════════════════════════════════╗");
println!("║ Ruvector Comparison Benchmark ║");
println!("╚════════════════════════════════════════╝\n");
let mut all_results = Vec::new();
// Test 1: Ruvector (optimized)
println!("\n{}", "=".repeat(60));
println!("Test 1: Ruvector (SIMD + Quantization + HNSW)");
println!("{}\n", "=".repeat(60));
let result = bench_ruvector_optimized(&args)?;
all_results.push(result);
// Test 2: Ruvector (no quantization)
println!("\n{}", "=".repeat(60));
println!("Test 2: Ruvector (No Quantization)");
println!("{}\n", "=".repeat(60));
let result = bench_ruvector_no_quant(&args)?;
all_results.push(result);
// Test 3: Simulated Python baseline
println!("\n{}", "=".repeat(60));
println!("Test 3: Simulated Python Baseline");
println!("{}\n", "=".repeat(60));
let result = simulate_python_baseline(&args)?;
all_results.push(result);
// Test 4: Simulated naive brute-force
println!("\n{}", "=".repeat(60));
println!("Test 4: Simulated Brute-Force Search");
println!("{}\n", "=".repeat(60));
let result = simulate_brute_force(&args)?;
all_results.push(result);
// Write results
let writer = ResultWriter::new(&args.output)?;
writer.write_json("comparison_benchmark", &all_results)?;
writer.write_csv("comparison_benchmark", &all_results)?;
writer.write_markdown_report("comparison_benchmark", &all_results)?;
print_comparison_table(&all_results);
println!(
"\n✓ Comparison benchmark complete! Results saved to: {}",
args.output.display()
);
Ok(())
}
fn bench_ruvector_optimized(args: &Args) -> Result<BenchmarkResult> {
let (db, queries) = setup_ruvector(args, QuantizationConfig::Scalar)?;
println!("Running {} queries...", queries.len());
let mut latency_stats = LatencyStats::new()?;
let pb = create_progress_bar(queries.len() as u64, "Searching");
let search_start = Instant::now();
for query in &queries {
let query_start = Instant::now();
db.search(SearchQuery {
vector: query.clone(),
k: 10,
filter: None,
ef_search: None,
})?;
latency_stats.record(query_start.elapsed())?;
pb.inc(1);
}
pb.finish_with_message("✓ Search complete");
let total_time = search_start.elapsed();
let qps = queries.len() as f64 / total_time.as_secs_f64();
Ok(BenchmarkResult {
name: "ruvector_optimized".to_string(),
dataset: "synthetic".to_string(),
dimensions: args.dimensions,
num_vectors: args.num_vectors,
num_queries: queries.len(),
k: 10,
qps,
latency_p50: latency_stats.percentile(0.50).as_secs_f64() * 1000.0,
latency_p95: latency_stats.percentile(0.95).as_secs_f64() * 1000.0,
latency_p99: latency_stats.percentile(0.99).as_secs_f64() * 1000.0,
latency_p999: latency_stats.percentile(0.999).as_secs_f64() * 1000.0,
recall_at_1: 1.0,
recall_at_10: 1.0,
recall_at_100: 1.0,
memory_mb: 0.0,
build_time_secs: 0.0,
metadata: vec![("system".to_string(), "ruvector".to_string())]
.into_iter()
.collect(),
})
}
fn bench_ruvector_no_quant(args: &Args) -> Result<BenchmarkResult> {
let (db, queries) = setup_ruvector(args, QuantizationConfig::None)?;
println!("Running {} queries...", queries.len());
let mut latency_stats = LatencyStats::new()?;
let pb = create_progress_bar(queries.len() as u64, "Searching");
let search_start = Instant::now();
for query in &queries {
let query_start = Instant::now();
db.search(SearchQuery {
vector: query.clone(),
k: 10,
filter: None,
ef_search: None,
})?;
latency_stats.record(query_start.elapsed())?;
pb.inc(1);
}
pb.finish_with_message("✓ Search complete");
let total_time = search_start.elapsed();
let qps = queries.len() as f64 / total_time.as_secs_f64();
Ok(BenchmarkResult {
name: "ruvector_no_quant".to_string(),
dataset: "synthetic".to_string(),
dimensions: args.dimensions,
num_vectors: args.num_vectors,
num_queries: queries.len(),
k: 10,
qps,
latency_p50: latency_stats.percentile(0.50).as_secs_f64() * 1000.0,
latency_p95: latency_stats.percentile(0.95).as_secs_f64() * 1000.0,
latency_p99: latency_stats.percentile(0.99).as_secs_f64() * 1000.0,
latency_p999: latency_stats.percentile(0.999).as_secs_f64() * 1000.0,
recall_at_1: 1.0,
recall_at_10: 1.0,
recall_at_100: 1.0,
memory_mb: 0.0,
build_time_secs: 0.0,
metadata: vec![("system".to_string(), "ruvector_no_quant".to_string())]
.into_iter()
.collect(),
})
}
fn simulate_python_baseline(args: &Args) -> Result<BenchmarkResult> {
// Simulate Python numpy-based implementation
// Estimated to be 10-20x slower based on typical Rust vs Python performance
let (db, queries) = setup_ruvector(args, QuantizationConfig::Scalar)?;
println!("Simulating Python baseline (estimated)...");
let mut latency_stats = LatencyStats::new()?;
let search_start = Instant::now();
for query in &queries {
let query_start = Instant::now();
db.search(SearchQuery {
vector: query.clone(),
k: 10,
filter: None,
ef_search: None,
})?;
let rust_latency = query_start.elapsed();
// Simulate Python being 15x slower
let simulated_latency = rust_latency * 15;
latency_stats.record(simulated_latency)?;
}
let total_time = search_start.elapsed() * 15; // Simulate slower execution
let qps = queries.len() as f64 / total_time.as_secs_f64();
println!(" (Estimated based on 15x slowdown factor)");
Ok(BenchmarkResult {
name: "python_baseline".to_string(),
dataset: "synthetic".to_string(),
dimensions: args.dimensions,
num_vectors: args.num_vectors,
num_queries: queries.len(),
k: 10,
qps,
latency_p50: latency_stats.percentile(0.50).as_secs_f64() * 1000.0,
latency_p95: latency_stats.percentile(0.95).as_secs_f64() * 1000.0,
latency_p99: latency_stats.percentile(0.99).as_secs_f64() * 1000.0,
latency_p999: latency_stats.percentile(0.999).as_secs_f64() * 1000.0,
recall_at_1: 1.0,
recall_at_10: 1.0,
recall_at_100: 1.0,
memory_mb: 0.0,
build_time_secs: 0.0,
metadata: vec![
("system".to_string(), "python_numpy".to_string()),
("simulated".to_string(), "true".to_string()),
]
.into_iter()
.collect(),
})
}
fn simulate_brute_force(args: &Args) -> Result<BenchmarkResult> {
// Simulate naive brute-force O(n) search
// For HNSW with 50K vectors, brute force would be ~500x slower
let (db, queries) = setup_ruvector(args, QuantizationConfig::Scalar)?;
println!("Simulating brute-force search (estimated)...");
let mut latency_stats = LatencyStats::new()?;
let slowdown_factor = (args.num_vectors as f64).sqrt() as u32; // Rough O(log n) vs O(n) ratio
let search_start = Instant::now();
for query in &queries {
let query_start = Instant::now();
db.search(SearchQuery {
vector: query.clone(),
k: 10,
filter: None,
ef_search: None,
})?;
let hnsw_latency = query_start.elapsed();
// Simulate brute force being much slower
let simulated_latency = hnsw_latency * slowdown_factor;
latency_stats.record(simulated_latency)?;
}
let total_time = search_start.elapsed() * slowdown_factor;
let qps = queries.len() as f64 / total_time.as_secs_f64();
println!(" (Estimated with {}x slowdown factor)", slowdown_factor);
Ok(BenchmarkResult {
name: "brute_force".to_string(),
dataset: "synthetic".to_string(),
dimensions: args.dimensions,
num_vectors: args.num_vectors,
num_queries: queries.len(),
k: 10,
qps,
latency_p50: latency_stats.percentile(0.50).as_secs_f64() * 1000.0,
latency_p95: latency_stats.percentile(0.95).as_secs_f64() * 1000.0,
latency_p99: latency_stats.percentile(0.99).as_secs_f64() * 1000.0,
latency_p999: latency_stats.percentile(0.999).as_secs_f64() * 1000.0,
recall_at_1: 1.0,
recall_at_10: 1.0,
recall_at_100: 1.0,
memory_mb: 0.0,
build_time_secs: 0.0,
metadata: vec![
("system".to_string(), "brute_force".to_string()),
("simulated".to_string(), "true".to_string()),
("slowdown_factor".to_string(), slowdown_factor.to_string()),
]
.into_iter()
.collect(),
})
}
fn setup_ruvector(
args: &Args,
quantization: QuantizationConfig,
) -> Result<(VectorDB, Vec<Vec<f32>>)> {
let temp_dir = tempfile::tempdir()?;
let db_path = temp_dir.path().join("comparison.db");
let options = DbOptions {
dimensions: args.dimensions,
distance_metric: DistanceMetric::Cosine,
storage_path: db_path.to_str().unwrap().to_string(),
hnsw_config: Some(HnswConfig::default()),
quantization: Some(quantization),
};
let db = VectorDB::new(options)?;
let gen = DatasetGenerator::new(
args.dimensions,
VectorDistribution::Normal {
mean: 0.0,
std_dev: 1.0,
},
);
println!("Indexing {} vectors...", args.num_vectors);
let pb = create_progress_bar(args.num_vectors as u64, "Indexing");
for i in 0..args.num_vectors {
let entry = VectorEntry {
id: Some(i.to_string()),
vector: gen.generate(1).into_iter().next().unwrap(),
metadata: None,
};
db.insert(entry)?;
pb.inc(1);
}
pb.finish_with_message("✓ Indexing complete");
let queries = gen.generate(args.queries);
Ok((db, queries))
}
fn print_comparison_table(results: &[BenchmarkResult]) {
use tabled::{Table, Tabled};
#[derive(Tabled)]
struct ResultRow {
#[tabled(rename = "System")]
name: String,
#[tabled(rename = "QPS")]
qps: String,
#[tabled(rename = "p50 (ms)")]
p50: String,
#[tabled(rename = "p99 (ms)")]
p99: String,
#[tabled(rename = "Speedup")]
speedup: String,
}
let baseline_qps = results
.iter()
.find(|r| r.name == "python_baseline")
.map(|r| r.qps)
.unwrap_or(1.0);
let rows: Vec<ResultRow> = results
.iter()
.map(|r| {
let speedup = r.qps / baseline_qps;
ResultRow {
name: r.name.clone(),
qps: format!("{:.0}", r.qps),
p50: format!("{:.2}", r.latency_p50),
p99: format!("{:.2}", r.latency_p99),
speedup: format!("{:.1}x", speedup),
}
})
.collect();
println!("\n\n{}", Table::new(rows));
println!("\nNote: Python and brute-force results are simulated estimates.");
}

View File

@@ -0,0 +1,411 @@
//! Latency profiling benchmark
//!
//! Measures p50, p95, p99, p99.9 latencies under various conditions:
//! - Single-threaded vs multi-threaded
//! - Effect of efSearch on latency
//! - Effect of quantization on latency/recall tradeoff
use anyhow::Result;
use clap::Parser;
use rayon::prelude::*;
use ruvector_bench::{
create_progress_bar, BenchmarkResult, DatasetGenerator, LatencyStats, MemoryProfiler,
ResultWriter, VectorDistribution,
};
use ruvector_core::{
types::{DbOptions, HnswConfig, QuantizationConfig},
DistanceMetric, SearchQuery, VectorDB, VectorEntry,
};
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Instant;
#[derive(Parser)]
#[command(name = "latency-benchmark")]
#[command(about = "Latency profiling across different conditions")]
struct Args {
/// Number of vectors
#[arg(short, long, default_value = "50000")]
num_vectors: usize,
/// Number of queries
#[arg(short, long, default_value = "1000")]
queries: usize,
/// Vector dimensions
#[arg(short, long, default_value = "384")]
dimensions: usize,
/// Number of parallel threads to test
#[arg(short, long, default_value = "1,4,8,16")]
threads: String,
/// Output directory
#[arg(short, long, default_value = "bench_results")]
output: PathBuf,
}
fn main() -> Result<()> {
let args = Args::parse();
println!("╔════════════════════════════════════════╗");
println!("║ Ruvector Latency Profiling ║");
println!("╚════════════════════════════════════════╝\n");
let mut all_results = Vec::new();
// Test 1: Single-threaded latency
println!("\n{}", "=".repeat(60));
println!("Test 1: Single-threaded Latency");
println!("{}\n", "=".repeat(60));
let result = bench_single_threaded(&args)?;
all_results.push(result);
// Test 2: Multi-threaded latency
let thread_counts: Vec<usize> = args
.threads
.split(',')
.map(|s| s.trim().parse().unwrap())
.collect();
for &num_threads in &thread_counts {
println!("\n{}", "=".repeat(60));
println!("Test 2: Multi-threaded Latency ({} threads)", num_threads);
println!("{}\n", "=".repeat(60));
let result = bench_multi_threaded(&args, num_threads)?;
all_results.push(result);
}
// Test 3: Effect of efSearch
println!("\n{}", "=".repeat(60));
println!("Test 3: Effect of efSearch on Latency");
println!("{}\n", "=".repeat(60));
let result = bench_ef_search_latency(&args)?;
all_results.extend(result);
// Test 4: Effect of quantization
println!("\n{}", "=".repeat(60));
println!("Test 4: Effect of Quantization on Latency");
println!("{}\n", "=".repeat(60));
let result = bench_quantization_latency(&args)?;
all_results.extend(result);
// Write results
let writer = ResultWriter::new(&args.output)?;
writer.write_json("latency_benchmark", &all_results)?;
writer.write_csv("latency_benchmark", &all_results)?;
writer.write_markdown_report("latency_benchmark", &all_results)?;
print_summary(&all_results);
println!(
"\n✓ Latency benchmark complete! Results saved to: {}",
args.output.display()
);
Ok(())
}
fn bench_single_threaded(args: &Args) -> Result<BenchmarkResult> {
let (db, queries) = setup_database(args, QuantizationConfig::Scalar)?;
println!("Running {} queries (single-threaded)...", queries.len());
let mut latency_stats = LatencyStats::new()?;
let pb = create_progress_bar(queries.len() as u64, "Searching");
let search_start = Instant::now();
for query in &queries {
let query_start = Instant::now();
db.search(SearchQuery {
vector: query.clone(),
k: 10,
filter: None,
ef_search: None,
})?;
latency_stats.record(query_start.elapsed())?;
pb.inc(1);
}
pb.finish_with_message("✓ Search complete");
let total_time = search_start.elapsed();
let qps = queries.len() as f64 / total_time.as_secs_f64();
Ok(BenchmarkResult {
name: "single_threaded".to_string(),
dataset: "synthetic".to_string(),
dimensions: args.dimensions,
num_vectors: args.num_vectors,
num_queries: queries.len(),
k: 10,
qps,
latency_p50: latency_stats.percentile(0.50).as_secs_f64() * 1000.0,
latency_p95: latency_stats.percentile(0.95).as_secs_f64() * 1000.0,
latency_p99: latency_stats.percentile(0.99).as_secs_f64() * 1000.0,
latency_p999: latency_stats.percentile(0.999).as_secs_f64() * 1000.0,
recall_at_1: 1.0,
recall_at_10: 1.0,
recall_at_100: 1.0,
memory_mb: 0.0,
build_time_secs: 0.0,
metadata: HashMap::new(),
})
}
fn bench_multi_threaded(args: &Args, num_threads: usize) -> Result<BenchmarkResult> {
let (db, queries) = setup_database(args, QuantizationConfig::Scalar)?;
let db = Arc::new(db);
println!(
"Running {} queries ({} threads)...",
queries.len(),
num_threads
);
rayon::ThreadPoolBuilder::new()
.num_threads(num_threads)
.build_global()
.ok();
let search_start = Instant::now();
let latencies: Vec<f64> = queries
.par_iter()
.map(|query| {
let query_start = Instant::now();
db.search(SearchQuery {
vector: query.clone(),
k: 10,
filter: None,
ef_search: None,
})
.ok();
query_start.elapsed().as_secs_f64() * 1000.0
})
.collect();
let total_time = search_start.elapsed();
let qps = queries.len() as f64 / total_time.as_secs_f64();
// Calculate percentiles manually
let mut sorted_latencies = latencies.clone();
sorted_latencies.sort_by(|a, b| a.partial_cmp(b).unwrap());
let p50 = sorted_latencies[(sorted_latencies.len() as f64 * 0.50) as usize];
let p95 = sorted_latencies[(sorted_latencies.len() as f64 * 0.95) as usize];
let p99 = sorted_latencies[(sorted_latencies.len() as f64 * 0.99) as usize];
let p999 = sorted_latencies[(sorted_latencies.len() as f64 * 0.999) as usize];
Ok(BenchmarkResult {
name: format!("multi_threaded_{}", num_threads),
dataset: "synthetic".to_string(),
dimensions: args.dimensions,
num_vectors: args.num_vectors,
num_queries: queries.len(),
k: 10,
qps,
latency_p50: p50,
latency_p95: p95,
latency_p99: p99,
latency_p999: p999,
recall_at_1: 1.0,
recall_at_10: 1.0,
recall_at_100: 1.0,
memory_mb: 0.0,
build_time_secs: 0.0,
metadata: vec![("threads".to_string(), num_threads.to_string())]
.into_iter()
.collect(),
})
}
fn bench_ef_search_latency(args: &Args) -> Result<Vec<BenchmarkResult>> {
let ef_values = vec![50, 100, 200, 400, 800];
let mut results = Vec::new();
for ef_search in ef_values {
println!("Testing efSearch = {}...", ef_search);
let (db, queries) = setup_database(args, QuantizationConfig::Scalar)?;
let mut latency_stats = LatencyStats::new()?;
let pb = create_progress_bar(queries.len() as u64, &format!("ef={}", ef_search));
let search_start = Instant::now();
for query in &queries {
let query_start = Instant::now();
db.search(SearchQuery {
vector: query.clone(),
k: 10,
filter: None,
ef_search: Some(ef_search),
})?;
latency_stats.record(query_start.elapsed())?;
pb.inc(1);
}
pb.finish_with_message(format!("✓ ef={} complete", ef_search));
let total_time = search_start.elapsed();
let qps = queries.len() as f64 / total_time.as_secs_f64();
results.push(BenchmarkResult {
name: format!("ef_search_{}", ef_search),
dataset: "synthetic".to_string(),
dimensions: args.dimensions,
num_vectors: args.num_vectors,
num_queries: queries.len(),
k: 10,
qps,
latency_p50: latency_stats.percentile(0.50).as_secs_f64() * 1000.0,
latency_p95: latency_stats.percentile(0.95).as_secs_f64() * 1000.0,
latency_p99: latency_stats.percentile(0.99).as_secs_f64() * 1000.0,
latency_p999: latency_stats.percentile(0.999).as_secs_f64() * 1000.0,
recall_at_1: 1.0,
recall_at_10: 1.0,
recall_at_100: 1.0,
memory_mb: 0.0,
build_time_secs: 0.0,
metadata: vec![("ef_search".to_string(), ef_search.to_string())]
.into_iter()
.collect(),
});
}
Ok(results)
}
fn bench_quantization_latency(args: &Args) -> Result<Vec<BenchmarkResult>> {
let quantizations = vec![
("none", QuantizationConfig::None),
("scalar", QuantizationConfig::Scalar),
("binary", QuantizationConfig::Binary),
];
let mut results = Vec::new();
for (name, quant_config) in quantizations {
println!("Testing quantization: {}...", name);
let (db, queries) = setup_database(args, quant_config)?;
let mut latency_stats = LatencyStats::new()?;
let pb = create_progress_bar(queries.len() as u64, &format!("quant={}", name));
let search_start = Instant::now();
for query in &queries {
let query_start = Instant::now();
db.search(SearchQuery {
vector: query.clone(),
k: 10,
filter: None,
ef_search: None,
})?;
latency_stats.record(query_start.elapsed())?;
pb.inc(1);
}
pb.finish_with_message(format!("{} complete", name));
let total_time = search_start.elapsed();
let qps = queries.len() as f64 / total_time.as_secs_f64();
results.push(BenchmarkResult {
name: format!("quantization_{}", name),
dataset: "synthetic".to_string(),
dimensions: args.dimensions,
num_vectors: args.num_vectors,
num_queries: queries.len(),
k: 10,
qps,
latency_p50: latency_stats.percentile(0.50).as_secs_f64() * 1000.0,
latency_p95: latency_stats.percentile(0.95).as_secs_f64() * 1000.0,
latency_p99: latency_stats.percentile(0.99).as_secs_f64() * 1000.0,
latency_p999: latency_stats.percentile(0.999).as_secs_f64() * 1000.0,
recall_at_1: 1.0,
recall_at_10: 1.0,
recall_at_100: 1.0,
memory_mb: 0.0,
build_time_secs: 0.0,
metadata: vec![("quantization".to_string(), name.to_string())]
.into_iter()
.collect(),
});
}
Ok(results)
}
fn setup_database(
args: &Args,
quantization: QuantizationConfig,
) -> Result<(VectorDB, Vec<Vec<f32>>)> {
let temp_dir = tempfile::tempdir()?;
let db_path = temp_dir.path().join("latency.db");
let options = DbOptions {
dimensions: args.dimensions,
distance_metric: DistanceMetric::Cosine,
storage_path: db_path.to_str().unwrap().to_string(),
hnsw_config: Some(HnswConfig::default()),
quantization: Some(quantization),
};
let db = VectorDB::new(options)?;
// Generate and index data
let gen = DatasetGenerator::new(
args.dimensions,
VectorDistribution::Normal {
mean: 0.0,
std_dev: 1.0,
},
);
println!("Indexing {} vectors...", args.num_vectors);
let pb = create_progress_bar(args.num_vectors as u64, "Indexing");
for i in 0..args.num_vectors {
let entry = VectorEntry {
id: Some(i.to_string()),
vector: gen.generate(1).into_iter().next().unwrap(),
metadata: None,
};
db.insert(entry)?;
pb.inc(1);
}
pb.finish_with_message("✓ Indexing complete");
// Generate query vectors
let queries = gen.generate(args.queries);
Ok((db, queries))
}
fn print_summary(results: &[BenchmarkResult]) {
use tabled::{Table, Tabled};
#[derive(Tabled)]
struct ResultRow {
#[tabled(rename = "Configuration")]
name: String,
#[tabled(rename = "QPS")]
qps: String,
#[tabled(rename = "p50 (ms)")]
p50: String,
#[tabled(rename = "p95 (ms)")]
p95: String,
#[tabled(rename = "p99 (ms)")]
p99: String,
#[tabled(rename = "p99.9 (ms)")]
p999: String,
}
let rows: Vec<ResultRow> = results
.iter()
.map(|r| ResultRow {
name: r.name.clone(),
qps: format!("{:.0}", r.qps),
p50: format!("{:.2}", r.latency_p50),
p95: format!("{:.2}", r.latency_p95),
p99: format!("{:.2}", r.latency_p99),
p999: format!("{:.2}", r.latency_p999),
})
.collect();
println!("\n\n{}", Table::new(rows));
}

View File

@@ -0,0 +1,432 @@
//! Memory usage profiling benchmark
//!
//! Measures memory consumption at various scales and configurations:
//! - Memory usage at 10K, 100K, 1M vectors
//! - Effect of quantization on memory
//! - Index overhead measurement
use anyhow::Result;
use clap::Parser;
use ruvector_bench::{
create_progress_bar, BenchmarkResult, DatasetGenerator, MemoryProfiler, ResultWriter,
VectorDistribution,
};
use ruvector_core::{
types::{DbOptions, HnswConfig, QuantizationConfig},
DistanceMetric, VectorDB, VectorEntry,
};
use std::collections::HashMap;
use std::path::PathBuf;
use std::time::Instant;
#[derive(Parser)]
#[command(name = "memory-benchmark")]
#[command(about = "Memory usage profiling")]
struct Args {
/// Vector dimensions
#[arg(short, long, default_value = "384")]
dimensions: usize,
/// Scales to test (comma-separated)
#[arg(short, long, default_value = "1000,10000,100000")]
scales: String,
/// Output directory
#[arg(short, long, default_value = "bench_results")]
output: PathBuf,
}
fn main() -> Result<()> {
let args = Args::parse();
println!("╔════════════════════════════════════════╗");
println!("║ Ruvector Memory Profiling ║");
println!("╚════════════════════════════════════════╝\n");
let mut all_results = Vec::new();
// Parse scales
let scales: Vec<usize> = args
.scales
.split(',')
.map(|s| s.trim().parse().unwrap())
.collect();
// Test 1: Memory usage at different scales
for &scale in &scales {
println!("\n{}", "=".repeat(60));
println!("Test: Memory at {} vectors", scale);
println!("{}\n", "=".repeat(60));
let result = bench_memory_scale(&args, scale)?;
all_results.push(result);
}
// Test 2: Effect of quantization on memory
println!("\n{}", "=".repeat(60));
println!("Test: Effect of Quantization on Memory");
println!("{}\n", "=".repeat(60));
let results = bench_quantization_memory(&args)?;
all_results.extend(results);
// Test 3: Index overhead analysis
println!("\n{}", "=".repeat(60));
println!("Test: Index Overhead Analysis");
println!("{}\n", "=".repeat(60));
let result = bench_index_overhead(&args)?;
all_results.push(result);
// Write results
let writer = ResultWriter::new(&args.output)?;
writer.write_json("memory_benchmark", &all_results)?;
writer.write_csv("memory_benchmark", &all_results)?;
writer.write_markdown_report("memory_benchmark", &all_results)?;
print_summary(&all_results);
println!(
"\n✓ Memory benchmark complete! Results saved to: {}",
args.output.display()
);
Ok(())
}
fn bench_memory_scale(args: &Args, num_vectors: usize) -> Result<BenchmarkResult> {
let temp_dir = tempfile::tempdir()?;
let db_path = temp_dir.path().join("memory_scale.db");
let options = DbOptions {
dimensions: args.dimensions,
distance_metric: DistanceMetric::Cosine,
storage_path: db_path.to_str().unwrap().to_string(),
hnsw_config: Some(HnswConfig::default()),
quantization: Some(QuantizationConfig::Scalar),
};
let mem_profiler = MemoryProfiler::new();
let initial_mb = mem_profiler.current_usage_mb();
println!("Initial memory: {:.2} MB", initial_mb);
println!("Indexing {} vectors...", num_vectors);
let build_start = Instant::now();
let db = VectorDB::new(options)?;
let gen = DatasetGenerator::new(
args.dimensions,
VectorDistribution::Normal {
mean: 0.0,
std_dev: 1.0,
},
);
let pb = create_progress_bar(num_vectors as u64, "Indexing");
for i in 0..num_vectors {
let entry = VectorEntry {
id: Some(i.to_string()),
vector: gen.generate(1).into_iter().next().unwrap(),
metadata: None,
};
db.insert(entry)?;
// Sample memory every 10%
if i % (num_vectors / 10).max(1) == 0 {
let current_mb = mem_profiler.current_usage_mb();
println!(
" Progress: {}%, Memory: {:.2} MB",
(i * 100) / num_vectors,
current_mb
);
}
pb.inc(1);
}
pb.finish_with_message("✓ Indexing complete");
let build_time = build_start.elapsed();
let final_mb = mem_profiler.current_usage_mb();
let memory_per_vector_kb = (final_mb - initial_mb) * 1024.0 / num_vectors as f64;
println!("Final memory: {:.2} MB", final_mb);
println!("Memory per vector: {:.2} KB", memory_per_vector_kb);
// Calculate theoretical minimum
let vector_size_bytes = args.dimensions * 4; // 4 bytes per f32
let theoretical_mb = (num_vectors * vector_size_bytes) as f64 / 1_048_576.0;
let overhead_ratio = final_mb / theoretical_mb;
println!("Theoretical minimum: {:.2} MB", theoretical_mb);
println!("Overhead ratio: {:.2}x", overhead_ratio);
Ok(BenchmarkResult {
name: format!("memory_scale_{}", num_vectors),
dataset: "synthetic".to_string(),
dimensions: args.dimensions,
num_vectors,
num_queries: 0,
k: 0,
qps: 0.0,
latency_p50: 0.0,
latency_p95: 0.0,
latency_p99: 0.0,
latency_p999: 0.0,
recall_at_1: 0.0,
recall_at_10: 0.0,
recall_at_100: 0.0,
memory_mb: final_mb,
build_time_secs: build_time.as_secs_f64(),
metadata: vec![
(
"memory_per_vector_kb".to_string(),
format!("{:.2}", memory_per_vector_kb),
),
(
"theoretical_mb".to_string(),
format!("{:.2}", theoretical_mb),
),
(
"overhead_ratio".to_string(),
format!("{:.2}", overhead_ratio),
),
]
.into_iter()
.collect(),
})
}
fn bench_quantization_memory(args: &Args) -> Result<Vec<BenchmarkResult>> {
let quantizations = vec![
("none", QuantizationConfig::None),
("scalar", QuantizationConfig::Scalar),
("binary", QuantizationConfig::Binary),
];
let num_vectors = 50_000;
let mut results = Vec::new();
for (name, quant_config) in quantizations {
println!("Testing quantization: {}...", name);
let temp_dir = tempfile::tempdir()?;
let db_path = temp_dir.path().join("quant_memory.db");
let options = DbOptions {
dimensions: args.dimensions,
distance_metric: DistanceMetric::Cosine,
storage_path: db_path.to_str().unwrap().to_string(),
hnsw_config: Some(HnswConfig::default()),
quantization: Some(quant_config),
};
let mem_profiler = MemoryProfiler::new();
let build_start = Instant::now();
let db = VectorDB::new(options)?;
let gen = DatasetGenerator::new(
args.dimensions,
VectorDistribution::Normal {
mean: 0.0,
std_dev: 1.0,
},
);
let pb = create_progress_bar(num_vectors as u64, &format!("quant={}", name));
for i in 0..num_vectors {
let entry = VectorEntry {
id: Some(i.to_string()),
vector: gen.generate(1).into_iter().next().unwrap(),
metadata: None,
};
db.insert(entry)?;
pb.inc(1);
}
pb.finish_with_message(format!("{} complete", name));
let build_time = build_start.elapsed();
let memory_mb = mem_profiler.current_usage_mb();
let vector_size_bytes = args.dimensions * 4;
let theoretical_mb = (num_vectors * vector_size_bytes) as f64 / 1_048_576.0;
let compression_ratio = theoretical_mb / memory_mb;
println!(
" Memory: {:.2} MB, Compression: {:.2}x",
memory_mb, compression_ratio
);
results.push(BenchmarkResult {
name: format!("quantization_{}", name),
dataset: "synthetic".to_string(),
dimensions: args.dimensions,
num_vectors,
num_queries: 0,
k: 0,
qps: 0.0,
latency_p50: 0.0,
latency_p95: 0.0,
latency_p99: 0.0,
latency_p999: 0.0,
recall_at_1: 0.0,
recall_at_10: 0.0,
recall_at_100: 0.0,
memory_mb,
build_time_secs: build_time.as_secs_f64(),
metadata: vec![
("quantization".to_string(), name.to_string()),
(
"compression_ratio".to_string(),
format!("{:.2}", compression_ratio),
),
(
"theoretical_mb".to_string(),
format!("{:.2}", theoretical_mb),
),
]
.into_iter()
.collect(),
});
}
Ok(results)
}
fn bench_index_overhead(args: &Args) -> Result<BenchmarkResult> {
let num_vectors = 100_000;
println!("Analyzing index overhead for {} vectors...", num_vectors);
let temp_dir = tempfile::tempdir()?;
let db_path = temp_dir.path().join("overhead.db");
let options = DbOptions {
dimensions: args.dimensions,
distance_metric: DistanceMetric::Cosine,
storage_path: db_path.to_str().unwrap().to_string(),
hnsw_config: Some(HnswConfig {
m: 32,
ef_construction: 200,
ef_search: 100,
max_elements: num_vectors * 2,
}),
quantization: Some(QuantizationConfig::None), // No quantization for overhead analysis
};
let mem_profiler = MemoryProfiler::new();
let build_start = Instant::now();
let db = VectorDB::new(options)?;
let gen = DatasetGenerator::new(
args.dimensions,
VectorDistribution::Normal {
mean: 0.0,
std_dev: 1.0,
},
);
let pb = create_progress_bar(num_vectors as u64, "Building index");
for i in 0..num_vectors {
let entry = VectorEntry {
id: Some(i.to_string()),
vector: gen.generate(1).into_iter().next().unwrap(),
metadata: None,
};
db.insert(entry)?;
pb.inc(1);
}
pb.finish_with_message("✓ Index built");
let build_time = build_start.elapsed();
let total_memory_mb = mem_profiler.current_usage_mb();
// Calculate components
let vector_data_mb = (num_vectors * args.dimensions * 4) as f64 / 1_048_576.0;
let index_overhead_mb = total_memory_mb - vector_data_mb;
let overhead_percentage = (index_overhead_mb / vector_data_mb) * 100.0;
println!("\nMemory Breakdown:");
println!(" Vector data: {:.2} MB", vector_data_mb);
println!(
" Index overhead: {:.2} MB ({:.1}%)",
index_overhead_mb, overhead_percentage
);
println!(" Total: {:.2} MB", total_memory_mb);
Ok(BenchmarkResult {
name: "index_overhead".to_string(),
dataset: "synthetic".to_string(),
dimensions: args.dimensions,
num_vectors,
num_queries: 0,
k: 0,
qps: 0.0,
latency_p50: 0.0,
latency_p95: 0.0,
latency_p99: 0.0,
latency_p999: 0.0,
recall_at_1: 0.0,
recall_at_10: 0.0,
recall_at_100: 0.0,
memory_mb: total_memory_mb,
build_time_secs: build_time.as_secs_f64(),
metadata: vec![
(
"vector_data_mb".to_string(),
format!("{:.2}", vector_data_mb),
),
(
"index_overhead_mb".to_string(),
format!("{:.2}", index_overhead_mb),
),
(
"overhead_percentage".to_string(),
format!("{:.1}", overhead_percentage),
),
]
.into_iter()
.collect(),
})
}
fn print_summary(results: &[BenchmarkResult]) {
use tabled::{Table, Tabled};
#[derive(Tabled)]
struct ResultRow {
#[tabled(rename = "Configuration")]
name: String,
#[tabled(rename = "Vectors")]
vectors: String,
#[tabled(rename = "Memory (MB)")]
memory: String,
#[tabled(rename = "Per Vector")]
per_vector: String,
#[tabled(rename = "Build Time (s)")]
build_time: String,
}
let rows: Vec<ResultRow> = results
.iter()
.map(|r| {
let per_vector = if r.num_vectors > 0 {
format!("{:.2} KB", (r.memory_mb * 1024.0) / r.num_vectors as f64)
} else {
"N/A".to_string()
};
ResultRow {
name: r.name.clone(),
vectors: if r.num_vectors > 0 {
r.num_vectors.to_string()
} else {
"N/A".to_string()
},
memory: format!("{:.2}", r.memory_mb),
per_vector,
build_time: format!("{:.2}", r.build_time_secs),
}
})
.collect();
println!("\n\n{}", Table::new(rows));
}

View File

@@ -0,0 +1,334 @@
//! Performance profiling benchmark with flamegraph support
//!
//! Generates:
//! - CPU flamegraphs
//! - Memory allocation profiles
//! - Lock contention analysis
//! - SIMD utilization measurement
use anyhow::Result;
use clap::Parser;
use ruvector_bench::{create_progress_bar, DatasetGenerator, MemoryProfiler, VectorDistribution};
use ruvector_core::{
types::{DbOptions, HnswConfig, QuantizationConfig},
DistanceMetric, SearchQuery, VectorDB, VectorEntry,
};
use std::path::PathBuf;
use std::time::Instant;
#[derive(Parser)]
#[command(name = "profiling-benchmark")]
#[command(about = "Performance profiling with flamegraph support")]
struct Args {
/// Number of vectors
#[arg(short, long, default_value = "100000")]
num_vectors: usize,
/// Number of queries
#[arg(short, long, default_value = "10000")]
queries: usize,
/// Vector dimensions
#[arg(short, long, default_value = "384")]
dimensions: usize,
/// Enable flamegraph generation
#[arg(long)]
flamegraph: bool,
/// Output directory
#[arg(short, long, default_value = "bench_results/profiling")]
output: PathBuf,
}
fn main() -> Result<()> {
let args = Args::parse();
println!("╔════════════════════════════════════════╗");
println!("║ Ruvector Performance Profiling ║");
println!("╚════════════════════════════════════════╝\n");
std::fs::create_dir_all(&args.output)?;
// Start profiling if enabled
#[cfg(feature = "profiling")]
let guard = if args.flamegraph {
println!("Starting CPU profiling...");
Some(start_profiling())
} else {
None
};
// Profile 1: Indexing performance
println!("\n{}", "=".repeat(60));
println!("Profiling: Index Construction");
println!("{}\n", "=".repeat(60));
profile_indexing(&args)?;
// Profile 2: Search performance
println!("\n{}", "=".repeat(60));
println!("Profiling: Search Operations");
println!("{}\n", "=".repeat(60));
profile_search(&args)?;
// Profile 3: Mixed workload
println!("\n{}", "=".repeat(60));
println!("Profiling: Mixed Read/Write Workload");
println!("{}\n", "=".repeat(60));
profile_mixed_workload(&args)?;
// Stop profiling and generate flamegraph
#[cfg(feature = "profiling")]
if let Some(guard) = guard {
println!("\nGenerating flamegraph...");
stop_profiling(guard, &args.output)?;
}
#[cfg(not(feature = "profiling"))]
if args.flamegraph {
println!("\n⚠ Profiling feature not enabled. Rebuild with:");
println!(" cargo build --release --features profiling");
}
println!(
"\n✓ Profiling complete! Results saved to: {}",
args.output.display()
);
Ok(())
}
#[cfg(feature = "profiling")]
fn start_profiling() -> pprof::ProfilerGuard<'static> {
pprof::ProfilerGuardBuilder::default()
.frequency(1000)
.blocklist(&["libc", "libgcc", "pthread", "vdso"])
.build()
.unwrap()
}
#[cfg(feature = "profiling")]
fn stop_profiling(guard: pprof::ProfilerGuard<'static>, output_dir: &PathBuf) -> Result<()> {
use std::fs::File;
use std::io::Write;
if let Ok(report) = guard.report().build() {
let flamegraph_path = output_dir.join("flamegraph.svg");
let mut file = File::create(&flamegraph_path)?;
report.flamegraph(&mut file)?;
println!("✓ Flamegraph saved to: {}", flamegraph_path.display());
// Also generate a text report
let profile_path = output_dir.join("profile.txt");
let mut profile_file = File::create(&profile_path)?;
writeln!(profile_file, "CPU Profile Report\n==================\n")?;
writeln!(profile_file, "{:?}", report)?;
println!("✓ Profile report saved to: {}", profile_path.display());
}
Ok(())
}
fn profile_indexing(args: &Args) -> Result<()> {
let temp_dir = tempfile::tempdir()?;
let db_path = temp_dir.path().join("profiling.db");
let options = DbOptions {
dimensions: args.dimensions,
distance_metric: DistanceMetric::Cosine,
storage_path: db_path.to_str().unwrap().to_string(),
hnsw_config: Some(HnswConfig::default()),
quantization: Some(QuantizationConfig::Scalar),
};
let mem_profiler = MemoryProfiler::new();
let start = Instant::now();
let db = VectorDB::new(options)?;
let gen = DatasetGenerator::new(
args.dimensions,
VectorDistribution::Normal {
mean: 0.0,
std_dev: 1.0,
},
);
println!("Indexing {} vectors for profiling...", args.num_vectors);
let pb = create_progress_bar(args.num_vectors as u64, "Indexing");
for i in 0..args.num_vectors {
let entry = VectorEntry {
id: Some(i.to_string()),
vector: gen.generate(1).into_iter().next().unwrap(),
metadata: None,
};
db.insert(entry)?;
pb.inc(1);
}
pb.finish_with_message("✓ Indexing complete");
let elapsed = start.elapsed();
let memory_mb = mem_profiler.current_usage_mb();
println!("\nIndexing Performance:");
println!(" Total time: {:.2}s", elapsed.as_secs_f64());
println!(
" Throughput: {:.0} vectors/sec",
args.num_vectors as f64 / elapsed.as_secs_f64()
);
println!(" Memory: {:.2} MB", memory_mb);
Ok(())
}
fn profile_search(args: &Args) -> Result<()> {
let (db, queries) = setup_database(args)?;
println!("Running {} search queries for profiling...", args.queries);
let pb = create_progress_bar(args.queries as u64, "Searching");
let start = Instant::now();
for query in &queries {
db.search(SearchQuery {
vector: query.clone(),
k: 10,
filter: None,
ef_search: None,
})?;
pb.inc(1);
}
pb.finish_with_message("✓ Search complete");
let elapsed = start.elapsed();
println!("\nSearch Performance:");
println!(" Total time: {:.2}s", elapsed.as_secs_f64());
println!(" QPS: {:.0}", args.queries as f64 / elapsed.as_secs_f64());
println!(
" Avg latency: {:.2}ms",
elapsed.as_secs_f64() * 1000.0 / args.queries as f64
);
Ok(())
}
fn profile_mixed_workload(args: &Args) -> Result<()> {
let temp_dir = tempfile::tempdir()?;
let db_path = temp_dir.path().join("mixed.db");
let options = DbOptions {
dimensions: args.dimensions,
distance_metric: DistanceMetric::Cosine,
storage_path: db_path.to_str().unwrap().to_string(),
hnsw_config: Some(HnswConfig::default()),
quantization: Some(QuantizationConfig::Scalar),
};
let db = VectorDB::new(options)?;
let gen = DatasetGenerator::new(
args.dimensions,
VectorDistribution::Normal {
mean: 0.0,
std_dev: 1.0,
},
);
let num_ops = args.num_vectors / 10;
println!(
"Running {} mixed operations (70% writes, 30% reads)...",
num_ops
);
let pb = create_progress_bar(num_ops as u64, "Processing");
let start = Instant::now();
let mut write_count = 0;
let mut read_count = 0;
for i in 0..num_ops {
if i % 10 < 7 {
// Write operation
let entry = VectorEntry {
id: Some(i.to_string()),
vector: gen.generate(1).into_iter().next().unwrap(),
metadata: None,
};
db.insert(entry)?;
write_count += 1;
} else {
// Read operation
let query = gen.generate(1).into_iter().next().unwrap();
db.search(SearchQuery {
vector: query,
k: 10,
filter: None,
ef_search: None,
})?;
read_count += 1;
}
pb.inc(1);
}
pb.finish_with_message("✓ Mixed workload complete");
let elapsed = start.elapsed();
println!("\nMixed Workload Performance:");
println!(" Total time: {:.2}s", elapsed.as_secs_f64());
println!(
" Writes: {} ({:.0} writes/sec)",
write_count,
write_count as f64 / elapsed.as_secs_f64()
);
println!(
" Reads: {} ({:.0} reads/sec)",
read_count,
read_count as f64 / elapsed.as_secs_f64()
);
println!(
" Total throughput: {:.0} ops/sec",
num_ops as f64 / elapsed.as_secs_f64()
);
Ok(())
}
fn setup_database(args: &Args) -> Result<(VectorDB, Vec<Vec<f32>>)> {
let temp_dir = tempfile::tempdir()?;
let db_path = temp_dir.path().join("search.db");
let options = DbOptions {
dimensions: args.dimensions,
distance_metric: DistanceMetric::Cosine,
storage_path: db_path.to_str().unwrap().to_string(),
hnsw_config: Some(HnswConfig::default()),
quantization: Some(QuantizationConfig::Scalar),
};
let db = VectorDB::new(options)?;
let gen = DatasetGenerator::new(
args.dimensions,
VectorDistribution::Normal {
mean: 0.0,
std_dev: 1.0,
},
);
println!("Preparing database with {} vectors...", args.num_vectors);
let pb = create_progress_bar(args.num_vectors as u64, "Preparing");
for i in 0..args.num_vectors {
let entry = VectorEntry {
id: Some(i.to_string()),
vector: gen.generate(1).into_iter().next().unwrap(),
metadata: None,
};
db.insert(entry)?;
pb.inc(1);
}
pb.finish_with_message("✓ Database ready");
let queries = gen.generate(args.queries);
Ok((db, queries))
}

View File

@@ -0,0 +1,356 @@
//! Benchmarking utilities for Ruvector
//!
//! This module provides comprehensive benchmarking tools including:
//! - ANN-Benchmarks compatibility for standardized testing
//! - AgenticDB workload simulation
//! - Latency profiling (p50, p95, p99, p99.9)
//! - Memory usage analysis
//! - Cross-system performance comparison
//! - CPU and memory profiling with flamegraphs
use anyhow::{Context, Result};
use rand::Rng;
use rand_distr::{Distribution, Normal, Uniform};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fs::{self, File};
use std::io::{BufReader, BufWriter, Write};
use std::path::{Path, PathBuf};
use std::time::{Duration, Instant};
/// Benchmark result for a single test
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BenchmarkResult {
pub name: String,
pub dataset: String,
pub dimensions: usize,
pub num_vectors: usize,
pub num_queries: usize,
pub k: usize,
pub qps: f64,
pub latency_p50: f64,
pub latency_p95: f64,
pub latency_p99: f64,
pub latency_p999: f64,
pub recall_at_1: f64,
pub recall_at_10: f64,
pub recall_at_100: f64,
pub memory_mb: f64,
pub build_time_secs: f64,
pub metadata: HashMap<String, String>,
}
/// Statistics collector using HDR histogram
pub struct LatencyStats {
histogram: hdrhistogram::Histogram<u64>,
}
impl LatencyStats {
pub fn new() -> Result<Self> {
let histogram = hdrhistogram::Histogram::new_with_bounds(1, 60_000_000, 3)?;
Ok(Self { histogram })
}
pub fn record(&mut self, duration: Duration) -> Result<()> {
let micros = duration.as_micros() as u64;
self.histogram.record(micros)?;
Ok(())
}
pub fn percentile(&self, percentile: f64) -> Duration {
let micros = self.histogram.value_at_percentile(percentile);
Duration::from_micros(micros)
}
pub fn mean(&self) -> Duration {
Duration::from_micros(self.histogram.mean() as u64)
}
pub fn count(&self) -> u64 {
self.histogram.len()
}
}
impl Default for LatencyStats {
fn default() -> Self {
Self::new().unwrap()
}
}
/// Dataset generator for synthetic benchmarks
pub struct DatasetGenerator {
dimensions: usize,
distribution: VectorDistribution,
}
#[derive(Debug, Clone, Copy)]
pub enum VectorDistribution {
Uniform,
Normal { mean: f32, std_dev: f32 },
Clustered { num_clusters: usize },
}
impl DatasetGenerator {
pub fn new(dimensions: usize, distribution: VectorDistribution) -> Self {
Self {
dimensions,
distribution,
}
}
pub fn generate(&self, count: usize) -> Vec<Vec<f32>> {
let mut rng = rand::thread_rng();
(0..count).map(|_| self.generate_vector(&mut rng)).collect()
}
fn generate_vector<R: Rng>(&self, rng: &mut R) -> Vec<f32> {
match self.distribution {
VectorDistribution::Uniform => {
let uniform = Uniform::new(-1.0, 1.0);
(0..self.dimensions).map(|_| uniform.sample(rng)).collect()
}
VectorDistribution::Normal { mean, std_dev } => {
let normal = Normal::new(mean, std_dev).unwrap();
(0..self.dimensions).map(|_| normal.sample(rng)).collect()
}
VectorDistribution::Clustered { num_clusters } => {
let cluster_id = rng.gen_range(0..num_clusters);
let center_offset = cluster_id as f32 * 10.0;
let normal = Normal::new(center_offset, 1.0).unwrap();
(0..self.dimensions).map(|_| normal.sample(rng)).collect()
}
}
}
pub fn normalize_vector(vec: &mut [f32]) {
let norm: f32 = vec.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm > 0.0 {
for x in vec.iter_mut() {
*x /= norm;
}
}
}
}
/// Result writer for benchmark outputs
pub struct ResultWriter {
output_dir: PathBuf,
}
impl ResultWriter {
pub fn new<P: AsRef<Path>>(output_dir: P) -> Result<Self> {
let output_dir = output_dir.as_ref().to_path_buf();
fs::create_dir_all(&output_dir)?;
Ok(Self { output_dir })
}
pub fn write_json<T: Serialize>(&self, name: &str, data: &T) -> Result<()> {
let path = self.output_dir.join(format!("{}.json", name));
let file = File::create(&path)?;
let writer = BufWriter::new(file);
serde_json::to_writer_pretty(writer, data)?;
println!("✓ Written results to: {}", path.display());
Ok(())
}
pub fn write_csv(&self, name: &str, results: &[BenchmarkResult]) -> Result<()> {
let path = self.output_dir.join(format!("{}.csv", name));
let mut file = File::create(&path)?;
// Write header
writeln!(
file,
"name,dataset,dimensions,num_vectors,num_queries,k,qps,p50,p95,p99,p999,recall@1,recall@10,recall@100,memory_mb,build_time"
)?;
// Write data
for result in results {
writeln!(
file,
"{},{},{},{},{},{},{:.2},{:.2},{:.2},{:.2},{:.2},{:.4},{:.4},{:.4},{:.2},{:.2}",
result.name,
result.dataset,
result.dimensions,
result.num_vectors,
result.num_queries,
result.k,
result.qps,
result.latency_p50,
result.latency_p95,
result.latency_p99,
result.latency_p999,
result.recall_at_1,
result.recall_at_10,
result.recall_at_100,
result.memory_mb,
result.build_time_secs,
)?;
}
println!("✓ Written CSV to: {}", path.display());
Ok(())
}
pub fn write_markdown_report(&self, name: &str, results: &[BenchmarkResult]) -> Result<()> {
let path = self.output_dir.join(format!("{}.md", name));
let mut file = File::create(&path)?;
writeln!(file, "# Ruvector Benchmark Results\n")?;
writeln!(
file,
"Generated: {}\n",
chrono::Utc::now().format("%Y-%m-%d %H:%M:%S UTC")
)?;
for result in results {
writeln!(file, "## {}\n", result.name)?;
writeln!(
file,
"**Dataset:** {} ({}D, {} vectors)\n",
result.dataset, result.dimensions, result.num_vectors
)?;
writeln!(file, "### Performance")?;
writeln!(file, "- **QPS:** {:.2}", result.qps)?;
writeln!(file, "- **Latency (p50):** {:.2}ms", result.latency_p50)?;
writeln!(file, "- **Latency (p95):** {:.2}ms", result.latency_p95)?;
writeln!(file, "- **Latency (p99):** {:.2}ms", result.latency_p99)?;
writeln!(file, "- **Latency (p99.9):** {:.2}ms", result.latency_p999)?;
writeln!(file, "")?;
writeln!(file, "### Recall")?;
writeln!(file, "- **Recall@1:** {:.2}%", result.recall_at_1 * 100.0)?;
writeln!(file, "- **Recall@10:** {:.2}%", result.recall_at_10 * 100.0)?;
writeln!(
file,
"- **Recall@100:** {:.2}%",
result.recall_at_100 * 100.0
)?;
writeln!(file, "")?;
writeln!(file, "### Resources")?;
writeln!(file, "- **Memory:** {:.2} MB", result.memory_mb)?;
writeln!(file, "- **Build Time:** {:.2}s", result.build_time_secs)?;
writeln!(file, "")?;
}
println!("✓ Written markdown report to: {}", path.display());
Ok(())
}
}
/// Memory profiler
pub struct MemoryProfiler {
#[cfg(feature = "profiling")]
initial_allocated: usize,
#[cfg(not(feature = "profiling"))]
_phantom: (),
}
impl MemoryProfiler {
pub fn new() -> Self {
#[cfg(feature = "profiling")]
{
use jemalloc_ctl::{epoch, stats};
epoch::mib().unwrap().advance().unwrap();
let allocated = stats::allocated::mib().unwrap().read().unwrap();
Self {
initial_allocated: allocated,
}
}
#[cfg(not(feature = "profiling"))]
{
Self { _phantom: () }
}
}
pub fn current_usage_mb(&self) -> f64 {
#[cfg(feature = "profiling")]
{
use jemalloc_ctl::{epoch, stats};
epoch::mib().unwrap().advance().unwrap();
let allocated = stats::allocated::mib().unwrap().read().unwrap();
(allocated - self.initial_allocated) as f64 / 1_048_576.0
}
#[cfg(not(feature = "profiling"))]
{
0.0
}
}
pub fn system_memory_info() -> Result<(u64, u64)> {
use sysinfo::System;
let mut sys = System::new_all();
sys.refresh_all();
let total = sys.total_memory();
let used = sys.used_memory();
Ok((total, used))
}
}
impl Default for MemoryProfiler {
fn default() -> Self {
Self::new()
}
}
/// Calculate recall between search results and ground truth
pub fn calculate_recall(results: &[Vec<String>], ground_truth: &[Vec<String>], k: usize) -> f64 {
assert_eq!(results.len(), ground_truth.len());
let mut total_recall = 0.0;
for (result, truth) in results.iter().zip(ground_truth.iter()) {
let result_set: std::collections::HashSet<_> = result.iter().take(k).collect();
let truth_set: std::collections::HashSet<_> = truth.iter().take(k).collect();
let intersection = result_set.intersection(&truth_set).count();
total_recall += intersection as f64 / k.min(truth.len()) as f64;
}
total_recall / results.len() as f64
}
/// Progress bar helper
pub fn create_progress_bar(len: u64, msg: &str) -> indicatif::ProgressBar {
let pb = indicatif::ProgressBar::new(len);
pb.set_style(
indicatif::ProgressStyle::default_bar()
.template("{msg} [{bar:40.cyan/blue}] {pos}/{len} ({eta})")
.unwrap()
.progress_chars("#>-"),
);
pb.set_message(msg.to_string());
pb
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_dataset_generator() {
let gen = DatasetGenerator::new(128, VectorDistribution::Uniform);
let vectors = gen.generate(100);
assert_eq!(vectors.len(), 100);
assert_eq!(vectors[0].len(), 128);
}
#[test]
fn test_latency_stats() {
let mut stats = LatencyStats::new().unwrap();
for i in 0..1000 {
stats.record(Duration::from_micros(i)).unwrap();
}
assert!(stats.percentile(0.5).as_micros() > 0);
}
#[test]
fn test_recall_calculation() {
let results = vec![
vec!["1".to_string(), "2".to_string(), "3".to_string()],
vec!["4".to_string(), "5".to_string(), "6".to_string()],
];
let ground_truth = vec![
vec!["1".to_string(), "2".to_string(), "7".to_string()],
vec!["4".to_string(), "8".to_string(), "6".to_string()],
];
let recall = calculate_recall(&results, &ground_truth, 3);
assert!((recall - 0.666).abs() < 0.01);
}
}