Files
wifi-densepose/vendor/ruvector/examples/vibecast-7sense/benches/utils.rs

674 lines
19 KiB
Rust

//! Benchmark Utilities for 7sense Performance Testing
//!
//! This module provides common utilities for benchmarking:
//! - Random vector generation
//! - Test index setup
//! - Recall calculation
//! - Ground truth computation
//! - Performance metrics
use std::collections::HashSet;
use std::time::{Duration, Instant};
/// Embedding dimensions for Perch 2.0 model
pub const PERCH_EMBEDDING_DIM: usize = 1536;
/// Default HNSW parameters from ADR-004
pub const DEFAULT_M: usize = 32;
pub const DEFAULT_EF_CONSTRUCTION: usize = 200;
pub const DEFAULT_EF_SEARCH: usize = 128;
pub const HIGH_RECALL_EF_SEARCH: usize = 256;
/// Performance targets from ADR-004
pub mod targets {
use std::time::Duration;
/// HNSW Search Targets
pub const HNSW_SPEEDUP_VS_BRUTE_FORCE: f64 = 150.0;
pub const QUERY_LATENCY_P50_MS: u64 = 10;
pub const QUERY_LATENCY_P99_MS: u64 = 50;
pub const RECALL_AT_10: f64 = 0.95;
pub const RECALL_AT_100: f64 = 0.98;
/// Embedding Inference Targets
pub const EMBEDDING_SEGMENTS_PER_SECOND: u64 = 100;
/// Batch Ingestion Targets
pub const BATCH_VECTORS_PER_MINUTE: u64 = 1_000_000;
pub const INSERT_THROUGHPUT_PER_SECOND: u64 = 10_000;
/// Query Latency Targets
pub const TOTAL_QUERY_LATENCY_MS: u64 = 100;
/// Build Time Targets
pub const BUILD_TIME_1M_VECTORS: Duration = Duration::from_secs(30 * 60);
/// Quantization Targets
pub const MAX_RECALL_LOSS_INT8: f64 = 0.03;
}
/// Generate random f32 vectors for benchmarking
///
/// # Arguments
/// * `count` - Number of vectors to generate
/// * `dims` - Dimensionality of each vector
///
/// # Returns
/// A vector of random f32 vectors, normalized to unit length
pub fn generate_random_vectors(count: usize, dims: usize) -> Vec<Vec<f32>> {
use std::f32::consts::PI;
let mut vectors = Vec::with_capacity(count);
for i in 0..count {
let mut vec = Vec::with_capacity(dims);
// Use a simple deterministic random generator for reproducibility
let mut seed = (i as u64).wrapping_mul(6364136223846793005).wrapping_add(1);
for _ in 0..dims {
seed = seed.wrapping_mul(6364136223846793005).wrapping_add(1);
let val = ((seed >> 33) as f32) / (u32::MAX as f32) * 2.0 - 1.0;
vec.push(val);
}
// Normalize to unit length
let norm: f32 = vec.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm > 0.0 {
for x in vec.iter_mut() {
*x /= norm;
}
}
vectors.push(vec);
}
vectors
}
/// Generate clustered random vectors for more realistic benchmarking
///
/// # Arguments
/// * `count` - Total number of vectors to generate
/// * `dims` - Dimensionality of each vector
/// * `num_clusters` - Number of clusters to create
/// * `cluster_spread` - Standard deviation within clusters (0.0 to 1.0)
///
/// # Returns
/// A vector of random f32 vectors organized around cluster centers
pub fn generate_clustered_vectors(
count: usize,
dims: usize,
num_clusters: usize,
cluster_spread: f32,
) -> Vec<Vec<f32>> {
let mut vectors = Vec::with_capacity(count);
// Generate cluster centers
let centers = generate_random_vectors(num_clusters, dims);
// Assign vectors to clusters
for i in 0..count {
let cluster_idx = i % num_clusters;
let center = &centers[cluster_idx];
let mut vec = Vec::with_capacity(dims);
// Use deterministic random for offset
let mut seed = (i as u64).wrapping_mul(2862933555777941757).wrapping_add(3);
for d in 0..dims {
seed = seed.wrapping_mul(2862933555777941757).wrapping_add(3);
let noise = ((seed >> 33) as f32) / (u32::MAX as f32) * 2.0 - 1.0;
let val = center[d] + noise * cluster_spread;
vec.push(val);
}
// Normalize to unit length
let norm: f32 = vec.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm > 0.0 {
for x in vec.iter_mut() {
*x /= norm;
}
}
vectors.push(vec);
}
vectors
}
/// Compute L2 (Euclidean) distance between two vectors
#[inline]
pub fn l2_distance(a: &[f32], b: &[f32]) -> f32 {
debug_assert_eq!(a.len(), b.len());
a.iter()
.zip(b.iter())
.map(|(x, y)| (x - y) * (x - y))
.sum::<f32>()
.sqrt()
}
/// Compute L2 squared distance (faster, no sqrt)
#[inline]
pub fn l2_distance_squared(a: &[f32], b: &[f32]) -> f32 {
debug_assert_eq!(a.len(), b.len());
a.iter()
.zip(b.iter())
.map(|(x, y)| (x - y) * (x - y))
.sum()
}
/// Compute cosine similarity between two vectors
#[inline]
pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
debug_assert_eq!(a.len(), b.len());
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm_a > 0.0 && norm_b > 0.0 {
dot / (norm_a * norm_b)
} else {
0.0
}
}
/// Compute brute-force k-nearest neighbors (ground truth)
///
/// # Arguments
/// * `query` - Query vector
/// * `dataset` - Dataset of vectors to search
/// * `k` - Number of neighbors to find
///
/// # Returns
/// Vector of (index, distance) pairs sorted by distance
pub fn brute_force_knn(query: &[f32], dataset: &[Vec<f32>], k: usize) -> Vec<(usize, f32)> {
let mut distances: Vec<(usize, f32)> = dataset
.iter()
.enumerate()
.map(|(i, vec)| (i, l2_distance(query, vec)))
.collect();
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
distances.truncate(k);
distances
}
/// Measure recall@k for approximate nearest neighbor results
///
/// # Arguments
/// * `results` - Approximate results (index, distance) pairs
/// * `ground_truth` - Exact brute-force results (index, distance) pairs
/// * `k` - Number of top results to consider
///
/// # Returns
/// Recall value between 0.0 and 1.0
pub fn measure_recall_at_k(
results: &[(usize, f32)],
ground_truth: &[(usize, f32)],
k: usize,
) -> f32 {
let k = k.min(results.len()).min(ground_truth.len());
if k == 0 {
return 0.0;
}
let result_set: HashSet<usize> = results.iter().take(k).map(|(idx, _)| *idx).collect();
let truth_set: HashSet<usize> = ground_truth.iter().take(k).map(|(idx, _)| *idx).collect();
let intersection = result_set.intersection(&truth_set).count();
intersection as f32 / k as f32
}
/// Calculate percentile from a sorted slice of durations
pub fn percentile(sorted_latencies: &[Duration], p: f64) -> Duration {
if sorted_latencies.is_empty() {
return Duration::ZERO;
}
let idx = ((sorted_latencies.len() as f64 - 1.0) * p / 100.0).round() as usize;
sorted_latencies[idx.min(sorted_latencies.len() - 1)]
}
/// Performance statistics from benchmark runs
#[derive(Debug, Clone)]
pub struct PerformanceStats {
pub count: usize,
pub total_time: Duration,
pub min: Duration,
pub max: Duration,
pub mean: Duration,
pub p50: Duration,
pub p95: Duration,
pub p99: Duration,
pub p999: Duration,
pub throughput_per_sec: f64,
}
impl PerformanceStats {
/// Calculate statistics from a collection of latency measurements
pub fn from_latencies(mut latencies: Vec<Duration>) -> Self {
if latencies.is_empty() {
return Self {
count: 0,
total_time: Duration::ZERO,
min: Duration::ZERO,
max: Duration::ZERO,
mean: Duration::ZERO,
p50: Duration::ZERO,
p95: Duration::ZERO,
p99: Duration::ZERO,
p999: Duration::ZERO,
throughput_per_sec: 0.0,
};
}
latencies.sort();
let total_time: Duration = latencies.iter().sum();
let count = latencies.len();
let mean = total_time / count as u32;
Self {
count,
total_time,
min: latencies[0],
max: latencies[count - 1],
mean,
p50: percentile(&latencies, 50.0),
p95: percentile(&latencies, 95.0),
p99: percentile(&latencies, 99.0),
p999: percentile(&latencies, 99.9),
throughput_per_sec: count as f64 / total_time.as_secs_f64(),
}
}
/// Check if stats meet p99 latency target
pub fn meets_p99_target(&self, target_ms: u64) -> bool {
self.p99 <= Duration::from_millis(target_ms)
}
/// Check if stats meet throughput target
pub fn meets_throughput_target(&self, target_per_sec: u64) -> bool {
self.throughput_per_sec >= target_per_sec as f64
}
/// Format as a readable report
pub fn report(&self) -> String {
format!(
"Count: {}\n\
Total Time: {:?}\n\
Min: {:?}\n\
Max: {:?}\n\
Mean: {:?}\n\
P50: {:?}\n\
P95: {:?}\n\
P99: {:?}\n\
P99.9: {:?}\n\
Throughput: {:.2} ops/sec",
self.count,
self.total_time,
self.min,
self.max,
self.mean,
self.p50,
self.p95,
self.p99,
self.p999,
self.throughput_per_sec
)
}
}
/// Simple HNSW-like index for benchmarking
/// This is a simplified implementation for benchmark purposes
pub struct SimpleHnswIndex {
vectors: Vec<Vec<f32>>,
dims: usize,
m: usize,
ef_construction: usize,
ef_search: usize,
// Simplified graph structure: each vector has a list of neighbor indices
graph: Vec<Vec<usize>>,
}
impl SimpleHnswIndex {
/// Create a new empty index
pub fn new(dims: usize, m: usize, ef_construction: usize, ef_search: usize) -> Self {
Self {
vectors: Vec::new(),
dims,
m,
ef_construction,
ef_search,
graph: Vec::new(),
}
}
/// Create an index with default parameters for Perch embeddings
pub fn new_default() -> Self {
Self::new(
PERCH_EMBEDDING_DIM,
DEFAULT_M,
DEFAULT_EF_CONSTRUCTION,
DEFAULT_EF_SEARCH,
)
}
/// Get the number of vectors in the index
pub fn len(&self) -> usize {
self.vectors.len()
}
/// Check if the index is empty
pub fn is_empty(&self) -> bool {
self.vectors.is_empty()
}
/// Add a single vector to the index
pub fn add(&mut self, vector: Vec<f32>) -> usize {
assert_eq!(vector.len(), self.dims);
let id = self.vectors.len();
// Find neighbors for the new vector
let neighbors = if self.vectors.is_empty() {
Vec::new()
} else {
self.search_internal(&vector, self.m.min(self.vectors.len()))
.into_iter()
.map(|(idx, _)| idx)
.collect()
};
self.vectors.push(vector);
self.graph.push(neighbors.clone());
// Update bidirectional connections
for &neighbor_id in &neighbors {
if self.graph[neighbor_id].len() < self.m * 2 {
self.graph[neighbor_id].push(id);
}
}
id
}
/// Batch add vectors to the index
pub fn batch_add(&mut self, vectors: Vec<Vec<f32>>) -> Vec<usize> {
vectors.into_iter().map(|v| self.add(v)).collect()
}
/// Search for k nearest neighbors
pub fn search(&self, query: &[f32], k: usize) -> Vec<(usize, f32)> {
assert_eq!(query.len(), self.dims);
if self.vectors.is_empty() {
return Vec::new();
}
self.search_internal(query, k)
}
/// Internal search implementation with simplified HNSW-like traversal
fn search_internal(&self, query: &[f32], k: usize) -> Vec<(usize, f32)> {
use std::collections::{BinaryHeap, HashSet};
use std::cmp::Reverse;
let ef = self.ef_search.max(k);
// Start from a random entry point
let entry_point = 0;
let mut visited: HashSet<usize> = HashSet::new();
let mut candidates: BinaryHeap<Reverse<(ordered_float::OrderedFloat<f32>, usize)>> =
BinaryHeap::new();
let mut results: BinaryHeap<(ordered_float::OrderedFloat<f32>, usize)> = BinaryHeap::new();
let entry_dist = l2_distance(query, &self.vectors[entry_point]);
candidates.push(Reverse((ordered_float::OrderedFloat(entry_dist), entry_point)));
results.push((ordered_float::OrderedFloat(entry_dist), entry_point));
visited.insert(entry_point);
while let Some(Reverse((dist, current))) = candidates.pop() {
let worst_dist = if results.len() >= ef {
results.peek().map(|(d, _)| d.0).unwrap_or(f32::MAX)
} else {
f32::MAX
};
if dist.0 > worst_dist {
break;
}
// Explore neighbors
for &neighbor in &self.graph[current] {
if visited.insert(neighbor) {
let neighbor_dist = l2_distance(query, &self.vectors[neighbor]);
if results.len() < ef || neighbor_dist < worst_dist {
candidates.push(Reverse((
ordered_float::OrderedFloat(neighbor_dist),
neighbor,
)));
results.push((ordered_float::OrderedFloat(neighbor_dist), neighbor));
if results.len() > ef {
results.pop();
}
}
}
}
}
// Convert to output format and sort by distance
let mut output: Vec<(usize, f32)> =
results.into_iter().map(|(d, idx)| (idx, d.0)).collect();
output.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
output.truncate(k);
output
}
/// Set ef_search parameter for queries
pub fn set_ef_search(&mut self, ef: usize) {
self.ef_search = ef;
}
}
/// Setup a test index with the specified number of vectors
pub fn setup_test_index(size: usize) -> SimpleHnswIndex {
let vectors = generate_random_vectors(size, PERCH_EMBEDDING_DIM);
let mut index = SimpleHnswIndex::new_default();
for vec in vectors {
index.add(vec);
}
index
}
/// Scalar quantizer for int8 compression
pub struct ScalarQuantizer {
mins: Vec<f32>,
maxs: Vec<f32>,
scales: Vec<f32>,
dims: usize,
}
impl ScalarQuantizer {
/// Create a new quantizer for the specified dimensions
pub fn new(dims: usize) -> Self {
Self {
mins: vec![f32::MAX; dims],
maxs: vec![f32::MIN; dims],
scales: vec![1.0; dims],
dims,
}
}
/// Calibrate the quantizer from a sample of embeddings
pub fn calibrate(&mut self, embeddings: &[Vec<f32>]) {
// Find min/max per dimension
for embedding in embeddings {
for (d, &val) in embedding.iter().enumerate() {
if val < self.mins[d] {
self.mins[d] = val;
}
if val > self.maxs[d] {
self.maxs[d] = val;
}
}
}
// Compute scales
for d in 0..self.dims {
let range = self.maxs[d] - self.mins[d];
if range > 0.0 {
self.scales[d] = 255.0 / range;
} else {
self.scales[d] = 1.0;
}
}
}
/// Quantize a float32 embedding to int8
pub fn quantize(&self, embedding: &[f32]) -> Vec<u8> {
embedding
.iter()
.enumerate()
.map(|(d, &val)| {
let normalized = (val - self.mins[d]) * self.scales[d];
normalized.round().clamp(0.0, 255.0) as u8
})
.collect()
}
/// Dequantize an int8 embedding back to float32
pub fn dequantize(&self, quantized: &[u8]) -> Vec<f32> {
quantized
.iter()
.enumerate()
.map(|(d, &val)| (val as f32) / self.scales[d] + self.mins[d])
.collect()
}
}
/// Timer utility for measuring operations
pub struct Timer {
start: Instant,
}
impl Timer {
/// Start a new timer
pub fn start() -> Self {
Self {
start: Instant::now(),
}
}
/// Get elapsed time
pub fn elapsed(&self) -> Duration {
self.start.elapsed()
}
/// Stop and return elapsed time
pub fn stop(self) -> Duration {
self.start.elapsed()
}
}
/// Measure execution time of a closure
pub fn measure_time<F, R>(f: F) -> (R, Duration)
where
F: FnOnce() -> R,
{
let start = Instant::now();
let result = f();
let duration = start.elapsed();
(result, duration)
}
/// Measure average execution time over multiple iterations
pub fn measure_average<F>(iterations: usize, mut f: F) -> PerformanceStats
where
F: FnMut() -> (),
{
let latencies: Vec<Duration> = (0..iterations)
.map(|_| {
let start = Instant::now();
f();
start.elapsed()
})
.collect();
PerformanceStats::from_latencies(latencies)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_generate_random_vectors() {
let vectors = generate_random_vectors(100, 1536);
assert_eq!(vectors.len(), 100);
assert_eq!(vectors[0].len(), 1536);
// Check normalization
for vec in &vectors {
let norm: f32 = vec.iter().map(|x| x * x).sum::<f32>().sqrt();
assert!((norm - 1.0).abs() < 1e-5);
}
}
#[test]
fn test_l2_distance() {
let a = vec![1.0, 0.0, 0.0];
let b = vec![0.0, 1.0, 0.0];
let dist = l2_distance(&a, &b);
assert!((dist - std::f32::consts::SQRT_2).abs() < 1e-5);
}
#[test]
fn test_recall_at_k() {
let results: Vec<(usize, f32)> = vec![(0, 0.1), (1, 0.2), (2, 0.3), (3, 0.4), (5, 0.5)];
let ground_truth: Vec<(usize, f32)> =
vec![(0, 0.1), (1, 0.2), (2, 0.3), (4, 0.4), (5, 0.5)];
let recall = measure_recall_at_k(&results, &ground_truth, 5);
assert!((recall - 0.8).abs() < 1e-5); // 4 out of 5 match
}
#[test]
fn test_scalar_quantizer() {
let vectors = generate_random_vectors(100, 128);
let mut quantizer = ScalarQuantizer::new(128);
quantizer.calibrate(&vectors);
for vec in &vectors {
let quantized = quantizer.quantize(vec);
let dequantized = quantizer.dequantize(&quantized);
// Check that dequantized is close to original
let error: f32 = vec
.iter()
.zip(dequantized.iter())
.map(|(a, b)| (a - b).abs())
.sum::<f32>()
/ vec.len() as f32;
assert!(error < 0.1); // Average error should be small
}
}
#[test]
fn test_performance_stats() {
let latencies: Vec<Duration> = (0..100)
.map(|i| Duration::from_micros(100 + i * 10))
.collect();
let stats = PerformanceStats::from_latencies(latencies);
assert_eq!(stats.count, 100);
assert!(stats.min <= stats.p50);
assert!(stats.p50 <= stats.p95);
assert!(stats.p95 <= stats.p99);
assert!(stats.p99 <= stats.max);
}
}