Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions
--- a/examples/vibecast-7sense/benches/utils.rs
+++ b/examples/vibecast-7sense/benches/utils.rs
@@ -0,0 +1,673 @@
+//! Benchmark Utilities for 7sense Performance Testing
+//!
+//! This module provides common utilities for benchmarking:
+//! - Random vector generation
+//! - Test index setup
+//! - Recall calculation
+//! - Ground truth computation
+//! - Performance metrics
+
+use std::collections::HashSet;
+use std::time::{Duration, Instant};
+
+/// Embedding dimensions for Perch 2.0 model
+pub const PERCH_EMBEDDING_DIM: usize = 1536;
+
+/// Default HNSW parameters from ADR-004
+pub const DEFAULT_M: usize = 32;
+pub const DEFAULT_EF_CONSTRUCTION: usize = 200;
+pub const DEFAULT_EF_SEARCH: usize = 128;
+pub const HIGH_RECALL_EF_SEARCH: usize = 256;
+
+/// Performance targets from ADR-004
+pub mod targets {
+    use std::time::Duration;
+
+    /// HNSW Search Targets
+    pub const HNSW_SPEEDUP_VS_BRUTE_FORCE: f64 = 150.0;
+    pub const QUERY_LATENCY_P50_MS: u64 = 10;
+    pub const QUERY_LATENCY_P99_MS: u64 = 50;
+    pub const RECALL_AT_10: f64 = 0.95;
+    pub const RECALL_AT_100: f64 = 0.98;
+
+    /// Embedding Inference Targets
+    pub const EMBEDDING_SEGMENTS_PER_SECOND: u64 = 100;
+
+    /// Batch Ingestion Targets
+    pub const BATCH_VECTORS_PER_MINUTE: u64 = 1_000_000;
+    pub const INSERT_THROUGHPUT_PER_SECOND: u64 = 10_000;
+
+    /// Query Latency Targets
+    pub const TOTAL_QUERY_LATENCY_MS: u64 = 100;
+
+    /// Build Time Targets
+    pub const BUILD_TIME_1M_VECTORS: Duration = Duration::from_secs(30 * 60);
+
+    /// Quantization Targets
+    pub const MAX_RECALL_LOSS_INT8: f64 = 0.03;
+}
+
+/// Generate random f32 vectors for benchmarking
+///
+/// # Arguments
+/// * `count` - Number of vectors to generate
+/// * `dims` - Dimensionality of each vector
+///
+/// # Returns
+/// A vector of random f32 vectors, normalized to unit length
+pub fn generate_random_vectors(count: usize, dims: usize) -> Vec<Vec<f32>> {
+    use std::f32::consts::PI;
+
+    let mut vectors = Vec::with_capacity(count);
+
+    for i in 0..count {
+        let mut vec = Vec::with_capacity(dims);
+
+        // Use a simple deterministic random generator for reproducibility
+        let mut seed = (i as u64).wrapping_mul(6364136223846793005).wrapping_add(1);
+
+        for _ in 0..dims {
+            seed = seed.wrapping_mul(6364136223846793005).wrapping_add(1);
+            let val = ((seed >> 33) as f32) / (u32::MAX as f32) * 2.0 - 1.0;
+            vec.push(val);
+        }
+
+        // Normalize to unit length
+        let norm: f32 = vec.iter().map(|x| x * x).sum::<f32>().sqrt();
+        if norm > 0.0 {
+            for x in vec.iter_mut() {
+                *x /= norm;
+            }
+        }
+
+        vectors.push(vec);
+    }
+
+    vectors
+}
+
+/// Generate clustered random vectors for more realistic benchmarking
+///
+/// # Arguments
+/// * `count` - Total number of vectors to generate
+/// * `dims` - Dimensionality of each vector
+/// * `num_clusters` - Number of clusters to create
+/// * `cluster_spread` - Standard deviation within clusters (0.0 to 1.0)
+///
+/// # Returns
+/// A vector of random f32 vectors organized around cluster centers
+pub fn generate_clustered_vectors(
+    count: usize,
+    dims: usize,
+    num_clusters: usize,
+    cluster_spread: f32,
+) -> Vec<Vec<f32>> {
+    let mut vectors = Vec::with_capacity(count);
+
+    // Generate cluster centers
+    let centers = generate_random_vectors(num_clusters, dims);
+
+    // Assign vectors to clusters
+    for i in 0..count {
+        let cluster_idx = i % num_clusters;
+        let center = &centers[cluster_idx];
+
+        let mut vec = Vec::with_capacity(dims);
+
+        // Use deterministic random for offset
+        let mut seed = (i as u64).wrapping_mul(2862933555777941757).wrapping_add(3);
+
+        for d in 0..dims {
+            seed = seed.wrapping_mul(2862933555777941757).wrapping_add(3);
+            let noise = ((seed >> 33) as f32) / (u32::MAX as f32) * 2.0 - 1.0;
+            let val = center[d] + noise * cluster_spread;
+            vec.push(val);
+        }
+
+        // Normalize to unit length
+        let norm: f32 = vec.iter().map(|x| x * x).sum::<f32>().sqrt();
+        if norm > 0.0 {
+            for x in vec.iter_mut() {
+                *x /= norm;
+            }
+        }
+
+        vectors.push(vec);
+    }
+
+    vectors
+}
+
+/// Compute L2 (Euclidean) distance between two vectors
+#[inline]
+pub fn l2_distance(a: &[f32], b: &[f32]) -> f32 {
+    debug_assert_eq!(a.len(), b.len());
+    a.iter()
+        .zip(b.iter())
+        .map(|(x, y)| (x - y) * (x - y))
+        .sum::<f32>()
+        .sqrt()
+}
+
+/// Compute L2 squared distance (faster, no sqrt)
+#[inline]
+pub fn l2_distance_squared(a: &[f32], b: &[f32]) -> f32 {
+    debug_assert_eq!(a.len(), b.len());
+    a.iter()
+        .zip(b.iter())
+        .map(|(x, y)| (x - y) * (x - y))
+        .sum()
+}
+
+/// Compute cosine similarity between two vectors
+#[inline]
+pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
+    debug_assert_eq!(a.len(), b.len());
+    let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
+    let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
+    let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
+    if norm_a > 0.0 && norm_b > 0.0 {
+        dot / (norm_a * norm_b)
+    } else {
+        0.0
+    }
+}
+
+/// Compute brute-force k-nearest neighbors (ground truth)
+///
+/// # Arguments
+/// * `query` - Query vector
+/// * `dataset` - Dataset of vectors to search
+/// * `k` - Number of neighbors to find
+///
+/// # Returns
+/// Vector of (index, distance) pairs sorted by distance
+pub fn brute_force_knn(query: &[f32], dataset: &[Vec<f32>], k: usize) -> Vec<(usize, f32)> {
+    let mut distances: Vec<(usize, f32)> = dataset
+        .iter()
+        .enumerate()
+        .map(|(i, vec)| (i, l2_distance(query, vec)))
+        .collect();
+
+    distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
+    distances.truncate(k);
+    distances
+}
+
+/// Measure recall@k for approximate nearest neighbor results
+///
+/// # Arguments
+/// * `results` - Approximate results (index, distance) pairs
+/// * `ground_truth` - Exact brute-force results (index, distance) pairs
+/// * `k` - Number of top results to consider
+///
+/// # Returns
+/// Recall value between 0.0 and 1.0
+pub fn measure_recall_at_k(
+    results: &[(usize, f32)],
+    ground_truth: &[(usize, f32)],
+    k: usize,
+) -> f32 {
+    let k = k.min(results.len()).min(ground_truth.len());
+    if k == 0 {
+        return 0.0;
+    }
+
+    let result_set: HashSet<usize> = results.iter().take(k).map(|(idx, _)| *idx).collect();
+    let truth_set: HashSet<usize> = ground_truth.iter().take(k).map(|(idx, _)| *idx).collect();
+
+    let intersection = result_set.intersection(&truth_set).count();
+    intersection as f32 / k as f32
+}
+
+/// Calculate percentile from a sorted slice of durations
+pub fn percentile(sorted_latencies: &[Duration], p: f64) -> Duration {
+    if sorted_latencies.is_empty() {
+        return Duration::ZERO;
+    }
+    let idx = ((sorted_latencies.len() as f64 - 1.0) * p / 100.0).round() as usize;
+    sorted_latencies[idx.min(sorted_latencies.len() - 1)]
+}
+
+/// Performance statistics from benchmark runs
+#[derive(Debug, Clone)]
+pub struct PerformanceStats {
+    pub count: usize,
+    pub total_time: Duration,
+    pub min: Duration,
+    pub max: Duration,
+    pub mean: Duration,
+    pub p50: Duration,
+    pub p95: Duration,
+    pub p99: Duration,
+    pub p999: Duration,
+    pub throughput_per_sec: f64,
+}
+
+impl PerformanceStats {
+    /// Calculate statistics from a collection of latency measurements
+    pub fn from_latencies(mut latencies: Vec<Duration>) -> Self {
+        if latencies.is_empty() {
+            return Self {
+                count: 0,
+                total_time: Duration::ZERO,
+                min: Duration::ZERO,
+                max: Duration::ZERO,
+                mean: Duration::ZERO,
+                p50: Duration::ZERO,
+                p95: Duration::ZERO,
+                p99: Duration::ZERO,
+                p999: Duration::ZERO,
+                throughput_per_sec: 0.0,
+            };
+        }
+
+        latencies.sort();
+
+        let total_time: Duration = latencies.iter().sum();
+        let count = latencies.len();
+        let mean = total_time / count as u32;
+
+        Self {
+            count,
+            total_time,
+            min: latencies[0],
+            max: latencies[count - 1],
+            mean,
+            p50: percentile(&latencies, 50.0),
+            p95: percentile(&latencies, 95.0),
+            p99: percentile(&latencies, 99.0),
+            p999: percentile(&latencies, 99.9),
+            throughput_per_sec: count as f64 / total_time.as_secs_f64(),
+        }
+    }
+
+    /// Check if stats meet p99 latency target
+    pub fn meets_p99_target(&self, target_ms: u64) -> bool {
+        self.p99 <= Duration::from_millis(target_ms)
+    }
+
+    /// Check if stats meet throughput target
+    pub fn meets_throughput_target(&self, target_per_sec: u64) -> bool {
+        self.throughput_per_sec >= target_per_sec as f64
+    }
+
+    /// Format as a readable report
+    pub fn report(&self) -> String {
+        format!(
+            "Count: {}\n\
+             Total Time: {:?}\n\
+             Min: {:?}\n\
+             Max: {:?}\n\
+             Mean: {:?}\n\
+             P50: {:?}\n\
+             P95: {:?}\n\
+             P99: {:?}\n\
+             P99.9: {:?}\n\
+             Throughput: {:.2} ops/sec",
+            self.count,
+            self.total_time,
+            self.min,
+            self.max,
+            self.mean,
+            self.p50,
+            self.p95,
+            self.p99,
+            self.p999,
+            self.throughput_per_sec
+        )
+    }
+}
+
+/// Simple HNSW-like index for benchmarking
+/// This is a simplified implementation for benchmark purposes
+pub struct SimpleHnswIndex {
+    vectors: Vec<Vec<f32>>,
+    dims: usize,
+    m: usize,
+    ef_construction: usize,
+    ef_search: usize,
+    // Simplified graph structure: each vector has a list of neighbor indices
+    graph: Vec<Vec<usize>>,
+}
+
+impl SimpleHnswIndex {
+    /// Create a new empty index
+    pub fn new(dims: usize, m: usize, ef_construction: usize, ef_search: usize) -> Self {
+        Self {
+            vectors: Vec::new(),
+            dims,
+            m,
+            ef_construction,
+            ef_search,
+            graph: Vec::new(),
+        }
+    }
+
+    /// Create an index with default parameters for Perch embeddings
+    pub fn new_default() -> Self {
+        Self::new(
+            PERCH_EMBEDDING_DIM,
+            DEFAULT_M,
+            DEFAULT_EF_CONSTRUCTION,
+            DEFAULT_EF_SEARCH,
+        )
+    }
+
+    /// Get the number of vectors in the index
+    pub fn len(&self) -> usize {
+        self.vectors.len()
+    }
+
+    /// Check if the index is empty
+    pub fn is_empty(&self) -> bool {
+        self.vectors.is_empty()
+    }
+
+    /// Add a single vector to the index
+    pub fn add(&mut self, vector: Vec<f32>) -> usize {
+        assert_eq!(vector.len(), self.dims);
+        let id = self.vectors.len();
+
+        // Find neighbors for the new vector
+        let neighbors = if self.vectors.is_empty() {
+            Vec::new()
+        } else {
+            self.search_internal(&vector, self.m.min(self.vectors.len()))
+                .into_iter()
+                .map(|(idx, _)| idx)
+                .collect()
+        };
+
+        self.vectors.push(vector);
+        self.graph.push(neighbors.clone());
+
+        // Update bidirectional connections
+        for &neighbor_id in &neighbors {
+            if self.graph[neighbor_id].len() < self.m * 2 {
+                self.graph[neighbor_id].push(id);
+            }
+        }
+
+        id
+    }
+
+    /// Batch add vectors to the index
+    pub fn batch_add(&mut self, vectors: Vec<Vec<f32>>) -> Vec<usize> {
+        vectors.into_iter().map(|v| self.add(v)).collect()
+    }
+
+    /// Search for k nearest neighbors
+    pub fn search(&self, query: &[f32], k: usize) -> Vec<(usize, f32)> {
+        assert_eq!(query.len(), self.dims);
+        if self.vectors.is_empty() {
+            return Vec::new();
+        }
+        self.search_internal(query, k)
+    }
+
+    /// Internal search implementation with simplified HNSW-like traversal
+    fn search_internal(&self, query: &[f32], k: usize) -> Vec<(usize, f32)> {
+        use std::collections::{BinaryHeap, HashSet};
+        use std::cmp::Reverse;
+
+        let ef = self.ef_search.max(k);
+
+        // Start from a random entry point
+        let entry_point = 0;
+
+        let mut visited: HashSet<usize> = HashSet::new();
+        let mut candidates: BinaryHeap<Reverse<(ordered_float::OrderedFloat<f32>, usize)>> =
+            BinaryHeap::new();
+        let mut results: BinaryHeap<(ordered_float::OrderedFloat<f32>, usize)> = BinaryHeap::new();
+
+        let entry_dist = l2_distance(query, &self.vectors[entry_point]);
+        candidates.push(Reverse((ordered_float::OrderedFloat(entry_dist), entry_point)));
+        results.push((ordered_float::OrderedFloat(entry_dist), entry_point));
+        visited.insert(entry_point);
+
+        while let Some(Reverse((dist, current))) = candidates.pop() {
+            let worst_dist = if results.len() >= ef {
+                results.peek().map(|(d, _)| d.0).unwrap_or(f32::MAX)
+            } else {
+                f32::MAX
+            };
+
+            if dist.0 > worst_dist {
+                break;
+            }
+
+            // Explore neighbors
+            for &neighbor in &self.graph[current] {
+                if visited.insert(neighbor) {
+                    let neighbor_dist = l2_distance(query, &self.vectors[neighbor]);
+
+                    if results.len() < ef || neighbor_dist < worst_dist {
+                        candidates.push(Reverse((
+                            ordered_float::OrderedFloat(neighbor_dist),
+                            neighbor,
+                        )));
+                        results.push((ordered_float::OrderedFloat(neighbor_dist), neighbor));
+
+                        if results.len() > ef {
+                            results.pop();
+                        }
+                    }
+                }
+            }
+        }
+
+        // Convert to output format and sort by distance
+        let mut output: Vec<(usize, f32)> =
+            results.into_iter().map(|(d, idx)| (idx, d.0)).collect();
+        output.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
+        output.truncate(k);
+        output
+    }
+
+    /// Set ef_search parameter for queries
+    pub fn set_ef_search(&mut self, ef: usize) {
+        self.ef_search = ef;
+    }
+}
+
+/// Setup a test index with the specified number of vectors
+pub fn setup_test_index(size: usize) -> SimpleHnswIndex {
+    let vectors = generate_random_vectors(size, PERCH_EMBEDDING_DIM);
+    let mut index = SimpleHnswIndex::new_default();
+
+    for vec in vectors {
+        index.add(vec);
+    }
+
+    index
+}
+
+/// Scalar quantizer for int8 compression
+pub struct ScalarQuantizer {
+    mins: Vec<f32>,
+    maxs: Vec<f32>,
+    scales: Vec<f32>,
+    dims: usize,
+}
+
+impl ScalarQuantizer {
+    /// Create a new quantizer for the specified dimensions
+    pub fn new(dims: usize) -> Self {
+        Self {
+            mins: vec![f32::MAX; dims],
+            maxs: vec![f32::MIN; dims],
+            scales: vec![1.0; dims],
+            dims,
+        }
+    }
+
+    /// Calibrate the quantizer from a sample of embeddings
+    pub fn calibrate(&mut self, embeddings: &[Vec<f32>]) {
+        // Find min/max per dimension
+        for embedding in embeddings {
+            for (d, &val) in embedding.iter().enumerate() {
+                if val < self.mins[d] {
+                    self.mins[d] = val;
+                }
+                if val > self.maxs[d] {
+                    self.maxs[d] = val;
+                }
+            }
+        }
+
+        // Compute scales
+        for d in 0..self.dims {
+            let range = self.maxs[d] - self.mins[d];
+            if range > 0.0 {
+                self.scales[d] = 255.0 / range;
+            } else {
+                self.scales[d] = 1.0;
+            }
+        }
+    }
+
+    /// Quantize a float32 embedding to int8
+    pub fn quantize(&self, embedding: &[f32]) -> Vec<u8> {
+        embedding
+            .iter()
+            .enumerate()
+            .map(|(d, &val)| {
+                let normalized = (val - self.mins[d]) * self.scales[d];
+                normalized.round().clamp(0.0, 255.0) as u8
+            })
+            .collect()
+    }
+
+    /// Dequantize an int8 embedding back to float32
+    pub fn dequantize(&self, quantized: &[u8]) -> Vec<f32> {
+        quantized
+            .iter()
+            .enumerate()
+            .map(|(d, &val)| (val as f32) / self.scales[d] + self.mins[d])
+            .collect()
+    }
+}
+
+/// Timer utility for measuring operations
+pub struct Timer {
+    start: Instant,
+}
+
+impl Timer {
+    /// Start a new timer
+    pub fn start() -> Self {
+        Self {
+            start: Instant::now(),
+        }
+    }
+
+    /// Get elapsed time
+    pub fn elapsed(&self) -> Duration {
+        self.start.elapsed()
+    }
+
+    /// Stop and return elapsed time
+    pub fn stop(self) -> Duration {
+        self.start.elapsed()
+    }
+}
+
+/// Measure execution time of a closure
+pub fn measure_time<F, R>(f: F) -> (R, Duration)
+where
+    F: FnOnce() -> R,
+{
+    let start = Instant::now();
+    let result = f();
+    let duration = start.elapsed();
+    (result, duration)
+}
+
+/// Measure average execution time over multiple iterations
+pub fn measure_average<F>(iterations: usize, mut f: F) -> PerformanceStats
+where
+    F: FnMut() -> (),
+{
+    let latencies: Vec<Duration> = (0..iterations)
+        .map(|_| {
+            let start = Instant::now();
+            f();
+            start.elapsed()
+        })
+        .collect();
+
+    PerformanceStats::from_latencies(latencies)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_generate_random_vectors() {
+        let vectors = generate_random_vectors(100, 1536);
+        assert_eq!(vectors.len(), 100);
+        assert_eq!(vectors[0].len(), 1536);
+
+        // Check normalization
+        for vec in &vectors {
+            let norm: f32 = vec.iter().map(|x| x * x).sum::<f32>().sqrt();
+            assert!((norm - 1.0).abs() < 1e-5);
+        }
+    }
+
+    #[test]
+    fn test_l2_distance() {
+        let a = vec![1.0, 0.0, 0.0];
+        let b = vec![0.0, 1.0, 0.0];
+        let dist = l2_distance(&a, &b);
+        assert!((dist - std::f32::consts::SQRT_2).abs() < 1e-5);
+    }
+
+    #[test]
+    fn test_recall_at_k() {
+        let results: Vec<(usize, f32)> = vec![(0, 0.1), (1, 0.2), (2, 0.3), (3, 0.4), (5, 0.5)];
+        let ground_truth: Vec<(usize, f32)> =
+            vec![(0, 0.1), (1, 0.2), (2, 0.3), (4, 0.4), (5, 0.5)];
+
+        let recall = measure_recall_at_k(&results, &ground_truth, 5);
+        assert!((recall - 0.8).abs() < 1e-5); // 4 out of 5 match
+    }
+
+    #[test]
+    fn test_scalar_quantizer() {
+        let vectors = generate_random_vectors(100, 128);
+        let mut quantizer = ScalarQuantizer::new(128);
+        quantizer.calibrate(&vectors);
+
+        for vec in &vectors {
+            let quantized = quantizer.quantize(vec);
+            let dequantized = quantizer.dequantize(&quantized);
+
+            // Check that dequantized is close to original
+            let error: f32 = vec
+                .iter()
+                .zip(dequantized.iter())
+                .map(|(a, b)| (a - b).abs())
+                .sum::<f32>()
+                / vec.len() as f32;
+
+            assert!(error < 0.1); // Average error should be small
+        }
+    }
+
+    #[test]
+    fn test_performance_stats() {
+        let latencies: Vec<Duration> = (0..100)
+            .map(|i| Duration::from_micros(100 + i * 10))
+            .collect();
+
+        let stats = PerformanceStats::from_latencies(latencies);
+        assert_eq!(stats.count, 100);
+        assert!(stats.min <= stats.p50);
+        assert!(stats.p50 <= stats.p95);
+        assert!(stats.p95 <= stats.p99);
+        assert!(stats.p99 <= stats.max);
+    }
+}