674 lines
19 KiB
Rust
674 lines
19 KiB
Rust
//! Benchmark Utilities for 7sense Performance Testing
|
|
//!
|
|
//! This module provides common utilities for benchmarking:
|
|
//! - Random vector generation
|
|
//! - Test index setup
|
|
//! - Recall calculation
|
|
//! - Ground truth computation
|
|
//! - Performance metrics
|
|
|
|
use std::collections::HashSet;
|
|
use std::time::{Duration, Instant};
|
|
|
|
/// Embedding dimensions for Perch 2.0 model
|
|
pub const PERCH_EMBEDDING_DIM: usize = 1536;
|
|
|
|
/// Default HNSW parameters from ADR-004
|
|
pub const DEFAULT_M: usize = 32;
|
|
pub const DEFAULT_EF_CONSTRUCTION: usize = 200;
|
|
pub const DEFAULT_EF_SEARCH: usize = 128;
|
|
pub const HIGH_RECALL_EF_SEARCH: usize = 256;
|
|
|
|
/// Performance targets from ADR-004
|
|
pub mod targets {
|
|
use std::time::Duration;
|
|
|
|
/// HNSW Search Targets
|
|
pub const HNSW_SPEEDUP_VS_BRUTE_FORCE: f64 = 150.0;
|
|
pub const QUERY_LATENCY_P50_MS: u64 = 10;
|
|
pub const QUERY_LATENCY_P99_MS: u64 = 50;
|
|
pub const RECALL_AT_10: f64 = 0.95;
|
|
pub const RECALL_AT_100: f64 = 0.98;
|
|
|
|
/// Embedding Inference Targets
|
|
pub const EMBEDDING_SEGMENTS_PER_SECOND: u64 = 100;
|
|
|
|
/// Batch Ingestion Targets
|
|
pub const BATCH_VECTORS_PER_MINUTE: u64 = 1_000_000;
|
|
pub const INSERT_THROUGHPUT_PER_SECOND: u64 = 10_000;
|
|
|
|
/// Query Latency Targets
|
|
pub const TOTAL_QUERY_LATENCY_MS: u64 = 100;
|
|
|
|
/// Build Time Targets
|
|
pub const BUILD_TIME_1M_VECTORS: Duration = Duration::from_secs(30 * 60);
|
|
|
|
/// Quantization Targets
|
|
pub const MAX_RECALL_LOSS_INT8: f64 = 0.03;
|
|
}
|
|
|
|
/// Generate random f32 vectors for benchmarking
|
|
///
|
|
/// # Arguments
|
|
/// * `count` - Number of vectors to generate
|
|
/// * `dims` - Dimensionality of each vector
|
|
///
|
|
/// # Returns
|
|
/// A vector of random f32 vectors, normalized to unit length
|
|
pub fn generate_random_vectors(count: usize, dims: usize) -> Vec<Vec<f32>> {
|
|
use std::f32::consts::PI;
|
|
|
|
let mut vectors = Vec::with_capacity(count);
|
|
|
|
for i in 0..count {
|
|
let mut vec = Vec::with_capacity(dims);
|
|
|
|
// Use a simple deterministic random generator for reproducibility
|
|
let mut seed = (i as u64).wrapping_mul(6364136223846793005).wrapping_add(1);
|
|
|
|
for _ in 0..dims {
|
|
seed = seed.wrapping_mul(6364136223846793005).wrapping_add(1);
|
|
let val = ((seed >> 33) as f32) / (u32::MAX as f32) * 2.0 - 1.0;
|
|
vec.push(val);
|
|
}
|
|
|
|
// Normalize to unit length
|
|
let norm: f32 = vec.iter().map(|x| x * x).sum::<f32>().sqrt();
|
|
if norm > 0.0 {
|
|
for x in vec.iter_mut() {
|
|
*x /= norm;
|
|
}
|
|
}
|
|
|
|
vectors.push(vec);
|
|
}
|
|
|
|
vectors
|
|
}
|
|
|
|
/// Generate clustered random vectors for more realistic benchmarking
|
|
///
|
|
/// # Arguments
|
|
/// * `count` - Total number of vectors to generate
|
|
/// * `dims` - Dimensionality of each vector
|
|
/// * `num_clusters` - Number of clusters to create
|
|
/// * `cluster_spread` - Standard deviation within clusters (0.0 to 1.0)
|
|
///
|
|
/// # Returns
|
|
/// A vector of random f32 vectors organized around cluster centers
|
|
pub fn generate_clustered_vectors(
|
|
count: usize,
|
|
dims: usize,
|
|
num_clusters: usize,
|
|
cluster_spread: f32,
|
|
) -> Vec<Vec<f32>> {
|
|
let mut vectors = Vec::with_capacity(count);
|
|
|
|
// Generate cluster centers
|
|
let centers = generate_random_vectors(num_clusters, dims);
|
|
|
|
// Assign vectors to clusters
|
|
for i in 0..count {
|
|
let cluster_idx = i % num_clusters;
|
|
let center = ¢ers[cluster_idx];
|
|
|
|
let mut vec = Vec::with_capacity(dims);
|
|
|
|
// Use deterministic random for offset
|
|
let mut seed = (i as u64).wrapping_mul(2862933555777941757).wrapping_add(3);
|
|
|
|
for d in 0..dims {
|
|
seed = seed.wrapping_mul(2862933555777941757).wrapping_add(3);
|
|
let noise = ((seed >> 33) as f32) / (u32::MAX as f32) * 2.0 - 1.0;
|
|
let val = center[d] + noise * cluster_spread;
|
|
vec.push(val);
|
|
}
|
|
|
|
// Normalize to unit length
|
|
let norm: f32 = vec.iter().map(|x| x * x).sum::<f32>().sqrt();
|
|
if norm > 0.0 {
|
|
for x in vec.iter_mut() {
|
|
*x /= norm;
|
|
}
|
|
}
|
|
|
|
vectors.push(vec);
|
|
}
|
|
|
|
vectors
|
|
}
|
|
|
|
/// Compute L2 (Euclidean) distance between two vectors
|
|
#[inline]
|
|
pub fn l2_distance(a: &[f32], b: &[f32]) -> f32 {
|
|
debug_assert_eq!(a.len(), b.len());
|
|
a.iter()
|
|
.zip(b.iter())
|
|
.map(|(x, y)| (x - y) * (x - y))
|
|
.sum::<f32>()
|
|
.sqrt()
|
|
}
|
|
|
|
/// Compute L2 squared distance (faster, no sqrt)
|
|
#[inline]
|
|
pub fn l2_distance_squared(a: &[f32], b: &[f32]) -> f32 {
|
|
debug_assert_eq!(a.len(), b.len());
|
|
a.iter()
|
|
.zip(b.iter())
|
|
.map(|(x, y)| (x - y) * (x - y))
|
|
.sum()
|
|
}
|
|
|
|
/// Compute cosine similarity between two vectors
|
|
#[inline]
|
|
pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
|
debug_assert_eq!(a.len(), b.len());
|
|
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
|
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
|
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
|
if norm_a > 0.0 && norm_b > 0.0 {
|
|
dot / (norm_a * norm_b)
|
|
} else {
|
|
0.0
|
|
}
|
|
}
|
|
|
|
/// Compute brute-force k-nearest neighbors (ground truth)
|
|
///
|
|
/// # Arguments
|
|
/// * `query` - Query vector
|
|
/// * `dataset` - Dataset of vectors to search
|
|
/// * `k` - Number of neighbors to find
|
|
///
|
|
/// # Returns
|
|
/// Vector of (index, distance) pairs sorted by distance
|
|
pub fn brute_force_knn(query: &[f32], dataset: &[Vec<f32>], k: usize) -> Vec<(usize, f32)> {
|
|
let mut distances: Vec<(usize, f32)> = dataset
|
|
.iter()
|
|
.enumerate()
|
|
.map(|(i, vec)| (i, l2_distance(query, vec)))
|
|
.collect();
|
|
|
|
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
|
|
distances.truncate(k);
|
|
distances
|
|
}
|
|
|
|
/// Measure recall@k for approximate nearest neighbor results
|
|
///
|
|
/// # Arguments
|
|
/// * `results` - Approximate results (index, distance) pairs
|
|
/// * `ground_truth` - Exact brute-force results (index, distance) pairs
|
|
/// * `k` - Number of top results to consider
|
|
///
|
|
/// # Returns
|
|
/// Recall value between 0.0 and 1.0
|
|
pub fn measure_recall_at_k(
|
|
results: &[(usize, f32)],
|
|
ground_truth: &[(usize, f32)],
|
|
k: usize,
|
|
) -> f32 {
|
|
let k = k.min(results.len()).min(ground_truth.len());
|
|
if k == 0 {
|
|
return 0.0;
|
|
}
|
|
|
|
let result_set: HashSet<usize> = results.iter().take(k).map(|(idx, _)| *idx).collect();
|
|
let truth_set: HashSet<usize> = ground_truth.iter().take(k).map(|(idx, _)| *idx).collect();
|
|
|
|
let intersection = result_set.intersection(&truth_set).count();
|
|
intersection as f32 / k as f32
|
|
}
|
|
|
|
/// Calculate percentile from a sorted slice of durations
|
|
pub fn percentile(sorted_latencies: &[Duration], p: f64) -> Duration {
|
|
if sorted_latencies.is_empty() {
|
|
return Duration::ZERO;
|
|
}
|
|
let idx = ((sorted_latencies.len() as f64 - 1.0) * p / 100.0).round() as usize;
|
|
sorted_latencies[idx.min(sorted_latencies.len() - 1)]
|
|
}
|
|
|
|
/// Performance statistics from benchmark runs
|
|
#[derive(Debug, Clone)]
|
|
pub struct PerformanceStats {
|
|
pub count: usize,
|
|
pub total_time: Duration,
|
|
pub min: Duration,
|
|
pub max: Duration,
|
|
pub mean: Duration,
|
|
pub p50: Duration,
|
|
pub p95: Duration,
|
|
pub p99: Duration,
|
|
pub p999: Duration,
|
|
pub throughput_per_sec: f64,
|
|
}
|
|
|
|
impl PerformanceStats {
|
|
/// Calculate statistics from a collection of latency measurements
|
|
pub fn from_latencies(mut latencies: Vec<Duration>) -> Self {
|
|
if latencies.is_empty() {
|
|
return Self {
|
|
count: 0,
|
|
total_time: Duration::ZERO,
|
|
min: Duration::ZERO,
|
|
max: Duration::ZERO,
|
|
mean: Duration::ZERO,
|
|
p50: Duration::ZERO,
|
|
p95: Duration::ZERO,
|
|
p99: Duration::ZERO,
|
|
p999: Duration::ZERO,
|
|
throughput_per_sec: 0.0,
|
|
};
|
|
}
|
|
|
|
latencies.sort();
|
|
|
|
let total_time: Duration = latencies.iter().sum();
|
|
let count = latencies.len();
|
|
let mean = total_time / count as u32;
|
|
|
|
Self {
|
|
count,
|
|
total_time,
|
|
min: latencies[0],
|
|
max: latencies[count - 1],
|
|
mean,
|
|
p50: percentile(&latencies, 50.0),
|
|
p95: percentile(&latencies, 95.0),
|
|
p99: percentile(&latencies, 99.0),
|
|
p999: percentile(&latencies, 99.9),
|
|
throughput_per_sec: count as f64 / total_time.as_secs_f64(),
|
|
}
|
|
}
|
|
|
|
/// Check if stats meet p99 latency target
|
|
pub fn meets_p99_target(&self, target_ms: u64) -> bool {
|
|
self.p99 <= Duration::from_millis(target_ms)
|
|
}
|
|
|
|
/// Check if stats meet throughput target
|
|
pub fn meets_throughput_target(&self, target_per_sec: u64) -> bool {
|
|
self.throughput_per_sec >= target_per_sec as f64
|
|
}
|
|
|
|
/// Format as a readable report
|
|
pub fn report(&self) -> String {
|
|
format!(
|
|
"Count: {}\n\
|
|
Total Time: {:?}\n\
|
|
Min: {:?}\n\
|
|
Max: {:?}\n\
|
|
Mean: {:?}\n\
|
|
P50: {:?}\n\
|
|
P95: {:?}\n\
|
|
P99: {:?}\n\
|
|
P99.9: {:?}\n\
|
|
Throughput: {:.2} ops/sec",
|
|
self.count,
|
|
self.total_time,
|
|
self.min,
|
|
self.max,
|
|
self.mean,
|
|
self.p50,
|
|
self.p95,
|
|
self.p99,
|
|
self.p999,
|
|
self.throughput_per_sec
|
|
)
|
|
}
|
|
}
|
|
|
|
/// Simple HNSW-like index for benchmarking
|
|
/// This is a simplified implementation for benchmark purposes
|
|
pub struct SimpleHnswIndex {
|
|
vectors: Vec<Vec<f32>>,
|
|
dims: usize,
|
|
m: usize,
|
|
ef_construction: usize,
|
|
ef_search: usize,
|
|
// Simplified graph structure: each vector has a list of neighbor indices
|
|
graph: Vec<Vec<usize>>,
|
|
}
|
|
|
|
impl SimpleHnswIndex {
|
|
/// Create a new empty index
|
|
pub fn new(dims: usize, m: usize, ef_construction: usize, ef_search: usize) -> Self {
|
|
Self {
|
|
vectors: Vec::new(),
|
|
dims,
|
|
m,
|
|
ef_construction,
|
|
ef_search,
|
|
graph: Vec::new(),
|
|
}
|
|
}
|
|
|
|
/// Create an index with default parameters for Perch embeddings
|
|
pub fn new_default() -> Self {
|
|
Self::new(
|
|
PERCH_EMBEDDING_DIM,
|
|
DEFAULT_M,
|
|
DEFAULT_EF_CONSTRUCTION,
|
|
DEFAULT_EF_SEARCH,
|
|
)
|
|
}
|
|
|
|
/// Get the number of vectors in the index
|
|
pub fn len(&self) -> usize {
|
|
self.vectors.len()
|
|
}
|
|
|
|
/// Check if the index is empty
|
|
pub fn is_empty(&self) -> bool {
|
|
self.vectors.is_empty()
|
|
}
|
|
|
|
/// Add a single vector to the index
|
|
pub fn add(&mut self, vector: Vec<f32>) -> usize {
|
|
assert_eq!(vector.len(), self.dims);
|
|
let id = self.vectors.len();
|
|
|
|
// Find neighbors for the new vector
|
|
let neighbors = if self.vectors.is_empty() {
|
|
Vec::new()
|
|
} else {
|
|
self.search_internal(&vector, self.m.min(self.vectors.len()))
|
|
.into_iter()
|
|
.map(|(idx, _)| idx)
|
|
.collect()
|
|
};
|
|
|
|
self.vectors.push(vector);
|
|
self.graph.push(neighbors.clone());
|
|
|
|
// Update bidirectional connections
|
|
for &neighbor_id in &neighbors {
|
|
if self.graph[neighbor_id].len() < self.m * 2 {
|
|
self.graph[neighbor_id].push(id);
|
|
}
|
|
}
|
|
|
|
id
|
|
}
|
|
|
|
/// Batch add vectors to the index
|
|
pub fn batch_add(&mut self, vectors: Vec<Vec<f32>>) -> Vec<usize> {
|
|
vectors.into_iter().map(|v| self.add(v)).collect()
|
|
}
|
|
|
|
/// Search for k nearest neighbors
|
|
pub fn search(&self, query: &[f32], k: usize) -> Vec<(usize, f32)> {
|
|
assert_eq!(query.len(), self.dims);
|
|
if self.vectors.is_empty() {
|
|
return Vec::new();
|
|
}
|
|
self.search_internal(query, k)
|
|
}
|
|
|
|
/// Internal search implementation with simplified HNSW-like traversal
|
|
fn search_internal(&self, query: &[f32], k: usize) -> Vec<(usize, f32)> {
|
|
use std::collections::{BinaryHeap, HashSet};
|
|
use std::cmp::Reverse;
|
|
|
|
let ef = self.ef_search.max(k);
|
|
|
|
// Start from a random entry point
|
|
let entry_point = 0;
|
|
|
|
let mut visited: HashSet<usize> = HashSet::new();
|
|
let mut candidates: BinaryHeap<Reverse<(ordered_float::OrderedFloat<f32>, usize)>> =
|
|
BinaryHeap::new();
|
|
let mut results: BinaryHeap<(ordered_float::OrderedFloat<f32>, usize)> = BinaryHeap::new();
|
|
|
|
let entry_dist = l2_distance(query, &self.vectors[entry_point]);
|
|
candidates.push(Reverse((ordered_float::OrderedFloat(entry_dist), entry_point)));
|
|
results.push((ordered_float::OrderedFloat(entry_dist), entry_point));
|
|
visited.insert(entry_point);
|
|
|
|
while let Some(Reverse((dist, current))) = candidates.pop() {
|
|
let worst_dist = if results.len() >= ef {
|
|
results.peek().map(|(d, _)| d.0).unwrap_or(f32::MAX)
|
|
} else {
|
|
f32::MAX
|
|
};
|
|
|
|
if dist.0 > worst_dist {
|
|
break;
|
|
}
|
|
|
|
// Explore neighbors
|
|
for &neighbor in &self.graph[current] {
|
|
if visited.insert(neighbor) {
|
|
let neighbor_dist = l2_distance(query, &self.vectors[neighbor]);
|
|
|
|
if results.len() < ef || neighbor_dist < worst_dist {
|
|
candidates.push(Reverse((
|
|
ordered_float::OrderedFloat(neighbor_dist),
|
|
neighbor,
|
|
)));
|
|
results.push((ordered_float::OrderedFloat(neighbor_dist), neighbor));
|
|
|
|
if results.len() > ef {
|
|
results.pop();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Convert to output format and sort by distance
|
|
let mut output: Vec<(usize, f32)> =
|
|
results.into_iter().map(|(d, idx)| (idx, d.0)).collect();
|
|
output.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
|
|
output.truncate(k);
|
|
output
|
|
}
|
|
|
|
/// Set ef_search parameter for queries
|
|
pub fn set_ef_search(&mut self, ef: usize) {
|
|
self.ef_search = ef;
|
|
}
|
|
}
|
|
|
|
/// Setup a test index with the specified number of vectors
|
|
pub fn setup_test_index(size: usize) -> SimpleHnswIndex {
|
|
let vectors = generate_random_vectors(size, PERCH_EMBEDDING_DIM);
|
|
let mut index = SimpleHnswIndex::new_default();
|
|
|
|
for vec in vectors {
|
|
index.add(vec);
|
|
}
|
|
|
|
index
|
|
}
|
|
|
|
/// Scalar quantizer for int8 compression
|
|
pub struct ScalarQuantizer {
|
|
mins: Vec<f32>,
|
|
maxs: Vec<f32>,
|
|
scales: Vec<f32>,
|
|
dims: usize,
|
|
}
|
|
|
|
impl ScalarQuantizer {
|
|
/// Create a new quantizer for the specified dimensions
|
|
pub fn new(dims: usize) -> Self {
|
|
Self {
|
|
mins: vec![f32::MAX; dims],
|
|
maxs: vec![f32::MIN; dims],
|
|
scales: vec![1.0; dims],
|
|
dims,
|
|
}
|
|
}
|
|
|
|
/// Calibrate the quantizer from a sample of embeddings
|
|
pub fn calibrate(&mut self, embeddings: &[Vec<f32>]) {
|
|
// Find min/max per dimension
|
|
for embedding in embeddings {
|
|
for (d, &val) in embedding.iter().enumerate() {
|
|
if val < self.mins[d] {
|
|
self.mins[d] = val;
|
|
}
|
|
if val > self.maxs[d] {
|
|
self.maxs[d] = val;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Compute scales
|
|
for d in 0..self.dims {
|
|
let range = self.maxs[d] - self.mins[d];
|
|
if range > 0.0 {
|
|
self.scales[d] = 255.0 / range;
|
|
} else {
|
|
self.scales[d] = 1.0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Quantize a float32 embedding to int8
|
|
pub fn quantize(&self, embedding: &[f32]) -> Vec<u8> {
|
|
embedding
|
|
.iter()
|
|
.enumerate()
|
|
.map(|(d, &val)| {
|
|
let normalized = (val - self.mins[d]) * self.scales[d];
|
|
normalized.round().clamp(0.0, 255.0) as u8
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
/// Dequantize an int8 embedding back to float32
|
|
pub fn dequantize(&self, quantized: &[u8]) -> Vec<f32> {
|
|
quantized
|
|
.iter()
|
|
.enumerate()
|
|
.map(|(d, &val)| (val as f32) / self.scales[d] + self.mins[d])
|
|
.collect()
|
|
}
|
|
}
|
|
|
|
/// Timer utility for measuring operations
|
|
pub struct Timer {
|
|
start: Instant,
|
|
}
|
|
|
|
impl Timer {
|
|
/// Start a new timer
|
|
pub fn start() -> Self {
|
|
Self {
|
|
start: Instant::now(),
|
|
}
|
|
}
|
|
|
|
/// Get elapsed time
|
|
pub fn elapsed(&self) -> Duration {
|
|
self.start.elapsed()
|
|
}
|
|
|
|
/// Stop and return elapsed time
|
|
pub fn stop(self) -> Duration {
|
|
self.start.elapsed()
|
|
}
|
|
}
|
|
|
|
/// Measure execution time of a closure
|
|
pub fn measure_time<F, R>(f: F) -> (R, Duration)
|
|
where
|
|
F: FnOnce() -> R,
|
|
{
|
|
let start = Instant::now();
|
|
let result = f();
|
|
let duration = start.elapsed();
|
|
(result, duration)
|
|
}
|
|
|
|
/// Measure average execution time over multiple iterations
|
|
pub fn measure_average<F>(iterations: usize, mut f: F) -> PerformanceStats
|
|
where
|
|
F: FnMut() -> (),
|
|
{
|
|
let latencies: Vec<Duration> = (0..iterations)
|
|
.map(|_| {
|
|
let start = Instant::now();
|
|
f();
|
|
start.elapsed()
|
|
})
|
|
.collect();
|
|
|
|
PerformanceStats::from_latencies(latencies)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_generate_random_vectors() {
|
|
let vectors = generate_random_vectors(100, 1536);
|
|
assert_eq!(vectors.len(), 100);
|
|
assert_eq!(vectors[0].len(), 1536);
|
|
|
|
// Check normalization
|
|
for vec in &vectors {
|
|
let norm: f32 = vec.iter().map(|x| x * x).sum::<f32>().sqrt();
|
|
assert!((norm - 1.0).abs() < 1e-5);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_l2_distance() {
|
|
let a = vec![1.0, 0.0, 0.0];
|
|
let b = vec![0.0, 1.0, 0.0];
|
|
let dist = l2_distance(&a, &b);
|
|
assert!((dist - std::f32::consts::SQRT_2).abs() < 1e-5);
|
|
}
|
|
|
|
#[test]
|
|
fn test_recall_at_k() {
|
|
let results: Vec<(usize, f32)> = vec![(0, 0.1), (1, 0.2), (2, 0.3), (3, 0.4), (5, 0.5)];
|
|
let ground_truth: Vec<(usize, f32)> =
|
|
vec![(0, 0.1), (1, 0.2), (2, 0.3), (4, 0.4), (5, 0.5)];
|
|
|
|
let recall = measure_recall_at_k(&results, &ground_truth, 5);
|
|
assert!((recall - 0.8).abs() < 1e-5); // 4 out of 5 match
|
|
}
|
|
|
|
#[test]
|
|
fn test_scalar_quantizer() {
|
|
let vectors = generate_random_vectors(100, 128);
|
|
let mut quantizer = ScalarQuantizer::new(128);
|
|
quantizer.calibrate(&vectors);
|
|
|
|
for vec in &vectors {
|
|
let quantized = quantizer.quantize(vec);
|
|
let dequantized = quantizer.dequantize(&quantized);
|
|
|
|
// Check that dequantized is close to original
|
|
let error: f32 = vec
|
|
.iter()
|
|
.zip(dequantized.iter())
|
|
.map(|(a, b)| (a - b).abs())
|
|
.sum::<f32>()
|
|
/ vec.len() as f32;
|
|
|
|
assert!(error < 0.1); // Average error should be small
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_performance_stats() {
|
|
let latencies: Vec<Duration> = (0..100)
|
|
.map(|i| Duration::from_micros(100 + i * 10))
|
|
.collect();
|
|
|
|
let stats = PerformanceStats::from_latencies(latencies);
|
|
assert_eq!(stats.count, 100);
|
|
assert!(stats.min <= stats.p50);
|
|
assert!(stats.p50 <= stats.p95);
|
|
assert!(stats.p95 <= stats.p99);
|
|
assert!(stats.p99 <= stats.max);
|
|
}
|
|
}
|