Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,934 @@
//! Quantization techniques for memory compression
//!
//! This module provides tiered quantization strategies as specified in ADR-001:
//!
//! | Quantization | Compression | Use Case |
//! |--------------|-------------|----------|
//! | Scalar (u8) | 4x | Warm data (40-80% access) |
//! | Int4 | 8x | Cool data (10-40% access) |
//! | Product | 8-16x | Cold data (1-10% access) |
//! | Binary | 32x | Archive (<1% access) |
//!
//! ## Performance Optimizations v2
//!
//! - SIMD-accelerated distance calculations for scalar (int8) quantization
//! - SIMD popcnt for binary hamming distance
//! - 4x loop unrolling for better instruction-level parallelism
//! - Separate accumulator strategy to reduce data dependencies
use crate::error::Result;
use serde::{Deserialize, Serialize};
/// Trait for quantized vector representations
pub trait QuantizedVector: Send + Sync {
/// Quantize a full-precision vector
fn quantize(vector: &[f32]) -> Self;
/// Calculate distance to another quantized vector
fn distance(&self, other: &Self) -> f32;
/// Reconstruct approximate full-precision vector
fn reconstruct(&self) -> Vec<f32>;
}
/// Scalar quantization to int8 (4x compression)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScalarQuantized {
/// Quantized values (int8)
pub data: Vec<u8>,
/// Minimum value for dequantization
pub min: f32,
/// Scale factor for dequantization
pub scale: f32,
}
impl QuantizedVector for ScalarQuantized {
fn quantize(vector: &[f32]) -> Self {
let min = vector.iter().copied().fold(f32::INFINITY, f32::min);
let max = vector.iter().copied().fold(f32::NEG_INFINITY, f32::max);
// Handle edge case where all values are the same (scale = 0)
let scale = if (max - min).abs() < f32::EPSILON {
1.0 // Arbitrary non-zero scale when all values are identical
} else {
(max - min) / 255.0
};
let data = vector
.iter()
.map(|&v| ((v - min) / scale).round().clamp(0.0, 255.0) as u8)
.collect();
Self { data, min, scale }
}
fn distance(&self, other: &Self) -> f32 {
// Fast int8 distance calculation with SIMD optimization
// Use i32 to avoid overflow: max diff is 255, and 255*255=65025 fits in i32
// Scale handling: We use the average of both scales for balanced comparison.
// Using max(scale) would bias toward the vector with larger range,
// while average provides a more symmetric distance metric.
// This ensures distance(a, b) ≈ distance(b, a) in the reconstructed space.
let avg_scale = (self.scale + other.scale) / 2.0;
// Use SIMD-optimized version for larger vectors
#[cfg(target_arch = "aarch64")]
{
if self.data.len() >= 16 {
return unsafe { scalar_distance_neon(&self.data, &other.data) }.sqrt() * avg_scale;
}
}
#[cfg(target_arch = "x86_64")]
{
if self.data.len() >= 32 && is_x86_feature_detected!("avx2") {
return unsafe { scalar_distance_avx2(&self.data, &other.data) }.sqrt() * avg_scale;
}
}
// Scalar fallback with 4x loop unrolling for better ILP
scalar_distance_scalar(&self.data, &other.data).sqrt() * avg_scale
}
fn reconstruct(&self) -> Vec<f32> {
self.data
.iter()
.map(|&v| self.min + (v as f32) * self.scale)
.collect()
}
}
/// Product quantization (8-16x compression)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProductQuantized {
/// Quantized codes (one per subspace)
pub codes: Vec<u8>,
/// Codebooks for each subspace
pub codebooks: Vec<Vec<Vec<f32>>>,
}
impl ProductQuantized {
/// Train product quantization on a set of vectors
pub fn train(
vectors: &[Vec<f32>],
num_subspaces: usize,
codebook_size: usize,
iterations: usize,
) -> Result<Self> {
if vectors.is_empty() {
return Err(crate::error::RuvectorError::InvalidInput(
"Cannot train on empty vector set".into(),
));
}
if vectors[0].is_empty() {
return Err(crate::error::RuvectorError::InvalidInput(
"Cannot train on vectors with zero dimensions".into(),
));
}
if codebook_size > 256 {
return Err(crate::error::RuvectorError::InvalidParameter(format!(
"Codebook size {} exceeds u8 maximum of 256",
codebook_size
)));
}
let dimensions = vectors[0].len();
let subspace_dim = dimensions / num_subspaces;
let mut codebooks = Vec::with_capacity(num_subspaces);
// Train codebook for each subspace using k-means
for subspace_idx in 0..num_subspaces {
let start = subspace_idx * subspace_dim;
let end = start + subspace_dim;
// Extract subspace vectors
let subspace_vectors: Vec<Vec<f32>> =
vectors.iter().map(|v| v[start..end].to_vec()).collect();
// Run k-means
let codebook = kmeans_clustering(&subspace_vectors, codebook_size, iterations);
codebooks.push(codebook);
}
Ok(Self {
codes: vec![],
codebooks,
})
}
/// Quantize a vector using trained codebooks
pub fn encode(&self, vector: &[f32]) -> Vec<u8> {
let num_subspaces = self.codebooks.len();
let subspace_dim = vector.len() / num_subspaces;
let mut codes = Vec::with_capacity(num_subspaces);
for (subspace_idx, codebook) in self.codebooks.iter().enumerate() {
let start = subspace_idx * subspace_dim;
let end = start + subspace_dim;
let subvector = &vector[start..end];
// Find nearest centroid
let code = codebook
.iter()
.enumerate()
.min_by(|(_, a), (_, b)| {
let dist_a = euclidean_squared(subvector, a);
let dist_b = euclidean_squared(subvector, b);
dist_a.partial_cmp(&dist_b).unwrap()
})
.map(|(idx, _)| idx as u8)
.unwrap_or(0);
codes.push(code);
}
codes
}
}
/// Int4 quantization (8x compression)
///
/// Quantizes f32 to 4-bit integers (0-15), packing 2 values per byte.
/// Provides 8x compression with better precision than binary.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Int4Quantized {
/// Packed 4-bit values (2 per byte)
pub data: Vec<u8>,
/// Minimum value for dequantization
pub min: f32,
/// Scale factor for dequantization
pub scale: f32,
/// Number of dimensions
pub dimensions: usize,
}
impl Int4Quantized {
/// Quantize a vector to 4-bit representation
pub fn quantize(vector: &[f32]) -> Self {
let min = vector.iter().copied().fold(f32::INFINITY, f32::min);
let max = vector.iter().copied().fold(f32::NEG_INFINITY, f32::max);
// Handle edge case where all values are the same
let scale = if (max - min).abs() < f32::EPSILON {
1.0
} else {
(max - min) / 15.0 // 4-bit gives 0-15 range
};
let dimensions = vector.len();
let num_bytes = dimensions.div_ceil(2);
let mut data = vec![0u8; num_bytes];
for (i, &v) in vector.iter().enumerate() {
let quantized = ((v - min) / scale).round().clamp(0.0, 15.0) as u8;
let byte_idx = i / 2;
if i % 2 == 0 {
// Low nibble
data[byte_idx] |= quantized;
} else {
// High nibble
data[byte_idx] |= quantized << 4;
}
}
Self {
data,
min,
scale,
dimensions,
}
}
/// Calculate distance to another Int4 quantized vector
pub fn distance(&self, other: &Self) -> f32 {
assert_eq!(self.dimensions, other.dimensions);
// Use average scale for balanced comparison
let avg_scale = (self.scale + other.scale) / 2.0;
let _avg_min = (self.min + other.min) / 2.0;
let mut sum_sq = 0i32;
for i in 0..self.dimensions {
let byte_idx = i / 2;
let shift = if i % 2 == 0 { 0 } else { 4 };
let a = ((self.data[byte_idx] >> shift) & 0x0F) as i32;
let b = ((other.data[byte_idx] >> shift) & 0x0F) as i32;
let diff = a - b;
sum_sq += diff * diff;
}
(sum_sq as f32).sqrt() * avg_scale
}
/// Reconstruct approximate full-precision vector
pub fn reconstruct(&self) -> Vec<f32> {
let mut result = Vec::with_capacity(self.dimensions);
for i in 0..self.dimensions {
let byte_idx = i / 2;
let shift = if i % 2 == 0 { 0 } else { 4 };
let quantized = (self.data[byte_idx] >> shift) & 0x0F;
result.push(self.min + (quantized as f32) * self.scale);
}
result
}
/// Get compression ratio (8x for Int4)
pub fn compression_ratio() -> f32 {
8.0 // f32 (4 bytes) -> 4 bits (0.5 bytes)
}
}
/// Binary quantization (32x compression)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BinaryQuantized {
/// Binary representation (1 bit per dimension, packed into bytes)
pub bits: Vec<u8>,
/// Number of dimensions
pub dimensions: usize,
}
impl QuantizedVector for BinaryQuantized {
fn quantize(vector: &[f32]) -> Self {
let dimensions = vector.len();
let num_bytes = dimensions.div_ceil(8);
let mut bits = vec![0u8; num_bytes];
for (i, &v) in vector.iter().enumerate() {
if v > 0.0 {
let byte_idx = i / 8;
let bit_idx = i % 8;
bits[byte_idx] |= 1 << bit_idx;
}
}
Self { bits, dimensions }
}
fn distance(&self, other: &Self) -> f32 {
// Hamming distance using SIMD-friendly operations
Self::hamming_distance_fast(&self.bits, &other.bits) as f32
}
fn reconstruct(&self) -> Vec<f32> {
let mut result = Vec::with_capacity(self.dimensions);
for i in 0..self.dimensions {
let byte_idx = i / 8;
let bit_idx = i % 8;
let bit = (self.bits[byte_idx] >> bit_idx) & 1;
result.push(if bit == 1 { 1.0 } else { -1.0 });
}
result
}
}
impl BinaryQuantized {
/// Fast hamming distance using SIMD-optimized operations
///
/// Uses hardware POPCNT on x86_64 or NEON vcnt on ARM64 for optimal performance.
/// Processes 16 bytes at a time on ARM64, 8 bytes at a time on x86_64.
/// Falls back to 64-bit operations for remainders.
pub fn hamming_distance_fast(a: &[u8], b: &[u8]) -> u32 {
// Use SIMD-optimized version based on architecture
#[cfg(target_arch = "aarch64")]
{
if a.len() >= 16 {
return unsafe { hamming_distance_neon(a, b) };
}
}
#[cfg(target_arch = "x86_64")]
{
if a.len() >= 8 && is_x86_feature_detected!("popcnt") {
return unsafe { hamming_distance_simd_x86(a, b) };
}
}
// Scalar fallback using 64-bit operations
let mut distance = 0u32;
// Process 8 bytes at a time using u64
let chunks_a = a.chunks_exact(8);
let chunks_b = b.chunks_exact(8);
let remainder_a = chunks_a.remainder();
let remainder_b = chunks_b.remainder();
for (chunk_a, chunk_b) in chunks_a.zip(chunks_b) {
let a_u64 = u64::from_le_bytes(chunk_a.try_into().unwrap());
let b_u64 = u64::from_le_bytes(chunk_b.try_into().unwrap());
distance += (a_u64 ^ b_u64).count_ones();
}
// Handle remainder bytes
for (&a_byte, &b_byte) in remainder_a.iter().zip(remainder_b) {
distance += (a_byte ^ b_byte).count_ones();
}
distance
}
/// Compute normalized hamming similarity (0.0 to 1.0)
pub fn similarity(&self, other: &Self) -> f32 {
let distance = self.distance(other);
1.0 - (distance / self.dimensions as f32)
}
/// Get compression ratio (32x for binary)
pub fn compression_ratio() -> f32 {
32.0 // f32 (4 bytes = 32 bits) -> 1 bit
}
/// Convert to bytes for storage
pub fn to_bytes(&self) -> &[u8] {
&self.bits
}
/// Create from bytes
pub fn from_bytes(bits: Vec<u8>, dimensions: usize) -> Self {
Self { bits, dimensions }
}
}
// ============================================================================
// Helper functions for scalar quantization distance
// ============================================================================
/// Scalar fallback for scalar quantization distance (sum of squared differences)
fn scalar_distance_scalar(a: &[u8], b: &[u8]) -> f32 {
let mut sum_sq = 0i32;
// 4x loop unrolling for better ILP
let chunks = a.len() / 4;
for i in 0..chunks {
let idx = i * 4;
let d0 = (a[idx] as i32) - (b[idx] as i32);
let d1 = (a[idx + 1] as i32) - (b[idx + 1] as i32);
let d2 = (a[idx + 2] as i32) - (b[idx + 2] as i32);
let d3 = (a[idx + 3] as i32) - (b[idx + 3] as i32);
sum_sq += d0 * d0 + d1 * d1 + d2 * d2 + d3 * d3;
}
// Handle remainder
for i in (chunks * 4)..a.len() {
let diff = (a[i] as i32) - (b[i] as i32);
sum_sq += diff * diff;
}
sum_sq as f32
}
/// NEON SIMD distance for scalar quantization
///
/// # Safety
/// Caller must ensure a.len() == b.len()
#[cfg(target_arch = "aarch64")]
#[inline(always)]
unsafe fn scalar_distance_neon(a: &[u8], b: &[u8]) -> f32 {
use std::arch::aarch64::*;
let len = a.len();
let a_ptr = a.as_ptr();
let b_ptr = b.as_ptr();
let mut sum = vdupq_n_s32(0);
// Process 8 bytes at a time
let chunks = len / 8;
let mut idx = 0usize;
for _ in 0..chunks {
// Load 8 u8 values
let va = vld1_u8(a_ptr.add(idx));
let vb = vld1_u8(b_ptr.add(idx));
// Zero-extend u8 to u16
let va_u16 = vmovl_u8(va);
let vb_u16 = vmovl_u8(vb);
// Convert to signed for subtraction
let va_s16 = vreinterpretq_s16_u16(va_u16);
let vb_s16 = vreinterpretq_s16_u16(vb_u16);
// Compute difference
let diff = vsubq_s16(va_s16, vb_s16);
// Square and accumulate
let prod_lo = vmull_s16(vget_low_s16(diff), vget_low_s16(diff));
let prod_hi = vmull_s16(vget_high_s16(diff), vget_high_s16(diff));
sum = vaddq_s32(sum, prod_lo);
sum = vaddq_s32(sum, prod_hi);
idx += 8;
}
let mut total = vaddvq_s32(sum);
// Handle remainder with bounds-check elimination
for i in (chunks * 8)..len {
let diff = (*a.get_unchecked(i) as i32) - (*b.get_unchecked(i) as i32);
total += diff * diff;
}
total as f32
}
/// AVX2 SIMD distance for scalar quantization
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
#[inline]
unsafe fn scalar_distance_avx2(a: &[u8], b: &[u8]) -> f32 {
use std::arch::x86_64::*;
let len = a.len();
let mut sum = _mm256_setzero_si256();
// Process 16 bytes at a time
let chunks = len / 16;
for i in 0..chunks {
let idx = i * 16;
// Load 16 u8 values
let va = _mm_loadu_si128(a.as_ptr().add(idx) as *const __m128i);
let vb = _mm_loadu_si128(b.as_ptr().add(idx) as *const __m128i);
// Zero-extend u8 to i16 (low and high halves)
let va_lo = _mm256_cvtepu8_epi16(va);
let vb_lo = _mm256_cvtepu8_epi16(vb);
// Compute difference
let diff = _mm256_sub_epi16(va_lo, vb_lo);
// Square (multiply i16 * i16 -> i32)
let prod = _mm256_madd_epi16(diff, diff);
// Accumulate
sum = _mm256_add_epi32(sum, prod);
}
// Horizontal sum
let sum_lo = _mm256_castsi256_si128(sum);
let sum_hi = _mm256_extracti128_si256(sum, 1);
let sum_128 = _mm_add_epi32(sum_lo, sum_hi);
let shuffle = _mm_shuffle_epi32(sum_128, 0b10_11_00_01);
let sum_64 = _mm_add_epi32(sum_128, shuffle);
let shuffle2 = _mm_shuffle_epi32(sum_64, 0b00_00_10_10);
let final_sum = _mm_add_epi32(sum_64, shuffle2);
let mut total = _mm_cvtsi128_si32(final_sum);
// Handle remainder
for i in (chunks * 16)..len {
let diff = (a[i] as i32) - (b[i] as i32);
total += diff * diff;
}
total as f32
}
// Helper functions
fn euclidean_squared(a: &[f32], b: &[f32]) -> f32 {
a.iter()
.zip(b)
.map(|(&x, &y)| {
let diff = x - y;
diff * diff
})
.sum()
}
fn kmeans_clustering(vectors: &[Vec<f32>], k: usize, iterations: usize) -> Vec<Vec<f32>> {
use rand::seq::SliceRandom;
use rand::thread_rng;
let mut rng = thread_rng();
// Initialize centroids randomly
let mut centroids: Vec<Vec<f32>> = vectors.choose_multiple(&mut rng, k).cloned().collect();
for _ in 0..iterations {
// Assign vectors to nearest centroid
let mut assignments = vec![Vec::new(); k];
for vector in vectors {
let nearest = centroids
.iter()
.enumerate()
.min_by(|(_, a), (_, b)| {
let dist_a = euclidean_squared(vector, a);
let dist_b = euclidean_squared(vector, b);
dist_a.partial_cmp(&dist_b).unwrap()
})
.map(|(idx, _)| idx)
.unwrap_or(0);
assignments[nearest].push(vector.clone());
}
// Update centroids
for (centroid, assigned) in centroids.iter_mut().zip(&assignments) {
if !assigned.is_empty() {
let dim = centroid.len();
*centroid = vec![0.0; dim];
for vector in assigned {
for (i, &v) in vector.iter().enumerate() {
centroid[i] += v;
}
}
let count = assigned.len() as f32;
for v in centroid.iter_mut() {
*v /= count;
}
}
}
}
centroids
}
// =============================================================================
// SIMD-Optimized Distance Calculations for Quantized Vectors
// =============================================================================
// NOTE: scalar_distance_scalar is already defined above (lines 404-425)
// NOTE: scalar_distance_neon is already defined above (lines 430-473)
// NOTE: scalar_distance_avx2 is already defined above (lines 479-540)
// This section uses the existing implementations for consistency
/// SIMD-optimized hamming distance using popcnt
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "popcnt")]
#[inline]
unsafe fn hamming_distance_simd_x86(a: &[u8], b: &[u8]) -> u32 {
use std::arch::x86_64::*;
let mut distance = 0u64;
// Process 8 bytes at a time using u64 with hardware popcnt
let chunks_a = a.chunks_exact(8);
let chunks_b = b.chunks_exact(8);
let remainder_a = chunks_a.remainder();
let remainder_b = chunks_b.remainder();
for (chunk_a, chunk_b) in chunks_a.zip(chunks_b) {
let a_u64 = u64::from_le_bytes(chunk_a.try_into().unwrap());
let b_u64 = u64::from_le_bytes(chunk_b.try_into().unwrap());
distance += _popcnt64((a_u64 ^ b_u64) as i64) as u64;
}
// Handle remainder
for (&a_byte, &b_byte) in remainder_a.iter().zip(remainder_b) {
distance += (a_byte ^ b_byte).count_ones() as u64;
}
distance as u32
}
/// NEON-optimized hamming distance for ARM64
///
/// # Safety
/// Caller must ensure a.len() == b.len()
#[cfg(target_arch = "aarch64")]
#[inline(always)]
unsafe fn hamming_distance_neon(a: &[u8], b: &[u8]) -> u32 {
use std::arch::aarch64::*;
let len = a.len();
let a_ptr = a.as_ptr();
let b_ptr = b.as_ptr();
let chunks = len / 16;
let mut idx = 0usize;
let mut sum = vdupq_n_u8(0);
for _ in 0..chunks {
// Load 16 bytes
let a_vec = vld1q_u8(a_ptr.add(idx));
let b_vec = vld1q_u8(b_ptr.add(idx));
// XOR and count bits using vcntq_u8 (population count)
let xor_result = veorq_u8(a_vec, b_vec);
let bits = vcntq_u8(xor_result);
// Accumulate
sum = vaddq_u8(sum, bits);
idx += 16;
}
// Horizontal sum
let sum_val = vaddvq_u8(sum) as u32;
// Handle remainder with bounds-check elimination
let mut remainder_sum = 0u32;
let start = chunks * 16;
for i in start..len {
remainder_sum += (*a.get_unchecked(i) ^ *b.get_unchecked(i)).count_ones();
}
sum_val + remainder_sum
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_scalar_quantization() {
let vector = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let quantized = ScalarQuantized::quantize(&vector);
let reconstructed = quantized.reconstruct();
// Check approximate reconstruction
for (orig, recon) in vector.iter().zip(&reconstructed) {
assert!((orig - recon).abs() < 0.1);
}
}
#[test]
fn test_binary_quantization() {
let vector = vec![1.0, -1.0, 2.0, -2.0, 0.5];
let quantized = BinaryQuantized::quantize(&vector);
assert_eq!(quantized.dimensions, 5);
assert_eq!(quantized.bits.len(), 1); // 5 bits fit in 1 byte
}
#[test]
fn test_binary_distance() {
let v1 = vec![1.0, 1.0, 1.0, 1.0];
let v2 = vec![1.0, 1.0, -1.0, -1.0];
let q1 = BinaryQuantized::quantize(&v1);
let q2 = BinaryQuantized::quantize(&v2);
let dist = q1.distance(&q2);
assert_eq!(dist, 2.0); // 2 bits differ
}
#[test]
fn test_scalar_quantization_roundtrip() {
// Test that quantize -> reconstruct produces values close to original
let test_vectors = vec![
vec![1.0, 2.0, 3.0, 4.0, 5.0],
vec![-10.0, -5.0, 0.0, 5.0, 10.0],
vec![0.1, 0.2, 0.3, 0.4, 0.5],
vec![100.0, 200.0, 300.0, 400.0, 500.0],
];
for vector in test_vectors {
let quantized = ScalarQuantized::quantize(&vector);
let reconstructed = quantized.reconstruct();
assert_eq!(vector.len(), reconstructed.len());
for (orig, recon) in vector.iter().zip(reconstructed.iter()) {
// With 8-bit quantization, max error is roughly (max-min)/255
let max = vector.iter().copied().fold(f32::NEG_INFINITY, f32::max);
let min = vector.iter().copied().fold(f32::INFINITY, f32::min);
let max_error = (max - min) / 255.0 * 2.0; // Allow 2x for rounding
assert!(
(orig - recon).abs() < max_error,
"Roundtrip error too large: orig={}, recon={}, error={}",
orig,
recon,
(orig - recon).abs()
);
}
}
}
#[test]
fn test_scalar_distance_symmetry() {
// Test that distance(a, b) == distance(b, a)
let v1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let v2 = vec![2.0, 3.0, 4.0, 5.0, 6.0];
let q1 = ScalarQuantized::quantize(&v1);
let q2 = ScalarQuantized::quantize(&v2);
let dist_ab = q1.distance(&q2);
let dist_ba = q2.distance(&q1);
// Distance should be symmetric (within floating point precision)
assert!(
(dist_ab - dist_ba).abs() < 0.01,
"Distance is not symmetric: d(a,b)={}, d(b,a)={}",
dist_ab,
dist_ba
);
}
#[test]
fn test_scalar_distance_different_scales() {
// Test distance calculation with vectors that have different scales
let v1 = vec![1.0, 2.0, 3.0, 4.0, 5.0]; // range: 4.0
let v2 = vec![10.0, 20.0, 30.0, 40.0, 50.0]; // range: 40.0
let q1 = ScalarQuantized::quantize(&v1);
let q2 = ScalarQuantized::quantize(&v2);
let dist_ab = q1.distance(&q2);
let dist_ba = q2.distance(&q1);
// With average scaling, symmetry should be maintained
assert!(
(dist_ab - dist_ba).abs() < 0.01,
"Distance with different scales not symmetric: d(a,b)={}, d(b,a)={}",
dist_ab,
dist_ba
);
}
#[test]
fn test_scalar_quantization_edge_cases() {
// Test with all same values
let same_values = vec![5.0, 5.0, 5.0, 5.0];
let quantized = ScalarQuantized::quantize(&same_values);
let reconstructed = quantized.reconstruct();
for (orig, recon) in same_values.iter().zip(reconstructed.iter()) {
assert!((orig - recon).abs() < 0.01);
}
// Test with extreme ranges
let extreme = vec![f32::MIN / 1e10, 0.0, f32::MAX / 1e10];
let quantized = ScalarQuantized::quantize(&extreme);
let reconstructed = quantized.reconstruct();
assert_eq!(extreme.len(), reconstructed.len());
}
#[test]
fn test_binary_distance_symmetry() {
// Test that binary distance is symmetric
let v1 = vec![1.0, -1.0, 1.0, -1.0];
let v2 = vec![1.0, 1.0, -1.0, -1.0];
let q1 = BinaryQuantized::quantize(&v1);
let q2 = BinaryQuantized::quantize(&v2);
let dist_ab = q1.distance(&q2);
let dist_ba = q2.distance(&q1);
assert_eq!(
dist_ab, dist_ba,
"Binary distance not symmetric: d(a,b)={}, d(b,a)={}",
dist_ab, dist_ba
);
}
#[test]
fn test_int4_quantization() {
let vector = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let quantized = Int4Quantized::quantize(&vector);
let reconstructed = quantized.reconstruct();
assert_eq!(quantized.dimensions, 5);
// 5 dimensions = 3 bytes (2 per byte, last byte has 1)
assert_eq!(quantized.data.len(), 3);
// Check approximate reconstruction
for (orig, recon) in vector.iter().zip(&reconstructed) {
// With 4-bit quantization, max error is roughly (max-min)/15
let max_error = (5.0 - 1.0) / 15.0 * 2.0;
assert!(
(orig - recon).abs() < max_error,
"Int4 roundtrip error too large: orig={}, recon={}",
orig,
recon
);
}
}
#[test]
fn test_int4_distance() {
// Use vectors with different quantized patterns
// v1 spans [0.0, 15.0] -> quantizes to [0, 1, 2, ..., 15] (linear mapping)
// v2 spans [0.0, 15.0] but with different distribution
let v1 = vec![0.0, 5.0, 10.0, 15.0];
let v2 = vec![0.0, 3.0, 12.0, 15.0]; // Different middle values
let q1 = Int4Quantized::quantize(&v1);
let q2 = Int4Quantized::quantize(&v2);
let dist = q1.distance(&q2);
// The quantized values differ in the middle, so distance should be positive
assert!(
dist > 0.0,
"Distance should be positive, got {}. q1.data={:?}, q2.data={:?}",
dist,
q1.data,
q2.data
);
}
#[test]
fn test_int4_distance_symmetry() {
let v1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let v2 = vec![2.0, 3.0, 4.0, 5.0, 6.0];
let q1 = Int4Quantized::quantize(&v1);
let q2 = Int4Quantized::quantize(&v2);
let dist_ab = q1.distance(&q2);
let dist_ba = q2.distance(&q1);
assert!(
(dist_ab - dist_ba).abs() < 0.01,
"Int4 distance not symmetric: d(a,b)={}, d(b,a)={}",
dist_ab,
dist_ba
);
}
#[test]
fn test_int4_compression_ratio() {
assert_eq!(Int4Quantized::compression_ratio(), 8.0);
}
#[test]
fn test_binary_fast_hamming() {
// Test fast hamming distance with various sizes
let a = vec![0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xAA];
let b = vec![0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x55];
let distance = BinaryQuantized::hamming_distance_fast(&a, &b);
// All bits differ: 9 bytes * 8 bits = 72 bits
assert_eq!(distance, 72);
}
#[test]
fn test_binary_similarity() {
let v1 = vec![1.0; 8]; // All positive
let v2 = vec![1.0; 8]; // Same
let q1 = BinaryQuantized::quantize(&v1);
let q2 = BinaryQuantized::quantize(&v2);
let sim = q1.similarity(&q2);
assert!(
(sim - 1.0).abs() < 0.001,
"Same vectors should have similarity 1.0"
);
}
#[test]
fn test_binary_compression_ratio() {
assert_eq!(BinaryQuantized::compression_ratio(), 32.0);
}
}