Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
371
vendor/ruvector/crates/ruvector-postgres/src/quantization/binary.rs
vendored
Normal file
371
vendor/ruvector/crates/ruvector-postgres/src/quantization/binary.rs
vendored
Normal file
@@ -0,0 +1,371 @@
|
||||
//! Binary Quantization
|
||||
//!
|
||||
//! Compresses vectors to 1 bit per dimension, achieving 32x memory reduction.
|
||||
//! Uses Hamming distance for fast comparison.
|
||||
|
||||
/// Quantize f32 vector to binary (1 bit per dimension)
|
||||
///
|
||||
/// Positive values -> 1, negative/zero values -> 0
|
||||
pub fn quantize(vector: &[f32]) -> Vec<u8> {
|
||||
let n_bytes = (vector.len() + 7) / 8;
|
||||
let mut result = vec![0u8; n_bytes];
|
||||
|
||||
for (i, &v) in vector.iter().enumerate() {
|
||||
if v > 0.0 {
|
||||
let byte_idx = i / 8;
|
||||
let bit_idx = i % 8;
|
||||
result[byte_idx] |= 1 << bit_idx;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Quantize with threshold
|
||||
pub fn quantize_with_threshold(vector: &[f32], threshold: f32) -> Vec<u8> {
|
||||
let n_bytes = (vector.len() + 7) / 8;
|
||||
let mut result = vec![0u8; n_bytes];
|
||||
|
||||
for (i, &v) in vector.iter().enumerate() {
|
||||
if v > threshold {
|
||||
let byte_idx = i / 8;
|
||||
let bit_idx = i % 8;
|
||||
result[byte_idx] |= 1 << bit_idx;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Calculate Hamming distance between binary vectors
|
||||
pub fn hamming_distance(a: &[u8], b: &[u8]) -> u32 {
|
||||
debug_assert_eq!(a.len(), b.len());
|
||||
|
||||
a.iter()
|
||||
.zip(b.iter())
|
||||
.map(|(&x, &y)| (x ^ y).count_ones())
|
||||
.sum()
|
||||
}
|
||||
|
||||
/// SIMD-optimized Hamming distance using POPCNT
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "popcnt")]
|
||||
unsafe fn hamming_distance_popcnt(a: &[u8], b: &[u8]) -> u32 {
|
||||
use std::arch::x86_64::*;
|
||||
|
||||
let n = a.len();
|
||||
let mut count = 0u32;
|
||||
|
||||
// Process 8 bytes at a time
|
||||
let chunks = n / 8;
|
||||
for i in 0..chunks {
|
||||
let offset = i * 8;
|
||||
let va = *(a.as_ptr().add(offset) as *const u64);
|
||||
let vb = *(b.as_ptr().add(offset) as *const u64);
|
||||
count += _popcnt64((va ^ vb) as i64) as u32;
|
||||
}
|
||||
|
||||
// Handle remainder
|
||||
for i in (chunks * 8)..n {
|
||||
count += (a[i] ^ b[i]).count_ones();
|
||||
}
|
||||
|
||||
count
|
||||
}
|
||||
|
||||
/// AVX2-optimized Hamming distance using vpshufb popcount
|
||||
///
|
||||
/// Uses the SWAR (SIMD Within A Register) technique with lookup tables.
|
||||
/// Processes 32 bytes per iteration, which is 4x faster than scalar POPCNT
|
||||
/// for large vectors (1024+ dimensions).
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2")]
|
||||
unsafe fn hamming_distance_avx2(a: &[u8], b: &[u8]) -> u32 {
|
||||
use std::arch::x86_64::*;
|
||||
|
||||
let n = a.len();
|
||||
|
||||
// Lookup table for popcount of 4-bit values
|
||||
let lookup = _mm256_setr_epi8(
|
||||
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3,
|
||||
3, 4,
|
||||
);
|
||||
let low_mask = _mm256_set1_epi8(0x0F);
|
||||
|
||||
let mut total = _mm256_setzero_si256();
|
||||
|
||||
// Process 32 bytes at a time
|
||||
let chunks = n / 32;
|
||||
for i in 0..chunks {
|
||||
let offset = i * 32;
|
||||
let va = _mm256_loadu_si256(a.as_ptr().add(offset) as *const __m256i);
|
||||
let vb = _mm256_loadu_si256(b.as_ptr().add(offset) as *const __m256i);
|
||||
|
||||
// XOR the vectors
|
||||
let xor = _mm256_xor_si256(va, vb);
|
||||
|
||||
// Split into low and high nibbles
|
||||
let lo = _mm256_and_si256(xor, low_mask);
|
||||
let hi = _mm256_and_si256(_mm256_srli_epi16(xor, 4), low_mask);
|
||||
|
||||
// Lookup popcount for each nibble
|
||||
let popcnt_lo = _mm256_shuffle_epi8(lookup, lo);
|
||||
let popcnt_hi = _mm256_shuffle_epi8(lookup, hi);
|
||||
|
||||
// Sum nibble popcounts
|
||||
let popcnt = _mm256_add_epi8(popcnt_lo, popcnt_hi);
|
||||
|
||||
// Accumulate using sad (sum of absolute differences from zero)
|
||||
let sad = _mm256_sad_epu8(popcnt, _mm256_setzero_si256());
|
||||
total = _mm256_add_epi64(total, sad);
|
||||
}
|
||||
|
||||
// Horizontal sum of the 4 64-bit values
|
||||
let sum128_lo = _mm256_castsi256_si128(total);
|
||||
let sum128_hi = _mm256_extracti128_si256(total, 1);
|
||||
let sum128 = _mm_add_epi64(sum128_lo, sum128_hi);
|
||||
let sum64 = _mm_add_epi64(sum128, _mm_srli_si128(sum128, 8));
|
||||
let mut count = _mm_cvtsi128_si64(sum64) as u32;
|
||||
|
||||
// Handle remainder with scalar POPCNT
|
||||
for i in (chunks * 32)..n {
|
||||
count += (a[i] ^ b[i]).count_ones();
|
||||
}
|
||||
|
||||
count
|
||||
}
|
||||
|
||||
/// Calculate Hamming distance with SIMD optimization
|
||||
///
|
||||
/// Automatically selects the best implementation:
|
||||
/// - AVX2 vpshufb for large vectors (>= 128 bytes / 1024 bits)
|
||||
/// - POPCNT for medium vectors (>= 8 bytes)
|
||||
/// - Scalar for small vectors
|
||||
pub fn hamming_distance_simd(a: &[u8], b: &[u8]) -> u32 {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
let n = a.len();
|
||||
|
||||
// For large vectors, AVX2 vpshufb is fastest
|
||||
if n >= 128 && is_x86_feature_detected!("avx2") {
|
||||
return unsafe { hamming_distance_avx2(a, b) };
|
||||
}
|
||||
|
||||
// For medium vectors, use POPCNT
|
||||
if is_x86_feature_detected!("popcnt") {
|
||||
return unsafe { hamming_distance_popcnt(a, b) };
|
||||
}
|
||||
}
|
||||
|
||||
hamming_distance(a, b)
|
||||
}
|
||||
|
||||
/// Normalize Hamming distance to [0, 1] range
|
||||
pub fn normalized_hamming_distance(a: &[u8], b: &[u8], dimensions: usize) -> f32 {
|
||||
let dist = hamming_distance_simd(a, b);
|
||||
dist as f32 / dimensions as f32
|
||||
}
|
||||
|
||||
/// Binary quantized vector
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BinaryQuantizedVector {
|
||||
pub data: Vec<u8>,
|
||||
pub dimensions: usize,
|
||||
}
|
||||
|
||||
impl BinaryQuantizedVector {
|
||||
/// Create from f32 vector
|
||||
pub fn from_f32(vector: &[f32]) -> Self {
|
||||
Self {
|
||||
data: quantize(vector),
|
||||
dimensions: vector.len(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create from f32 vector with threshold
|
||||
pub fn from_f32_threshold(vector: &[f32], threshold: f32) -> Self {
|
||||
Self {
|
||||
data: quantize_with_threshold(vector, threshold),
|
||||
dimensions: vector.len(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate Hamming distance to another binary vector
|
||||
pub fn hamming_distance(&self, other: &Self) -> u32 {
|
||||
debug_assert_eq!(self.dimensions, other.dimensions);
|
||||
hamming_distance_simd(&self.data, &other.data)
|
||||
}
|
||||
|
||||
/// Calculate normalized distance [0, 1]
|
||||
pub fn normalized_distance(&self, other: &Self) -> f32 {
|
||||
self.hamming_distance(other) as f32 / self.dimensions as f32
|
||||
}
|
||||
|
||||
/// Memory size in bytes
|
||||
pub fn memory_size(&self) -> usize {
|
||||
std::mem::size_of::<Self>() + self.data.len()
|
||||
}
|
||||
|
||||
/// Compression ratio compared to f32
|
||||
pub fn compression_ratio(&self) -> f32 {
|
||||
32.0 // f32 (32 bits) -> 1 bit
|
||||
}
|
||||
|
||||
/// Get bit at position
|
||||
pub fn get_bit(&self, pos: usize) -> bool {
|
||||
debug_assert!(pos < self.dimensions);
|
||||
let byte_idx = pos / 8;
|
||||
let bit_idx = pos % 8;
|
||||
(self.data[byte_idx] >> bit_idx) & 1 == 1
|
||||
}
|
||||
|
||||
/// Count number of 1 bits
|
||||
pub fn popcount(&self) -> u32 {
|
||||
self.data.iter().map(|&b| b.count_ones()).sum()
|
||||
}
|
||||
}
|
||||
|
||||
/// Two-stage search with binary quantization
|
||||
///
|
||||
/// 1. Fast Hamming distance filtering using binary vectors
|
||||
/// 2. Rerank top candidates with full precision distance
|
||||
pub struct BinarySearcher {
|
||||
/// Binary quantized vectors
|
||||
binary_vectors: Vec<BinaryQuantizedVector>,
|
||||
/// Original vectors for reranking
|
||||
original_vectors: Vec<Vec<f32>>,
|
||||
/// Rerank factor (rerank top k * factor candidates)
|
||||
rerank_factor: usize,
|
||||
}
|
||||
|
||||
impl BinarySearcher {
|
||||
/// Create a new binary searcher
|
||||
pub fn new(vectors: Vec<Vec<f32>>, rerank_factor: usize) -> Self {
|
||||
let binary_vectors: Vec<_> = vectors
|
||||
.iter()
|
||||
.map(|v| BinaryQuantizedVector::from_f32(v))
|
||||
.collect();
|
||||
|
||||
Self {
|
||||
binary_vectors,
|
||||
original_vectors: vectors,
|
||||
rerank_factor,
|
||||
}
|
||||
}
|
||||
|
||||
/// Search for k nearest neighbors
|
||||
pub fn search(&self, query: &[f32], k: usize) -> Vec<(usize, f32)> {
|
||||
let query_binary = BinaryQuantizedVector::from_f32(query);
|
||||
|
||||
// Stage 1: Fast Hamming distance search
|
||||
let mut candidates: Vec<(usize, u32)> = self
|
||||
.binary_vectors
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, bv)| (i, query_binary.hamming_distance(bv)))
|
||||
.collect();
|
||||
|
||||
// Sort by Hamming distance
|
||||
candidates.sort_by_key(|(_, d)| *d);
|
||||
|
||||
// Take top k * rerank_factor candidates
|
||||
let n_candidates = (k * self.rerank_factor).min(candidates.len());
|
||||
let top_candidates: Vec<usize> = candidates
|
||||
.iter()
|
||||
.take(n_candidates)
|
||||
.map(|(i, _)| *i)
|
||||
.collect();
|
||||
|
||||
// Stage 2: Rerank with full precision distance
|
||||
let mut reranked: Vec<(usize, f32)> = top_candidates
|
||||
.iter()
|
||||
.map(|&i| {
|
||||
let dist: f32 = query
|
||||
.iter()
|
||||
.zip(self.original_vectors[i].iter())
|
||||
.map(|(a, b)| (a - b).powi(2))
|
||||
.sum::<f32>()
|
||||
.sqrt();
|
||||
(i, dist)
|
||||
})
|
||||
.collect();
|
||||
|
||||
reranked.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
|
||||
reranked.truncate(k);
|
||||
reranked
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Tests
|
||||
// ============================================================================
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_quantize() {
|
||||
let v = vec![0.5, -0.3, 0.1, -0.8, 0.2, -0.1, 0.9, -0.5];
|
||||
let q = quantize(&v);
|
||||
|
||||
assert_eq!(q.len(), 1);
|
||||
// Bits: 1, 0, 1, 0, 1, 0, 1, 0 = 0b01010101 = 85
|
||||
assert_eq!(q[0], 0b01010101);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hamming_distance() {
|
||||
let a = vec![0b11110000];
|
||||
let b = vec![0b10101010];
|
||||
// XOR: 0b01011010, popcount = 4
|
||||
assert_eq!(hamming_distance(&a, &b), 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compression_ratio() {
|
||||
let v = BinaryQuantizedVector::from_f32(&vec![0.0; 1024]);
|
||||
assert_eq!(v.compression_ratio(), 32.0);
|
||||
assert_eq!(v.data.len(), 128); // 1024 bits = 128 bytes
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simd_matches_scalar() {
|
||||
let a: Vec<u8> = (0..128).collect();
|
||||
let b: Vec<u8> = (0..128).map(|i| 255 - i).collect();
|
||||
|
||||
let scalar = hamming_distance(&a, &b);
|
||||
let simd = hamming_distance_simd(&a, &b);
|
||||
|
||||
assert_eq!(scalar, simd);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_binary_searcher() {
|
||||
let vectors: Vec<Vec<f32>> = (0..100)
|
||||
.map(|i| vec![i as f32 * 0.1, (100 - i) as f32 * 0.1, 0.5])
|
||||
.collect();
|
||||
|
||||
let searcher = BinarySearcher::new(vectors.clone(), 4);
|
||||
|
||||
let query = vec![5.0, 5.0, 0.5];
|
||||
let results = searcher.search(&query, 5);
|
||||
|
||||
assert_eq!(results.len(), 5);
|
||||
// Results should be ordered by distance
|
||||
for i in 1..results.len() {
|
||||
assert!(results[i].1 >= results[i - 1].1);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_bit() {
|
||||
let v = vec![1.0, -1.0, 1.0, -1.0];
|
||||
let bv = BinaryQuantizedVector::from_f32(&v);
|
||||
|
||||
assert!(bv.get_bit(0));
|
||||
assert!(!bv.get_bit(1));
|
||||
assert!(bv.get_bit(2));
|
||||
assert!(!bv.get_bit(3));
|
||||
}
|
||||
}
|
||||
63
vendor/ruvector/crates/ruvector-postgres/src/quantization/mod.rs
vendored
Normal file
63
vendor/ruvector/crates/ruvector-postgres/src/quantization/mod.rs
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
//! Vector quantization for memory reduction
|
||||
//!
|
||||
//! Provides various quantization methods:
|
||||
//! - Scalar (SQ8): 4x compression
|
||||
//! - Product (PQ): 8-32x compression
|
||||
//! - Binary: 32x compression
|
||||
|
||||
pub mod binary;
|
||||
pub mod product;
|
||||
pub mod scalar;
|
||||
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
/// Global quantization table memory tracking
|
||||
static TABLE_MEMORY_BYTES: AtomicUsize = AtomicUsize::new(0);
|
||||
|
||||
/// Get quantization table memory in MB
|
||||
pub fn get_table_memory_mb() -> f64 {
|
||||
TABLE_MEMORY_BYTES.load(Ordering::Relaxed) as f64 / (1024.0 * 1024.0)
|
||||
}
|
||||
|
||||
/// Track table memory allocation
|
||||
pub fn track_table_allocation(bytes: usize) {
|
||||
TABLE_MEMORY_BYTES.fetch_add(bytes, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
/// Quantization type
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum QuantizationType {
|
||||
/// No quantization (full precision)
|
||||
None,
|
||||
/// Scalar quantization (f32 -> i8)
|
||||
Scalar,
|
||||
/// Product quantization (subspace division)
|
||||
Product,
|
||||
/// Binary quantization (f32 -> 1 bit)
|
||||
Binary,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for QuantizationType {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
QuantizationType::None => write!(f, "none"),
|
||||
QuantizationType::Scalar => write!(f, "sq8"),
|
||||
QuantizationType::Product => write!(f, "pq"),
|
||||
QuantizationType::Binary => write!(f, "binary"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::str::FromStr for QuantizationType {
|
||||
type Err = String;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s.to_lowercase().as_str() {
|
||||
"none" | "" => Ok(QuantizationType::None),
|
||||
"scalar" | "sq8" | "sq" => Ok(QuantizationType::Scalar),
|
||||
"product" | "pq" => Ok(QuantizationType::Product),
|
||||
"binary" | "bq" => Ok(QuantizationType::Binary),
|
||||
_ => Err(format!("Unknown quantization type: {}", s)),
|
||||
}
|
||||
}
|
||||
}
|
||||
380
vendor/ruvector/crates/ruvector-postgres/src/quantization/product.rs
vendored
Normal file
380
vendor/ruvector/crates/ruvector-postgres/src/quantization/product.rs
vendored
Normal file
@@ -0,0 +1,380 @@
|
||||
//! Product Quantization (PQ)
|
||||
//!
|
||||
//! Compresses vectors by dividing into subspaces and quantizing each
|
||||
//! independently. Achieves 8-32x compression with precomputed distance tables.
|
||||
|
||||
use rand::prelude::SliceRandom;
|
||||
use rand::Rng;
|
||||
|
||||
/// Product Quantization configuration
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PQConfig {
|
||||
/// Number of subspaces (subvectors)
|
||||
pub m: usize,
|
||||
/// Number of centroids per subspace (typically 256 for 8-bit codes)
|
||||
pub k: usize,
|
||||
/// Random seed
|
||||
pub seed: u64,
|
||||
}
|
||||
|
||||
impl Default for PQConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
m: 8, // 8 subspaces
|
||||
k: 256, // 256 centroids (8-bit codes)
|
||||
seed: 42,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Product Quantization index
|
||||
pub struct ProductQuantizer {
|
||||
/// Configuration
|
||||
config: PQConfig,
|
||||
/// Dimensions per subspace
|
||||
dims_per_subspace: usize,
|
||||
/// Total dimensions
|
||||
dimensions: usize,
|
||||
/// Centroids for each subspace: [m][k][dims_per_subspace]
|
||||
centroids: Vec<Vec<Vec<f32>>>,
|
||||
/// Whether trained
|
||||
trained: bool,
|
||||
}
|
||||
|
||||
impl ProductQuantizer {
|
||||
/// Create a new product quantizer
|
||||
pub fn new(dimensions: usize, config: PQConfig) -> Self {
|
||||
assert!(
|
||||
dimensions % config.m == 0,
|
||||
"Dimensions must be divisible by number of subspaces"
|
||||
);
|
||||
|
||||
let dims_per_subspace = dimensions / config.m;
|
||||
|
||||
Self {
|
||||
config,
|
||||
dims_per_subspace,
|
||||
dimensions,
|
||||
centroids: Vec::new(),
|
||||
trained: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Train the quantizer on sample vectors
|
||||
pub fn train(&mut self, vectors: &[Vec<f32>]) {
|
||||
use rand::prelude::*;
|
||||
use rand_chacha::ChaCha8Rng;
|
||||
|
||||
let mut rng = ChaCha8Rng::seed_from_u64(self.config.seed);
|
||||
|
||||
self.centroids = Vec::with_capacity(self.config.m);
|
||||
|
||||
for subspace in 0..self.config.m {
|
||||
let start = subspace * self.dims_per_subspace;
|
||||
let end = start + self.dims_per_subspace;
|
||||
|
||||
// Extract subvectors
|
||||
let subvectors: Vec<Vec<f32>> =
|
||||
vectors.iter().map(|v| v[start..end].to_vec()).collect();
|
||||
|
||||
// Run k-means on this subspace
|
||||
let centroids = self.kmeans(&subvectors, self.config.k, 10, &mut rng);
|
||||
self.centroids.push(centroids);
|
||||
}
|
||||
|
||||
self.trained = true;
|
||||
}
|
||||
|
||||
/// K-means clustering
|
||||
fn kmeans<R: Rng>(
|
||||
&self,
|
||||
vectors: &[Vec<f32>],
|
||||
k: usize,
|
||||
iterations: usize,
|
||||
rng: &mut R,
|
||||
) -> Vec<Vec<f32>> {
|
||||
if vectors.is_empty() || k == 0 {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let dims = vectors[0].len();
|
||||
let k = k.min(vectors.len());
|
||||
|
||||
// Initialize centroids randomly
|
||||
let mut indices: Vec<usize> = (0..vectors.len()).collect();
|
||||
indices.shuffle(rng);
|
||||
|
||||
let mut centroids: Vec<Vec<f32>> = indices
|
||||
.iter()
|
||||
.take(k)
|
||||
.map(|&i| vectors[i].clone())
|
||||
.collect();
|
||||
|
||||
for _ in 0..iterations {
|
||||
// Assign vectors to nearest centroid
|
||||
let mut assignments: Vec<Vec<usize>> = vec![Vec::new(); k];
|
||||
|
||||
for (i, v) in vectors.iter().enumerate() {
|
||||
let nearest = self.find_nearest(v, ¢roids);
|
||||
assignments[nearest].push(i);
|
||||
}
|
||||
|
||||
// Update centroids
|
||||
for (c, assigned) in assignments.iter().enumerate() {
|
||||
if assigned.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut new_centroid = vec![0.0f32; dims];
|
||||
for &i in assigned {
|
||||
for (j, &val) in vectors[i].iter().enumerate() {
|
||||
new_centroid[j] += val;
|
||||
}
|
||||
}
|
||||
|
||||
let count = assigned.len() as f32;
|
||||
for val in &mut new_centroid {
|
||||
*val /= count;
|
||||
}
|
||||
|
||||
centroids[c] = new_centroid;
|
||||
}
|
||||
}
|
||||
|
||||
centroids
|
||||
}
|
||||
|
||||
/// Find nearest centroid index
|
||||
fn find_nearest(&self, vector: &[f32], centroids: &[Vec<f32>]) -> usize {
|
||||
let mut best = 0;
|
||||
let mut best_dist = f32::MAX;
|
||||
|
||||
for (i, c) in centroids.iter().enumerate() {
|
||||
let dist: f32 = vector
|
||||
.iter()
|
||||
.zip(c.iter())
|
||||
.map(|(a, b)| (a - b).powi(2))
|
||||
.sum();
|
||||
|
||||
if dist < best_dist {
|
||||
best_dist = dist;
|
||||
best = i;
|
||||
}
|
||||
}
|
||||
|
||||
best
|
||||
}
|
||||
|
||||
/// Encode a vector to PQ codes
|
||||
pub fn encode(&self, vector: &[f32]) -> Vec<u8> {
|
||||
assert!(self.trained, "Quantizer must be trained");
|
||||
assert_eq!(vector.len(), self.dimensions);
|
||||
|
||||
let mut codes = Vec::with_capacity(self.config.m);
|
||||
|
||||
for subspace in 0..self.config.m {
|
||||
let start = subspace * self.dims_per_subspace;
|
||||
let end = start + self.dims_per_subspace;
|
||||
let subvector = &vector[start..end];
|
||||
|
||||
let nearest = self.find_nearest(subvector, &self.centroids[subspace]);
|
||||
codes.push(nearest as u8);
|
||||
}
|
||||
|
||||
codes
|
||||
}
|
||||
|
||||
/// Decode PQ codes back to approximate vector
|
||||
pub fn decode(&self, codes: &[u8]) -> Vec<f32> {
|
||||
assert!(self.trained, "Quantizer must be trained");
|
||||
assert_eq!(codes.len(), self.config.m);
|
||||
|
||||
let mut vector = Vec::with_capacity(self.dimensions);
|
||||
|
||||
for (subspace, &code) in codes.iter().enumerate() {
|
||||
let centroid = &self.centroids[subspace][code as usize];
|
||||
vector.extend_from_slice(centroid);
|
||||
}
|
||||
|
||||
vector
|
||||
}
|
||||
|
||||
/// Compute asymmetric distance (query to encoded vector)
|
||||
/// More accurate than symmetric but slower
|
||||
pub fn asymmetric_distance(&self, query: &[f32], codes: &[u8]) -> f32 {
|
||||
assert_eq!(query.len(), self.dimensions);
|
||||
assert_eq!(codes.len(), self.config.m);
|
||||
|
||||
let mut distance_sq = 0.0f32;
|
||||
|
||||
for (subspace, &code) in codes.iter().enumerate() {
|
||||
let start = subspace * self.dims_per_subspace;
|
||||
let end = start + self.dims_per_subspace;
|
||||
let query_sub = &query[start..end];
|
||||
let centroid = &self.centroids[subspace][code as usize];
|
||||
|
||||
for (q, c) in query_sub.iter().zip(centroid.iter()) {
|
||||
distance_sq += (q - c).powi(2);
|
||||
}
|
||||
}
|
||||
|
||||
distance_sq.sqrt()
|
||||
}
|
||||
|
||||
/// Precompute distance table for a query
|
||||
/// Returns: [m][k] distances from query subvector to each centroid
|
||||
pub fn precompute_distance_table(&self, query: &[f32]) -> Vec<Vec<f32>> {
|
||||
assert_eq!(query.len(), self.dimensions);
|
||||
|
||||
let mut table = Vec::with_capacity(self.config.m);
|
||||
|
||||
for subspace in 0..self.config.m {
|
||||
let start = subspace * self.dims_per_subspace;
|
||||
let end = start + self.dims_per_subspace;
|
||||
let query_sub = &query[start..end];
|
||||
|
||||
let distances: Vec<f32> = self.centroids[subspace]
|
||||
.iter()
|
||||
.map(|c| {
|
||||
query_sub
|
||||
.iter()
|
||||
.zip(c.iter())
|
||||
.map(|(q, v)| (q - v).powi(2))
|
||||
.sum::<f32>()
|
||||
})
|
||||
.collect();
|
||||
|
||||
table.push(distances);
|
||||
}
|
||||
|
||||
table
|
||||
}
|
||||
|
||||
/// Fast distance using precomputed table
|
||||
pub fn table_distance(&self, table: &[Vec<f32>], codes: &[u8]) -> f32 {
|
||||
let mut distance_sq = 0.0f32;
|
||||
|
||||
for (subspace, &code) in codes.iter().enumerate() {
|
||||
distance_sq += table[subspace][code as usize];
|
||||
}
|
||||
|
||||
distance_sq.sqrt()
|
||||
}
|
||||
|
||||
/// Memory per encoded vector in bytes
|
||||
pub fn bytes_per_vector(&self) -> usize {
|
||||
self.config.m // One byte per subspace
|
||||
}
|
||||
|
||||
/// Compression ratio
|
||||
pub fn compression_ratio(&self) -> f32 {
|
||||
(self.dimensions * 4) as f32 / self.config.m as f32
|
||||
}
|
||||
}
|
||||
|
||||
/// Encoded vector with its codes
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PQVector {
|
||||
pub codes: Vec<u8>,
|
||||
}
|
||||
|
||||
impl PQVector {
|
||||
pub fn memory_size(&self) -> usize {
|
||||
std::mem::size_of::<Self>() + self.codes.len()
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Tests
|
||||
// ============================================================================
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use rand::prelude::*;
|
||||
use rand_chacha::ChaCha8Rng;
|
||||
|
||||
fn random_vectors(n: usize, dims: usize, seed: u64) -> Vec<Vec<f32>> {
|
||||
let mut rng = ChaCha8Rng::seed_from_u64(seed);
|
||||
(0..n)
|
||||
.map(|_| (0..dims).map(|_| rng.gen_range(-1.0..1.0)).collect())
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_train_and_encode() {
|
||||
let dims = 128;
|
||||
let config = PQConfig {
|
||||
m: 8,
|
||||
k: 64,
|
||||
seed: 42,
|
||||
};
|
||||
|
||||
let mut pq = ProductQuantizer::new(dims, config);
|
||||
|
||||
let training = random_vectors(1000, dims, 42);
|
||||
pq.train(&training);
|
||||
|
||||
// Encode a vector
|
||||
let vector = random_vectors(1, dims, 123)[0].clone();
|
||||
let codes = pq.encode(&vector);
|
||||
|
||||
assert_eq!(codes.len(), 8);
|
||||
|
||||
// Decode and check distance
|
||||
let decoded = pq.decode(&codes);
|
||||
let error: f32 = vector
|
||||
.iter()
|
||||
.zip(decoded.iter())
|
||||
.map(|(a, b)| (a - b).powi(2))
|
||||
.sum::<f32>()
|
||||
.sqrt();
|
||||
|
||||
// Error should be reasonable
|
||||
assert!(error < 2.0, "Reconstruction error too high: {}", error);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distance_table() {
|
||||
let dims = 64;
|
||||
let config = PQConfig {
|
||||
m: 4,
|
||||
k: 16,
|
||||
seed: 42,
|
||||
};
|
||||
|
||||
let mut pq = ProductQuantizer::new(dims, config);
|
||||
let training = random_vectors(500, dims, 42);
|
||||
pq.train(&training);
|
||||
|
||||
let query = random_vectors(1, dims, 123)[0].clone();
|
||||
let target = random_vectors(1, dims, 456)[0].clone();
|
||||
let codes = pq.encode(&target);
|
||||
|
||||
// Compare asymmetric and table distances
|
||||
let asym_dist = pq.asymmetric_distance(&query, &codes);
|
||||
|
||||
let table = pq.precompute_distance_table(&query);
|
||||
let table_dist = pq.table_distance(&table, &codes);
|
||||
|
||||
assert!((asym_dist - table_dist).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compression_ratio() {
|
||||
let dims = 1536;
|
||||
let config = PQConfig {
|
||||
m: 48,
|
||||
k: 256,
|
||||
seed: 42,
|
||||
};
|
||||
|
||||
let pq = ProductQuantizer::new(dims, config);
|
||||
|
||||
// Original: 1536 * 4 = 6144 bytes
|
||||
// Compressed: 48 bytes
|
||||
// Ratio: 128x
|
||||
assert_eq!(pq.bytes_per_vector(), 48);
|
||||
assert!((pq.compression_ratio() - 128.0).abs() < 0.1);
|
||||
}
|
||||
}
|
||||
227
vendor/ruvector/crates/ruvector-postgres/src/quantization/scalar.rs
vendored
Normal file
227
vendor/ruvector/crates/ruvector-postgres/src/quantization/scalar.rs
vendored
Normal file
@@ -0,0 +1,227 @@
|
||||
//! Scalar Quantization (SQ8)
|
||||
//!
|
||||
//! Compresses f32 vectors to i8, achieving 4x memory reduction
|
||||
//! with minimal accuracy loss.
|
||||
|
||||
/// Quantize f32 vector to i8
|
||||
///
|
||||
/// Returns (quantized_data, scale, offset)
|
||||
pub fn quantize(vector: &[f32]) -> (Vec<i8>, f32, f32) {
|
||||
if vector.is_empty() {
|
||||
return (Vec::new(), 1.0, 0.0);
|
||||
}
|
||||
|
||||
// Find min and max
|
||||
let mut min = f32::MAX;
|
||||
let mut max = f32::MIN;
|
||||
|
||||
for &v in vector {
|
||||
if v < min {
|
||||
min = v;
|
||||
}
|
||||
if v > max {
|
||||
max = v;
|
||||
}
|
||||
}
|
||||
|
||||
let range = max - min;
|
||||
let scale = if range > 0.0 { range / 254.0 } else { 1.0 };
|
||||
let offset = min;
|
||||
|
||||
// Quantize to i8 (-127 to 127)
|
||||
let quantized: Vec<i8> = vector
|
||||
.iter()
|
||||
.map(|&v| {
|
||||
let normalized = (v - offset) / scale;
|
||||
(normalized.clamp(0.0, 254.0) - 127.0) as i8
|
||||
})
|
||||
.collect();
|
||||
|
||||
(quantized, scale, offset)
|
||||
}
|
||||
|
||||
/// Dequantize i8 vector back to f32
|
||||
pub fn dequantize(quantized: &[i8], scale: f32, offset: f32) -> Vec<f32> {
|
||||
quantized
|
||||
.iter()
|
||||
.map(|&q| (q as f32 + 127.0) * scale + offset)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Calculate squared Euclidean distance between quantized vectors
|
||||
pub fn distance_sq(a: &[i8], b: &[i8]) -> i32 {
|
||||
debug_assert_eq!(a.len(), b.len());
|
||||
|
||||
a.iter()
|
||||
.zip(b.iter())
|
||||
.map(|(&x, &y)| {
|
||||
let diff = x as i32 - y as i32;
|
||||
diff * diff
|
||||
})
|
||||
.sum()
|
||||
}
|
||||
|
||||
/// Calculate Euclidean distance between quantized vectors
|
||||
pub fn distance(a: &[i8], b: &[i8], scale: f32) -> f32 {
|
||||
(distance_sq(a, b) as f32).sqrt() * scale
|
||||
}
|
||||
|
||||
/// Quantized vector with metadata
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ScalarQuantizedVector {
|
||||
pub data: Vec<i8>,
|
||||
pub scale: f32,
|
||||
pub offset: f32,
|
||||
}
|
||||
|
||||
impl ScalarQuantizedVector {
|
||||
/// Create from f32 vector
|
||||
pub fn from_f32(vector: &[f32]) -> Self {
|
||||
let (data, scale, offset) = quantize(vector);
|
||||
Self {
|
||||
data,
|
||||
scale,
|
||||
offset,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert back to f32
|
||||
pub fn to_f32(&self) -> Vec<f32> {
|
||||
dequantize(&self.data, self.scale, self.offset)
|
||||
}
|
||||
|
||||
/// Calculate distance to another quantized vector
|
||||
pub fn distance(&self, other: &Self) -> f32 {
|
||||
let max_scale = self.scale.max(other.scale);
|
||||
distance(&self.data, &other.data, max_scale)
|
||||
}
|
||||
|
||||
/// Memory size in bytes
|
||||
pub fn memory_size(&self) -> usize {
|
||||
std::mem::size_of::<Self>() + self.data.len()
|
||||
}
|
||||
|
||||
/// Compression ratio compared to f32
|
||||
pub fn compression_ratio(&self) -> f32 {
|
||||
4.0 // f32 (4 bytes) -> i8 (1 byte)
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// SIMD-optimized distance (for larger vectors)
|
||||
// ============================================================================
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2")]
|
||||
unsafe fn distance_sq_avx2(a: &[i8], b: &[i8]) -> i32 {
|
||||
use std::arch::x86_64::*;
|
||||
|
||||
let n = a.len();
|
||||
let mut sum = _mm256_setzero_si256();
|
||||
|
||||
let chunks = n / 32;
|
||||
for i in 0..chunks {
|
||||
let offset = i * 32;
|
||||
|
||||
let va = _mm256_loadu_si256(a.as_ptr().add(offset) as *const __m256i);
|
||||
let vb = _mm256_loadu_si256(b.as_ptr().add(offset) as *const __m256i);
|
||||
|
||||
// Subtract (with sign extension trick for i8)
|
||||
let diff_lo = _mm256_sub_epi16(
|
||||
_mm256_cvtepi8_epi16(_mm256_castsi256_si128(va)),
|
||||
_mm256_cvtepi8_epi16(_mm256_castsi256_si128(vb)),
|
||||
);
|
||||
let diff_hi = _mm256_sub_epi16(
|
||||
_mm256_cvtepi8_epi16(_mm256_extracti128_si256(va, 1)),
|
||||
_mm256_cvtepi8_epi16(_mm256_extracti128_si256(vb, 1)),
|
||||
);
|
||||
|
||||
// Square and accumulate
|
||||
let sq_lo = _mm256_madd_epi16(diff_lo, diff_lo);
|
||||
let sq_hi = _mm256_madd_epi16(diff_hi, diff_hi);
|
||||
|
||||
sum = _mm256_add_epi32(sum, sq_lo);
|
||||
sum = _mm256_add_epi32(sum, sq_hi);
|
||||
}
|
||||
|
||||
// Horizontal sum
|
||||
let sum128_lo = _mm256_castsi256_si128(sum);
|
||||
let sum128_hi = _mm256_extracti128_si256(sum, 1);
|
||||
let sum128 = _mm_add_epi32(sum128_lo, sum128_hi);
|
||||
|
||||
let sum64 = _mm_add_epi32(sum128, _mm_srli_si128(sum128, 8));
|
||||
let sum32 = _mm_add_epi32(sum64, _mm_srli_si128(sum64, 4));
|
||||
|
||||
let mut result = _mm_cvtsi128_si32(sum32);
|
||||
|
||||
// Handle remainder
|
||||
for i in (chunks * 32)..n {
|
||||
let diff = a[i] as i32 - b[i] as i32;
|
||||
result += diff * diff;
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// SIMD-accelerated distance calculation
|
||||
pub fn distance_simd(a: &[i8], b: &[i8], scale: f32) -> f32 {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
if is_x86_feature_detected!("avx2") {
|
||||
return (unsafe { distance_sq_avx2(a, b) } as f32).sqrt() * scale;
|
||||
}
|
||||
}
|
||||
|
||||
distance(a, b, scale)
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Tests
|
||||
// ============================================================================
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_quantize_dequantize() {
|
||||
let original = vec![0.1, 0.5, -0.3, 0.8, -0.9];
|
||||
let (quantized, scale, offset) = quantize(&original);
|
||||
let restored = dequantize(&quantized, scale, offset);
|
||||
|
||||
for (o, r) in original.iter().zip(restored.iter()) {
|
||||
assert!((o - r).abs() < 0.02, "orig={}, restored={}", o, r);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distance() {
|
||||
let a = vec![1.0, 0.0, 0.0];
|
||||
let b = vec![0.0, 1.0, 0.0];
|
||||
|
||||
let qa = ScalarQuantizedVector::from_f32(&a);
|
||||
let qb = ScalarQuantizedVector::from_f32(&b);
|
||||
|
||||
let dist = qa.distance(&qb);
|
||||
// Euclidean distance should be sqrt(2) ≈ 1.414
|
||||
assert!((dist - 1.414).abs() < 0.2, "dist={}", dist);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compression_ratio() {
|
||||
let v = ScalarQuantizedVector::from_f32(&vec![0.0; 1000]);
|
||||
assert_eq!(v.compression_ratio(), 4.0);
|
||||
assert_eq!(v.data.len(), 1000); // 1000 i8 = 1000 bytes
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simd_matches_scalar() {
|
||||
let a: Vec<i8> = (0..128).map(|i| i as i8).collect();
|
||||
let b: Vec<i8> = (0..128).map(|i| -(i as i8)).collect();
|
||||
|
||||
let scalar_result = distance_sq(&a, &b);
|
||||
let simd_result = (distance_simd(&a, &b, 1.0).powi(2)) as i32;
|
||||
|
||||
assert!((scalar_result - simd_result).abs() < 10);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user