Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/crates/ruvector-postgres/src/quantization/binary.rs
+++ b/vendor/ruvector/crates/ruvector-postgres/src/quantization/binary.rs
@@ -0,0 +1,371 @@
+//! Binary Quantization
+//!
+//! Compresses vectors to 1 bit per dimension, achieving 32x memory reduction.
+//! Uses Hamming distance for fast comparison.
+
+/// Quantize f32 vector to binary (1 bit per dimension)
+///
+/// Positive values -> 1, negative/zero values -> 0
+pub fn quantize(vector: &[f32]) -> Vec<u8> {
+    let n_bytes = (vector.len() + 7) / 8;
+    let mut result = vec![0u8; n_bytes];
+
+    for (i, &v) in vector.iter().enumerate() {
+        if v > 0.0 {
+            let byte_idx = i / 8;
+            let bit_idx = i % 8;
+            result[byte_idx] |= 1 << bit_idx;
+        }
+    }
+
+    result
+}
+
+/// Quantize with threshold
+pub fn quantize_with_threshold(vector: &[f32], threshold: f32) -> Vec<u8> {
+    let n_bytes = (vector.len() + 7) / 8;
+    let mut result = vec![0u8; n_bytes];
+
+    for (i, &v) in vector.iter().enumerate() {
+        if v > threshold {
+            let byte_idx = i / 8;
+            let bit_idx = i % 8;
+            result[byte_idx] |= 1 << bit_idx;
+        }
+    }
+
+    result
+}
+
+/// Calculate Hamming distance between binary vectors
+pub fn hamming_distance(a: &[u8], b: &[u8]) -> u32 {
+    debug_assert_eq!(a.len(), b.len());
+
+    a.iter()
+        .zip(b.iter())
+        .map(|(&x, &y)| (x ^ y).count_ones())
+        .sum()
+}
+
+/// SIMD-optimized Hamming distance using POPCNT
+#[cfg(target_arch = "x86_64")]
+#[target_feature(enable = "popcnt")]
+unsafe fn hamming_distance_popcnt(a: &[u8], b: &[u8]) -> u32 {
+    use std::arch::x86_64::*;
+
+    let n = a.len();
+    let mut count = 0u32;
+
+    // Process 8 bytes at a time
+    let chunks = n / 8;
+    for i in 0..chunks {
+        let offset = i * 8;
+        let va = *(a.as_ptr().add(offset) as *const u64);
+        let vb = *(b.as_ptr().add(offset) as *const u64);
+        count += _popcnt64((va ^ vb) as i64) as u32;
+    }
+
+    // Handle remainder
+    for i in (chunks * 8)..n {
+        count += (a[i] ^ b[i]).count_ones();
+    }
+
+    count
+}
+
+/// AVX2-optimized Hamming distance using vpshufb popcount
+///
+/// Uses the SWAR (SIMD Within A Register) technique with lookup tables.
+/// Processes 32 bytes per iteration, which is 4x faster than scalar POPCNT
+/// for large vectors (1024+ dimensions).
+#[cfg(target_arch = "x86_64")]
+#[target_feature(enable = "avx2")]
+unsafe fn hamming_distance_avx2(a: &[u8], b: &[u8]) -> u32 {
+    use std::arch::x86_64::*;
+
+    let n = a.len();
+
+    // Lookup table for popcount of 4-bit values
+    let lookup = _mm256_setr_epi8(
+        0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3,
+        3, 4,
+    );
+    let low_mask = _mm256_set1_epi8(0x0F);
+
+    let mut total = _mm256_setzero_si256();
+
+    // Process 32 bytes at a time
+    let chunks = n / 32;
+    for i in 0..chunks {
+        let offset = i * 32;
+        let va = _mm256_loadu_si256(a.as_ptr().add(offset) as *const __m256i);
+        let vb = _mm256_loadu_si256(b.as_ptr().add(offset) as *const __m256i);
+
+        // XOR the vectors
+        let xor = _mm256_xor_si256(va, vb);
+
+        // Split into low and high nibbles
+        let lo = _mm256_and_si256(xor, low_mask);
+        let hi = _mm256_and_si256(_mm256_srli_epi16(xor, 4), low_mask);
+
+        // Lookup popcount for each nibble
+        let popcnt_lo = _mm256_shuffle_epi8(lookup, lo);
+        let popcnt_hi = _mm256_shuffle_epi8(lookup, hi);
+
+        // Sum nibble popcounts
+        let popcnt = _mm256_add_epi8(popcnt_lo, popcnt_hi);
+
+        // Accumulate using sad (sum of absolute differences from zero)
+        let sad = _mm256_sad_epu8(popcnt, _mm256_setzero_si256());
+        total = _mm256_add_epi64(total, sad);
+    }
+
+    // Horizontal sum of the 4 64-bit values
+    let sum128_lo = _mm256_castsi256_si128(total);
+    let sum128_hi = _mm256_extracti128_si256(total, 1);
+    let sum128 = _mm_add_epi64(sum128_lo, sum128_hi);
+    let sum64 = _mm_add_epi64(sum128, _mm_srli_si128(sum128, 8));
+    let mut count = _mm_cvtsi128_si64(sum64) as u32;
+
+    // Handle remainder with scalar POPCNT
+    for i in (chunks * 32)..n {
+        count += (a[i] ^ b[i]).count_ones();
+    }
+
+    count
+}
+
+/// Calculate Hamming distance with SIMD optimization
+///
+/// Automatically selects the best implementation:
+/// - AVX2 vpshufb for large vectors (>= 128 bytes / 1024 bits)
+/// - POPCNT for medium vectors (>= 8 bytes)
+/// - Scalar for small vectors
+pub fn hamming_distance_simd(a: &[u8], b: &[u8]) -> u32 {
+    #[cfg(target_arch = "x86_64")]
+    {
+        let n = a.len();
+
+        // For large vectors, AVX2 vpshufb is fastest
+        if n >= 128 && is_x86_feature_detected!("avx2") {
+            return unsafe { hamming_distance_avx2(a, b) };
+        }
+
+        // For medium vectors, use POPCNT
+        if is_x86_feature_detected!("popcnt") {
+            return unsafe { hamming_distance_popcnt(a, b) };
+        }
+    }
+
+    hamming_distance(a, b)
+}
+
+/// Normalize Hamming distance to [0, 1] range
+pub fn normalized_hamming_distance(a: &[u8], b: &[u8], dimensions: usize) -> f32 {
+    let dist = hamming_distance_simd(a, b);
+    dist as f32 / dimensions as f32
+}
+
+/// Binary quantized vector
+#[derive(Debug, Clone)]
+pub struct BinaryQuantizedVector {
+    pub data: Vec<u8>,
+    pub dimensions: usize,
+}
+
+impl BinaryQuantizedVector {
+    /// Create from f32 vector
+    pub fn from_f32(vector: &[f32]) -> Self {
+        Self {
+            data: quantize(vector),
+            dimensions: vector.len(),
+        }
+    }
+
+    /// Create from f32 vector with threshold
+    pub fn from_f32_threshold(vector: &[f32], threshold: f32) -> Self {
+        Self {
+            data: quantize_with_threshold(vector, threshold),
+            dimensions: vector.len(),
+        }
+    }
+
+    /// Calculate Hamming distance to another binary vector
+    pub fn hamming_distance(&self, other: &Self) -> u32 {
+        debug_assert_eq!(self.dimensions, other.dimensions);
+        hamming_distance_simd(&self.data, &other.data)
+    }
+
+    /// Calculate normalized distance [0, 1]
+    pub fn normalized_distance(&self, other: &Self) -> f32 {
+        self.hamming_distance(other) as f32 / self.dimensions as f32
+    }
+
+    /// Memory size in bytes
+    pub fn memory_size(&self) -> usize {
+        std::mem::size_of::<Self>() + self.data.len()
+    }
+
+    /// Compression ratio compared to f32
+    pub fn compression_ratio(&self) -> f32 {
+        32.0 // f32 (32 bits) -> 1 bit
+    }
+
+    /// Get bit at position
+    pub fn get_bit(&self, pos: usize) -> bool {
+        debug_assert!(pos < self.dimensions);
+        let byte_idx = pos / 8;
+        let bit_idx = pos % 8;
+        (self.data[byte_idx] >> bit_idx) & 1 == 1
+    }
+
+    /// Count number of 1 bits
+    pub fn popcount(&self) -> u32 {
+        self.data.iter().map(|&b| b.count_ones()).sum()
+    }
+}
+
+/// Two-stage search with binary quantization
+///
+/// 1. Fast Hamming distance filtering using binary vectors
+/// 2. Rerank top candidates with full precision distance
+pub struct BinarySearcher {
+    /// Binary quantized vectors
+    binary_vectors: Vec<BinaryQuantizedVector>,
+    /// Original vectors for reranking
+    original_vectors: Vec<Vec<f32>>,
+    /// Rerank factor (rerank top k * factor candidates)
+    rerank_factor: usize,
+}
+
+impl BinarySearcher {
+    /// Create a new binary searcher
+    pub fn new(vectors: Vec<Vec<f32>>, rerank_factor: usize) -> Self {
+        let binary_vectors: Vec<_> = vectors
+            .iter()
+            .map(|v| BinaryQuantizedVector::from_f32(v))
+            .collect();
+
+        Self {
+            binary_vectors,
+            original_vectors: vectors,
+            rerank_factor,
+        }
+    }
+
+    /// Search for k nearest neighbors
+    pub fn search(&self, query: &[f32], k: usize) -> Vec<(usize, f32)> {
+        let query_binary = BinaryQuantizedVector::from_f32(query);
+
+        // Stage 1: Fast Hamming distance search
+        let mut candidates: Vec<(usize, u32)> = self
+            .binary_vectors
+            .iter()
+            .enumerate()
+            .map(|(i, bv)| (i, query_binary.hamming_distance(bv)))
+            .collect();
+
+        // Sort by Hamming distance
+        candidates.sort_by_key(|(_, d)| *d);
+
+        // Take top k * rerank_factor candidates
+        let n_candidates = (k * self.rerank_factor).min(candidates.len());
+        let top_candidates: Vec<usize> = candidates
+            .iter()
+            .take(n_candidates)
+            .map(|(i, _)| *i)
+            .collect();
+
+        // Stage 2: Rerank with full precision distance
+        let mut reranked: Vec<(usize, f32)> = top_candidates
+            .iter()
+            .map(|&i| {
+                let dist: f32 = query
+                    .iter()
+                    .zip(self.original_vectors[i].iter())
+                    .map(|(a, b)| (a - b).powi(2))
+                    .sum::<f32>()
+                    .sqrt();
+                (i, dist)
+            })
+            .collect();
+
+        reranked.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
+        reranked.truncate(k);
+        reranked
+    }
+}
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_quantize() {
+        let v = vec![0.5, -0.3, 0.1, -0.8, 0.2, -0.1, 0.9, -0.5];
+        let q = quantize(&v);
+
+        assert_eq!(q.len(), 1);
+        // Bits: 1, 0, 1, 0, 1, 0, 1, 0 = 0b01010101 = 85
+        assert_eq!(q[0], 0b01010101);
+    }
+
+    #[test]
+    fn test_hamming_distance() {
+        let a = vec![0b11110000];
+        let b = vec![0b10101010];
+        // XOR: 0b01011010, popcount = 4
+        assert_eq!(hamming_distance(&a, &b), 4);
+    }
+
+    #[test]
+    fn test_compression_ratio() {
+        let v = BinaryQuantizedVector::from_f32(&vec![0.0; 1024]);
+        assert_eq!(v.compression_ratio(), 32.0);
+        assert_eq!(v.data.len(), 128); // 1024 bits = 128 bytes
+    }
+
+    #[test]
+    fn test_simd_matches_scalar() {
+        let a: Vec<u8> = (0..128).collect();
+        let b: Vec<u8> = (0..128).map(|i| 255 - i).collect();
+
+        let scalar = hamming_distance(&a, &b);
+        let simd = hamming_distance_simd(&a, &b);
+
+        assert_eq!(scalar, simd);
+    }
+
+    #[test]
+    fn test_binary_searcher() {
+        let vectors: Vec<Vec<f32>> = (0..100)
+            .map(|i| vec![i as f32 * 0.1, (100 - i) as f32 * 0.1, 0.5])
+            .collect();
+
+        let searcher = BinarySearcher::new(vectors.clone(), 4);
+
+        let query = vec![5.0, 5.0, 0.5];
+        let results = searcher.search(&query, 5);
+
+        assert_eq!(results.len(), 5);
+        // Results should be ordered by distance
+        for i in 1..results.len() {
+            assert!(results[i].1 >= results[i - 1].1);
+        }
+    }
+
+    #[test]
+    fn test_get_bit() {
+        let v = vec![1.0, -1.0, 1.0, -1.0];
+        let bv = BinaryQuantizedVector::from_f32(&v);
+
+        assert!(bv.get_bit(0));
+        assert!(!bv.get_bit(1));
+        assert!(bv.get_bit(2));
+        assert!(!bv.get_bit(3));
+    }
+}
--- a/vendor/ruvector/crates/ruvector-postgres/src/quantization/mod.rs
+++ b/vendor/ruvector/crates/ruvector-postgres/src/quantization/mod.rs
@@ -0,0 +1,63 @@
+//! Vector quantization for memory reduction
+//!
+//! Provides various quantization methods:
+//! - Scalar (SQ8): 4x compression
+//! - Product (PQ): 8-32x compression
+//! - Binary: 32x compression
+
+pub mod binary;
+pub mod product;
+pub mod scalar;
+
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+/// Global quantization table memory tracking
+static TABLE_MEMORY_BYTES: AtomicUsize = AtomicUsize::new(0);
+
+/// Get quantization table memory in MB
+pub fn get_table_memory_mb() -> f64 {
+    TABLE_MEMORY_BYTES.load(Ordering::Relaxed) as f64 / (1024.0 * 1024.0)
+}
+
+/// Track table memory allocation
+pub fn track_table_allocation(bytes: usize) {
+    TABLE_MEMORY_BYTES.fetch_add(bytes, Ordering::Relaxed);
+}
+
+/// Quantization type
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum QuantizationType {
+    /// No quantization (full precision)
+    None,
+    /// Scalar quantization (f32 -> i8)
+    Scalar,
+    /// Product quantization (subspace division)
+    Product,
+    /// Binary quantization (f32 -> 1 bit)
+    Binary,
+}
+
+impl std::fmt::Display for QuantizationType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            QuantizationType::None => write!(f, "none"),
+            QuantizationType::Scalar => write!(f, "sq8"),
+            QuantizationType::Product => write!(f, "pq"),
+            QuantizationType::Binary => write!(f, "binary"),
+        }
+    }
+}
+
+impl std::str::FromStr for QuantizationType {
+    type Err = String;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s.to_lowercase().as_str() {
+            "none" | "" => Ok(QuantizationType::None),
+            "scalar" | "sq8" | "sq" => Ok(QuantizationType::Scalar),
+            "product" | "pq" => Ok(QuantizationType::Product),
+            "binary" | "bq" => Ok(QuantizationType::Binary),
+            _ => Err(format!("Unknown quantization type: {}", s)),
+        }
+    }
+}
--- a/vendor/ruvector/crates/ruvector-postgres/src/quantization/product.rs
+++ b/vendor/ruvector/crates/ruvector-postgres/src/quantization/product.rs
@@ -0,0 +1,380 @@
+//! Product Quantization (PQ)
+//!
+//! Compresses vectors by dividing into subspaces and quantizing each
+//! independently. Achieves 8-32x compression with precomputed distance tables.
+
+use rand::prelude::SliceRandom;
+use rand::Rng;
+
+/// Product Quantization configuration
+#[derive(Debug, Clone)]
+pub struct PQConfig {
+    /// Number of subspaces (subvectors)
+    pub m: usize,
+    /// Number of centroids per subspace (typically 256 for 8-bit codes)
+    pub k: usize,
+    /// Random seed
+    pub seed: u64,
+}
+
+impl Default for PQConfig {
+    fn default() -> Self {
+        Self {
+            m: 8,   // 8 subspaces
+            k: 256, // 256 centroids (8-bit codes)
+            seed: 42,
+        }
+    }
+}
+
+/// Product Quantization index
+pub struct ProductQuantizer {
+    /// Configuration
+    config: PQConfig,
+    /// Dimensions per subspace
+    dims_per_subspace: usize,
+    /// Total dimensions
+    dimensions: usize,
+    /// Centroids for each subspace: [m][k][dims_per_subspace]
+    centroids: Vec<Vec<Vec<f32>>>,
+    /// Whether trained
+    trained: bool,
+}
+
+impl ProductQuantizer {
+    /// Create a new product quantizer
+    pub fn new(dimensions: usize, config: PQConfig) -> Self {
+        assert!(
+            dimensions % config.m == 0,
+            "Dimensions must be divisible by number of subspaces"
+        );
+
+        let dims_per_subspace = dimensions / config.m;
+
+        Self {
+            config,
+            dims_per_subspace,
+            dimensions,
+            centroids: Vec::new(),
+            trained: false,
+        }
+    }
+
+    /// Train the quantizer on sample vectors
+    pub fn train(&mut self, vectors: &[Vec<f32>]) {
+        use rand::prelude::*;
+        use rand_chacha::ChaCha8Rng;
+
+        let mut rng = ChaCha8Rng::seed_from_u64(self.config.seed);
+
+        self.centroids = Vec::with_capacity(self.config.m);
+
+        for subspace in 0..self.config.m {
+            let start = subspace * self.dims_per_subspace;
+            let end = start + self.dims_per_subspace;
+
+            // Extract subvectors
+            let subvectors: Vec<Vec<f32>> =
+                vectors.iter().map(|v| v[start..end].to_vec()).collect();
+
+            // Run k-means on this subspace
+            let centroids = self.kmeans(&subvectors, self.config.k, 10, &mut rng);
+            self.centroids.push(centroids);
+        }
+
+        self.trained = true;
+    }
+
+    /// K-means clustering
+    fn kmeans<R: Rng>(
+        &self,
+        vectors: &[Vec<f32>],
+        k: usize,
+        iterations: usize,
+        rng: &mut R,
+    ) -> Vec<Vec<f32>> {
+        if vectors.is_empty() || k == 0 {
+            return Vec::new();
+        }
+
+        let dims = vectors[0].len();
+        let k = k.min(vectors.len());
+
+        // Initialize centroids randomly
+        let mut indices: Vec<usize> = (0..vectors.len()).collect();
+        indices.shuffle(rng);
+
+        let mut centroids: Vec<Vec<f32>> = indices
+            .iter()
+            .take(k)
+            .map(|&i| vectors[i].clone())
+            .collect();
+
+        for _ in 0..iterations {
+            // Assign vectors to nearest centroid
+            let mut assignments: Vec<Vec<usize>> = vec![Vec::new(); k];
+
+            for (i, v) in vectors.iter().enumerate() {
+                let nearest = self.find_nearest(v, &centroids);
+                assignments[nearest].push(i);
+            }
+
+            // Update centroids
+            for (c, assigned) in assignments.iter().enumerate() {
+                if assigned.is_empty() {
+                    continue;
+                }
+
+                let mut new_centroid = vec![0.0f32; dims];
+                for &i in assigned {
+                    for (j, &val) in vectors[i].iter().enumerate() {
+                        new_centroid[j] += val;
+                    }
+                }
+
+                let count = assigned.len() as f32;
+                for val in &mut new_centroid {
+                    *val /= count;
+                }
+
+                centroids[c] = new_centroid;
+            }
+        }
+
+        centroids
+    }
+
+    /// Find nearest centroid index
+    fn find_nearest(&self, vector: &[f32], centroids: &[Vec<f32>]) -> usize {
+        let mut best = 0;
+        let mut best_dist = f32::MAX;
+
+        for (i, c) in centroids.iter().enumerate() {
+            let dist: f32 = vector
+                .iter()
+                .zip(c.iter())
+                .map(|(a, b)| (a - b).powi(2))
+                .sum();
+
+            if dist < best_dist {
+                best_dist = dist;
+                best = i;
+            }
+        }
+
+        best
+    }
+
+    /// Encode a vector to PQ codes
+    pub fn encode(&self, vector: &[f32]) -> Vec<u8> {
+        assert!(self.trained, "Quantizer must be trained");
+        assert_eq!(vector.len(), self.dimensions);
+
+        let mut codes = Vec::with_capacity(self.config.m);
+
+        for subspace in 0..self.config.m {
+            let start = subspace * self.dims_per_subspace;
+            let end = start + self.dims_per_subspace;
+            let subvector = &vector[start..end];
+
+            let nearest = self.find_nearest(subvector, &self.centroids[subspace]);
+            codes.push(nearest as u8);
+        }
+
+        codes
+    }
+
+    /// Decode PQ codes back to approximate vector
+    pub fn decode(&self, codes: &[u8]) -> Vec<f32> {
+        assert!(self.trained, "Quantizer must be trained");
+        assert_eq!(codes.len(), self.config.m);
+
+        let mut vector = Vec::with_capacity(self.dimensions);
+
+        for (subspace, &code) in codes.iter().enumerate() {
+            let centroid = &self.centroids[subspace][code as usize];
+            vector.extend_from_slice(centroid);
+        }
+
+        vector
+    }
+
+    /// Compute asymmetric distance (query to encoded vector)
+    /// More accurate than symmetric but slower
+    pub fn asymmetric_distance(&self, query: &[f32], codes: &[u8]) -> f32 {
+        assert_eq!(query.len(), self.dimensions);
+        assert_eq!(codes.len(), self.config.m);
+
+        let mut distance_sq = 0.0f32;
+
+        for (subspace, &code) in codes.iter().enumerate() {
+            let start = subspace * self.dims_per_subspace;
+            let end = start + self.dims_per_subspace;
+            let query_sub = &query[start..end];
+            let centroid = &self.centroids[subspace][code as usize];
+
+            for (q, c) in query_sub.iter().zip(centroid.iter()) {
+                distance_sq += (q - c).powi(2);
+            }
+        }
+
+        distance_sq.sqrt()
+    }
+
+    /// Precompute distance table for a query
+    /// Returns: [m][k] distances from query subvector to each centroid
+    pub fn precompute_distance_table(&self, query: &[f32]) -> Vec<Vec<f32>> {
+        assert_eq!(query.len(), self.dimensions);
+
+        let mut table = Vec::with_capacity(self.config.m);
+
+        for subspace in 0..self.config.m {
+            let start = subspace * self.dims_per_subspace;
+            let end = start + self.dims_per_subspace;
+            let query_sub = &query[start..end];
+
+            let distances: Vec<f32> = self.centroids[subspace]
+                .iter()
+                .map(|c| {
+                    query_sub
+                        .iter()
+                        .zip(c.iter())
+                        .map(|(q, v)| (q - v).powi(2))
+                        .sum::<f32>()
+                })
+                .collect();
+
+            table.push(distances);
+        }
+
+        table
+    }
+
+    /// Fast distance using precomputed table
+    pub fn table_distance(&self, table: &[Vec<f32>], codes: &[u8]) -> f32 {
+        let mut distance_sq = 0.0f32;
+
+        for (subspace, &code) in codes.iter().enumerate() {
+            distance_sq += table[subspace][code as usize];
+        }
+
+        distance_sq.sqrt()
+    }
+
+    /// Memory per encoded vector in bytes
+    pub fn bytes_per_vector(&self) -> usize {
+        self.config.m // One byte per subspace
+    }
+
+    /// Compression ratio
+    pub fn compression_ratio(&self) -> f32 {
+        (self.dimensions * 4) as f32 / self.config.m as f32
+    }
+}
+
+/// Encoded vector with its codes
+#[derive(Debug, Clone)]
+pub struct PQVector {
+    pub codes: Vec<u8>,
+}
+
+impl PQVector {
+    pub fn memory_size(&self) -> usize {
+        std::mem::size_of::<Self>() + self.codes.len()
+    }
+}
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use rand::prelude::*;
+    use rand_chacha::ChaCha8Rng;
+
+    fn random_vectors(n: usize, dims: usize, seed: u64) -> Vec<Vec<f32>> {
+        let mut rng = ChaCha8Rng::seed_from_u64(seed);
+        (0..n)
+            .map(|_| (0..dims).map(|_| rng.gen_range(-1.0..1.0)).collect())
+            .collect()
+    }
+
+    #[test]
+    fn test_train_and_encode() {
+        let dims = 128;
+        let config = PQConfig {
+            m: 8,
+            k: 64,
+            seed: 42,
+        };
+
+        let mut pq = ProductQuantizer::new(dims, config);
+
+        let training = random_vectors(1000, dims, 42);
+        pq.train(&training);
+
+        // Encode a vector
+        let vector = random_vectors(1, dims, 123)[0].clone();
+        let codes = pq.encode(&vector);
+
+        assert_eq!(codes.len(), 8);
+
+        // Decode and check distance
+        let decoded = pq.decode(&codes);
+        let error: f32 = vector
+            .iter()
+            .zip(decoded.iter())
+            .map(|(a, b)| (a - b).powi(2))
+            .sum::<f32>()
+            .sqrt();
+
+        // Error should be reasonable
+        assert!(error < 2.0, "Reconstruction error too high: {}", error);
+    }
+
+    #[test]
+    fn test_distance_table() {
+        let dims = 64;
+        let config = PQConfig {
+            m: 4,
+            k: 16,
+            seed: 42,
+        };
+
+        let mut pq = ProductQuantizer::new(dims, config);
+        let training = random_vectors(500, dims, 42);
+        pq.train(&training);
+
+        let query = random_vectors(1, dims, 123)[0].clone();
+        let target = random_vectors(1, dims, 456)[0].clone();
+        let codes = pq.encode(&target);
+
+        // Compare asymmetric and table distances
+        let asym_dist = pq.asymmetric_distance(&query, &codes);
+
+        let table = pq.precompute_distance_table(&query);
+        let table_dist = pq.table_distance(&table, &codes);
+
+        assert!((asym_dist - table_dist).abs() < 0.001);
+    }
+
+    #[test]
+    fn test_compression_ratio() {
+        let dims = 1536;
+        let config = PQConfig {
+            m: 48,
+            k: 256,
+            seed: 42,
+        };
+
+        let pq = ProductQuantizer::new(dims, config);
+
+        // Original: 1536 * 4 = 6144 bytes
+        // Compressed: 48 bytes
+        // Ratio: 128x
+        assert_eq!(pq.bytes_per_vector(), 48);
+        assert!((pq.compression_ratio() - 128.0).abs() < 0.1);
+    }
+}
--- a/vendor/ruvector/crates/ruvector-postgres/src/quantization/scalar.rs
+++ b/vendor/ruvector/crates/ruvector-postgres/src/quantization/scalar.rs
@@ -0,0 +1,227 @@
+//! Scalar Quantization (SQ8)
+//!
+//! Compresses f32 vectors to i8, achieving 4x memory reduction
+//! with minimal accuracy loss.
+
+/// Quantize f32 vector to i8
+///
+/// Returns (quantized_data, scale, offset)
+pub fn quantize(vector: &[f32]) -> (Vec<i8>, f32, f32) {
+    if vector.is_empty() {
+        return (Vec::new(), 1.0, 0.0);
+    }
+
+    // Find min and max
+    let mut min = f32::MAX;
+    let mut max = f32::MIN;
+
+    for &v in vector {
+        if v < min {
+            min = v;
+        }
+        if v > max {
+            max = v;
+        }
+    }
+
+    let range = max - min;
+    let scale = if range > 0.0 { range / 254.0 } else { 1.0 };
+    let offset = min;
+
+    // Quantize to i8 (-127 to 127)
+    let quantized: Vec<i8> = vector
+        .iter()
+        .map(|&v| {
+            let normalized = (v - offset) / scale;
+            (normalized.clamp(0.0, 254.0) - 127.0) as i8
+        })
+        .collect();
+
+    (quantized, scale, offset)
+}
+
+/// Dequantize i8 vector back to f32
+pub fn dequantize(quantized: &[i8], scale: f32, offset: f32) -> Vec<f32> {
+    quantized
+        .iter()
+        .map(|&q| (q as f32 + 127.0) * scale + offset)
+        .collect()
+}
+
+/// Calculate squared Euclidean distance between quantized vectors
+pub fn distance_sq(a: &[i8], b: &[i8]) -> i32 {
+    debug_assert_eq!(a.len(), b.len());
+
+    a.iter()
+        .zip(b.iter())
+        .map(|(&x, &y)| {
+            let diff = x as i32 - y as i32;
+            diff * diff
+        })
+        .sum()
+}
+
+/// Calculate Euclidean distance between quantized vectors
+pub fn distance(a: &[i8], b: &[i8], scale: f32) -> f32 {
+    (distance_sq(a, b) as f32).sqrt() * scale
+}
+
+/// Quantized vector with metadata
+#[derive(Debug, Clone)]
+pub struct ScalarQuantizedVector {
+    pub data: Vec<i8>,
+    pub scale: f32,
+    pub offset: f32,
+}
+
+impl ScalarQuantizedVector {
+    /// Create from f32 vector
+    pub fn from_f32(vector: &[f32]) -> Self {
+        let (data, scale, offset) = quantize(vector);
+        Self {
+            data,
+            scale,
+            offset,
+        }
+    }
+
+    /// Convert back to f32
+    pub fn to_f32(&self) -> Vec<f32> {
+        dequantize(&self.data, self.scale, self.offset)
+    }
+
+    /// Calculate distance to another quantized vector
+    pub fn distance(&self, other: &Self) -> f32 {
+        let max_scale = self.scale.max(other.scale);
+        distance(&self.data, &other.data, max_scale)
+    }
+
+    /// Memory size in bytes
+    pub fn memory_size(&self) -> usize {
+        std::mem::size_of::<Self>() + self.data.len()
+    }
+
+    /// Compression ratio compared to f32
+    pub fn compression_ratio(&self) -> f32 {
+        4.0 // f32 (4 bytes) -> i8 (1 byte)
+    }
+}
+
+// ============================================================================
+// SIMD-optimized distance (for larger vectors)
+// ============================================================================
+
+#[cfg(target_arch = "x86_64")]
+#[target_feature(enable = "avx2")]
+unsafe fn distance_sq_avx2(a: &[i8], b: &[i8]) -> i32 {
+    use std::arch::x86_64::*;
+
+    let n = a.len();
+    let mut sum = _mm256_setzero_si256();
+
+    let chunks = n / 32;
+    for i in 0..chunks {
+        let offset = i * 32;
+
+        let va = _mm256_loadu_si256(a.as_ptr().add(offset) as *const __m256i);
+        let vb = _mm256_loadu_si256(b.as_ptr().add(offset) as *const __m256i);
+
+        // Subtract (with sign extension trick for i8)
+        let diff_lo = _mm256_sub_epi16(
+            _mm256_cvtepi8_epi16(_mm256_castsi256_si128(va)),
+            _mm256_cvtepi8_epi16(_mm256_castsi256_si128(vb)),
+        );
+        let diff_hi = _mm256_sub_epi16(
+            _mm256_cvtepi8_epi16(_mm256_extracti128_si256(va, 1)),
+            _mm256_cvtepi8_epi16(_mm256_extracti128_si256(vb, 1)),
+        );
+
+        // Square and accumulate
+        let sq_lo = _mm256_madd_epi16(diff_lo, diff_lo);
+        let sq_hi = _mm256_madd_epi16(diff_hi, diff_hi);
+
+        sum = _mm256_add_epi32(sum, sq_lo);
+        sum = _mm256_add_epi32(sum, sq_hi);
+    }
+
+    // Horizontal sum
+    let sum128_lo = _mm256_castsi256_si128(sum);
+    let sum128_hi = _mm256_extracti128_si256(sum, 1);
+    let sum128 = _mm_add_epi32(sum128_lo, sum128_hi);
+
+    let sum64 = _mm_add_epi32(sum128, _mm_srli_si128(sum128, 8));
+    let sum32 = _mm_add_epi32(sum64, _mm_srli_si128(sum64, 4));
+
+    let mut result = _mm_cvtsi128_si32(sum32);
+
+    // Handle remainder
+    for i in (chunks * 32)..n {
+        let diff = a[i] as i32 - b[i] as i32;
+        result += diff * diff;
+    }
+
+    result
+}
+
+/// SIMD-accelerated distance calculation
+pub fn distance_simd(a: &[i8], b: &[i8], scale: f32) -> f32 {
+    #[cfg(target_arch = "x86_64")]
+    {
+        if is_x86_feature_detected!("avx2") {
+            return (unsafe { distance_sq_avx2(a, b) } as f32).sqrt() * scale;
+        }
+    }
+
+    distance(a, b, scale)
+}
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_quantize_dequantize() {
+        let original = vec![0.1, 0.5, -0.3, 0.8, -0.9];
+        let (quantized, scale, offset) = quantize(&original);
+        let restored = dequantize(&quantized, scale, offset);
+
+        for (o, r) in original.iter().zip(restored.iter()) {
+            assert!((o - r).abs() < 0.02, "orig={}, restored={}", o, r);
+        }
+    }
+
+    #[test]
+    fn test_distance() {
+        let a = vec![1.0, 0.0, 0.0];
+        let b = vec![0.0, 1.0, 0.0];
+
+        let qa = ScalarQuantizedVector::from_f32(&a);
+        let qb = ScalarQuantizedVector::from_f32(&b);
+
+        let dist = qa.distance(&qb);
+        // Euclidean distance should be sqrt(2) ≈ 1.414
+        assert!((dist - 1.414).abs() < 0.2, "dist={}", dist);
+    }
+
+    #[test]
+    fn test_compression_ratio() {
+        let v = ScalarQuantizedVector::from_f32(&vec![0.0; 1000]);
+        assert_eq!(v.compression_ratio(), 4.0);
+        assert_eq!(v.data.len(), 1000); // 1000 i8 = 1000 bytes
+    }
+
+    #[test]
+    fn test_simd_matches_scalar() {
+        let a: Vec<i8> = (0..128).map(|i| i as i8).collect();
+        let b: Vec<i8> = (0..128).map(|i| -(i as i8)).collect();
+
+        let scalar_result = distance_sq(&a, &b);
+        let simd_result = (distance_simd(&a, &b, 1.0).powi(2)) as i32;
+
+        assert!((scalar_result - simd_result).abs() < 10);
+    }
+}