Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions
--- a/crates/ruvector-router-core/src/quantization.rs
+++ b/crates/ruvector-router-core/src/quantization.rs
@@ -0,0 +1,299 @@
+//! Quantization techniques for memory compression
+
+use crate::error::{Result, VectorDbError};
+use crate::types::QuantizationType;
+use serde::{Deserialize, Serialize};
+
+/// Quantized vector representation
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub enum QuantizedVector {
+    /// No quantization - full precision float32
+    None(Vec<f32>),
+    /// Scalar quantization to int8
+    Scalar {
+        /// Quantized values
+        data: Vec<u8>,
+        /// Minimum value for dequantization
+        min: f32,
+        /// Scale factor for dequantization
+        scale: f32,
+    },
+    /// Product quantization
+    Product {
+        /// Codebook indices
+        codes: Vec<u8>,
+        /// Number of subspaces
+        subspaces: usize,
+    },
+    /// Binary quantization (1 bit per dimension)
+    Binary {
+        /// Packed binary data
+        data: Vec<u8>,
+        /// Threshold value
+        threshold: f32,
+        /// Number of original dimensions
+        dimensions: usize,
+    },
+}
+
+/// Quantize a vector using specified quantization type
+pub fn quantize(vector: &[f32], qtype: QuantizationType) -> Result<QuantizedVector> {
+    match qtype {
+        QuantizationType::None => Ok(QuantizedVector::None(vector.to_vec())),
+        QuantizationType::Scalar => Ok(scalar_quantize(vector)),
+        QuantizationType::Product { subspaces, k } => product_quantize(vector, subspaces, k),
+        QuantizationType::Binary => Ok(binary_quantize(vector)),
+    }
+}
+
+/// Dequantize a quantized vector back to float32
+pub fn dequantize(quantized: &QuantizedVector) -> Vec<f32> {
+    match quantized {
+        QuantizedVector::None(v) => v.clone(),
+        QuantizedVector::Scalar { data, min, scale } => scalar_dequantize(data, *min, *scale),
+        QuantizedVector::Product { codes, subspaces } => {
+            // Placeholder - would need codebooks stored separately
+            vec![0.0; codes.len() * (codes.len() / subspaces)]
+        }
+        QuantizedVector::Binary {
+            data,
+            threshold,
+            dimensions,
+        } => binary_dequantize(data, *threshold, *dimensions),
+    }
+}
+
+/// Scalar quantization to int8
+fn scalar_quantize(vector: &[f32]) -> QuantizedVector {
+    let min = vector.iter().copied().fold(f32::INFINITY, f32::min);
+    let max = vector.iter().copied().fold(f32::NEG_INFINITY, f32::max);
+
+    let scale = if max > min { 255.0 / (max - min) } else { 1.0 };
+
+    let data: Vec<u8> = vector
+        .iter()
+        .map(|&v| ((v - min) * scale).clamp(0.0, 255.0) as u8)
+        .collect();
+
+    QuantizedVector::Scalar { data, min, scale }
+}
+
+/// Dequantize scalar quantized vector
+fn scalar_dequantize(data: &[u8], min: f32, scale: f32) -> Vec<f32> {
+    // CRITICAL FIX: During quantization, we compute: quantized = (value - min) * scale
+    // where scale = 255.0 / (max - min)
+    // Therefore, dequantization must be: value = quantized / scale + min
+    // which simplifies to: value = min + quantized * (max - min) / 255.0
+    // Since scale = 255.0 / (max - min), then 1/scale = (max - min) / 255.0
+    // So the correct formula is: value = min + quantized / scale
+    data.iter().map(|&v| min + (v as f32) / scale).collect()
+}
+
+/// Product quantization (simplified version)
+fn product_quantize(vector: &[f32], subspaces: usize, _k: usize) -> Result<QuantizedVector> {
+    if !vector.len().is_multiple_of(subspaces) {
+        return Err(VectorDbError::Quantization(
+            "Vector length must be divisible by number of subspaces".to_string(),
+        ));
+    }
+
+    // Simplified: just store subspace indices
+    // In production, this would involve k-means clustering per subspace
+    let subspace_dim = vector.len() / subspaces;
+    let codes: Vec<u8> = (0..subspaces)
+        .map(|i| {
+            let start = i * subspace_dim;
+            let subvec = &vector[start..start + subspace_dim];
+            // Placeholder: hash to a code (0-255)
+            (subvec.iter().sum::<f32>() as u32 % 256) as u8
+        })
+        .collect();
+
+    Ok(QuantizedVector::Product { codes, subspaces })
+}
+
+/// Binary quantization (1 bit per dimension)
+fn binary_quantize(vector: &[f32]) -> QuantizedVector {
+    let threshold = vector.iter().sum::<f32>() / vector.len() as f32;
+    let dimensions = vector.len();
+
+    let num_bytes = dimensions.div_ceil(8);
+    let mut data = vec![0u8; num_bytes];
+
+    for (i, &val) in vector.iter().enumerate() {
+        if val > threshold {
+            let byte_idx = i / 8;
+            let bit_idx = i % 8;
+            data[byte_idx] |= 1 << bit_idx;
+        }
+    }
+
+    QuantizedVector::Binary {
+        data,
+        threshold,
+        dimensions,
+    }
+}
+
+/// Dequantize binary quantized vector
+fn binary_dequantize(data: &[u8], threshold: f32, dimensions: usize) -> Vec<f32> {
+    let mut result = Vec::with_capacity(dimensions);
+
+    for (i, &byte) in data.iter().enumerate() {
+        for bit_idx in 0..8 {
+            if result.len() >= dimensions {
+                break;
+            }
+            let bit = (byte >> bit_idx) & 1;
+            result.push(if bit == 1 {
+                threshold + 1.0
+            } else {
+                threshold - 1.0
+            });
+        }
+        if result.len() >= dimensions {
+            break;
+        }
+    }
+
+    result
+}
+
+/// Calculate memory savings from quantization
+pub fn calculate_compression_ratio(original_dims: usize, qtype: QuantizationType) -> f32 {
+    let original_bytes = original_dims * 4; // float32 = 4 bytes
+    let quantized_bytes = match qtype {
+        QuantizationType::None => original_bytes,
+        QuantizationType::Scalar => original_dims + 8, // u8 per dim + min + scale
+        QuantizationType::Product { subspaces, .. } => subspaces + 4, // u8 per subspace + overhead
+        QuantizationType::Binary => original_dims.div_ceil(8) + 4, // 1 bit per dim + threshold
+    };
+
+    original_bytes as f32 / quantized_bytes as f32
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_scalar_quantization() {
+        let vector = vec![1.0, 2.0, 3.0, 4.0, 5.0];
+        let quantized = scalar_quantize(&vector);
+        let dequantized = dequantize(&quantized);
+
+        // Check approximate equality (quantization loses precision)
+        for (orig, deq) in vector.iter().zip(dequantized.iter()) {
+            assert!((orig - deq).abs() < 0.1);
+        }
+    }
+
+    #[test]
+    fn test_binary_quantization() {
+        let vector = vec![1.0, 5.0, 2.0, 8.0, 3.0];
+        let quantized = binary_quantize(&vector);
+
+        match quantized {
+            QuantizedVector::Binary {
+                data, dimensions, ..
+            } => {
+                assert!(!data.is_empty());
+                assert_eq!(dimensions, 5);
+            }
+            _ => panic!("Expected binary quantization"),
+        }
+    }
+
+    #[test]
+    fn test_compression_ratio() {
+        let ratio = calculate_compression_ratio(384, QuantizationType::Scalar);
+        assert!(ratio > 3.0); // Should be close to 4x
+
+        let ratio = calculate_compression_ratio(384, QuantizationType::Binary);
+        assert!(ratio > 20.0); // Should be close to 32x
+    }
+
+    #[test]
+    fn test_scalar_quantization_roundtrip() {
+        // Test that quantize -> dequantize produces values close to original
+        let test_vectors = vec![
+            vec![1.0, 2.0, 3.0, 4.0, 5.0],
+            vec![-10.0, -5.0, 0.0, 5.0, 10.0],
+            vec![0.1, 0.2, 0.3, 0.4, 0.5],
+            vec![100.0, 200.0, 300.0, 400.0, 500.0],
+        ];
+
+        for vector in test_vectors {
+            let quantized = scalar_quantize(&vector);
+            let dequantized = dequantize(&quantized);
+
+            assert_eq!(vector.len(), dequantized.len());
+
+            for (orig, deq) in vector.iter().zip(dequantized.iter()) {
+                // With 8-bit quantization, max error is roughly (max-min)/255
+                let max = vector.iter().copied().fold(f32::NEG_INFINITY, f32::max);
+                let min = vector.iter().copied().fold(f32::INFINITY, f32::min);
+                let max_error = (max - min) / 255.0 * 2.0; // Allow 2x for rounding
+
+                assert!(
+                    (orig - deq).abs() < max_error,
+                    "Roundtrip error too large: orig={}, deq={}, error={}",
+                    orig,
+                    deq,
+                    (orig - deq).abs()
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn test_scalar_quantization_edge_cases() {
+        // Test with all same values
+        let same_values = vec![5.0, 5.0, 5.0, 5.0];
+        let quantized = scalar_quantize(&same_values);
+        let dequantized = dequantize(&quantized);
+
+        for (orig, deq) in same_values.iter().zip(dequantized.iter()) {
+            assert!((orig - deq).abs() < 0.01);
+        }
+
+        // Test with extreme ranges
+        let extreme = vec![f32::MIN / 1e10, 0.0, f32::MAX / 1e10];
+        let quantized = scalar_quantize(&extreme);
+        let dequantized = dequantize(&quantized);
+
+        assert_eq!(extreme.len(), dequantized.len());
+    }
+
+    #[test]
+    fn test_binary_quantization_roundtrip() {
+        let vector = vec![1.0, -1.0, 2.0, -2.0, 0.5, -0.5];
+        let quantized = binary_quantize(&vector);
+        let dequantized = dequantize(&quantized);
+
+        // Binary quantization doesn't preserve exact values,
+        // but should preserve the sign relative to threshold
+        assert_eq!(
+            vector.len(),
+            dequantized.len(),
+            "Dequantized vector should have same length as original"
+        );
+
+        match quantized {
+            QuantizedVector::Binary {
+                threshold,
+                dimensions,
+                ..
+            } => {
+                assert_eq!(dimensions, vector.len());
+                for (orig, deq) in vector.iter().zip(dequantized.iter()) {
+                    // Check that both have same relationship to threshold
+                    let orig_above = orig > &threshold;
+                    let deq_above = deq > &threshold;
+                    assert_eq!(orig_above, deq_above);
+                }
+            }
+            _ => panic!("Expected binary quantization"),
+        }
+    }
+}