Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/crates/ruvector-nervous-system/src/separate/dentate.rs
+++ b/vendor/ruvector/crates/ruvector-nervous-system/src/separate/dentate.rs
@@ -0,0 +1,364 @@
+//! Dentate gyrus model combining sparse projection and k-winners-take-all
+//!
+//! The dentate gyrus is the input layer of the hippocampus responsible for
+//! pattern separation - creating orthogonal representations from similar inputs.
+
+use super::{SparseBitVector, SparseProjection};
+use crate::{NervousSystemError, Result};
+
+/// Dentate gyrus pattern separation encoder
+///
+/// Combines sparse random projection with k-winners-take-all sparsification
+/// to create collision-resistant, orthogonal vector encodings.
+///
+/// # Biological Inspiration
+///
+/// The dentate gyrus expands cortical representations ~4-5x (EC: 200K → DG: 1M neurons)
+/// and uses extremely sparse coding (~2% active) to minimize pattern overlap.
+///
+/// # Properties
+///
+/// - Input → Output expansion (typically 128D → 10000D)
+/// - 2-5% sparsity (k-winners-take-all)
+/// - Collision rate < 1% on diverse inputs
+/// - Fast encoding: <500μs for typical inputs
+///
+/// # Example
+///
+/// ```
+/// use ruvector_nervous_system::DentateGyrus;
+///
+/// let dg = DentateGyrus::new(128, 10000, 200, 42);
+/// let input = vec![1.0; 128];
+/// let sparse_code = dg.encode(&input);
+/// ```
+#[derive(Debug, Clone)]
+pub struct DentateGyrus {
+    /// Sparse random projection layer
+    projection: SparseProjection,
+
+    /// Number of active neurons (k in k-winners-take-all)
+    k: usize,
+
+    /// Output dimension
+    output_dim: usize,
+}
+
+impl DentateGyrus {
+    /// Create a new dentate gyrus encoder
+    ///
+    /// # Arguments
+    ///
+    /// * `input_dim` - Input vector dimension (e.g., 128, 512)
+    /// * `output_dim` - Output dimension (e.g., 10000) - should be >> input_dim
+    /// * `k` - Number of active neurons (e.g., 200 for 2% of 10000)
+    /// * `seed` - Random seed for reproducibility
+    ///
+    /// # Recommended Parameters
+    ///
+    /// - `output_dim`: 50-100x larger than `input_dim`
+    /// - `k`: 2-5% of `output_dim`
+    /// - Projection sparsity: 0.1-0.2
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use ruvector_nervous_system::DentateGyrus;
+    ///
+    /// // 128D input → 10000D output with 2% sparsity
+    /// let dg = DentateGyrus::new(128, 10000, 200, 42);
+    /// ```
+    pub fn new(input_dim: usize, output_dim: usize, k: usize, seed: u64) -> Self {
+        if k == 0 {
+            panic!("k must be > 0");
+        }
+
+        if k > output_dim {
+            panic!("k cannot exceed output_dim");
+        }
+
+        // Use 15% projection sparsity as default (good balance)
+        let projection = SparseProjection::new(input_dim, output_dim, 0.15, seed)
+            .expect("Failed to create sparse projection");
+
+        Self {
+            projection,
+            k,
+            output_dim,
+        }
+    }
+
+    /// Encode input vector into sparse representation
+    ///
+    /// # Arguments
+    ///
+    /// * `input` - Input vector
+    ///
+    /// # Returns
+    ///
+    /// Sparse bit vector with exactly k active bits
+    ///
+    /// # Process
+    ///
+    /// 1. Sparse random projection: input → dense high-dim vector
+    /// 2. K-winners-take-all: select top k activations
+    /// 3. Return sparse bit vector of active neurons
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use ruvector_nervous_system::DentateGyrus;
+    ///
+    /// let dg = DentateGyrus::new(128, 10000, 200, 42);
+    /// let input = vec![1.0; 128];
+    /// let sparse = dg.encode(&input);
+    /// assert_eq!(sparse.count(), 200); // Exactly k active
+    /// ```
+    pub fn encode(&self, input: &[f32]) -> SparseBitVector {
+        // Step 1: Sparse projection
+        let projected = self.projection.project(input).expect("Projection failed");
+
+        // Step 2: K-winners-take-all
+        self.k_winners_take_all(&projected)
+    }
+
+    /// Encode input and return dense vector (for compatibility)
+    ///
+    /// Returns a dense vector where only the top-k elements are non-zero.
+    ///
+    /// # Arguments
+    ///
+    /// * `input` - Input vector
+    ///
+    /// # Returns
+    ///
+    /// Dense vector with k non-zero elements
+    pub fn encode_dense(&self, input: &[f32]) -> Vec<f32> {
+        let projected = self.projection.project(input).expect("Projection failed");
+
+        let sparse = self.k_winners_take_all(&projected);
+
+        // Convert to dense
+        let mut dense = vec![0.0; self.output_dim];
+        for &idx in &sparse.indices {
+            dense[idx as usize] = projected[idx as usize];
+        }
+
+        dense
+    }
+
+    /// K-winners-take-all: select top k activations
+    ///
+    /// # Arguments
+    ///
+    /// * `activations` - Dense activation vector
+    ///
+    /// # Returns
+    ///
+    /// Sparse bit vector with k highest activations set
+    fn k_winners_take_all(&self, activations: &[f32]) -> SparseBitVector {
+        // Create (index, value) pairs
+        let mut indexed: Vec<(usize, f32)> = activations
+            .iter()
+            .enumerate()
+            .map(|(i, &v)| (i, v))
+            .collect();
+
+        // Partial sort to find top k (faster than full sort)
+        indexed.select_nth_unstable_by(self.k, |a, b| {
+            b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)
+        });
+
+        // Take top k indices
+        let mut top_k_indices: Vec<u16> =
+            indexed[..self.k].iter().map(|(i, _)| *i as u16).collect();
+
+        top_k_indices.sort_unstable();
+
+        SparseBitVector::from_indices(top_k_indices, self.output_dim as u16)
+    }
+
+    /// Get input dimension
+    pub fn input_dim(&self) -> usize {
+        self.projection.input_dim()
+    }
+
+    /// Get output dimension
+    pub fn output_dim(&self) -> usize {
+        self.output_dim
+    }
+
+    /// Get k (number of active neurons)
+    pub fn k(&self) -> usize {
+        self.k
+    }
+
+    /// Get sparsity level (k / output_dim)
+    pub fn sparsity(&self) -> f32 {
+        self.k as f32 / self.output_dim as f32
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_dentate_gyrus_creation() {
+        let dg = DentateGyrus::new(128, 10000, 200, 42);
+        assert_eq!(dg.input_dim(), 128);
+        assert_eq!(dg.output_dim(), 10000);
+        assert_eq!(dg.k(), 200);
+        assert_eq!(dg.sparsity(), 0.02); // 2%
+    }
+
+    #[test]
+    #[should_panic(expected = "k must be > 0")]
+    fn test_invalid_k_zero() {
+        DentateGyrus::new(128, 10000, 0, 42);
+    }
+
+    #[test]
+    #[should_panic(expected = "k cannot exceed output_dim")]
+    fn test_invalid_k_too_large() {
+        DentateGyrus::new(128, 100, 200, 42);
+    }
+
+    #[test]
+    fn test_encode_produces_sparse_output() {
+        let dg = DentateGyrus::new(128, 10000, 200, 42);
+        let input: Vec<f32> = (0..128).map(|i| (i as f32).sin()).collect();
+
+        let sparse = dg.encode(&input);
+
+        assert_eq!(sparse.count(), 200, "Should have exactly k active neurons");
+        assert_eq!(sparse.capacity(), 10000);
+    }
+
+    #[test]
+    fn test_encode_deterministic() {
+        let dg = DentateGyrus::new(128, 10000, 200, 42);
+        let input: Vec<f32> = (0..128).map(|i| (i as f32).sin()).collect();
+
+        let sparse1 = dg.encode(&input);
+        let sparse2 = dg.encode(&input);
+
+        assert_eq!(sparse1, sparse2, "Same input should produce same encoding");
+    }
+
+    #[test]
+    fn test_encode_dense_has_k_nonzeros() {
+        let dg = DentateGyrus::new(128, 10000, 200, 42);
+        let input: Vec<f32> = (0..128).map(|i| (i as f32).sin()).collect();
+
+        let dense = dg.encode_dense(&input);
+        let nonzero_count = dense.iter().filter(|&&x| x != 0.0).count();
+
+        assert_eq!(
+            nonzero_count, 200,
+            "Should have exactly k non-zero elements"
+        );
+    }
+
+    #[test]
+    fn test_different_inputs_produce_different_outputs() {
+        let dg = DentateGyrus::new(128, 10000, 200, 42);
+
+        let input1: Vec<f32> = (0..128).map(|i| (i as f32).sin()).collect();
+        let input2: Vec<f32> = (0..128).map(|i| (i as f32).cos()).collect();
+
+        let sparse1 = dg.encode(&input1);
+        let sparse2 = dg.encode(&input2);
+
+        assert_ne!(
+            sparse1, sparse2,
+            "Different inputs should produce different encodings"
+        );
+    }
+
+    #[test]
+    fn test_pattern_separation_property() {
+        let dg = DentateGyrus::new(128, 10000, 200, 42);
+
+        // Create two highly similar inputs
+        let mut input1 = vec![0.0; 128];
+        let mut input2 = vec![0.0; 128];
+
+        // 95% overlap
+        for i in 0..120 {
+            input1[i] = 1.0;
+            input2[i] = 1.0;
+        }
+        input1[125] = 1.0;
+        input2[126] = 1.0;
+
+        let sparse1 = dg.encode(&input1);
+        let sparse2 = dg.encode(&input2);
+
+        let input_overlap = 120.0 / 128.0; // 0.9375
+        let output_similarity = sparse1.jaccard_similarity(&sparse2);
+
+        // Pattern separation: output should be less similar than input
+        assert!(
+            output_similarity < input_overlap,
+            "Output similarity ({}) should be less than input overlap ({})",
+            output_similarity,
+            input_overlap
+        );
+    }
+
+    #[test]
+    fn test_sparsity_levels() {
+        // Test different sparsity levels
+        let cases = vec![
+            (10000, 200, 0.02), // 2%
+            (10000, 300, 0.03), // 3%
+            (10000, 500, 0.05), // 5%
+        ];
+
+        for (output_dim, k, expected_sparsity) in cases {
+            let dg = DentateGyrus::new(128, output_dim, k, 42);
+            assert_eq!(dg.sparsity(), expected_sparsity);
+
+            let input: Vec<f32> = (0..128).map(|i| (i as f32).sin()).collect();
+            let sparse = dg.encode(&input);
+
+            assert_eq!(sparse.count(), k);
+        }
+    }
+
+    #[test]
+    fn test_zero_input() {
+        let dg = DentateGyrus::new(128, 10000, 200, 42);
+        let input = vec![0.0; 128];
+
+        let sparse = dg.encode(&input);
+
+        // Even zero input should produce k active neurons (noise from projection)
+        assert_eq!(sparse.count(), 200);
+    }
+
+    #[test]
+    fn test_encode_performance_target() {
+        let dg = DentateGyrus::new(512, 10000, 200, 42);
+        let input: Vec<f32> = (0..512).map(|i| (i as f32).sin()).collect();
+
+        let start = std::time::Instant::now();
+        let iterations = 100;
+
+        for _ in 0..iterations {
+            let _ = dg.encode(&input);
+        }
+
+        let elapsed = start.elapsed();
+        let avg_time = elapsed / iterations;
+
+        // Target: encoding should complete in reasonable time (very relaxed for CI)
+        println!("Average encoding time: {:?}", avg_time);
+        assert!(
+            avg_time.as_secs() < 2,
+            "Average encoding time ({:?}) exceeds 2s target",
+            avg_time
+        );
+    }
+}
--- a/vendor/ruvector/crates/ruvector-nervous-system/src/separate/mod.rs
+++ b/vendor/ruvector/crates/ruvector-nervous-system/src/separate/mod.rs
@@ -0,0 +1,193 @@
+//! Pattern separation module implementing hippocampal dentate gyrus-inspired encoding
+//!
+//! This module provides sparse random projection and k-winners-take-all mechanisms
+//! for creating collision-resistant, orthogonal vector representations.
+
+mod dentate;
+mod projection;
+mod sparsification;
+
+pub use dentate::DentateGyrus;
+pub use projection::SparseProjection;
+pub use sparsification::SparseBitVector;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Test that similar inputs produce decorrelated outputs
+    #[test]
+    fn test_pattern_separation_decorrelation() {
+        let dg = DentateGyrus::new(128, 10000, 200, 42);
+
+        // Create two similar inputs (90% overlap)
+        let mut input1 = vec![0.0; 128];
+        let mut input2 = vec![0.0; 128];
+        for i in 0..115 {
+            input1[i] = 1.0;
+            input2[i] = 1.0;
+        }
+        input1[120] = 1.0;
+        input2[121] = 1.0;
+
+        let sparse1 = dg.encode(&input1);
+        let sparse2 = dg.encode(&input2);
+
+        // Despite 90% input overlap, output similarity should be lower
+        let input_overlap = 115.0 / 128.0; // 0.898
+        let output_similarity = sparse1.jaccard_similarity(&sparse2);
+
+        // Pattern separation should decorrelate: output similarity < input similarity
+        assert!(
+            output_similarity < input_overlap,
+            "Output similarity ({}) should be less than input overlap ({})",
+            output_similarity,
+            input_overlap
+        );
+    }
+
+    /// Test collision rate on random inputs
+    #[test]
+    fn test_collision_rate() {
+        let dg = DentateGyrus::new(128, 10000, 200, 42);
+        let num_samples = 1000;
+
+        let mut encodings = Vec::new();
+        for i in 0..num_samples {
+            let input: Vec<f32> = (0..128).map(|j| ((i * 128 + j) as f32).sin()).collect();
+            encodings.push(dg.encode(&input));
+        }
+
+        // Count collisions (identical encodings)
+        let mut collisions = 0;
+        for i in 0..encodings.len() {
+            for j in (i + 1)..encodings.len() {
+                if encodings[i].indices == encodings[j].indices {
+                    collisions += 1;
+                }
+            }
+        }
+
+        let collision_rate = collisions as f32 / (num_samples * (num_samples - 1) / 2) as f32;
+
+        // Collision rate should be < 1%
+        assert!(
+            collision_rate < 0.01,
+            "Collision rate ({:.4}) exceeds 1%",
+            collision_rate
+        );
+    }
+
+    /// Verify sparsity level (2-5% active neurons)
+    #[test]
+    fn test_sparsity_level() {
+        let output_dim = 10000;
+        let k = 200; // 2% sparsity
+        let dg = DentateGyrus::new(128, output_dim, k, 42);
+
+        let input: Vec<f32> = (0..128).map(|i| (i as f32).sin()).collect();
+        let sparse = dg.encode(&input);
+
+        let sparsity = sparse.indices.len() as f32 / output_dim as f32;
+
+        // Verify exact k winners
+        assert_eq!(
+            sparse.indices.len(),
+            k,
+            "Should have exactly k active neurons"
+        );
+
+        // Verify sparsity in 2-5% range
+        assert!(
+            sparsity >= 0.02 && sparsity <= 0.05,
+            "Sparsity ({:.4}) should be in 2-5% range",
+            sparsity
+        );
+    }
+
+    /// Test encoding performance
+    #[test]
+    fn test_encoding_performance() {
+        let dg = DentateGyrus::new(512, 10000, 200, 42);
+        let input: Vec<f32> = (0..512).map(|i| (i as f32).sin()).collect();
+
+        let start = std::time::Instant::now();
+        let iterations = 100;
+
+        for _ in 0..iterations {
+            let _ = dg.encode(&input);
+        }
+
+        let elapsed = start.elapsed();
+        let avg_time = elapsed / iterations;
+
+        // Should complete in reasonable time (very relaxed for CI environments)
+        assert!(
+            avg_time.as_secs() < 2,
+            "Average encoding time ({:?}) exceeds 2s",
+            avg_time
+        );
+    }
+
+    /// Test similarity computation performance
+    #[test]
+    fn test_similarity_performance() {
+        let dg = DentateGyrus::new(512, 10000, 200, 42);
+
+        let input1: Vec<f32> = (0..512).map(|i| (i as f32).sin()).collect();
+        let input2: Vec<f32> = (0..512).map(|i| (i as f32).cos()).collect();
+
+        let sparse1 = dg.encode(&input1);
+        let sparse2 = dg.encode(&input2);
+
+        let start = std::time::Instant::now();
+        let iterations = 1000;
+
+        for _ in 0..iterations {
+            let _ = sparse1.jaccard_similarity(&sparse2);
+        }
+
+        let elapsed = start.elapsed();
+        let avg_time = elapsed / iterations;
+
+        // Should be < 100μs per similarity computation
+        assert!(
+            avg_time.as_micros() < 100,
+            "Average similarity time ({:?}) exceeds 100μs",
+            avg_time
+        );
+    }
+
+    /// Test retrieval quality: similar inputs should have higher similarity
+    #[test]
+    fn test_retrieval_quality() {
+        let dg = DentateGyrus::new(128, 10000, 200, 42);
+
+        // Original input
+        let original: Vec<f32> = (0..128).map(|i| (i as f32).sin()).collect();
+
+        // Similar input (small perturbation)
+        let similar: Vec<f32> = original
+            .iter()
+            .map(|&x| x + 0.1 * ((x * 10.0).cos()))
+            .collect();
+
+        // Different input
+        let different: Vec<f32> = (0..128).map(|i| (i as f32).cos()).collect();
+
+        let enc_original = dg.encode(&original);
+        let enc_similar = dg.encode(&similar);
+        let enc_different = dg.encode(&different);
+
+        let sim_to_similar = enc_original.jaccard_similarity(&enc_similar);
+        let sim_to_different = enc_original.jaccard_similarity(&enc_different);
+
+        // Similar inputs should have higher similarity than different inputs
+        assert!(
+            sim_to_similar > sim_to_different,
+            "Similar input similarity ({}) should be higher than different input ({})",
+            sim_to_similar,
+            sim_to_different
+        );
+    }
+}
--- a/vendor/ruvector/crates/ruvector-nervous-system/src/separate/projection.rs
+++ b/vendor/ruvector/crates/ruvector-nervous-system/src/separate/projection.rs
@@ -0,0 +1,252 @@
+//! Sparse random projection for dimensionality expansion
+//!
+//! Implements sparse random matrices for efficient high-dimensional projections
+//! with controlled sparsity (connection probability).
+
+use crate::{NervousSystemError, Result};
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+
+/// Sparse random projection matrix for dimensionality expansion
+///
+/// Uses a sparse random matrix to project low-dimensional inputs into
+/// high-dimensional space while maintaining computational efficiency.
+///
+/// # Properties
+///
+/// - Sparse connectivity (typically 10-20% connections)
+/// - Gaussian-distributed weights
+/// - Deterministic (seeded) for reproducibility
+///
+/// # Performance
+///
+/// - Time complexity: O(input_dim × output_dim × sparsity)
+/// - Space complexity: O(input_dim × output_dim)
+#[derive(Debug, Clone)]
+pub struct SparseProjection {
+    /// Projection weights [input_dim × output_dim]
+    weights: Vec<Vec<f32>>,
+
+    /// Connection probability (0.0 to 1.0)
+    sparsity: f32,
+
+    /// Random seed for reproducibility
+    seed: u64,
+
+    /// Input dimension
+    input_dim: usize,
+
+    /// Output dimension
+    output_dim: usize,
+}
+
+impl SparseProjection {
+    /// Create a new sparse random projection
+    ///
+    /// # Arguments
+    ///
+    /// * `input_dim` - Input vector dimension
+    /// * `output_dim` - Output vector dimension (should be >> input_dim)
+    /// * `sparsity` - Connection probability (typically 0.1-0.2)
+    /// * `seed` - Random seed for reproducibility
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use ruvector_nervous_system::SparseProjection;
+    ///
+    /// let projection = SparseProjection::new(128, 10000, 0.15, 42);
+    /// ```
+    pub fn new(input_dim: usize, output_dim: usize, sparsity: f32, seed: u64) -> Result<Self> {
+        if input_dim == 0 {
+            return Err(NervousSystemError::InvalidDimension(
+                "Input dimension must be > 0".to_string(),
+            ));
+        }
+
+        if output_dim == 0 {
+            return Err(NervousSystemError::InvalidDimension(
+                "Output dimension must be > 0".to_string(),
+            ));
+        }
+
+        if sparsity <= 0.0 || sparsity > 1.0 {
+            return Err(NervousSystemError::InvalidSparsity(format!(
+                "Sparsity must be in (0, 1], got {}",
+                sparsity
+            )));
+        }
+
+        let mut rng = StdRng::seed_from_u64(seed);
+        let mut weights = Vec::with_capacity(input_dim);
+
+        // Initialize sparse random weights
+        for _ in 0..input_dim {
+            let mut row = Vec::with_capacity(output_dim);
+            for _ in 0..output_dim {
+                if rng.gen::<f32>() < sparsity {
+                    // Gaussian random weight
+                    let weight: f32 = rng.gen_range(-1.0..1.0);
+                    row.push(weight);
+                } else {
+                    row.push(0.0);
+                }
+            }
+            weights.push(row);
+        }
+
+        Ok(Self {
+            weights,
+            sparsity,
+            seed,
+            input_dim,
+            output_dim,
+        })
+    }
+
+    /// Project input vector to high-dimensional space
+    ///
+    /// # Arguments
+    ///
+    /// * `input` - Input vector of size input_dim
+    ///
+    /// # Returns
+    ///
+    /// Output vector of size output_dim
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use ruvector_nervous_system::SparseProjection;
+    ///
+    /// let projection = SparseProjection::new(128, 10000, 0.15, 42).unwrap();
+    /// let input = vec![1.0; 128];
+    /// let output = projection.project(&input).unwrap();
+    /// assert_eq!(output.len(), 10000);
+    /// ```
+    pub fn project(&self, input: &[f32]) -> Result<Vec<f32>> {
+        if input.len() != self.input_dim {
+            return Err(NervousSystemError::DimensionMismatch {
+                expected: self.input_dim,
+                actual: input.len(),
+            });
+        }
+
+        let mut output = vec![0.0; self.output_dim];
+
+        // Matrix-vector multiplication: output = weights^T × input
+        for i in 0..self.input_dim {
+            let input_val = input[i];
+            if input_val != 0.0 {
+                for j in 0..self.output_dim {
+                    let weight = self.weights[i][j];
+                    if weight != 0.0 {
+                        output[j] += input_val * weight;
+                    }
+                }
+            }
+        }
+
+        Ok(output)
+    }
+
+    /// Get input dimension
+    pub fn input_dim(&self) -> usize {
+        self.input_dim
+    }
+
+    /// Get output dimension
+    pub fn output_dim(&self) -> usize {
+        self.output_dim
+    }
+
+    /// Get sparsity level
+    pub fn sparsity(&self) -> f32 {
+        self.sparsity
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_sparse_projection_creation() {
+        let proj = SparseProjection::new(128, 1000, 0.15, 42).unwrap();
+        assert_eq!(proj.input_dim(), 128);
+        assert_eq!(proj.output_dim(), 1000);
+        assert_eq!(proj.sparsity(), 0.15);
+    }
+
+    #[test]
+    fn test_invalid_dimensions() {
+        assert!(SparseProjection::new(0, 1000, 0.15, 42).is_err());
+        assert!(SparseProjection::new(128, 0, 0.15, 42).is_err());
+    }
+
+    #[test]
+    fn test_invalid_sparsity() {
+        assert!(SparseProjection::new(128, 1000, 0.0, 42).is_err());
+        assert!(SparseProjection::new(128, 1000, 1.5, 42).is_err());
+    }
+
+    #[test]
+    fn test_projection_dimensions() {
+        let proj = SparseProjection::new(128, 1000, 0.15, 42).unwrap();
+        let input = vec![1.0; 128];
+        let output = proj.project(&input).unwrap();
+        assert_eq!(output.len(), 1000);
+    }
+
+    #[test]
+    fn test_projection_dimension_mismatch() {
+        let proj = SparseProjection::new(128, 1000, 0.15, 42).unwrap();
+        let input = vec![1.0; 64]; // Wrong size
+        assert!(proj.project(&input).is_err());
+    }
+
+    #[test]
+    fn test_projection_deterministic() {
+        let proj1 = SparseProjection::new(128, 1000, 0.15, 42).unwrap();
+        let proj2 = SparseProjection::new(128, 1000, 0.15, 42).unwrap();
+
+        let input = vec![1.0; 128];
+        let output1 = proj1.project(&input).unwrap();
+        let output2 = proj2.project(&input).unwrap();
+
+        // Same seed should produce same results
+        assert_eq!(output1, output2);
+    }
+
+    #[test]
+    fn test_projection_sparsity_effect() {
+        let proj_sparse = SparseProjection::new(128, 1000, 0.1, 42).unwrap();
+        let proj_dense = SparseProjection::new(128, 1000, 0.9, 42).unwrap();
+
+        let input = vec![1.0; 128];
+        let output_sparse = proj_sparse.project(&input).unwrap();
+        let output_dense = proj_dense.project(&input).unwrap();
+
+        // Dense projection should have larger average magnitude
+        // (more connections contributing to each output)
+        let avg_sparse: f32 = output_sparse.iter().map(|x| x.abs()).sum::<f32>() / 1000.0;
+        let avg_dense: f32 = output_dense.iter().map(|x| x.abs()).sum::<f32>() / 1000.0;
+
+        // 0.9 sparsity means 9x more connections, so roughly sqrt(9) = 3x larger magnitude
+        assert!(
+            avg_dense > avg_sparse,
+            "Dense avg={} should be > sparse avg={}",
+            avg_dense,
+            avg_sparse
+        );
+    }
+
+    #[test]
+    fn test_zero_input_produces_zero_output() {
+        let proj = SparseProjection::new(128, 1000, 0.15, 42).unwrap();
+        let input = vec![0.0; 128];
+        let output = proj.project(&input).unwrap();
+
+        assert!(output.iter().all(|&x| x == 0.0));
+    }
+}
--- a/vendor/ruvector/crates/ruvector-nervous-system/src/separate/sparsification.rs
+++ b/vendor/ruvector/crates/ruvector-nervous-system/src/separate/sparsification.rs
@@ -0,0 +1,403 @@
+//! Sparse bit vector for efficient k-winners-take-all representation
+//!
+//! Implements memory-efficient sparse bit vectors using index lists
+//! with fast set operations for similarity computation.
+
+use serde::{Deserialize, Serialize};
+use std::collections::HashSet;
+
+/// Sparse bit vector storing only active indices
+///
+/// Efficient representation for sparse binary vectors where only
+/// a small fraction of bits are set (active). Stores only the indices
+/// of active bits rather than the full bit array.
+///
+/// # Properties
+///
+/// - Memory: O(k) where k is number of active bits
+/// - Set operations: O(k1 + k2) for intersection/union
+/// - Typical k: 200-500 active bits out of 10000+ total
+///
+/// # Example
+///
+/// ```
+/// use ruvector_nervous_system::SparseBitVector;
+///
+/// let mut sparse = SparseBitVector::new(10000);
+/// sparse.set(42);
+/// sparse.set(100);
+/// sparse.set(500);
+/// ```
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct SparseBitVector {
+    /// Sorted list of active bit indices
+    pub indices: Vec<u16>,
+
+    /// Total capacity (maximum index + 1)
+    capacity: u16,
+}
+
+impl SparseBitVector {
+    /// Create a new sparse bit vector with given capacity
+    ///
+    /// # Arguments
+    ///
+    /// * `capacity` - Maximum number of bits (max index + 1)
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use ruvector_nervous_system::SparseBitVector;
+    ///
+    /// let sparse = SparseBitVector::new(10000);
+    /// ```
+    pub fn new(capacity: u16) -> Self {
+        Self {
+            indices: Vec::new(),
+            capacity,
+        }
+    }
+
+    /// Create from a list of active indices
+    ///
+    /// # Arguments
+    ///
+    /// * `indices` - Vector of active bit indices
+    /// * `capacity` - Total capacity
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use ruvector_nervous_system::SparseBitVector;
+    ///
+    /// let sparse = SparseBitVector::from_indices(vec![10, 20, 30], 10000);
+    /// ```
+    pub fn from_indices(mut indices: Vec<u16>, capacity: u16) -> Self {
+        indices.sort_unstable();
+        indices.dedup();
+        Self { indices, capacity }
+    }
+
+    /// Set a bit to active
+    ///
+    /// # Arguments
+    ///
+    /// * `index` - Bit index to set
+    ///
+    /// # Panics
+    ///
+    /// Panics if index >= capacity
+    pub fn set(&mut self, index: u16) {
+        assert!(index < self.capacity, "Index out of bounds");
+
+        // Binary search for insertion point
+        match self.indices.binary_search(&index) {
+            Ok(_) => {} // Already present
+            Err(pos) => self.indices.insert(pos, index),
+        }
+    }
+
+    /// Check if a bit is active
+    ///
+    /// # Arguments
+    ///
+    /// * `index` - Bit index to check
+    ///
+    /// # Returns
+    ///
+    /// true if bit is set, false otherwise
+    pub fn is_set(&self, index: u16) -> bool {
+        self.indices.binary_search(&index).is_ok()
+    }
+
+    /// Get number of active bits
+    pub fn count(&self) -> usize {
+        self.indices.len()
+    }
+
+    /// Get capacity
+    pub fn capacity(&self) -> u16 {
+        self.capacity
+    }
+
+    /// Compute intersection with another sparse bit vector
+    ///
+    /// # Arguments
+    ///
+    /// * `other` - Other sparse bit vector
+    ///
+    /// # Returns
+    ///
+    /// New sparse bit vector containing intersection
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use ruvector_nervous_system::SparseBitVector;
+    ///
+    /// let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
+    /// let b = SparseBitVector::from_indices(vec![2, 3, 4], 100);
+    /// let intersection = a.intersection(&b);
+    /// assert_eq!(intersection.count(), 2); // {2, 3}
+    /// ```
+    pub fn intersection(&self, other: &Self) -> Self {
+        let mut result = Vec::new();
+        let mut i = 0;
+        let mut j = 0;
+
+        // Merge algorithm for sorted lists
+        while i < self.indices.len() && j < other.indices.len() {
+            match self.indices[i].cmp(&other.indices[j]) {
+                std::cmp::Ordering::Equal => {
+                    result.push(self.indices[i]);
+                    i += 1;
+                    j += 1;
+                }
+                std::cmp::Ordering::Less => i += 1,
+                std::cmp::Ordering::Greater => j += 1,
+            }
+        }
+
+        Self {
+            indices: result,
+            capacity: self.capacity,
+        }
+    }
+
+    /// Compute union with another sparse bit vector
+    ///
+    /// # Arguments
+    ///
+    /// * `other` - Other sparse bit vector
+    ///
+    /// # Returns
+    ///
+    /// New sparse bit vector containing union
+    pub fn union(&self, other: &Self) -> Self {
+        let mut result = Vec::new();
+        let mut i = 0;
+        let mut j = 0;
+
+        while i < self.indices.len() && j < other.indices.len() {
+            match self.indices[i].cmp(&other.indices[j]) {
+                std::cmp::Ordering::Equal => {
+                    result.push(self.indices[i]);
+                    i += 1;
+                    j += 1;
+                }
+                std::cmp::Ordering::Less => {
+                    result.push(self.indices[i]);
+                    i += 1;
+                }
+                std::cmp::Ordering::Greater => {
+                    result.push(other.indices[j]);
+                    j += 1;
+                }
+            }
+        }
+
+        // Add remaining elements
+        while i < self.indices.len() {
+            result.push(self.indices[i]);
+            i += 1;
+        }
+        while j < other.indices.len() {
+            result.push(other.indices[j]);
+            j += 1;
+        }
+
+        Self {
+            indices: result,
+            capacity: self.capacity,
+        }
+    }
+
+    /// Compute Jaccard similarity with another sparse bit vector
+    ///
+    /// Jaccard similarity = |A ∩ B| / |A ∪ B|
+    ///
+    /// # Arguments
+    ///
+    /// * `other` - Other sparse bit vector
+    ///
+    /// # Returns
+    ///
+    /// Similarity in range [0.0, 1.0]
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use ruvector_nervous_system::SparseBitVector;
+    ///
+    /// let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
+    /// let b = SparseBitVector::from_indices(vec![2, 3, 4], 100);
+    /// let sim = a.jaccard_similarity(&b);
+    /// assert!((sim - 0.5).abs() < 0.001); // 2/4 = 0.5
+    /// ```
+    pub fn jaccard_similarity(&self, other: &Self) -> f32 {
+        if self.indices.is_empty() && other.indices.is_empty() {
+            return 1.0;
+        }
+
+        let intersection_size = self.intersection_size(other);
+        let union_size = self.indices.len() + other.indices.len() - intersection_size;
+
+        if union_size == 0 {
+            return 0.0;
+        }
+
+        intersection_size as f32 / union_size as f32
+    }
+
+    /// Compute Hamming distance with another sparse bit vector
+    ///
+    /// Hamming distance = number of positions where bits differ
+    ///
+    /// # Arguments
+    ///
+    /// * `other` - Other sparse bit vector
+    ///
+    /// # Returns
+    ///
+    /// Hamming distance (number of differing bits)
+    pub fn hamming_distance(&self, other: &Self) -> u32 {
+        let intersection_size = self.intersection_size(other);
+        let total_active = self.indices.len() + other.indices.len();
+        (total_active - 2 * intersection_size) as u32
+    }
+
+    /// Helper: compute intersection size efficiently
+    fn intersection_size(&self, other: &Self) -> usize {
+        let mut count = 0;
+        let mut i = 0;
+        let mut j = 0;
+
+        while i < self.indices.len() && j < other.indices.len() {
+            match self.indices[i].cmp(&other.indices[j]) {
+                std::cmp::Ordering::Equal => {
+                    count += 1;
+                    i += 1;
+                    j += 1;
+                }
+                std::cmp::Ordering::Less => i += 1,
+                std::cmp::Ordering::Greater => j += 1,
+            }
+        }
+
+        count
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_sparse_bitvector_creation() {
+        let sparse = SparseBitVector::new(10000);
+        assert_eq!(sparse.count(), 0);
+        assert_eq!(sparse.capacity(), 10000);
+    }
+
+    #[test]
+    fn test_set_and_check() {
+        let mut sparse = SparseBitVector::new(100);
+        sparse.set(10);
+        sparse.set(20);
+        sparse.set(30);
+
+        assert!(sparse.is_set(10));
+        assert!(sparse.is_set(20));
+        assert!(sparse.is_set(30));
+        assert!(!sparse.is_set(15));
+        assert_eq!(sparse.count(), 3);
+    }
+
+    #[test]
+    fn test_from_indices() {
+        let sparse = SparseBitVector::from_indices(vec![30, 10, 20, 10], 100);
+        assert_eq!(sparse.count(), 3); // Deduped
+        assert!(sparse.is_set(10));
+        assert!(sparse.is_set(20));
+        assert!(sparse.is_set(30));
+    }
+
+    #[test]
+    fn test_intersection() {
+        let a = SparseBitVector::from_indices(vec![1, 2, 3, 4], 100);
+        let b = SparseBitVector::from_indices(vec![3, 4, 5, 6], 100);
+        let intersection = a.intersection(&b);
+
+        assert_eq!(intersection.count(), 2);
+        assert!(intersection.is_set(3));
+        assert!(intersection.is_set(4));
+    }
+
+    #[test]
+    fn test_union() {
+        let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
+        let b = SparseBitVector::from_indices(vec![3, 4, 5], 100);
+        let union = a.union(&b);
+
+        assert_eq!(union.count(), 5);
+        for i in 1..=5 {
+            assert!(union.is_set(i));
+        }
+    }
+
+    #[test]
+    fn test_jaccard_similarity() {
+        let a = SparseBitVector::from_indices(vec![1, 2, 3, 4], 100);
+        let b = SparseBitVector::from_indices(vec![3, 4, 5, 6], 100);
+
+        // Intersection: {3, 4} = 2
+        // Union: {1, 2, 3, 4, 5, 6} = 6
+        // Jaccard = 2/6 = 0.333...
+        let sim = a.jaccard_similarity(&b);
+        assert!((sim - 0.333333).abs() < 0.001);
+    }
+
+    #[test]
+    fn test_jaccard_identical() {
+        let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
+        let b = SparseBitVector::from_indices(vec![1, 2, 3], 100);
+
+        let sim = a.jaccard_similarity(&b);
+        assert_eq!(sim, 1.0);
+    }
+
+    #[test]
+    fn test_jaccard_disjoint() {
+        let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
+        let b = SparseBitVector::from_indices(vec![4, 5, 6], 100);
+
+        let sim = a.jaccard_similarity(&b);
+        assert_eq!(sim, 0.0);
+    }
+
+    #[test]
+    fn test_hamming_distance() {
+        let a = SparseBitVector::from_indices(vec![1, 2, 3, 4], 100);
+        let b = SparseBitVector::from_indices(vec![3, 4, 5, 6], 100);
+
+        // Symmetric difference: {1, 2, 5, 6} = 4
+        let dist = a.hamming_distance(&b);
+        assert_eq!(dist, 4);
+    }
+
+    #[test]
+    fn test_hamming_identical() {
+        let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
+        let b = SparseBitVector::from_indices(vec![1, 2, 3], 100);
+
+        let dist = a.hamming_distance(&b);
+        assert_eq!(dist, 0);
+    }
+
+    #[test]
+    #[should_panic(expected = "Index out of bounds")]
+    fn test_set_out_of_bounds() {
+        let mut sparse = SparseBitVector::new(100);
+        sparse.set(100); // Should panic
+    }
+}