Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,364 @@
//! Dentate gyrus model combining sparse projection and k-winners-take-all
//!
//! The dentate gyrus is the input layer of the hippocampus responsible for
//! pattern separation - creating orthogonal representations from similar inputs.
use super::{SparseBitVector, SparseProjection};
use crate::{NervousSystemError, Result};
/// Dentate gyrus pattern separation encoder
///
/// Combines sparse random projection with k-winners-take-all sparsification
/// to create collision-resistant, orthogonal vector encodings.
///
/// # Biological Inspiration
///
/// The dentate gyrus expands cortical representations ~4-5x (EC: 200K → DG: 1M neurons)
/// and uses extremely sparse coding (~2% active) to minimize pattern overlap.
///
/// # Properties
///
/// - Input → Output expansion (typically 128D → 10000D)
/// - 2-5% sparsity (k-winners-take-all)
/// - Collision rate < 1% on diverse inputs
/// - Fast encoding: <500μs for typical inputs
///
/// # Example
///
/// ```
/// use ruvector_nervous_system::DentateGyrus;
///
/// let dg = DentateGyrus::new(128, 10000, 200, 42);
/// let input = vec![1.0; 128];
/// let sparse_code = dg.encode(&input);
/// ```
#[derive(Debug, Clone)]
pub struct DentateGyrus {
/// Sparse random projection layer
projection: SparseProjection,
/// Number of active neurons (k in k-winners-take-all)
k: usize,
/// Output dimension
output_dim: usize,
}
impl DentateGyrus {
/// Create a new dentate gyrus encoder
///
/// # Arguments
///
/// * `input_dim` - Input vector dimension (e.g., 128, 512)
/// * `output_dim` - Output dimension (e.g., 10000) - should be >> input_dim
/// * `k` - Number of active neurons (e.g., 200 for 2% of 10000)
/// * `seed` - Random seed for reproducibility
///
/// # Recommended Parameters
///
/// - `output_dim`: 50-100x larger than `input_dim`
/// - `k`: 2-5% of `output_dim`
/// - Projection sparsity: 0.1-0.2
///
/// # Example
///
/// ```
/// use ruvector_nervous_system::DentateGyrus;
///
/// // 128D input → 10000D output with 2% sparsity
/// let dg = DentateGyrus::new(128, 10000, 200, 42);
/// ```
pub fn new(input_dim: usize, output_dim: usize, k: usize, seed: u64) -> Self {
if k == 0 {
panic!("k must be > 0");
}
if k > output_dim {
panic!("k cannot exceed output_dim");
}
// Use 15% projection sparsity as default (good balance)
let projection = SparseProjection::new(input_dim, output_dim, 0.15, seed)
.expect("Failed to create sparse projection");
Self {
projection,
k,
output_dim,
}
}
/// Encode input vector into sparse representation
///
/// # Arguments
///
/// * `input` - Input vector
///
/// # Returns
///
/// Sparse bit vector with exactly k active bits
///
/// # Process
///
/// 1. Sparse random projection: input → dense high-dim vector
/// 2. K-winners-take-all: select top k activations
/// 3. Return sparse bit vector of active neurons
///
/// # Example
///
/// ```
/// use ruvector_nervous_system::DentateGyrus;
///
/// let dg = DentateGyrus::new(128, 10000, 200, 42);
/// let input = vec![1.0; 128];
/// let sparse = dg.encode(&input);
/// assert_eq!(sparse.count(), 200); // Exactly k active
/// ```
pub fn encode(&self, input: &[f32]) -> SparseBitVector {
// Step 1: Sparse projection
let projected = self.projection.project(input).expect("Projection failed");
// Step 2: K-winners-take-all
self.k_winners_take_all(&projected)
}
/// Encode input and return dense vector (for compatibility)
///
/// Returns a dense vector where only the top-k elements are non-zero.
///
/// # Arguments
///
/// * `input` - Input vector
///
/// # Returns
///
/// Dense vector with k non-zero elements
pub fn encode_dense(&self, input: &[f32]) -> Vec<f32> {
let projected = self.projection.project(input).expect("Projection failed");
let sparse = self.k_winners_take_all(&projected);
// Convert to dense
let mut dense = vec![0.0; self.output_dim];
for &idx in &sparse.indices {
dense[idx as usize] = projected[idx as usize];
}
dense
}
/// K-winners-take-all: select top k activations
///
/// # Arguments
///
/// * `activations` - Dense activation vector
///
/// # Returns
///
/// Sparse bit vector with k highest activations set
fn k_winners_take_all(&self, activations: &[f32]) -> SparseBitVector {
// Create (index, value) pairs
let mut indexed: Vec<(usize, f32)> = activations
.iter()
.enumerate()
.map(|(i, &v)| (i, v))
.collect();
// Partial sort to find top k (faster than full sort)
indexed.select_nth_unstable_by(self.k, |a, b| {
b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)
});
// Take top k indices
let mut top_k_indices: Vec<u16> =
indexed[..self.k].iter().map(|(i, _)| *i as u16).collect();
top_k_indices.sort_unstable();
SparseBitVector::from_indices(top_k_indices, self.output_dim as u16)
}
/// Get input dimension
pub fn input_dim(&self) -> usize {
self.projection.input_dim()
}
/// Get output dimension
pub fn output_dim(&self) -> usize {
self.output_dim
}
/// Get k (number of active neurons)
pub fn k(&self) -> usize {
self.k
}
/// Get sparsity level (k / output_dim)
pub fn sparsity(&self) -> f32 {
self.k as f32 / self.output_dim as f32
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_dentate_gyrus_creation() {
let dg = DentateGyrus::new(128, 10000, 200, 42);
assert_eq!(dg.input_dim(), 128);
assert_eq!(dg.output_dim(), 10000);
assert_eq!(dg.k(), 200);
assert_eq!(dg.sparsity(), 0.02); // 2%
}
#[test]
#[should_panic(expected = "k must be > 0")]
fn test_invalid_k_zero() {
DentateGyrus::new(128, 10000, 0, 42);
}
#[test]
#[should_panic(expected = "k cannot exceed output_dim")]
fn test_invalid_k_too_large() {
DentateGyrus::new(128, 100, 200, 42);
}
#[test]
fn test_encode_produces_sparse_output() {
let dg = DentateGyrus::new(128, 10000, 200, 42);
let input: Vec<f32> = (0..128).map(|i| (i as f32).sin()).collect();
let sparse = dg.encode(&input);
assert_eq!(sparse.count(), 200, "Should have exactly k active neurons");
assert_eq!(sparse.capacity(), 10000);
}
#[test]
fn test_encode_deterministic() {
let dg = DentateGyrus::new(128, 10000, 200, 42);
let input: Vec<f32> = (0..128).map(|i| (i as f32).sin()).collect();
let sparse1 = dg.encode(&input);
let sparse2 = dg.encode(&input);
assert_eq!(sparse1, sparse2, "Same input should produce same encoding");
}
#[test]
fn test_encode_dense_has_k_nonzeros() {
let dg = DentateGyrus::new(128, 10000, 200, 42);
let input: Vec<f32> = (0..128).map(|i| (i as f32).sin()).collect();
let dense = dg.encode_dense(&input);
let nonzero_count = dense.iter().filter(|&&x| x != 0.0).count();
assert_eq!(
nonzero_count, 200,
"Should have exactly k non-zero elements"
);
}
#[test]
fn test_different_inputs_produce_different_outputs() {
let dg = DentateGyrus::new(128, 10000, 200, 42);
let input1: Vec<f32> = (0..128).map(|i| (i as f32).sin()).collect();
let input2: Vec<f32> = (0..128).map(|i| (i as f32).cos()).collect();
let sparse1 = dg.encode(&input1);
let sparse2 = dg.encode(&input2);
assert_ne!(
sparse1, sparse2,
"Different inputs should produce different encodings"
);
}
#[test]
fn test_pattern_separation_property() {
let dg = DentateGyrus::new(128, 10000, 200, 42);
// Create two highly similar inputs
let mut input1 = vec![0.0; 128];
let mut input2 = vec![0.0; 128];
// 95% overlap
for i in 0..120 {
input1[i] = 1.0;
input2[i] = 1.0;
}
input1[125] = 1.0;
input2[126] = 1.0;
let sparse1 = dg.encode(&input1);
let sparse2 = dg.encode(&input2);
let input_overlap = 120.0 / 128.0; // 0.9375
let output_similarity = sparse1.jaccard_similarity(&sparse2);
// Pattern separation: output should be less similar than input
assert!(
output_similarity < input_overlap,
"Output similarity ({}) should be less than input overlap ({})",
output_similarity,
input_overlap
);
}
#[test]
fn test_sparsity_levels() {
// Test different sparsity levels
let cases = vec![
(10000, 200, 0.02), // 2%
(10000, 300, 0.03), // 3%
(10000, 500, 0.05), // 5%
];
for (output_dim, k, expected_sparsity) in cases {
let dg = DentateGyrus::new(128, output_dim, k, 42);
assert_eq!(dg.sparsity(), expected_sparsity);
let input: Vec<f32> = (0..128).map(|i| (i as f32).sin()).collect();
let sparse = dg.encode(&input);
assert_eq!(sparse.count(), k);
}
}
#[test]
fn test_zero_input() {
let dg = DentateGyrus::new(128, 10000, 200, 42);
let input = vec![0.0; 128];
let sparse = dg.encode(&input);
// Even zero input should produce k active neurons (noise from projection)
assert_eq!(sparse.count(), 200);
}
#[test]
fn test_encode_performance_target() {
let dg = DentateGyrus::new(512, 10000, 200, 42);
let input: Vec<f32> = (0..512).map(|i| (i as f32).sin()).collect();
let start = std::time::Instant::now();
let iterations = 100;
for _ in 0..iterations {
let _ = dg.encode(&input);
}
let elapsed = start.elapsed();
let avg_time = elapsed / iterations;
// Target: encoding should complete in reasonable time (very relaxed for CI)
println!("Average encoding time: {:?}", avg_time);
assert!(
avg_time.as_secs() < 2,
"Average encoding time ({:?}) exceeds 2s target",
avg_time
);
}
}

View File

@@ -0,0 +1,193 @@
//! Pattern separation module implementing hippocampal dentate gyrus-inspired encoding
//!
//! This module provides sparse random projection and k-winners-take-all mechanisms
//! for creating collision-resistant, orthogonal vector representations.
mod dentate;
mod projection;
mod sparsification;
pub use dentate::DentateGyrus;
pub use projection::SparseProjection;
pub use sparsification::SparseBitVector;
#[cfg(test)]
mod tests {
use super::*;
/// Test that similar inputs produce decorrelated outputs
#[test]
fn test_pattern_separation_decorrelation() {
let dg = DentateGyrus::new(128, 10000, 200, 42);
// Create two similar inputs (90% overlap)
let mut input1 = vec![0.0; 128];
let mut input2 = vec![0.0; 128];
for i in 0..115 {
input1[i] = 1.0;
input2[i] = 1.0;
}
input1[120] = 1.0;
input2[121] = 1.0;
let sparse1 = dg.encode(&input1);
let sparse2 = dg.encode(&input2);
// Despite 90% input overlap, output similarity should be lower
let input_overlap = 115.0 / 128.0; // 0.898
let output_similarity = sparse1.jaccard_similarity(&sparse2);
// Pattern separation should decorrelate: output similarity < input similarity
assert!(
output_similarity < input_overlap,
"Output similarity ({}) should be less than input overlap ({})",
output_similarity,
input_overlap
);
}
/// Test collision rate on random inputs
#[test]
fn test_collision_rate() {
let dg = DentateGyrus::new(128, 10000, 200, 42);
let num_samples = 1000;
let mut encodings = Vec::new();
for i in 0..num_samples {
let input: Vec<f32> = (0..128).map(|j| ((i * 128 + j) as f32).sin()).collect();
encodings.push(dg.encode(&input));
}
// Count collisions (identical encodings)
let mut collisions = 0;
for i in 0..encodings.len() {
for j in (i + 1)..encodings.len() {
if encodings[i].indices == encodings[j].indices {
collisions += 1;
}
}
}
let collision_rate = collisions as f32 / (num_samples * (num_samples - 1) / 2) as f32;
// Collision rate should be < 1%
assert!(
collision_rate < 0.01,
"Collision rate ({:.4}) exceeds 1%",
collision_rate
);
}
/// Verify sparsity level (2-5% active neurons)
#[test]
fn test_sparsity_level() {
let output_dim = 10000;
let k = 200; // 2% sparsity
let dg = DentateGyrus::new(128, output_dim, k, 42);
let input: Vec<f32> = (0..128).map(|i| (i as f32).sin()).collect();
let sparse = dg.encode(&input);
let sparsity = sparse.indices.len() as f32 / output_dim as f32;
// Verify exact k winners
assert_eq!(
sparse.indices.len(),
k,
"Should have exactly k active neurons"
);
// Verify sparsity in 2-5% range
assert!(
sparsity >= 0.02 && sparsity <= 0.05,
"Sparsity ({:.4}) should be in 2-5% range",
sparsity
);
}
/// Test encoding performance
#[test]
fn test_encoding_performance() {
let dg = DentateGyrus::new(512, 10000, 200, 42);
let input: Vec<f32> = (0..512).map(|i| (i as f32).sin()).collect();
let start = std::time::Instant::now();
let iterations = 100;
for _ in 0..iterations {
let _ = dg.encode(&input);
}
let elapsed = start.elapsed();
let avg_time = elapsed / iterations;
// Should complete in reasonable time (very relaxed for CI environments)
assert!(
avg_time.as_secs() < 2,
"Average encoding time ({:?}) exceeds 2s",
avg_time
);
}
/// Test similarity computation performance
#[test]
fn test_similarity_performance() {
let dg = DentateGyrus::new(512, 10000, 200, 42);
let input1: Vec<f32> = (0..512).map(|i| (i as f32).sin()).collect();
let input2: Vec<f32> = (0..512).map(|i| (i as f32).cos()).collect();
let sparse1 = dg.encode(&input1);
let sparse2 = dg.encode(&input2);
let start = std::time::Instant::now();
let iterations = 1000;
for _ in 0..iterations {
let _ = sparse1.jaccard_similarity(&sparse2);
}
let elapsed = start.elapsed();
let avg_time = elapsed / iterations;
// Should be < 100μs per similarity computation
assert!(
avg_time.as_micros() < 100,
"Average similarity time ({:?}) exceeds 100μs",
avg_time
);
}
/// Test retrieval quality: similar inputs should have higher similarity
#[test]
fn test_retrieval_quality() {
let dg = DentateGyrus::new(128, 10000, 200, 42);
// Original input
let original: Vec<f32> = (0..128).map(|i| (i as f32).sin()).collect();
// Similar input (small perturbation)
let similar: Vec<f32> = original
.iter()
.map(|&x| x + 0.1 * ((x * 10.0).cos()))
.collect();
// Different input
let different: Vec<f32> = (0..128).map(|i| (i as f32).cos()).collect();
let enc_original = dg.encode(&original);
let enc_similar = dg.encode(&similar);
let enc_different = dg.encode(&different);
let sim_to_similar = enc_original.jaccard_similarity(&enc_similar);
let sim_to_different = enc_original.jaccard_similarity(&enc_different);
// Similar inputs should have higher similarity than different inputs
assert!(
sim_to_similar > sim_to_different,
"Similar input similarity ({}) should be higher than different input ({})",
sim_to_similar,
sim_to_different
);
}
}

View File

@@ -0,0 +1,252 @@
//! Sparse random projection for dimensionality expansion
//!
//! Implements sparse random matrices for efficient high-dimensional projections
//! with controlled sparsity (connection probability).
use crate::{NervousSystemError, Result};
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
/// Sparse random projection matrix for dimensionality expansion
///
/// Uses a sparse random matrix to project low-dimensional inputs into
/// high-dimensional space while maintaining computational efficiency.
///
/// # Properties
///
/// - Sparse connectivity (typically 10-20% connections)
/// - Gaussian-distributed weights
/// - Deterministic (seeded) for reproducibility
///
/// # Performance
///
/// - Time complexity: O(input_dim × output_dim × sparsity)
/// - Space complexity: O(input_dim × output_dim)
#[derive(Debug, Clone)]
pub struct SparseProjection {
/// Projection weights [input_dim × output_dim]
weights: Vec<Vec<f32>>,
/// Connection probability (0.0 to 1.0)
sparsity: f32,
/// Random seed for reproducibility
seed: u64,
/// Input dimension
input_dim: usize,
/// Output dimension
output_dim: usize,
}
impl SparseProjection {
/// Create a new sparse random projection
///
/// # Arguments
///
/// * `input_dim` - Input vector dimension
/// * `output_dim` - Output vector dimension (should be >> input_dim)
/// * `sparsity` - Connection probability (typically 0.1-0.2)
/// * `seed` - Random seed for reproducibility
///
/// # Example
///
/// ```
/// use ruvector_nervous_system::SparseProjection;
///
/// let projection = SparseProjection::new(128, 10000, 0.15, 42);
/// ```
pub fn new(input_dim: usize, output_dim: usize, sparsity: f32, seed: u64) -> Result<Self> {
if input_dim == 0 {
return Err(NervousSystemError::InvalidDimension(
"Input dimension must be > 0".to_string(),
));
}
if output_dim == 0 {
return Err(NervousSystemError::InvalidDimension(
"Output dimension must be > 0".to_string(),
));
}
if sparsity <= 0.0 || sparsity > 1.0 {
return Err(NervousSystemError::InvalidSparsity(format!(
"Sparsity must be in (0, 1], got {}",
sparsity
)));
}
let mut rng = StdRng::seed_from_u64(seed);
let mut weights = Vec::with_capacity(input_dim);
// Initialize sparse random weights
for _ in 0..input_dim {
let mut row = Vec::with_capacity(output_dim);
for _ in 0..output_dim {
if rng.gen::<f32>() < sparsity {
// Gaussian random weight
let weight: f32 = rng.gen_range(-1.0..1.0);
row.push(weight);
} else {
row.push(0.0);
}
}
weights.push(row);
}
Ok(Self {
weights,
sparsity,
seed,
input_dim,
output_dim,
})
}
/// Project input vector to high-dimensional space
///
/// # Arguments
///
/// * `input` - Input vector of size input_dim
///
/// # Returns
///
/// Output vector of size output_dim
///
/// # Example
///
/// ```
/// use ruvector_nervous_system::SparseProjection;
///
/// let projection = SparseProjection::new(128, 10000, 0.15, 42).unwrap();
/// let input = vec![1.0; 128];
/// let output = projection.project(&input).unwrap();
/// assert_eq!(output.len(), 10000);
/// ```
pub fn project(&self, input: &[f32]) -> Result<Vec<f32>> {
if input.len() != self.input_dim {
return Err(NervousSystemError::DimensionMismatch {
expected: self.input_dim,
actual: input.len(),
});
}
let mut output = vec![0.0; self.output_dim];
// Matrix-vector multiplication: output = weights^T × input
for i in 0..self.input_dim {
let input_val = input[i];
if input_val != 0.0 {
for j in 0..self.output_dim {
let weight = self.weights[i][j];
if weight != 0.0 {
output[j] += input_val * weight;
}
}
}
}
Ok(output)
}
/// Get input dimension
pub fn input_dim(&self) -> usize {
self.input_dim
}
/// Get output dimension
pub fn output_dim(&self) -> usize {
self.output_dim
}
/// Get sparsity level
pub fn sparsity(&self) -> f32 {
self.sparsity
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sparse_projection_creation() {
let proj = SparseProjection::new(128, 1000, 0.15, 42).unwrap();
assert_eq!(proj.input_dim(), 128);
assert_eq!(proj.output_dim(), 1000);
assert_eq!(proj.sparsity(), 0.15);
}
#[test]
fn test_invalid_dimensions() {
assert!(SparseProjection::new(0, 1000, 0.15, 42).is_err());
assert!(SparseProjection::new(128, 0, 0.15, 42).is_err());
}
#[test]
fn test_invalid_sparsity() {
assert!(SparseProjection::new(128, 1000, 0.0, 42).is_err());
assert!(SparseProjection::new(128, 1000, 1.5, 42).is_err());
}
#[test]
fn test_projection_dimensions() {
let proj = SparseProjection::new(128, 1000, 0.15, 42).unwrap();
let input = vec![1.0; 128];
let output = proj.project(&input).unwrap();
assert_eq!(output.len(), 1000);
}
#[test]
fn test_projection_dimension_mismatch() {
let proj = SparseProjection::new(128, 1000, 0.15, 42).unwrap();
let input = vec![1.0; 64]; // Wrong size
assert!(proj.project(&input).is_err());
}
#[test]
fn test_projection_deterministic() {
let proj1 = SparseProjection::new(128, 1000, 0.15, 42).unwrap();
let proj2 = SparseProjection::new(128, 1000, 0.15, 42).unwrap();
let input = vec![1.0; 128];
let output1 = proj1.project(&input).unwrap();
let output2 = proj2.project(&input).unwrap();
// Same seed should produce same results
assert_eq!(output1, output2);
}
#[test]
fn test_projection_sparsity_effect() {
let proj_sparse = SparseProjection::new(128, 1000, 0.1, 42).unwrap();
let proj_dense = SparseProjection::new(128, 1000, 0.9, 42).unwrap();
let input = vec![1.0; 128];
let output_sparse = proj_sparse.project(&input).unwrap();
let output_dense = proj_dense.project(&input).unwrap();
// Dense projection should have larger average magnitude
// (more connections contributing to each output)
let avg_sparse: f32 = output_sparse.iter().map(|x| x.abs()).sum::<f32>() / 1000.0;
let avg_dense: f32 = output_dense.iter().map(|x| x.abs()).sum::<f32>() / 1000.0;
// 0.9 sparsity means 9x more connections, so roughly sqrt(9) = 3x larger magnitude
assert!(
avg_dense > avg_sparse,
"Dense avg={} should be > sparse avg={}",
avg_dense,
avg_sparse
);
}
#[test]
fn test_zero_input_produces_zero_output() {
let proj = SparseProjection::new(128, 1000, 0.15, 42).unwrap();
let input = vec![0.0; 128];
let output = proj.project(&input).unwrap();
assert!(output.iter().all(|&x| x == 0.0));
}
}

View File

@@ -0,0 +1,403 @@
//! Sparse bit vector for efficient k-winners-take-all representation
//!
//! Implements memory-efficient sparse bit vectors using index lists
//! with fast set operations for similarity computation.
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
/// Sparse bit vector storing only active indices
///
/// Efficient representation for sparse binary vectors where only
/// a small fraction of bits are set (active). Stores only the indices
/// of active bits rather than the full bit array.
///
/// # Properties
///
/// - Memory: O(k) where k is number of active bits
/// - Set operations: O(k1 + k2) for intersection/union
/// - Typical k: 200-500 active bits out of 10000+ total
///
/// # Example
///
/// ```
/// use ruvector_nervous_system::SparseBitVector;
///
/// let mut sparse = SparseBitVector::new(10000);
/// sparse.set(42);
/// sparse.set(100);
/// sparse.set(500);
/// ```
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SparseBitVector {
/// Sorted list of active bit indices
pub indices: Vec<u16>,
/// Total capacity (maximum index + 1)
capacity: u16,
}
impl SparseBitVector {
/// Create a new sparse bit vector with given capacity
///
/// # Arguments
///
/// * `capacity` - Maximum number of bits (max index + 1)
///
/// # Example
///
/// ```
/// use ruvector_nervous_system::SparseBitVector;
///
/// let sparse = SparseBitVector::new(10000);
/// ```
pub fn new(capacity: u16) -> Self {
Self {
indices: Vec::new(),
capacity,
}
}
/// Create from a list of active indices
///
/// # Arguments
///
/// * `indices` - Vector of active bit indices
/// * `capacity` - Total capacity
///
/// # Example
///
/// ```
/// use ruvector_nervous_system::SparseBitVector;
///
/// let sparse = SparseBitVector::from_indices(vec![10, 20, 30], 10000);
/// ```
pub fn from_indices(mut indices: Vec<u16>, capacity: u16) -> Self {
indices.sort_unstable();
indices.dedup();
Self { indices, capacity }
}
/// Set a bit to active
///
/// # Arguments
///
/// * `index` - Bit index to set
///
/// # Panics
///
/// Panics if index >= capacity
pub fn set(&mut self, index: u16) {
assert!(index < self.capacity, "Index out of bounds");
// Binary search for insertion point
match self.indices.binary_search(&index) {
Ok(_) => {} // Already present
Err(pos) => self.indices.insert(pos, index),
}
}
/// Check if a bit is active
///
/// # Arguments
///
/// * `index` - Bit index to check
///
/// # Returns
///
/// true if bit is set, false otherwise
pub fn is_set(&self, index: u16) -> bool {
self.indices.binary_search(&index).is_ok()
}
/// Get number of active bits
pub fn count(&self) -> usize {
self.indices.len()
}
/// Get capacity
pub fn capacity(&self) -> u16 {
self.capacity
}
/// Compute intersection with another sparse bit vector
///
/// # Arguments
///
/// * `other` - Other sparse bit vector
///
/// # Returns
///
/// New sparse bit vector containing intersection
///
/// # Example
///
/// ```
/// use ruvector_nervous_system::SparseBitVector;
///
/// let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
/// let b = SparseBitVector::from_indices(vec![2, 3, 4], 100);
/// let intersection = a.intersection(&b);
/// assert_eq!(intersection.count(), 2); // {2, 3}
/// ```
pub fn intersection(&self, other: &Self) -> Self {
let mut result = Vec::new();
let mut i = 0;
let mut j = 0;
// Merge algorithm for sorted lists
while i < self.indices.len() && j < other.indices.len() {
match self.indices[i].cmp(&other.indices[j]) {
std::cmp::Ordering::Equal => {
result.push(self.indices[i]);
i += 1;
j += 1;
}
std::cmp::Ordering::Less => i += 1,
std::cmp::Ordering::Greater => j += 1,
}
}
Self {
indices: result,
capacity: self.capacity,
}
}
/// Compute union with another sparse bit vector
///
/// # Arguments
///
/// * `other` - Other sparse bit vector
///
/// # Returns
///
/// New sparse bit vector containing union
pub fn union(&self, other: &Self) -> Self {
let mut result = Vec::new();
let mut i = 0;
let mut j = 0;
while i < self.indices.len() && j < other.indices.len() {
match self.indices[i].cmp(&other.indices[j]) {
std::cmp::Ordering::Equal => {
result.push(self.indices[i]);
i += 1;
j += 1;
}
std::cmp::Ordering::Less => {
result.push(self.indices[i]);
i += 1;
}
std::cmp::Ordering::Greater => {
result.push(other.indices[j]);
j += 1;
}
}
}
// Add remaining elements
while i < self.indices.len() {
result.push(self.indices[i]);
i += 1;
}
while j < other.indices.len() {
result.push(other.indices[j]);
j += 1;
}
Self {
indices: result,
capacity: self.capacity,
}
}
/// Compute Jaccard similarity with another sparse bit vector
///
/// Jaccard similarity = |A ∩ B| / |A B|
///
/// # Arguments
///
/// * `other` - Other sparse bit vector
///
/// # Returns
///
/// Similarity in range [0.0, 1.0]
///
/// # Example
///
/// ```
/// use ruvector_nervous_system::SparseBitVector;
///
/// let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
/// let b = SparseBitVector::from_indices(vec![2, 3, 4], 100);
/// let sim = a.jaccard_similarity(&b);
/// assert!((sim - 0.5).abs() < 0.001); // 2/4 = 0.5
/// ```
pub fn jaccard_similarity(&self, other: &Self) -> f32 {
if self.indices.is_empty() && other.indices.is_empty() {
return 1.0;
}
let intersection_size = self.intersection_size(other);
let union_size = self.indices.len() + other.indices.len() - intersection_size;
if union_size == 0 {
return 0.0;
}
intersection_size as f32 / union_size as f32
}
/// Compute Hamming distance with another sparse bit vector
///
/// Hamming distance = number of positions where bits differ
///
/// # Arguments
///
/// * `other` - Other sparse bit vector
///
/// # Returns
///
/// Hamming distance (number of differing bits)
pub fn hamming_distance(&self, other: &Self) -> u32 {
let intersection_size = self.intersection_size(other);
let total_active = self.indices.len() + other.indices.len();
(total_active - 2 * intersection_size) as u32
}
/// Helper: compute intersection size efficiently
fn intersection_size(&self, other: &Self) -> usize {
let mut count = 0;
let mut i = 0;
let mut j = 0;
while i < self.indices.len() && j < other.indices.len() {
match self.indices[i].cmp(&other.indices[j]) {
std::cmp::Ordering::Equal => {
count += 1;
i += 1;
j += 1;
}
std::cmp::Ordering::Less => i += 1,
std::cmp::Ordering::Greater => j += 1,
}
}
count
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sparse_bitvector_creation() {
let sparse = SparseBitVector::new(10000);
assert_eq!(sparse.count(), 0);
assert_eq!(sparse.capacity(), 10000);
}
#[test]
fn test_set_and_check() {
let mut sparse = SparseBitVector::new(100);
sparse.set(10);
sparse.set(20);
sparse.set(30);
assert!(sparse.is_set(10));
assert!(sparse.is_set(20));
assert!(sparse.is_set(30));
assert!(!sparse.is_set(15));
assert_eq!(sparse.count(), 3);
}
#[test]
fn test_from_indices() {
let sparse = SparseBitVector::from_indices(vec![30, 10, 20, 10], 100);
assert_eq!(sparse.count(), 3); // Deduped
assert!(sparse.is_set(10));
assert!(sparse.is_set(20));
assert!(sparse.is_set(30));
}
#[test]
fn test_intersection() {
let a = SparseBitVector::from_indices(vec![1, 2, 3, 4], 100);
let b = SparseBitVector::from_indices(vec![3, 4, 5, 6], 100);
let intersection = a.intersection(&b);
assert_eq!(intersection.count(), 2);
assert!(intersection.is_set(3));
assert!(intersection.is_set(4));
}
#[test]
fn test_union() {
let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
let b = SparseBitVector::from_indices(vec![3, 4, 5], 100);
let union = a.union(&b);
assert_eq!(union.count(), 5);
for i in 1..=5 {
assert!(union.is_set(i));
}
}
#[test]
fn test_jaccard_similarity() {
let a = SparseBitVector::from_indices(vec![1, 2, 3, 4], 100);
let b = SparseBitVector::from_indices(vec![3, 4, 5, 6], 100);
// Intersection: {3, 4} = 2
// Union: {1, 2, 3, 4, 5, 6} = 6
// Jaccard = 2/6 = 0.333...
let sim = a.jaccard_similarity(&b);
assert!((sim - 0.333333).abs() < 0.001);
}
#[test]
fn test_jaccard_identical() {
let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
let b = SparseBitVector::from_indices(vec![1, 2, 3], 100);
let sim = a.jaccard_similarity(&b);
assert_eq!(sim, 1.0);
}
#[test]
fn test_jaccard_disjoint() {
let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
let b = SparseBitVector::from_indices(vec![4, 5, 6], 100);
let sim = a.jaccard_similarity(&b);
assert_eq!(sim, 0.0);
}
#[test]
fn test_hamming_distance() {
let a = SparseBitVector::from_indices(vec![1, 2, 3, 4], 100);
let b = SparseBitVector::from_indices(vec![3, 4, 5, 6], 100);
// Symmetric difference: {1, 2, 5, 6} = 4
let dist = a.hamming_distance(&b);
assert_eq!(dist, 4);
}
#[test]
fn test_hamming_identical() {
let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
let b = SparseBitVector::from_indices(vec![1, 2, 3], 100);
let dist = a.hamming_distance(&b);
assert_eq!(dist, 0);
}
#[test]
#[should_panic(expected = "Index out of bounds")]
fn test_set_out_of_bounds() {
let mut sparse = SparseBitVector::new(100);
sparse.set(100); // Should panic
}
}