Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
501
vendor/ruvector/crates/ruvector-nervous-system/src/hdc/memory.rs
vendored
Normal file
501
vendor/ruvector/crates/ruvector-nervous-system/src/hdc/memory.rs
vendored
Normal file
@@ -0,0 +1,501 @@
|
||||
//! Associative memory for hyperdimensional computing
|
||||
//!
|
||||
//! Provides high-capacity storage and retrieval of hypervector patterns
|
||||
//! with 10^40 representational capacity.
|
||||
|
||||
use super::vector::Hypervector;
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Associative memory for storing and retrieving hypervectors
|
||||
///
|
||||
/// # Capacity
|
||||
///
|
||||
/// - Theoretical: 10^40 distinct patterns
|
||||
/// - Practical: Limited by available memory (~1.2KB per entry)
|
||||
///
|
||||
/// # Performance
|
||||
///
|
||||
/// - Store: O(1)
|
||||
/// - Retrieve: O(N) where N is number of stored items
|
||||
/// - Can be optimized to O(log N) with spatial indexing
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, HdcMemory};
|
||||
///
|
||||
/// let mut memory = HdcMemory::new();
|
||||
///
|
||||
/// // Store concepts
|
||||
/// let concept_a = Hypervector::random();
|
||||
/// let concept_b = Hypervector::random();
|
||||
///
|
||||
/// memory.store("animal", concept_a.clone());
|
||||
/// memory.store("plant", concept_b);
|
||||
///
|
||||
/// // Retrieve similar concepts
|
||||
/// let results = memory.retrieve(&concept_a, 0.8);
|
||||
/// assert_eq!(results[0].0, "animal");
|
||||
/// assert!(results[0].1 > 0.99);
|
||||
/// ```
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct HdcMemory {
|
||||
items: HashMap<String, Hypervector>,
|
||||
}
|
||||
|
||||
impl HdcMemory {
|
||||
/// Creates a new empty associative memory
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
items: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a memory with pre-allocated capacity
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::HdcMemory;
|
||||
///
|
||||
/// let memory = HdcMemory::with_capacity(1000);
|
||||
/// ```
|
||||
pub fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
items: HashMap::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
/// Stores a hypervector with a key
|
||||
///
|
||||
/// If the key already exists, the value is overwritten.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, HdcMemory};
|
||||
///
|
||||
/// let mut memory = HdcMemory::new();
|
||||
/// let vector = Hypervector::random();
|
||||
///
|
||||
/// memory.store("my_key", vector);
|
||||
/// ```
|
||||
pub fn store(&mut self, key: impl Into<String>, value: Hypervector) {
|
||||
self.items.insert(key.into(), value);
|
||||
}
|
||||
|
||||
/// Retrieves vectors similar to the query above a threshold
|
||||
///
|
||||
/// Returns a vector of (key, similarity) pairs sorted by similarity (descending).
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `query` - The query hypervector
|
||||
/// * `threshold` - Minimum similarity (0.0 to 1.0) to include in results
|
||||
///
|
||||
/// # Performance
|
||||
///
|
||||
/// O(N) where N is the number of stored items. Each comparison is <100ns.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, HdcMemory};
|
||||
///
|
||||
/// let mut memory = HdcMemory::new();
|
||||
/// let v1 = Hypervector::random();
|
||||
///
|
||||
/// memory.store("item1", v1.clone());
|
||||
/// memory.store("item2", Hypervector::random());
|
||||
///
|
||||
/// let results = memory.retrieve(&v1, 0.9);
|
||||
/// assert!(!results.is_empty());
|
||||
/// assert_eq!(results[0].0, "item1");
|
||||
/// ```
|
||||
pub fn retrieve(&self, query: &Hypervector, threshold: f32) -> Vec<(String, f32)> {
|
||||
let mut results: Vec<_> = self
|
||||
.items
|
||||
.iter()
|
||||
.map(|(key, vector)| (key.clone(), query.similarity(vector)))
|
||||
.filter(|(_, sim)| *sim >= threshold)
|
||||
.collect();
|
||||
|
||||
// Sort by similarity descending (NaN-safe: treat NaN as less than any value)
|
||||
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Less));
|
||||
|
||||
results
|
||||
}
|
||||
|
||||
/// Retrieves the top-k most similar vectors
|
||||
///
|
||||
/// Returns at most k results, sorted by similarity (descending).
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, HdcMemory};
|
||||
///
|
||||
/// let mut memory = HdcMemory::new();
|
||||
///
|
||||
/// for i in 0..10 {
|
||||
/// memory.store(format!("item{}", i), Hypervector::random());
|
||||
/// }
|
||||
///
|
||||
/// let query = Hypervector::random();
|
||||
/// let top5 = memory.retrieve_top_k(&query, 5);
|
||||
///
|
||||
/// assert!(top5.len() <= 5);
|
||||
/// ```
|
||||
pub fn retrieve_top_k(&self, query: &Hypervector, k: usize) -> Vec<(String, f32)> {
|
||||
let mut results: Vec<_> = self
|
||||
.items
|
||||
.iter()
|
||||
.map(|(key, vector)| (key.clone(), query.similarity(vector)))
|
||||
.collect();
|
||||
|
||||
// Partial sort to get top k (NaN-safe)
|
||||
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Less));
|
||||
|
||||
results.into_iter().take(k).collect()
|
||||
}
|
||||
|
||||
/// Gets a stored vector by key
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, HdcMemory};
|
||||
///
|
||||
/// let mut memory = HdcMemory::new();
|
||||
/// let vector = Hypervector::random();
|
||||
///
|
||||
/// memory.store("key", vector.clone());
|
||||
///
|
||||
/// let retrieved = memory.get("key").unwrap();
|
||||
/// assert_eq!(&vector, retrieved);
|
||||
/// ```
|
||||
pub fn get(&self, key: &str) -> Option<&Hypervector> {
|
||||
self.items.get(key)
|
||||
}
|
||||
|
||||
/// Checks if a key exists in memory
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, HdcMemory};
|
||||
///
|
||||
/// let mut memory = HdcMemory::new();
|
||||
///
|
||||
/// assert!(!memory.contains_key("key"));
|
||||
/// memory.store("key", Hypervector::random());
|
||||
/// assert!(memory.contains_key("key"));
|
||||
/// ```
|
||||
pub fn contains_key(&self, key: &str) -> bool {
|
||||
self.items.contains_key(key)
|
||||
}
|
||||
|
||||
/// Removes a vector by key
|
||||
///
|
||||
/// Returns the removed vector if it existed.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, HdcMemory};
|
||||
///
|
||||
/// let mut memory = HdcMemory::new();
|
||||
/// let vector = Hypervector::random();
|
||||
///
|
||||
/// memory.store("key", vector.clone());
|
||||
/// let removed = memory.remove("key").unwrap();
|
||||
/// assert_eq!(vector, removed);
|
||||
/// assert!(!memory.contains_key("key"));
|
||||
/// ```
|
||||
pub fn remove(&mut self, key: &str) -> Option<Hypervector> {
|
||||
self.items.remove(key)
|
||||
}
|
||||
|
||||
/// Returns the number of stored vectors
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, HdcMemory};
|
||||
///
|
||||
/// let mut memory = HdcMemory::new();
|
||||
/// assert_eq!(memory.len(), 0);
|
||||
///
|
||||
/// memory.store("key", Hypervector::random());
|
||||
/// assert_eq!(memory.len(), 1);
|
||||
/// ```
|
||||
pub fn len(&self) -> usize {
|
||||
self.items.len()
|
||||
}
|
||||
|
||||
/// Checks if the memory is empty
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::HdcMemory;
|
||||
///
|
||||
/// let memory = HdcMemory::new();
|
||||
/// assert!(memory.is_empty());
|
||||
/// ```
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.items.is_empty()
|
||||
}
|
||||
|
||||
/// Clears all stored vectors
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, HdcMemory};
|
||||
///
|
||||
/// let mut memory = HdcMemory::new();
|
||||
/// memory.store("key", Hypervector::random());
|
||||
///
|
||||
/// memory.clear();
|
||||
/// assert!(memory.is_empty());
|
||||
/// ```
|
||||
pub fn clear(&mut self) {
|
||||
self.items.clear();
|
||||
}
|
||||
|
||||
/// Returns an iterator over all keys
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, HdcMemory};
|
||||
///
|
||||
/// let mut memory = HdcMemory::new();
|
||||
/// memory.store("key1", Hypervector::random());
|
||||
/// memory.store("key2", Hypervector::random());
|
||||
///
|
||||
/// let keys: Vec<_> = memory.keys().collect();
|
||||
/// assert_eq!(keys.len(), 2);
|
||||
/// ```
|
||||
pub fn keys(&self) -> impl Iterator<Item = &String> {
|
||||
self.items.keys()
|
||||
}
|
||||
|
||||
/// Returns an iterator over all (key, vector) pairs
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, HdcMemory};
|
||||
///
|
||||
/// let mut memory = HdcMemory::new();
|
||||
/// memory.store("key", Hypervector::random());
|
||||
///
|
||||
/// for (key, vector) in memory.iter() {
|
||||
/// println!("{}: {:?}", key, vector);
|
||||
/// }
|
||||
/// ```
|
||||
pub fn iter(&self) -> impl Iterator<Item = (&String, &Hypervector)> {
|
||||
self.items.iter()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for HdcMemory {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_new_memory_empty() {
|
||||
let memory = HdcMemory::new();
|
||||
assert_eq!(memory.len(), 0);
|
||||
assert!(memory.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_store_and_get() {
|
||||
let mut memory = HdcMemory::new();
|
||||
let vector = Hypervector::random();
|
||||
|
||||
memory.store("key", vector.clone());
|
||||
|
||||
assert_eq!(memory.len(), 1);
|
||||
assert_eq!(memory.get("key").unwrap(), &vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_store_overwrite() {
|
||||
let mut memory = HdcMemory::new();
|
||||
let v1 = Hypervector::from_seed(1);
|
||||
let v2 = Hypervector::from_seed(2);
|
||||
|
||||
memory.store("key", v1);
|
||||
memory.store("key", v2.clone());
|
||||
|
||||
assert_eq!(memory.len(), 1);
|
||||
assert_eq!(memory.get("key").unwrap(), &v2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_retrieve_exact_match() {
|
||||
let mut memory = HdcMemory::new();
|
||||
let vector = Hypervector::random();
|
||||
|
||||
memory.store("exact", vector.clone());
|
||||
|
||||
let results = memory.retrieve(&vector, 0.99);
|
||||
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0].0, "exact");
|
||||
assert!(results[0].1 > 0.99);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_retrieve_threshold() {
|
||||
let mut memory = HdcMemory::new();
|
||||
|
||||
let v1 = Hypervector::from_seed(1);
|
||||
let v2 = Hypervector::from_seed(2);
|
||||
let v3 = Hypervector::from_seed(3);
|
||||
|
||||
memory.store("v1", v1.clone());
|
||||
memory.store("v2", v2);
|
||||
memory.store("v3", v3);
|
||||
|
||||
// High threshold should return only exact match
|
||||
let results = memory.retrieve(&v1, 0.99);
|
||||
assert_eq!(results.len(), 1);
|
||||
|
||||
// Low threshold (-1.0 is min similarity) should return all
|
||||
let results = memory.retrieve(&v1, -1.0);
|
||||
assert_eq!(results.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_retrieve_sorted() {
|
||||
let mut memory = HdcMemory::new();
|
||||
|
||||
for i in 0..5 {
|
||||
memory.store(format!("v{}", i), Hypervector::from_seed(i));
|
||||
}
|
||||
|
||||
let query = Hypervector::from_seed(0);
|
||||
let results = memory.retrieve(&query, 0.0);
|
||||
|
||||
// Should be sorted by similarity descending
|
||||
for i in 0..(results.len() - 1) {
|
||||
assert!(results[i].1 >= results[i + 1].1);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_retrieve_top_k() {
|
||||
let mut memory = HdcMemory::new();
|
||||
|
||||
for i in 0..10 {
|
||||
memory.store(format!("v{}", i), Hypervector::from_seed(i));
|
||||
}
|
||||
|
||||
let query = Hypervector::random();
|
||||
let top3 = memory.retrieve_top_k(&query, 3);
|
||||
|
||||
assert_eq!(top3.len(), 3);
|
||||
|
||||
// Should be sorted
|
||||
assert!(top3[0].1 >= top3[1].1);
|
||||
assert!(top3[1].1 >= top3[2].1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_retrieve_top_k_more_than_stored() {
|
||||
let mut memory = HdcMemory::new();
|
||||
|
||||
for i in 0..3 {
|
||||
memory.store(format!("v{}", i), Hypervector::random());
|
||||
}
|
||||
|
||||
let results = memory.retrieve_top_k(&Hypervector::random(), 10);
|
||||
assert_eq!(results.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_contains_key() {
|
||||
let mut memory = HdcMemory::new();
|
||||
|
||||
assert!(!memory.contains_key("key"));
|
||||
|
||||
memory.store("key", Hypervector::random());
|
||||
assert!(memory.contains_key("key"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_remove() {
|
||||
let mut memory = HdcMemory::new();
|
||||
let vector = Hypervector::random();
|
||||
|
||||
memory.store("key", vector.clone());
|
||||
assert_eq!(memory.len(), 1);
|
||||
|
||||
let removed = memory.remove("key").unwrap();
|
||||
assert_eq!(removed, vector);
|
||||
assert_eq!(memory.len(), 0);
|
||||
assert!(!memory.contains_key("key"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clear() {
|
||||
let mut memory = HdcMemory::new();
|
||||
|
||||
for i in 0..5 {
|
||||
memory.store(format!("v{}", i), Hypervector::random());
|
||||
}
|
||||
|
||||
assert_eq!(memory.len(), 5);
|
||||
|
||||
memory.clear();
|
||||
assert_eq!(memory.len(), 0);
|
||||
assert!(memory.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_keys_iterator() {
|
||||
let mut memory = HdcMemory::new();
|
||||
|
||||
memory.store("key1", Hypervector::random());
|
||||
memory.store("key2", Hypervector::random());
|
||||
memory.store("key3", Hypervector::random());
|
||||
|
||||
let keys: Vec<_> = memory.keys().collect();
|
||||
assert_eq!(keys.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_iter() {
|
||||
let mut memory = HdcMemory::new();
|
||||
|
||||
for i in 0..3 {
|
||||
memory.store(format!("v{}", i), Hypervector::from_seed(i));
|
||||
}
|
||||
|
||||
let mut count = 0;
|
||||
for (key, vector) in memory.iter() {
|
||||
assert!(key.starts_with("v"));
|
||||
assert!(vector.popcount() > 0);
|
||||
count += 1;
|
||||
}
|
||||
|
||||
assert_eq!(count, 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_with_capacity() {
|
||||
let memory = HdcMemory::with_capacity(100);
|
||||
assert!(memory.is_empty());
|
||||
}
|
||||
}
|
||||
50
vendor/ruvector/crates/ruvector-nervous-system/src/hdc/mod.rs
vendored
Normal file
50
vendor/ruvector/crates/ruvector-nervous-system/src/hdc/mod.rs
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
//! Hyperdimensional Computing (HDC) module
|
||||
//!
|
||||
//! Implements binary hypervectors with SIMD-optimized operations for
|
||||
//! ultra-fast pattern matching and associative memory.
|
||||
|
||||
mod memory;
|
||||
mod ops;
|
||||
mod similarity;
|
||||
mod vector;
|
||||
|
||||
pub use memory::HdcMemory;
|
||||
pub use ops::{bind, bind_multiple, bundle, invert, permute};
|
||||
pub use similarity::{
|
||||
batch_similarities, cosine_similarity, find_similar, hamming_distance, jaccard_similarity,
|
||||
normalized_hamming, pairwise_similarities, top_k_similar,
|
||||
};
|
||||
pub use vector::{HdcError, Hypervector};
|
||||
|
||||
/// Number of bits in a hypervector (10,000)
|
||||
pub const HYPERVECTOR_BITS: usize = 10_000;
|
||||
|
||||
/// Number of u64 words needed to store HYPERVECTOR_BITS (157 = ceil(10000/64))
|
||||
pub const HYPERVECTOR_U64_LEN: usize = 157;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_constants() {
|
||||
assert_eq!(HYPERVECTOR_U64_LEN, 157);
|
||||
assert_eq!(HYPERVECTOR_BITS, 10_000);
|
||||
assert!(HYPERVECTOR_U64_LEN * 64 >= HYPERVECTOR_BITS);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_module_exports() {
|
||||
// Verify all exports are accessible
|
||||
let v1 = Hypervector::random();
|
||||
let v2 = Hypervector::random();
|
||||
|
||||
let _bound = bind(&v1, &v2);
|
||||
let _bundled = bundle(&[v1.clone(), v2.clone()]);
|
||||
let _dist = hamming_distance(&v1, &v2);
|
||||
let _sim = cosine_similarity(&v1, &v2);
|
||||
|
||||
let mut memory = HdcMemory::new();
|
||||
memory.store("test", v1.clone());
|
||||
}
|
||||
}
|
||||
256
vendor/ruvector/crates/ruvector-nervous-system/src/hdc/ops.rs
vendored
Normal file
256
vendor/ruvector/crates/ruvector-nervous-system/src/hdc/ops.rs
vendored
Normal file
@@ -0,0 +1,256 @@
|
||||
//! HDC operations: binding, bundling, permutation
|
||||
|
||||
use super::vector::{HdcError, Hypervector};
|
||||
use super::HYPERVECTOR_U64_LEN;
|
||||
|
||||
/// Binds two hypervectors using XOR
|
||||
///
|
||||
/// This is a convenience function equivalent to `v1.bind(&v2)`.
|
||||
///
|
||||
/// # Performance
|
||||
///
|
||||
/// <50ns on modern CPUs
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, bind};
|
||||
///
|
||||
/// let a = Hypervector::random();
|
||||
/// let b = Hypervector::random();
|
||||
/// let bound = bind(&a, &b);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn bind(v1: &Hypervector, v2: &Hypervector) -> Hypervector {
|
||||
v1.bind(v2)
|
||||
}
|
||||
|
||||
/// Bundles multiple hypervectors by majority voting
|
||||
///
|
||||
/// This is a convenience function equivalent to `Hypervector::bundle(vectors)`.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, bundle};
|
||||
///
|
||||
/// let v1 = Hypervector::random();
|
||||
/// let v2 = Hypervector::random();
|
||||
/// let v3 = Hypervector::random();
|
||||
///
|
||||
/// let bundled = bundle(&[v1, v2, v3]).unwrap();
|
||||
/// ```
|
||||
pub fn bundle(vectors: &[Hypervector]) -> Result<Hypervector, HdcError> {
|
||||
Hypervector::bundle(vectors)
|
||||
}
|
||||
|
||||
/// Permutes a hypervector by rotating bits
|
||||
///
|
||||
/// Permutation creates a new representation that is orthogonal to the original,
|
||||
/// useful for encoding sequences and positions.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, permute};
|
||||
///
|
||||
/// let v = Hypervector::random();
|
||||
/// let p1 = permute(&v, 1);
|
||||
/// let p2 = permute(&v, 2);
|
||||
///
|
||||
/// // Permuted vectors are orthogonal
|
||||
/// assert!(v.similarity(&p1) < 0.6);
|
||||
/// assert!(p1.similarity(&p2) < 0.6);
|
||||
/// ```
|
||||
pub fn permute(v: &Hypervector, shift: usize) -> Hypervector {
|
||||
if shift == 0 {
|
||||
return v.clone();
|
||||
}
|
||||
|
||||
let mut result = Hypervector::zero();
|
||||
let total_bits = HYPERVECTOR_U64_LEN * 64;
|
||||
let shift = shift % total_bits; // Normalize shift
|
||||
|
||||
// Rotate bits left by shift positions
|
||||
for i in 0..total_bits {
|
||||
let src_idx = i;
|
||||
let dst_idx = (i + shift) % total_bits;
|
||||
|
||||
let src_word = src_idx / 64;
|
||||
let src_bit = src_idx % 64;
|
||||
let dst_word = dst_idx / 64;
|
||||
let dst_bit = dst_idx % 64;
|
||||
|
||||
let bit = (v.bits()[src_word] >> src_bit) & 1;
|
||||
result.bits[dst_word] |= bit << dst_bit;
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Inverts all bits in a hypervector
|
||||
///
|
||||
/// Useful for negation and creating opposite representations.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, invert};
|
||||
///
|
||||
/// let v = Hypervector::random();
|
||||
/// let inv = invert(&v);
|
||||
///
|
||||
/// // Similarity should be near 0 (opposite)
|
||||
/// assert!(v.similarity(&inv) < 0.1);
|
||||
/// ```
|
||||
pub fn invert(v: &Hypervector) -> Hypervector {
|
||||
let mut result = Hypervector::zero();
|
||||
|
||||
for i in 0..HYPERVECTOR_U64_LEN {
|
||||
result.bits[i] = !v.bits()[i];
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Binds multiple vectors in sequence
|
||||
///
|
||||
/// Equivalent to `v1.bind(&v2).bind(&v3)...`
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, bind_multiple};
|
||||
///
|
||||
/// let v1 = Hypervector::random();
|
||||
/// let v2 = Hypervector::random();
|
||||
/// let v3 = Hypervector::random();
|
||||
///
|
||||
/// let bound = bind_multiple(&[v1, v2, v3]).unwrap();
|
||||
/// ```
|
||||
pub fn bind_multiple(vectors: &[Hypervector]) -> Result<Hypervector, HdcError> {
|
||||
if vectors.is_empty() {
|
||||
return Err(HdcError::EmptyVectorSet);
|
||||
}
|
||||
|
||||
let mut result = vectors[0].clone();
|
||||
|
||||
for v in &vectors[1..] {
|
||||
result = result.bind(v);
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_bind_function() {
|
||||
let a = Hypervector::random();
|
||||
let b = Hypervector::random();
|
||||
|
||||
let bound1 = bind(&a, &b);
|
||||
let bound2 = a.bind(&b);
|
||||
|
||||
assert_eq!(bound1, bound2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bundle_function() {
|
||||
let v1 = Hypervector::random();
|
||||
let v2 = Hypervector::random();
|
||||
|
||||
let bundled1 = bundle(&[v1.clone(), v2.clone()]).unwrap();
|
||||
let bundled2 = Hypervector::bundle(&[v1, v2]).unwrap();
|
||||
|
||||
assert_eq!(bundled1, bundled2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_permute_zero_is_identity() {
|
||||
let v = Hypervector::random();
|
||||
let p = permute(&v, 0);
|
||||
|
||||
assert_eq!(v, p);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_permute_creates_orthogonal() {
|
||||
let v = Hypervector::random();
|
||||
let p1 = permute(&v, 1);
|
||||
let p2 = permute(&v, 2);
|
||||
|
||||
// Permuted vectors should be mostly orthogonal
|
||||
assert!(v.similarity(&p1) < 0.6);
|
||||
assert!(p1.similarity(&p2) < 0.6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_permute_inverse() {
|
||||
let v = Hypervector::random();
|
||||
let total_bits = HYPERVECTOR_U64_LEN * 64;
|
||||
|
||||
let p = permute(&v, 100);
|
||||
let back = permute(&p, total_bits - 100);
|
||||
|
||||
assert_eq!(v, back);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invert_creates_opposite() {
|
||||
let v = Hypervector::random();
|
||||
let inv = invert(&v);
|
||||
|
||||
// Inverted vector should have opposite bits
|
||||
let sim = v.similarity(&inv);
|
||||
assert!(sim < 0.1, "similarity: {}", sim);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invert_double_is_identity() {
|
||||
let v = Hypervector::random();
|
||||
let inv = invert(&v);
|
||||
let back = invert(&inv);
|
||||
|
||||
assert_eq!(v, back);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bind_multiple_single() {
|
||||
let v = Hypervector::random();
|
||||
let result = bind_multiple(&[v.clone()]).unwrap();
|
||||
|
||||
assert_eq!(result, v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bind_multiple_two() {
|
||||
let v1 = Hypervector::random();
|
||||
let v2 = Hypervector::random();
|
||||
|
||||
let result1 = bind_multiple(&[v1.clone(), v2.clone()]).unwrap();
|
||||
let result2 = v1.bind(&v2);
|
||||
|
||||
assert_eq!(result1, result2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bind_multiple_three() {
|
||||
let v1 = Hypervector::random();
|
||||
let v2 = Hypervector::random();
|
||||
let v3 = Hypervector::random();
|
||||
|
||||
let result1 = bind_multiple(&[v1.clone(), v2.clone(), v3.clone()]).unwrap();
|
||||
let result2 = v1.bind(&v2).bind(&v3);
|
||||
|
||||
assert_eq!(result1, result2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bind_multiple_empty_error() {
|
||||
let result = bind_multiple(&[]);
|
||||
assert!(matches!(result, Err(HdcError::EmptyVectorSet)));
|
||||
}
|
||||
}
|
||||
396
vendor/ruvector/crates/ruvector-nervous-system/src/hdc/similarity.rs
vendored
Normal file
396
vendor/ruvector/crates/ruvector-nervous-system/src/hdc/similarity.rs
vendored
Normal file
@@ -0,0 +1,396 @@
|
||||
//! Similarity and distance metrics for hypervectors
|
||||
|
||||
use super::vector::Hypervector;
|
||||
use super::HYPERVECTOR_BITS;
|
||||
|
||||
/// Computes Hamming distance between two hypervectors
|
||||
///
|
||||
/// Returns the number of bits that differ between the two vectors.
|
||||
///
|
||||
/// # Performance
|
||||
///
|
||||
/// <100ns with SIMD popcount instruction
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, hamming_distance};
|
||||
///
|
||||
/// let a = Hypervector::random();
|
||||
/// let b = Hypervector::random();
|
||||
/// let dist = hamming_distance(&a, &b);
|
||||
/// assert!(dist > 0);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn hamming_distance(v1: &Hypervector, v2: &Hypervector) -> u32 {
|
||||
v1.hamming_distance(v2)
|
||||
}
|
||||
|
||||
/// Computes cosine similarity approximation for binary hypervectors
|
||||
///
|
||||
/// For binary vectors, cosine similarity ≈ 1 - 2*hamming_distance/dimension
|
||||
///
|
||||
/// Returns a value in [0.0, 1.0] where:
|
||||
/// - 1.0 = identical vectors
|
||||
/// - 0.5 = orthogonal/random vectors
|
||||
/// - 0.0 = opposite vectors
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, cosine_similarity};
|
||||
///
|
||||
/// let a = Hypervector::random();
|
||||
/// let b = a.clone();
|
||||
/// let sim = cosine_similarity(&a, &b);
|
||||
/// assert!((sim - 1.0).abs() < 0.001);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn cosine_similarity(v1: &Hypervector, v2: &Hypervector) -> f32 {
|
||||
v1.similarity(v2)
|
||||
}
|
||||
|
||||
/// Computes normalized Hamming similarity [0.0, 1.0]
|
||||
///
|
||||
/// This is equivalent to `1.0 - (hamming_distance / dimension)`
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, normalized_hamming};
|
||||
///
|
||||
/// let a = Hypervector::random();
|
||||
/// let sim = normalized_hamming(&a, &a);
|
||||
/// assert!((sim - 1.0).abs() < 0.001);
|
||||
/// ```
|
||||
pub fn normalized_hamming(v1: &Hypervector, v2: &Hypervector) -> f32 {
|
||||
let hamming = v1.hamming_distance(v2);
|
||||
1.0 - (hamming as f32 / HYPERVECTOR_BITS as f32)
|
||||
}
|
||||
|
||||
/// Computes Jaccard similarity coefficient
|
||||
///
|
||||
/// Jaccard = |intersection| / |union| for binary vectors
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, jaccard_similarity};
|
||||
///
|
||||
/// let a = Hypervector::random();
|
||||
/// let b = Hypervector::random();
|
||||
/// let sim = jaccard_similarity(&a, &b);
|
||||
/// assert!(sim >= 0.0 && sim <= 1.0);
|
||||
/// ```
|
||||
pub fn jaccard_similarity(v1: &Hypervector, v2: &Hypervector) -> f32 {
|
||||
let mut intersection = 0u32;
|
||||
let mut union = 0u32;
|
||||
|
||||
let bits1 = v1.bits();
|
||||
let bits2 = v2.bits();
|
||||
|
||||
for i in 0..bits1.len() {
|
||||
let and = bits1[i] & bits2[i];
|
||||
let or = bits1[i] | bits2[i];
|
||||
|
||||
intersection += and.count_ones();
|
||||
union += or.count_ones();
|
||||
}
|
||||
|
||||
if union == 0 {
|
||||
1.0 // Both vectors are zero
|
||||
} else {
|
||||
intersection as f32 / union as f32
|
||||
}
|
||||
}
|
||||
|
||||
/// Finds the k most similar vectors from a set
|
||||
///
|
||||
/// Returns indices and similarities of top-k matches, sorted by similarity (descending).
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, top_k_similar};
|
||||
///
|
||||
/// let query = Hypervector::random();
|
||||
/// let candidates: Vec<_> = (0..10).map(|_| Hypervector::random()).collect();
|
||||
///
|
||||
/// let top3 = top_k_similar(&query, &candidates, 3);
|
||||
/// assert_eq!(top3.len(), 3);
|
||||
/// assert!(top3[0].1 >= top3[1].1); // Sorted descending
|
||||
/// ```
|
||||
pub fn top_k_similar(
|
||||
query: &Hypervector,
|
||||
candidates: &[Hypervector],
|
||||
k: usize,
|
||||
) -> Vec<(usize, f32)> {
|
||||
let mut similarities: Vec<_> = candidates
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(idx, candidate)| (idx, query.similarity(candidate)))
|
||||
.collect();
|
||||
|
||||
// Partial sort to get top k (NaN-safe)
|
||||
similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Less));
|
||||
|
||||
similarities.into_iter().take(k).collect()
|
||||
}
|
||||
|
||||
/// Computes pairwise similarity matrix
|
||||
///
|
||||
/// Returns NxN matrix where result\[i\]\[j\] = similarity(vectors\[i\], vectors\[j\])
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, pairwise_similarities};
|
||||
///
|
||||
/// let vectors: Vec<_> = (0..5).map(|_| Hypervector::random()).collect();
|
||||
/// let matrix = pairwise_similarities(&vectors);
|
||||
///
|
||||
/// assert_eq!(matrix.len(), 5);
|
||||
/// assert_eq!(matrix[0].len(), 5);
|
||||
/// assert!((matrix[0][0] - 1.0).abs() < 0.001); // Diagonal is 1.0
|
||||
/// ```
|
||||
pub fn pairwise_similarities(vectors: &[Hypervector]) -> Vec<Vec<f32>> {
|
||||
let n = vectors.len();
|
||||
let mut matrix = vec![vec![0.0; n]; n];
|
||||
|
||||
for i in 0..n {
|
||||
matrix[i][i] = 1.0; // Diagonal
|
||||
|
||||
for j in (i + 1)..n {
|
||||
let sim = vectors[i].similarity(&vectors[j]);
|
||||
matrix[i][j] = sim;
|
||||
matrix[j][i] = sim; // Symmetric
|
||||
}
|
||||
}
|
||||
|
||||
matrix
|
||||
}
|
||||
|
||||
/// Computes batch similarities of query against all candidates
|
||||
///
|
||||
/// Optimized for computing one-to-many similarities efficiently.
|
||||
/// Uses loop unrolling for better CPU pipeline utilization.
|
||||
///
|
||||
/// # Performance
|
||||
///
|
||||
/// ~20ns per similarity (amortized over batch)
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, batch_similarities};
|
||||
///
|
||||
/// let query = Hypervector::random();
|
||||
/// let candidates: Vec<_> = (0..100).map(|_| Hypervector::random()).collect();
|
||||
///
|
||||
/// let sims = batch_similarities(&query, &candidates);
|
||||
/// assert_eq!(sims.len(), 100);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn batch_similarities(query: &Hypervector, candidates: &[Hypervector]) -> Vec<f32> {
|
||||
let n = candidates.len();
|
||||
let mut results = Vec::with_capacity(n);
|
||||
|
||||
// Process in chunks of 4 for better cache utilization
|
||||
let chunks = n / 4;
|
||||
let remainder = n % 4;
|
||||
|
||||
for i in 0..chunks {
|
||||
let base = i * 4;
|
||||
results.push(query.similarity(&candidates[base]));
|
||||
results.push(query.similarity(&candidates[base + 1]));
|
||||
results.push(query.similarity(&candidates[base + 2]));
|
||||
results.push(query.similarity(&candidates[base + 3]));
|
||||
}
|
||||
|
||||
// Handle remainder
|
||||
let base = chunks * 4;
|
||||
for i in 0..remainder {
|
||||
results.push(query.similarity(&candidates[base + i]));
|
||||
}
|
||||
|
||||
results
|
||||
}
|
||||
|
||||
/// Finds indices of all vectors with similarity above threshold
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::{Hypervector, find_similar};
|
||||
///
|
||||
/// let query = Hypervector::from_seed(42);
|
||||
/// let candidates: Vec<_> = (0..100).map(|i| Hypervector::from_seed(i)).collect();
|
||||
///
|
||||
/// let matches = find_similar(&query, &candidates, 0.9);
|
||||
/// assert!(matches.contains(&42)); // Should find itself
|
||||
/// ```
|
||||
pub fn find_similar(query: &Hypervector, candidates: &[Hypervector], threshold: f32) -> Vec<usize> {
|
||||
candidates
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(idx, candidate)| {
|
||||
if query.similarity(candidate) >= threshold {
|
||||
Some(idx)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_hamming_distance_identical() {
|
||||
let v = Hypervector::random();
|
||||
assert_eq!(hamming_distance(&v, &v), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hamming_distance_random() {
|
||||
let v1 = Hypervector::random();
|
||||
let v2 = Hypervector::random();
|
||||
|
||||
let dist = hamming_distance(&v1, &v2);
|
||||
// Random vectors should differ in ~50% of bits
|
||||
assert!(dist > 4000 && dist < 6000, "distance: {}", dist);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cosine_similarity_identical() {
|
||||
let v = Hypervector::random();
|
||||
let sim = cosine_similarity(&v, &v);
|
||||
|
||||
assert!((sim - 1.0).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cosine_similarity_bounds() {
|
||||
let v1 = Hypervector::random();
|
||||
let v2 = Hypervector::random();
|
||||
|
||||
let sim = cosine_similarity(&v1, &v2);
|
||||
// Cosine similarity for binary vectors: 1 - 2*hamming/dim gives [-1, 1]
|
||||
assert!(
|
||||
sim >= -1.0 && sim <= 1.0,
|
||||
"similarity out of bounds: {}",
|
||||
sim
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalized_hamming_identical() {
|
||||
let v = Hypervector::random();
|
||||
let sim = normalized_hamming(&v, &v);
|
||||
|
||||
assert!((sim - 1.0).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalized_hamming_random() {
|
||||
let v1 = Hypervector::random();
|
||||
let v2 = Hypervector::random();
|
||||
|
||||
let sim = normalized_hamming(&v1, &v2);
|
||||
// Random vectors should have ~0.5 similarity
|
||||
assert!(sim > 0.3 && sim < 0.7, "similarity: {}", sim);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_jaccard_identical() {
|
||||
let v = Hypervector::random();
|
||||
let sim = jaccard_similarity(&v, &v);
|
||||
|
||||
assert!((sim - 1.0).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_jaccard_zero_vectors() {
|
||||
let v1 = Hypervector::zero();
|
||||
let v2 = Hypervector::zero();
|
||||
|
||||
let sim = jaccard_similarity(&v1, &v2);
|
||||
assert!((sim - 1.0).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_jaccard_bounds() {
|
||||
let v1 = Hypervector::random();
|
||||
let v2 = Hypervector::random();
|
||||
|
||||
let sim = jaccard_similarity(&v1, &v2);
|
||||
assert!(sim >= 0.0 && sim <= 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_top_k_similar() {
|
||||
let query = Hypervector::from_seed(0);
|
||||
let candidates: Vec<_> = (1..11).map(|i| Hypervector::from_seed(i)).collect();
|
||||
|
||||
let top3 = top_k_similar(&query, &candidates, 3);
|
||||
|
||||
assert_eq!(top3.len(), 3);
|
||||
// Should be sorted descending
|
||||
assert!(top3[0].1 >= top3[1].1);
|
||||
assert!(top3[1].1 >= top3[2].1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_top_k_more_than_candidates() {
|
||||
let query = Hypervector::random();
|
||||
let candidates: Vec<_> = (0..5).map(|_| Hypervector::random()).collect();
|
||||
|
||||
let top10 = top_k_similar(&query, &candidates, 10);
|
||||
|
||||
// Should return all 5, not 10
|
||||
assert_eq!(top10.len(), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pairwise_similarities_diagonal() {
|
||||
let vectors: Vec<_> = (0..5).map(|i| Hypervector::from_seed(i)).collect();
|
||||
let matrix = pairwise_similarities(&vectors);
|
||||
|
||||
assert_eq!(matrix.len(), 5);
|
||||
|
||||
for i in 0..5 {
|
||||
assert!((matrix[i][i] - 1.0).abs() < 0.001);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pairwise_similarities_symmetric() {
|
||||
let vectors: Vec<_> = (0..5).map(|i| Hypervector::from_seed(i)).collect();
|
||||
let matrix = pairwise_similarities(&vectors);
|
||||
|
||||
for i in 0..5 {
|
||||
for j in 0..5 {
|
||||
assert!((matrix[i][j] - matrix[j][i]).abs() < 0.001);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pairwise_similarities_bounds() {
|
||||
let vectors: Vec<_> = (0..5).map(|_| Hypervector::random()).collect();
|
||||
let matrix = pairwise_similarities(&vectors);
|
||||
|
||||
for row in &matrix {
|
||||
for &sim in row {
|
||||
// Similarity range is [-1, 1] for cosine similarity
|
||||
assert!(
|
||||
sim >= -1.0 && sim <= 1.0,
|
||||
"similarity out of bounds: {}",
|
||||
sim
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
464
vendor/ruvector/crates/ruvector-nervous-system/src/hdc/vector.rs
vendored
Normal file
464
vendor/ruvector/crates/ruvector-nervous-system/src/hdc/vector.rs
vendored
Normal file
@@ -0,0 +1,464 @@
|
||||
//! Hypervector data type and basic operations
|
||||
|
||||
use super::{HYPERVECTOR_BITS, HYPERVECTOR_U64_LEN};
|
||||
use rand::Rng;
|
||||
use std::fmt;
|
||||
|
||||
/// Error types for HDC operations
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum HdcError {
|
||||
#[error("Invalid hypervector dimension: expected {expected}, got {got}")]
|
||||
InvalidDimension { expected: usize, got: usize },
|
||||
|
||||
#[error("Empty vector set provided")]
|
||||
EmptyVectorSet,
|
||||
|
||||
#[error("Serialization error: {0}")]
|
||||
SerializationError(String),
|
||||
}
|
||||
|
||||
/// A binary hypervector with 10,000 bits packed into 156 u64 words
|
||||
///
|
||||
/// # Performance
|
||||
///
|
||||
/// - Memory: 156 * 8 = 1,248 bytes per vector
|
||||
/// - XOR binding: <50ns (single CPU cycle per u64)
|
||||
/// - Similarity: <100ns (SIMD popcount)
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::Hypervector;
|
||||
///
|
||||
/// let v1 = Hypervector::random();
|
||||
/// let v2 = Hypervector::random();
|
||||
/// let bound = v1.bind(&v2);
|
||||
/// let sim = v1.similarity(&v2);
|
||||
/// ```
|
||||
#[derive(Clone, PartialEq, Eq)]
|
||||
pub struct Hypervector {
|
||||
pub(crate) bits: [u64; HYPERVECTOR_U64_LEN],
|
||||
}
|
||||
|
||||
impl Hypervector {
|
||||
/// Creates a new hypervector with all bits set to zero
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::Hypervector;
|
||||
///
|
||||
/// let zero = Hypervector::zero();
|
||||
/// assert_eq!(zero.popcount(), 0);
|
||||
/// ```
|
||||
pub fn zero() -> Self {
|
||||
Self {
|
||||
bits: [0u64; HYPERVECTOR_U64_LEN],
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a random hypervector with ~50% bits set
|
||||
///
|
||||
/// Uses thread-local RNG for performance.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::Hypervector;
|
||||
///
|
||||
/// let random = Hypervector::random();
|
||||
/// let count = random.popcount();
|
||||
/// // Should be around 5000 ± 150
|
||||
/// assert!(count > 4500 && count < 5500);
|
||||
/// ```
|
||||
pub fn random() -> Self {
|
||||
let mut rng = rand::thread_rng();
|
||||
let mut bits = [0u64; HYPERVECTOR_U64_LEN];
|
||||
|
||||
for word in bits.iter_mut() {
|
||||
*word = rng.gen();
|
||||
}
|
||||
|
||||
Self { bits }
|
||||
}
|
||||
|
||||
/// Creates a hypervector from a seed for reproducibility
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::Hypervector;
|
||||
///
|
||||
/// let v1 = Hypervector::from_seed(42);
|
||||
/// let v2 = Hypervector::from_seed(42);
|
||||
/// assert_eq!(v1, v2);
|
||||
/// ```
|
||||
pub fn from_seed(seed: u64) -> Self {
|
||||
use rand::SeedableRng;
|
||||
let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
|
||||
let mut bits = [0u64; HYPERVECTOR_U64_LEN];
|
||||
|
||||
for word in bits.iter_mut() {
|
||||
*word = rng.gen();
|
||||
}
|
||||
|
||||
Self { bits }
|
||||
}
|
||||
|
||||
/// Binds two hypervectors using XOR
|
||||
///
|
||||
/// Binding is associative, commutative, and self-inverse:
|
||||
/// - `a.bind(b) == b.bind(a)`
|
||||
/// - `a.bind(b).bind(b) == a`
|
||||
///
|
||||
/// # Performance
|
||||
///
|
||||
/// <50ns on modern CPUs (single cycle XOR per u64)
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::Hypervector;
|
||||
///
|
||||
/// let a = Hypervector::random();
|
||||
/// let b = Hypervector::random();
|
||||
/// let bound = a.bind(&b);
|
||||
///
|
||||
/// // Self-inverse property
|
||||
/// assert_eq!(bound.bind(&b), a);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn bind(&self, other: &Self) -> Self {
|
||||
let mut result = Self::zero();
|
||||
|
||||
for i in 0..HYPERVECTOR_U64_LEN {
|
||||
result.bits[i] = self.bits[i] ^ other.bits[i];
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Computes similarity between two hypervectors
|
||||
///
|
||||
/// Returns a value in [0.0, 1.0] where:
|
||||
/// - 1.0 = identical vectors
|
||||
/// - 0.5 = random/orthogonal vectors
|
||||
/// - 0.0 = completely opposite vectors
|
||||
///
|
||||
/// # Performance
|
||||
///
|
||||
/// <100ns with SIMD popcount
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::Hypervector;
|
||||
///
|
||||
/// let a = Hypervector::random();
|
||||
/// let b = a.clone();
|
||||
/// assert!((a.similarity(&b) - 1.0).abs() < 0.001);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn similarity(&self, other: &Self) -> f32 {
|
||||
let hamming = self.hamming_distance(other);
|
||||
1.0 - (2.0 * hamming as f32 / HYPERVECTOR_BITS as f32)
|
||||
}
|
||||
|
||||
/// Computes Hamming distance (number of differing bits)
|
||||
///
|
||||
/// # Performance
|
||||
///
|
||||
/// <50ns with SIMD popcount instruction and loop unrolling
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::Hypervector;
|
||||
///
|
||||
/// let a = Hypervector::random();
|
||||
/// assert_eq!(a.hamming_distance(&a), 0);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn hamming_distance(&self, other: &Self) -> u32 {
|
||||
// Unrolled loop for better instruction-level parallelism
|
||||
// Process 4 u64s at a time to maximize CPU pipeline utilization
|
||||
let mut d0 = 0u32;
|
||||
let mut d1 = 0u32;
|
||||
let mut d2 = 0u32;
|
||||
let mut d3 = 0u32;
|
||||
|
||||
let chunks = HYPERVECTOR_U64_LEN / 4;
|
||||
let remainder = HYPERVECTOR_U64_LEN % 4;
|
||||
|
||||
// Main unrolled loop (4 words per iteration)
|
||||
for i in 0..chunks {
|
||||
let base = i * 4;
|
||||
d0 += (self.bits[base] ^ other.bits[base]).count_ones();
|
||||
d1 += (self.bits[base + 1] ^ other.bits[base + 1]).count_ones();
|
||||
d2 += (self.bits[base + 2] ^ other.bits[base + 2]).count_ones();
|
||||
d3 += (self.bits[base + 3] ^ other.bits[base + 3]).count_ones();
|
||||
}
|
||||
|
||||
// Handle remaining elements
|
||||
let base = chunks * 4;
|
||||
for i in 0..remainder {
|
||||
d0 += (self.bits[base + i] ^ other.bits[base + i]).count_ones();
|
||||
}
|
||||
|
||||
d0 + d1 + d2 + d3
|
||||
}
|
||||
|
||||
/// Counts the number of set bits (population count)
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::Hypervector;
|
||||
///
|
||||
/// let zero = Hypervector::zero();
|
||||
/// assert_eq!(zero.popcount(), 0);
|
||||
///
|
||||
/// let random = Hypervector::random();
|
||||
/// let count = random.popcount();
|
||||
/// // Should be around 5000 for random vectors
|
||||
/// assert!(count > 4500 && count < 5500);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn popcount(&self) -> u32 {
|
||||
self.bits.iter().map(|&w| w.count_ones()).sum()
|
||||
}
|
||||
|
||||
/// Bundles multiple vectors by majority voting on each bit
|
||||
///
|
||||
/// # Performance
|
||||
///
|
||||
/// Optimized word-level implementation: O(n * 157 words) instead of O(n * 10000 bits)
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruvector_nervous_system::hdc::Hypervector;
|
||||
///
|
||||
/// let v1 = Hypervector::random();
|
||||
/// let v2 = Hypervector::random();
|
||||
/// let v3 = Hypervector::random();
|
||||
///
|
||||
/// let bundled = Hypervector::bundle(&[v1.clone(), v2, v3]).unwrap();
|
||||
/// // Bundled vector is similar to all inputs
|
||||
/// assert!(bundled.similarity(&v1) > 0.3);
|
||||
/// ```
|
||||
pub fn bundle(vectors: &[Self]) -> Result<Self, HdcError> {
|
||||
if vectors.is_empty() {
|
||||
return Err(HdcError::EmptyVectorSet);
|
||||
}
|
||||
|
||||
if vectors.len() == 1 {
|
||||
return Ok(vectors[0].clone());
|
||||
}
|
||||
|
||||
let n = vectors.len();
|
||||
let threshold = n / 2;
|
||||
let mut result = Self::zero();
|
||||
|
||||
// Process word by word (64 bits at a time)
|
||||
for word_idx in 0..HYPERVECTOR_U64_LEN {
|
||||
// Count bits at each position within this word using bit-parallel counting
|
||||
let mut counts = [0u8; 64];
|
||||
|
||||
for vector in vectors {
|
||||
let word = vector.bits[word_idx];
|
||||
// Unroll inner loop for cache efficiency
|
||||
for bit_pos in 0..64 {
|
||||
counts[bit_pos] += ((word >> bit_pos) & 1) as u8;
|
||||
}
|
||||
}
|
||||
|
||||
// Build result word from majority votes
|
||||
let mut result_word = 0u64;
|
||||
for (bit_pos, &count) in counts.iter().enumerate() {
|
||||
if count as usize > threshold {
|
||||
result_word |= 1u64 << bit_pos;
|
||||
}
|
||||
}
|
||||
result.bits[word_idx] = result_word;
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Fast bundle for exactly 3 vectors using bitwise majority
|
||||
///
|
||||
/// # Performance
|
||||
///
|
||||
/// Single-pass bitwise operation: ~500ns for 10,000 bits
|
||||
#[inline]
|
||||
pub fn bundle_3(a: &Self, b: &Self, c: &Self) -> Self {
|
||||
let mut result = Self::zero();
|
||||
|
||||
// Majority of 3 bits: (a & b) | (b & c) | (a & c)
|
||||
for i in 0..HYPERVECTOR_U64_LEN {
|
||||
let wa = a.bits[i];
|
||||
let wb = b.bits[i];
|
||||
let wc = c.bits[i];
|
||||
result.bits[i] = (wa & wb) | (wb & wc) | (wa & wc);
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Returns the internal bit array (for advanced use cases)
|
||||
#[inline]
|
||||
pub fn bits(&self) -> &[u64; HYPERVECTOR_U64_LEN] {
|
||||
&self.bits
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Hypervector {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"Hypervector {{ bits: {} set / {} total }}",
|
||||
self.popcount(),
|
||||
HYPERVECTOR_BITS
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Hypervector {
|
||||
fn default() -> Self {
|
||||
Self::zero()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_zero_vector() {
|
||||
let zero = Hypervector::zero();
|
||||
assert_eq!(zero.popcount(), 0);
|
||||
assert_eq!(zero.hamming_distance(&zero), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_random_vector_properties() {
|
||||
let v = Hypervector::random();
|
||||
let count = v.popcount();
|
||||
|
||||
// Random vector should have ~50% bits set (±3 sigma)
|
||||
assert!(count > 4500 && count < 5500, "popcount: {}", count);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_seed_deterministic() {
|
||||
let v1 = Hypervector::from_seed(42);
|
||||
let v2 = Hypervector::from_seed(42);
|
||||
let v3 = Hypervector::from_seed(43);
|
||||
|
||||
assert_eq!(v1, v2);
|
||||
assert_ne!(v1, v3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bind_commutative() {
|
||||
let a = Hypervector::random();
|
||||
let b = Hypervector::random();
|
||||
|
||||
assert_eq!(a.bind(&b), b.bind(&a));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bind_self_inverse() {
|
||||
let a = Hypervector::random();
|
||||
let b = Hypervector::random();
|
||||
|
||||
let bound = a.bind(&b);
|
||||
let unbound = bound.bind(&b);
|
||||
|
||||
assert_eq!(a, unbound);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_similarity_bounds() {
|
||||
let a = Hypervector::random();
|
||||
let b = Hypervector::random();
|
||||
|
||||
let sim = a.similarity(&b);
|
||||
// Cosine similarity formula: 1 - 2*hamming/dim gives range [-1, 1]
|
||||
assert!(
|
||||
sim >= -1.0 && sim <= 1.0,
|
||||
"similarity out of bounds: {}",
|
||||
sim
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_similarity_identical() {
|
||||
let a = Hypervector::random();
|
||||
let sim = a.similarity(&a);
|
||||
|
||||
assert!((sim - 1.0).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_similarity_random_approximately_zero() {
|
||||
let a = Hypervector::random();
|
||||
let b = Hypervector::random();
|
||||
|
||||
let sim = a.similarity(&b);
|
||||
// Random vectors have ~50% bit overlap, so similarity ≈ 0.0
|
||||
// 1 - 2*(5000/10000) = 1 - 1 = 0
|
||||
assert!(sim > -0.2 && sim < 0.2, "similarity: {}", sim);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hamming_distance_identical() {
|
||||
let a = Hypervector::random();
|
||||
assert_eq!(a.hamming_distance(&a), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bundle_single_vector() {
|
||||
let v = Hypervector::random();
|
||||
let bundled = Hypervector::bundle(&[v.clone()]).unwrap();
|
||||
|
||||
assert_eq!(bundled, v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bundle_empty_error() {
|
||||
let result = Hypervector::bundle(&[]);
|
||||
assert!(matches!(result, Err(HdcError::EmptyVectorSet)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bundle_majority_vote() {
|
||||
let v1 = Hypervector::from_seed(1);
|
||||
let v2 = Hypervector::from_seed(2);
|
||||
let v3 = Hypervector::from_seed(3);
|
||||
|
||||
let bundled = Hypervector::bundle(&[v1.clone(), v2.clone(), v3]).unwrap();
|
||||
|
||||
// Bundled should be similar to all inputs
|
||||
assert!(bundled.similarity(&v1) > 0.3);
|
||||
assert!(bundled.similarity(&v2) > 0.3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bundle_odd_count() {
|
||||
let vectors: Vec<_> = (0..5).map(|i| Hypervector::from_seed(i)).collect();
|
||||
let bundled = Hypervector::bundle(&vectors).unwrap();
|
||||
|
||||
for v in &vectors {
|
||||
assert!(bundled.similarity(v) > 0.3);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_debug_format() {
|
||||
let v = Hypervector::zero();
|
||||
let debug = format!("{:?}", v);
|
||||
assert!(debug.contains("bits: 0 set"));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user