git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
273 lines
7.9 KiB
Rust
273 lines
7.9 KiB
Rust
//! Hyperdimensional Computing (HDC) WASM bindings
|
|
//!
|
|
//! 10,000-bit binary hypervectors with ultra-fast operations:
|
|
//! - XOR binding: <50ns
|
|
//! - Hamming similarity: <100ns via SIMD
|
|
//! - 10^40 representational capacity
|
|
|
|
use wasm_bindgen::prelude::*;
|
|
|
|
/// Number of bits in a hypervector
|
|
const HYPERVECTOR_BITS: usize = 10_000;
|
|
|
|
/// Number of u64 words needed (ceil(10000/64) = 157)
|
|
const HYPERVECTOR_U64_LEN: usize = 157;
|
|
|
|
/// A binary hypervector with 10,000 bits
|
|
///
|
|
/// # Performance
|
|
/// - Memory: 1,248 bytes per vector
|
|
/// - XOR binding: <50ns
|
|
/// - Similarity: <100ns with SIMD popcount
|
|
#[wasm_bindgen]
|
|
pub struct Hypervector {
|
|
bits: Vec<u64>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl Hypervector {
|
|
/// Create a zero hypervector
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new() -> Hypervector {
|
|
Self {
|
|
bits: vec![0u64; HYPERVECTOR_U64_LEN],
|
|
}
|
|
}
|
|
|
|
/// Create a random hypervector with ~50% bits set
|
|
#[wasm_bindgen]
|
|
pub fn random() -> Hypervector {
|
|
use rand::Rng;
|
|
let mut rng = rand::thread_rng();
|
|
let bits: Vec<u64> = (0..HYPERVECTOR_U64_LEN).map(|_| rng.gen()).collect();
|
|
Self { bits }
|
|
}
|
|
|
|
/// Create a hypervector from a seed for reproducibility
|
|
#[wasm_bindgen]
|
|
pub fn from_seed(seed: u64) -> Hypervector {
|
|
use rand::{Rng, SeedableRng};
|
|
let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
|
|
let bits: Vec<u64> = (0..HYPERVECTOR_U64_LEN).map(|_| rng.gen()).collect();
|
|
Self { bits }
|
|
}
|
|
|
|
/// Bind two hypervectors using XOR
|
|
///
|
|
/// Binding is associative, commutative, and self-inverse:
|
|
/// - a.bind(b) == b.bind(a)
|
|
/// - a.bind(b).bind(b) == a
|
|
#[wasm_bindgen]
|
|
pub fn bind(&self, other: &Hypervector) -> Hypervector {
|
|
let bits: Vec<u64> = self
|
|
.bits
|
|
.iter()
|
|
.zip(other.bits.iter())
|
|
.map(|(&a, &b)| a ^ b)
|
|
.collect();
|
|
Self { bits }
|
|
}
|
|
|
|
/// Compute similarity between two hypervectors
|
|
///
|
|
/// Returns a value in [-1.0, 1.0] where:
|
|
/// - 1.0 = identical vectors
|
|
/// - 0.0 = random/orthogonal vectors
|
|
/// - -1.0 = completely opposite vectors
|
|
#[wasm_bindgen]
|
|
pub fn similarity(&self, other: &Hypervector) -> f32 {
|
|
let hamming = self.hamming_distance(other);
|
|
1.0 - (2.0 * hamming as f32 / HYPERVECTOR_BITS as f32)
|
|
}
|
|
|
|
/// Compute Hamming distance (number of differing bits)
|
|
#[wasm_bindgen]
|
|
pub fn hamming_distance(&self, other: &Hypervector) -> u32 {
|
|
// Unrolled loop for better instruction-level parallelism
|
|
let mut d0 = 0u32;
|
|
let mut d1 = 0u32;
|
|
let mut d2 = 0u32;
|
|
let mut d3 = 0u32;
|
|
|
|
let chunks = HYPERVECTOR_U64_LEN / 4;
|
|
let remainder = HYPERVECTOR_U64_LEN % 4;
|
|
|
|
for i in 0..chunks {
|
|
let base = i * 4;
|
|
d0 += (self.bits[base] ^ other.bits[base]).count_ones();
|
|
d1 += (self.bits[base + 1] ^ other.bits[base + 1]).count_ones();
|
|
d2 += (self.bits[base + 2] ^ other.bits[base + 2]).count_ones();
|
|
d3 += (self.bits[base + 3] ^ other.bits[base + 3]).count_ones();
|
|
}
|
|
|
|
let base = chunks * 4;
|
|
for i in 0..remainder {
|
|
d0 += (self.bits[base + i] ^ other.bits[base + i]).count_ones();
|
|
}
|
|
|
|
d0 + d1 + d2 + d3
|
|
}
|
|
|
|
/// Count the number of set bits (population count)
|
|
#[wasm_bindgen]
|
|
pub fn popcount(&self) -> u32 {
|
|
self.bits.iter().map(|&w| w.count_ones()).sum()
|
|
}
|
|
|
|
/// Bundle multiple vectors by majority voting on each bit
|
|
#[wasm_bindgen]
|
|
pub fn bundle_3(a: &Hypervector, b: &Hypervector, c: &Hypervector) -> Hypervector {
|
|
// Majority of 3 bits: (a & b) | (b & c) | (a & c)
|
|
let bits: Vec<u64> = (0..HYPERVECTOR_U64_LEN)
|
|
.map(|i| {
|
|
let wa = a.bits[i];
|
|
let wb = b.bits[i];
|
|
let wc = c.bits[i];
|
|
(wa & wb) | (wb & wc) | (wa & wc)
|
|
})
|
|
.collect();
|
|
Self { bits }
|
|
}
|
|
|
|
/// Get the raw bits as Uint8Array (for serialization)
|
|
#[wasm_bindgen]
|
|
pub fn to_bytes(&self) -> js_sys::Uint8Array {
|
|
let bytes: Vec<u8> = self.bits.iter().flat_map(|&w| w.to_le_bytes()).collect();
|
|
js_sys::Uint8Array::from(bytes.as_slice())
|
|
}
|
|
|
|
/// Create from raw bytes
|
|
#[wasm_bindgen]
|
|
pub fn from_bytes(bytes: &[u8]) -> Result<Hypervector, JsValue> {
|
|
if bytes.len() != HYPERVECTOR_U64_LEN * 8 {
|
|
return Err(JsValue::from_str(&format!(
|
|
"Invalid byte length: expected {}, got {}",
|
|
HYPERVECTOR_U64_LEN * 8,
|
|
bytes.len()
|
|
)));
|
|
}
|
|
|
|
let bits: Vec<u64> = bytes
|
|
.chunks_exact(8)
|
|
.map(|chunk| u64::from_le_bytes(chunk.try_into().unwrap()))
|
|
.collect();
|
|
|
|
Ok(Self { bits })
|
|
}
|
|
|
|
/// Get number of bits
|
|
#[wasm_bindgen(getter)]
|
|
pub fn dimension(&self) -> usize {
|
|
HYPERVECTOR_BITS
|
|
}
|
|
}
|
|
|
|
impl Default for Hypervector {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
/// HDC Memory for storing and retrieving hypervectors by label
|
|
#[wasm_bindgen]
|
|
pub struct HdcMemory {
|
|
labels: Vec<String>,
|
|
vectors: Vec<Hypervector>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl HdcMemory {
|
|
/// Create a new empty HDC memory
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new() -> HdcMemory {
|
|
Self {
|
|
labels: Vec::new(),
|
|
vectors: Vec::new(),
|
|
}
|
|
}
|
|
|
|
/// Store a hypervector with a label
|
|
#[wasm_bindgen]
|
|
pub fn store(&mut self, label: &str, vector: Hypervector) {
|
|
// Check if label exists
|
|
if let Some(idx) = self.labels.iter().position(|l| l == label) {
|
|
self.vectors[idx] = vector;
|
|
} else {
|
|
self.labels.push(label.to_string());
|
|
self.vectors.push(vector);
|
|
}
|
|
}
|
|
|
|
/// Retrieve vectors similar to query above threshold
|
|
///
|
|
/// Returns array of [label, similarity] pairs
|
|
#[wasm_bindgen]
|
|
pub fn retrieve(&self, query: &Hypervector, threshold: f32) -> JsValue {
|
|
let mut results: Vec<(String, f32)> = Vec::new();
|
|
|
|
for (label, vector) in self.labels.iter().zip(self.vectors.iter()) {
|
|
let sim = query.similarity(vector);
|
|
if sim >= threshold {
|
|
results.push((label.clone(), sim));
|
|
}
|
|
}
|
|
|
|
// Sort by similarity descending
|
|
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
|
|
|
serde_wasm_bindgen::to_value(&results).unwrap_or(JsValue::NULL)
|
|
}
|
|
|
|
/// Find the k most similar vectors to query
|
|
#[wasm_bindgen]
|
|
pub fn top_k(&self, query: &Hypervector, k: usize) -> JsValue {
|
|
let mut similarities: Vec<(String, f32)> = self
|
|
.labels
|
|
.iter()
|
|
.zip(self.vectors.iter())
|
|
.map(|(label, vector)| (label.clone(), query.similarity(vector)))
|
|
.collect();
|
|
|
|
similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
|
similarities.truncate(k);
|
|
|
|
serde_wasm_bindgen::to_value(&similarities).unwrap_or(JsValue::NULL)
|
|
}
|
|
|
|
/// Get number of stored vectors
|
|
#[wasm_bindgen(getter)]
|
|
pub fn size(&self) -> usize {
|
|
self.vectors.len()
|
|
}
|
|
|
|
/// Clear all stored vectors
|
|
#[wasm_bindgen]
|
|
pub fn clear(&mut self) {
|
|
self.labels.clear();
|
|
self.vectors.clear();
|
|
}
|
|
|
|
/// Check if a label exists
|
|
#[wasm_bindgen]
|
|
pub fn has(&self, label: &str) -> bool {
|
|
self.labels.iter().any(|l| l == label)
|
|
}
|
|
|
|
/// Get a vector by label
|
|
#[wasm_bindgen]
|
|
pub fn get(&self, label: &str) -> Option<Hypervector> {
|
|
self.labels
|
|
.iter()
|
|
.position(|l| l == label)
|
|
.map(|idx| Hypervector {
|
|
bits: self.vectors[idx].bits.clone(),
|
|
})
|
|
}
|
|
}
|
|
|
|
impl Default for HdcMemory {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|