Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
@@ -0,0 +1,585 @@
|
||||
//! # Tier 4: Compositional Hyperdimensional Computing
|
||||
//!
|
||||
//! SOTA application: Zero-shot concept composition via HDC binding.
|
||||
//!
|
||||
//! ## The Problem
|
||||
//! Traditional embeddings:
|
||||
//! - Fixed vocabulary at training time
|
||||
//! - Cannot represent "red dog" if never seen together
|
||||
//! - Composition requires retraining
|
||||
//! - No algebraic structure for reasoning
|
||||
//!
|
||||
//! ## What Changes
|
||||
//! - HDC: concepts are binary hypervectors (10,000 bits)
|
||||
//! - XOR binding: combine concepts preserving similarity
|
||||
//! - Bundling: create superpositions (sets of concepts)
|
||||
//! - Algebra: unbind to recover components
|
||||
//!
|
||||
//! ## Why This Matters
|
||||
//! - Zero-shot: represent any combination of known concepts
|
||||
//! - Sub-100ns operations: composition is just XOR
|
||||
//! - Distributed: no central vocabulary server
|
||||
//! - Interpretable: can unbind to see what's in a representation
|
||||
//!
|
||||
//! This is what embeddings should have been: compositional by construction.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
// ============================================================================
|
||||
// Hypervector Operations
|
||||
// ============================================================================
|
||||
|
||||
/// Number of bits in hypervector
|
||||
const DIM: usize = 10_000;
|
||||
/// Number of u64 words
|
||||
const WORDS: usize = (DIM + 63) / 64;
|
||||
|
||||
/// Binary hypervector with SIMD-friendly operations
|
||||
#[derive(Clone)]
|
||||
pub struct Hypervector {
|
||||
bits: [u64; WORDS],
|
||||
}
|
||||
|
||||
impl Hypervector {
|
||||
/// Create zero vector
|
||||
pub fn zeros() -> Self {
|
||||
Self { bits: [0; WORDS] }
|
||||
}
|
||||
|
||||
/// Create random vector (approximately 50% ones)
|
||||
pub fn random(seed: u64) -> Self {
|
||||
let mut bits = [0u64; WORDS];
|
||||
let mut state = seed;
|
||||
|
||||
for word in &mut bits {
|
||||
// Xorshift64
|
||||
state ^= state << 13;
|
||||
state ^= state >> 7;
|
||||
state ^= state << 17;
|
||||
*word = state;
|
||||
}
|
||||
|
||||
Self { bits }
|
||||
}
|
||||
|
||||
/// Create from seed string (deterministic)
|
||||
pub fn from_seed(seed: &str) -> Self {
|
||||
let hash = seed
|
||||
.bytes()
|
||||
.fold(0u64, |acc, b| acc.wrapping_mul(31).wrapping_add(b as u64));
|
||||
Self::random(hash)
|
||||
}
|
||||
|
||||
/// XOR binding: A ⊗ B
|
||||
/// Key property: (A ⊗ B) is dissimilar to both A and B
|
||||
/// but (A ⊗ B) ⊗ B ≈ A (unbinding)
|
||||
pub fn bind(&self, other: &Self) -> Self {
|
||||
let mut result = Self::zeros();
|
||||
for i in 0..WORDS {
|
||||
result.bits[i] = self.bits[i] ^ other.bits[i];
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
/// Unbind: given A ⊗ B and B, recover A
|
||||
/// Since XOR is its own inverse: A ⊗ B ⊗ B = A
|
||||
pub fn unbind(&self, key: &Self) -> Self {
|
||||
self.bind(key) // Same as bind
|
||||
}
|
||||
|
||||
/// Bundle (superposition): majority vote
|
||||
/// Result has bits that are 1 in most inputs
|
||||
pub fn bundle(vectors: &[Self]) -> Self {
|
||||
if vectors.is_empty() {
|
||||
return Self::zeros();
|
||||
}
|
||||
|
||||
if vectors.len() == 1 {
|
||||
return vectors[0].clone();
|
||||
}
|
||||
|
||||
let threshold = vectors.len() / 2;
|
||||
let mut result = Self::zeros();
|
||||
|
||||
for bit_idx in 0..DIM {
|
||||
let word_idx = bit_idx / 64;
|
||||
let bit_pos = bit_idx % 64;
|
||||
|
||||
let count: usize = vectors
|
||||
.iter()
|
||||
.filter(|v| (v.bits[word_idx] >> bit_pos) & 1 == 1)
|
||||
.count();
|
||||
|
||||
if count > threshold {
|
||||
result.bits[word_idx] |= 1 << bit_pos;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Permute: shift bits (creates sequence-sensitive binding)
|
||||
pub fn permute(&self, shift: usize) -> Self {
|
||||
let shift = shift % DIM;
|
||||
if shift == 0 {
|
||||
return self.clone();
|
||||
}
|
||||
|
||||
let mut result = Self::zeros();
|
||||
|
||||
for bit_idx in 0..DIM {
|
||||
let new_idx = (bit_idx + shift) % DIM;
|
||||
let old_word = bit_idx / 64;
|
||||
let old_pos = bit_idx % 64;
|
||||
let new_word = new_idx / 64;
|
||||
let new_pos = new_idx % 64;
|
||||
|
||||
if (self.bits[old_word] >> old_pos) & 1 == 1 {
|
||||
result.bits[new_word] |= 1 << new_pos;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Hamming distance (number of differing bits)
|
||||
pub fn hamming_distance(&self, other: &Self) -> u32 {
|
||||
let mut dist = 0u32;
|
||||
for i in 0..WORDS {
|
||||
dist += (self.bits[i] ^ other.bits[i]).count_ones();
|
||||
}
|
||||
dist
|
||||
}
|
||||
|
||||
/// Cosine-like similarity: 1 - 2 * (distance / DIM)
|
||||
pub fn similarity(&self, other: &Self) -> f32 {
|
||||
let dist = self.hamming_distance(other);
|
||||
1.0 - 2.0 * (dist as f32 / DIM as f32)
|
||||
}
|
||||
|
||||
/// Count ones
|
||||
pub fn popcount(&self) -> u32 {
|
||||
self.bits.iter().map(|w| w.count_ones()).sum()
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Hypervector {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "HV(popcount={})", self.popcount())
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Concept Memory
|
||||
// ============================================================================
|
||||
|
||||
/// Memory of atomic concepts
|
||||
pub struct ConceptMemory {
|
||||
/// Named concepts
|
||||
concepts: HashMap<String, Hypervector>,
|
||||
/// Role vectors for binding positions
|
||||
roles: HashMap<String, Hypervector>,
|
||||
}
|
||||
|
||||
impl ConceptMemory {
|
||||
pub fn new() -> Self {
|
||||
let mut mem = Self {
|
||||
concepts: HashMap::new(),
|
||||
roles: HashMap::new(),
|
||||
};
|
||||
|
||||
// Create role vectors for structured binding
|
||||
mem.roles.insert(
|
||||
"subject".to_string(),
|
||||
Hypervector::from_seed("role:subject"),
|
||||
);
|
||||
mem.roles.insert(
|
||||
"predicate".to_string(),
|
||||
Hypervector::from_seed("role:predicate"),
|
||||
);
|
||||
mem.roles
|
||||
.insert("object".to_string(), Hypervector::from_seed("role:object"));
|
||||
mem.roles.insert(
|
||||
"modifier".to_string(),
|
||||
Hypervector::from_seed("role:modifier"),
|
||||
);
|
||||
mem.roles.insert(
|
||||
"position_1".to_string(),
|
||||
Hypervector::from_seed("role:position_1"),
|
||||
);
|
||||
mem.roles.insert(
|
||||
"position_2".to_string(),
|
||||
Hypervector::from_seed("role:position_2"),
|
||||
);
|
||||
mem.roles.insert(
|
||||
"position_3".to_string(),
|
||||
Hypervector::from_seed("role:position_3"),
|
||||
);
|
||||
|
||||
mem
|
||||
}
|
||||
|
||||
/// Add a new atomic concept
|
||||
pub fn learn(&mut self, name: &str) -> Hypervector {
|
||||
if let Some(v) = self.concepts.get(name) {
|
||||
return v.clone();
|
||||
}
|
||||
|
||||
let v = Hypervector::from_seed(&format!("concept:{}", name));
|
||||
self.concepts.insert(name.to_string(), v.clone());
|
||||
v
|
||||
}
|
||||
|
||||
/// Get a concept (learn if new)
|
||||
pub fn get(&mut self, name: &str) -> Hypervector {
|
||||
self.learn(name)
|
||||
}
|
||||
|
||||
/// Get a role vector
|
||||
pub fn role(&self, name: &str) -> Option<&Hypervector> {
|
||||
self.roles.get(name)
|
||||
}
|
||||
|
||||
/// Bind concept to role
|
||||
pub fn bind_role(&self, concept: &Hypervector, role: &str) -> Option<Hypervector> {
|
||||
self.roles.get(role).map(|r| concept.bind(r))
|
||||
}
|
||||
|
||||
/// Unbind role to recover concept
|
||||
pub fn unbind_role(&self, bound: &Hypervector, role: &str) -> Option<Hypervector> {
|
||||
self.roles.get(role).map(|r| bound.unbind(r))
|
||||
}
|
||||
|
||||
/// Query: find best matching concept
|
||||
pub fn query(&self, hv: &Hypervector) -> Vec<(String, f32)> {
|
||||
let mut results: Vec<_> = self
|
||||
.concepts
|
||||
.iter()
|
||||
.map(|(name, v)| (name.clone(), hv.similarity(v)))
|
||||
.collect();
|
||||
|
||||
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||
results
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Compositional Structures
|
||||
// ============================================================================
|
||||
|
||||
/// Compose "modifier concept" pairs (e.g., "red" + "dog")
|
||||
pub fn compose_modifier(memory: &mut ConceptMemory, modifier: &str, concept: &str) -> Hypervector {
|
||||
let m = memory.get(modifier);
|
||||
let c = memory.get(concept);
|
||||
|
||||
// Bind modifier to modifier role, then bundle with concept
|
||||
let m_bound = m.bind(memory.role("modifier").unwrap());
|
||||
let c_bound = c.bind(memory.role("subject").unwrap());
|
||||
|
||||
Hypervector::bundle(&[m_bound, c_bound])
|
||||
}
|
||||
|
||||
/// Compose a sequence (e.g., "A then B then C")
|
||||
pub fn compose_sequence(memory: &mut ConceptMemory, items: &[&str]) -> Hypervector {
|
||||
let mut parts = Vec::new();
|
||||
|
||||
for (i, item) in items.iter().enumerate() {
|
||||
let v = memory.get(item);
|
||||
// Permute by position to create order-sensitive representation
|
||||
parts.push(v.permute(i * 10));
|
||||
}
|
||||
|
||||
Hypervector::bundle(&parts)
|
||||
}
|
||||
|
||||
/// Compose a relation triple (subject, predicate, object)
|
||||
pub fn compose_triple(
|
||||
memory: &mut ConceptMemory,
|
||||
subject: &str,
|
||||
predicate: &str,
|
||||
object: &str,
|
||||
) -> Hypervector {
|
||||
let s = memory.get(subject).bind(memory.role("subject").unwrap());
|
||||
let p = memory
|
||||
.get(predicate)
|
||||
.bind(memory.role("predicate").unwrap());
|
||||
let o = memory.get(object).bind(memory.role("object").unwrap());
|
||||
|
||||
Hypervector::bundle(&[s, p, o])
|
||||
}
|
||||
|
||||
/// Query a composed structure for a specific role
|
||||
pub fn query_role(memory: &ConceptMemory, composed: &Hypervector, role: &str) -> Hypervector {
|
||||
composed.unbind(memory.role(role).unwrap())
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Analogical Reasoning
|
||||
// ============================================================================
|
||||
|
||||
/// Solve analogy: A is to B as C is to ?
|
||||
/// Using: D = C ⊗ (B ⊗ A⁻¹) where A⁻¹ = A (self-inverse)
|
||||
pub fn analogy(memory: &mut ConceptMemory, a: &str, b: &str, c: &str) -> Hypervector {
|
||||
let a_vec = memory.get(a);
|
||||
let b_vec = memory.get(b);
|
||||
let c_vec = memory.get(c);
|
||||
|
||||
// Relationship: B ⊗ A (since XOR is self-inverse)
|
||||
let relationship = b_vec.bind(&a_vec);
|
||||
|
||||
// Apply to C
|
||||
c_vec.bind(&relationship)
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Example Usage
|
||||
// ============================================================================
|
||||
|
||||
fn main() {
|
||||
println!("=== Tier 4: Compositional Hyperdimensional Computing ===\n");
|
||||
|
||||
let mut memory = ConceptMemory::new();
|
||||
|
||||
// Learn atomic concepts
|
||||
println!("Learning atomic concepts...");
|
||||
let concepts = [
|
||||
"dog", "cat", "bird", "red", "blue", "big", "small", "run", "fly", "swim", "chase", "eat",
|
||||
"king", "queen", "man", "woman", "prince", "princess",
|
||||
];
|
||||
|
||||
for concept in &concepts {
|
||||
memory.learn(concept);
|
||||
}
|
||||
println!(" Learned {} concepts\n", concepts.len());
|
||||
|
||||
// Demonstrate composition
|
||||
println!("=== Modifier + Concept Composition ===");
|
||||
|
||||
let red_dog = compose_modifier(&mut memory, "red", "dog");
|
||||
let blue_dog = compose_modifier(&mut memory, "blue", "dog");
|
||||
let red_cat = compose_modifier(&mut memory, "red", "cat");
|
||||
|
||||
println!(
|
||||
"'red dog' vs 'blue dog' similarity: {:.3}",
|
||||
red_dog.similarity(&blue_dog)
|
||||
);
|
||||
println!(
|
||||
"'red dog' vs 'red cat' similarity: {:.3}",
|
||||
red_dog.similarity(&red_cat)
|
||||
);
|
||||
println!(
|
||||
"'blue dog' vs 'red cat' similarity: {:.3}",
|
||||
blue_dog.similarity(&red_cat)
|
||||
);
|
||||
|
||||
// Query composed structure
|
||||
println!("\nQuerying 'red dog' for modifier role:");
|
||||
let recovered = query_role(&memory, &red_dog, "modifier");
|
||||
let matches = memory.query(&recovered);
|
||||
println!(" Top matches: {:?}", &matches[..3.min(matches.len())]);
|
||||
|
||||
// Sequence composition
|
||||
println!("\n=== Sequence Composition ===");
|
||||
|
||||
let seq1 = compose_sequence(&mut memory, &["run", "jump", "fly"]);
|
||||
let seq2 = compose_sequence(&mut memory, &["run", "jump", "swim"]);
|
||||
let seq3 = compose_sequence(&mut memory, &["fly", "jump", "run"]);
|
||||
|
||||
println!(
|
||||
"'run→jump→fly' vs 'run→jump→swim': {:.3}",
|
||||
seq1.similarity(&seq2)
|
||||
);
|
||||
println!(
|
||||
"'run→jump→fly' vs 'fly→jump→run': {:.3}",
|
||||
seq1.similarity(&seq3)
|
||||
);
|
||||
println!(" (Order matters: same elements, different sequence = different representation)");
|
||||
|
||||
// Triple composition
|
||||
println!("\n=== Relation Triple Composition ===");
|
||||
|
||||
let triple1 = compose_triple(&mut memory, "dog", "chase", "cat");
|
||||
let triple2 = compose_triple(&mut memory, "cat", "chase", "bird");
|
||||
let triple3 = compose_triple(&mut memory, "dog", "eat", "cat");
|
||||
|
||||
println!(
|
||||
"'dog chase cat' vs 'cat chase bird': {:.3}",
|
||||
triple1.similarity(&triple2)
|
||||
);
|
||||
println!(
|
||||
"'dog chase cat' vs 'dog eat cat': {:.3}",
|
||||
triple1.similarity(&triple3)
|
||||
);
|
||||
|
||||
// Query subject from triple
|
||||
println!("\nQuerying 'dog chase cat' for subject:");
|
||||
let subject_query = query_role(&memory, &triple1, "subject");
|
||||
let subject_matches = memory.query(&subject_query);
|
||||
println!(
|
||||
" Top matches: {:?}",
|
||||
&subject_matches[..3.min(subject_matches.len())]
|
||||
);
|
||||
|
||||
// Analogical reasoning
|
||||
println!("\n=== Analogical Reasoning ===");
|
||||
println!("Solving: 'king' is to 'queen' as 'man' is to ?");
|
||||
|
||||
let answer = analogy(&mut memory, "king", "queen", "man");
|
||||
let analogy_matches = memory.query(&answer);
|
||||
println!(
|
||||
" Top matches: {:?}",
|
||||
&analogy_matches[..5.min(analogy_matches.len())]
|
||||
);
|
||||
println!(" Expected: 'woman' should be near the top");
|
||||
|
||||
// Zero-shot composition
|
||||
println!("\n=== Zero-Shot Composition ===");
|
||||
println!("Composing 'big blue cat' (never seen together):");
|
||||
|
||||
// Multi-modifier composition
|
||||
let big = memory.get("big").bind(memory.role("modifier").unwrap());
|
||||
let blue = memory
|
||||
.get("blue")
|
||||
.bind(memory.role("modifier").unwrap())
|
||||
.permute(5);
|
||||
let cat = memory.get("cat").bind(memory.role("subject").unwrap());
|
||||
let big_blue_cat = Hypervector::bundle(&[big, blue, cat]);
|
||||
|
||||
// Compare to similar compositions
|
||||
let small_red_dog = {
|
||||
let small = memory.get("small").bind(memory.role("modifier").unwrap());
|
||||
let red = memory
|
||||
.get("red")
|
||||
.bind(memory.role("modifier").unwrap())
|
||||
.permute(5);
|
||||
let dog = memory.get("dog").bind(memory.role("subject").unwrap());
|
||||
Hypervector::bundle(&[small, red, dog])
|
||||
};
|
||||
|
||||
let big_blue_dog = {
|
||||
let big = memory.get("big").bind(memory.role("modifier").unwrap());
|
||||
let blue = memory
|
||||
.get("blue")
|
||||
.bind(memory.role("modifier").unwrap())
|
||||
.permute(5);
|
||||
let dog = memory.get("dog").bind(memory.role("subject").unwrap());
|
||||
Hypervector::bundle(&[big, blue, dog])
|
||||
};
|
||||
|
||||
println!(
|
||||
"'big blue cat' vs 'small red dog': {:.3}",
|
||||
big_blue_cat.similarity(&small_red_dog)
|
||||
);
|
||||
println!(
|
||||
"'big blue cat' vs 'big blue dog': {:.3}",
|
||||
big_blue_cat.similarity(&big_blue_dog)
|
||||
);
|
||||
println!(" (Sharing modifiers increases similarity)");
|
||||
|
||||
// Performance test
|
||||
println!("\n=== Performance ===");
|
||||
let start = std::time::Instant::now();
|
||||
let iterations = 10_000;
|
||||
|
||||
let v1 = Hypervector::random(42);
|
||||
let v2 = Hypervector::random(123);
|
||||
|
||||
for _ in 0..iterations {
|
||||
let _ = v1.bind(&v2);
|
||||
}
|
||||
let bind_time = start.elapsed();
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
for _ in 0..iterations {
|
||||
let _ = v1.similarity(&v2);
|
||||
}
|
||||
let sim_time = start.elapsed();
|
||||
|
||||
println!(
|
||||
"Bind (XOR) time: {:.1}ns per op",
|
||||
bind_time.as_nanos() as f64 / iterations as f64
|
||||
);
|
||||
println!(
|
||||
"Similarity time: {:.1}ns per op",
|
||||
sim_time.as_nanos() as f64 / iterations as f64
|
||||
);
|
||||
|
||||
println!("\n=== Key Benefits ===");
|
||||
println!("- Zero-shot: compose any combination of known concepts");
|
||||
println!("- Sub-100ns: composition is just XOR operations");
|
||||
println!("- Algebraic: unbind to recover components");
|
||||
println!("- Distributed: no central vocabulary server");
|
||||
println!("- Interpretable: query reveals structure");
|
||||
println!("\nThis is what embeddings should have been: compositional by construction.");
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_bind_unbind() {
|
||||
let a = Hypervector::random(42);
|
||||
let b = Hypervector::random(123);
|
||||
|
||||
let bound = a.bind(&b);
|
||||
let recovered = bound.unbind(&b);
|
||||
|
||||
// Recovered should be very similar to original
|
||||
assert!(recovered.similarity(&a) > 0.95);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_binding_dissimilarity() {
|
||||
let a = Hypervector::random(42);
|
||||
let b = Hypervector::random(123);
|
||||
|
||||
let bound = a.bind(&b);
|
||||
|
||||
// Bound should be dissimilar to both components
|
||||
assert!(bound.similarity(&a).abs() < 0.2);
|
||||
assert!(bound.similarity(&b).abs() < 0.2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bundle_similarity() {
|
||||
let a = Hypervector::random(42);
|
||||
let b = Hypervector::random(123);
|
||||
let c = Hypervector::random(456);
|
||||
|
||||
let bundle_ab = Hypervector::bundle(&[a.clone(), b.clone()]);
|
||||
let bundle_ac = Hypervector::bundle(&[a.clone(), c.clone()]);
|
||||
|
||||
// Bundles with shared component should be somewhat similar
|
||||
let sim = bundle_ab.similarity(&bundle_ac);
|
||||
assert!(sim > 0.2); // Some similarity due to shared A
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_composition() {
|
||||
let mut memory = ConceptMemory::new();
|
||||
|
||||
let red_dog = compose_modifier(&mut memory, "red", "dog");
|
||||
let red_cat = compose_modifier(&mut memory, "red", "cat");
|
||||
let blue_dog = compose_modifier(&mut memory, "blue", "dog");
|
||||
|
||||
// Same modifier = more similar than same noun
|
||||
let rd_rc = red_dog.similarity(&red_cat);
|
||||
let rd_bd = red_dog.similarity(&blue_dog);
|
||||
|
||||
// Both should show some similarity due to shared component
|
||||
assert!(rd_rc.abs() > 0.1);
|
||||
assert!(rd_bd.abs() > 0.1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sequence_order() {
|
||||
let mut memory = ConceptMemory::new();
|
||||
|
||||
let seq1 = compose_sequence(&mut memory, &["a", "b", "c"]);
|
||||
let seq2 = compose_sequence(&mut memory, &["c", "b", "a"]);
|
||||
|
||||
// Different order should produce different representations
|
||||
assert!(seq1.similarity(&seq2) < 0.5);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user