git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
586 lines
18 KiB
Rust
586 lines
18 KiB
Rust
//! # Tier 4: Compositional Hyperdimensional Computing
|
|
//!
|
|
//! SOTA application: Zero-shot concept composition via HDC binding.
|
|
//!
|
|
//! ## The Problem
|
|
//! Traditional embeddings:
|
|
//! - Fixed vocabulary at training time
|
|
//! - Cannot represent "red dog" if never seen together
|
|
//! - Composition requires retraining
|
|
//! - No algebraic structure for reasoning
|
|
//!
|
|
//! ## What Changes
|
|
//! - HDC: concepts are binary hypervectors (10,000 bits)
|
|
//! - XOR binding: combine concepts preserving similarity
|
|
//! - Bundling: create superpositions (sets of concepts)
|
|
//! - Algebra: unbind to recover components
|
|
//!
|
|
//! ## Why This Matters
|
|
//! - Zero-shot: represent any combination of known concepts
|
|
//! - Sub-100ns operations: composition is just XOR
|
|
//! - Distributed: no central vocabulary server
|
|
//! - Interpretable: can unbind to see what's in a representation
|
|
//!
|
|
//! This is what embeddings should have been: compositional by construction.
|
|
|
|
use std::collections::HashMap;
|
|
|
|
// ============================================================================
|
|
// Hypervector Operations
|
|
// ============================================================================
|
|
|
|
/// Number of bits in hypervector
|
|
const DIM: usize = 10_000;
|
|
/// Number of u64 words
|
|
const WORDS: usize = (DIM + 63) / 64;
|
|
|
|
/// Binary hypervector with SIMD-friendly operations
|
|
#[derive(Clone)]
|
|
pub struct Hypervector {
|
|
bits: [u64; WORDS],
|
|
}
|
|
|
|
impl Hypervector {
|
|
/// Create zero vector
|
|
pub fn zeros() -> Self {
|
|
Self { bits: [0; WORDS] }
|
|
}
|
|
|
|
/// Create random vector (approximately 50% ones)
|
|
pub fn random(seed: u64) -> Self {
|
|
let mut bits = [0u64; WORDS];
|
|
let mut state = seed;
|
|
|
|
for word in &mut bits {
|
|
// Xorshift64
|
|
state ^= state << 13;
|
|
state ^= state >> 7;
|
|
state ^= state << 17;
|
|
*word = state;
|
|
}
|
|
|
|
Self { bits }
|
|
}
|
|
|
|
/// Create from seed string (deterministic)
|
|
pub fn from_seed(seed: &str) -> Self {
|
|
let hash = seed
|
|
.bytes()
|
|
.fold(0u64, |acc, b| acc.wrapping_mul(31).wrapping_add(b as u64));
|
|
Self::random(hash)
|
|
}
|
|
|
|
/// XOR binding: A ⊗ B
|
|
/// Key property: (A ⊗ B) is dissimilar to both A and B
|
|
/// but (A ⊗ B) ⊗ B ≈ A (unbinding)
|
|
pub fn bind(&self, other: &Self) -> Self {
|
|
let mut result = Self::zeros();
|
|
for i in 0..WORDS {
|
|
result.bits[i] = self.bits[i] ^ other.bits[i];
|
|
}
|
|
result
|
|
}
|
|
|
|
/// Unbind: given A ⊗ B and B, recover A
|
|
/// Since XOR is its own inverse: A ⊗ B ⊗ B = A
|
|
pub fn unbind(&self, key: &Self) -> Self {
|
|
self.bind(key) // Same as bind
|
|
}
|
|
|
|
/// Bundle (superposition): majority vote
|
|
/// Result has bits that are 1 in most inputs
|
|
pub fn bundle(vectors: &[Self]) -> Self {
|
|
if vectors.is_empty() {
|
|
return Self::zeros();
|
|
}
|
|
|
|
if vectors.len() == 1 {
|
|
return vectors[0].clone();
|
|
}
|
|
|
|
let threshold = vectors.len() / 2;
|
|
let mut result = Self::zeros();
|
|
|
|
for bit_idx in 0..DIM {
|
|
let word_idx = bit_idx / 64;
|
|
let bit_pos = bit_idx % 64;
|
|
|
|
let count: usize = vectors
|
|
.iter()
|
|
.filter(|v| (v.bits[word_idx] >> bit_pos) & 1 == 1)
|
|
.count();
|
|
|
|
if count > threshold {
|
|
result.bits[word_idx] |= 1 << bit_pos;
|
|
}
|
|
}
|
|
|
|
result
|
|
}
|
|
|
|
/// Permute: shift bits (creates sequence-sensitive binding)
|
|
pub fn permute(&self, shift: usize) -> Self {
|
|
let shift = shift % DIM;
|
|
if shift == 0 {
|
|
return self.clone();
|
|
}
|
|
|
|
let mut result = Self::zeros();
|
|
|
|
for bit_idx in 0..DIM {
|
|
let new_idx = (bit_idx + shift) % DIM;
|
|
let old_word = bit_idx / 64;
|
|
let old_pos = bit_idx % 64;
|
|
let new_word = new_idx / 64;
|
|
let new_pos = new_idx % 64;
|
|
|
|
if (self.bits[old_word] >> old_pos) & 1 == 1 {
|
|
result.bits[new_word] |= 1 << new_pos;
|
|
}
|
|
}
|
|
|
|
result
|
|
}
|
|
|
|
/// Hamming distance (number of differing bits)
|
|
pub fn hamming_distance(&self, other: &Self) -> u32 {
|
|
let mut dist = 0u32;
|
|
for i in 0..WORDS {
|
|
dist += (self.bits[i] ^ other.bits[i]).count_ones();
|
|
}
|
|
dist
|
|
}
|
|
|
|
/// Cosine-like similarity: 1 - 2 * (distance / DIM)
|
|
pub fn similarity(&self, other: &Self) -> f32 {
|
|
let dist = self.hamming_distance(other);
|
|
1.0 - 2.0 * (dist as f32 / DIM as f32)
|
|
}
|
|
|
|
/// Count ones
|
|
pub fn popcount(&self) -> u32 {
|
|
self.bits.iter().map(|w| w.count_ones()).sum()
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Debug for Hypervector {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
write!(f, "HV(popcount={})", self.popcount())
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// Concept Memory
|
|
// ============================================================================
|
|
|
|
/// Memory of atomic concepts
|
|
pub struct ConceptMemory {
|
|
/// Named concepts
|
|
concepts: HashMap<String, Hypervector>,
|
|
/// Role vectors for binding positions
|
|
roles: HashMap<String, Hypervector>,
|
|
}
|
|
|
|
impl ConceptMemory {
|
|
pub fn new() -> Self {
|
|
let mut mem = Self {
|
|
concepts: HashMap::new(),
|
|
roles: HashMap::new(),
|
|
};
|
|
|
|
// Create role vectors for structured binding
|
|
mem.roles.insert(
|
|
"subject".to_string(),
|
|
Hypervector::from_seed("role:subject"),
|
|
);
|
|
mem.roles.insert(
|
|
"predicate".to_string(),
|
|
Hypervector::from_seed("role:predicate"),
|
|
);
|
|
mem.roles
|
|
.insert("object".to_string(), Hypervector::from_seed("role:object"));
|
|
mem.roles.insert(
|
|
"modifier".to_string(),
|
|
Hypervector::from_seed("role:modifier"),
|
|
);
|
|
mem.roles.insert(
|
|
"position_1".to_string(),
|
|
Hypervector::from_seed("role:position_1"),
|
|
);
|
|
mem.roles.insert(
|
|
"position_2".to_string(),
|
|
Hypervector::from_seed("role:position_2"),
|
|
);
|
|
mem.roles.insert(
|
|
"position_3".to_string(),
|
|
Hypervector::from_seed("role:position_3"),
|
|
);
|
|
|
|
mem
|
|
}
|
|
|
|
/// Add a new atomic concept
|
|
pub fn learn(&mut self, name: &str) -> Hypervector {
|
|
if let Some(v) = self.concepts.get(name) {
|
|
return v.clone();
|
|
}
|
|
|
|
let v = Hypervector::from_seed(&format!("concept:{}", name));
|
|
self.concepts.insert(name.to_string(), v.clone());
|
|
v
|
|
}
|
|
|
|
/// Get a concept (learn if new)
|
|
pub fn get(&mut self, name: &str) -> Hypervector {
|
|
self.learn(name)
|
|
}
|
|
|
|
/// Get a role vector
|
|
pub fn role(&self, name: &str) -> Option<&Hypervector> {
|
|
self.roles.get(name)
|
|
}
|
|
|
|
/// Bind concept to role
|
|
pub fn bind_role(&self, concept: &Hypervector, role: &str) -> Option<Hypervector> {
|
|
self.roles.get(role).map(|r| concept.bind(r))
|
|
}
|
|
|
|
/// Unbind role to recover concept
|
|
pub fn unbind_role(&self, bound: &Hypervector, role: &str) -> Option<Hypervector> {
|
|
self.roles.get(role).map(|r| bound.unbind(r))
|
|
}
|
|
|
|
/// Query: find best matching concept
|
|
pub fn query(&self, hv: &Hypervector) -> Vec<(String, f32)> {
|
|
let mut results: Vec<_> = self
|
|
.concepts
|
|
.iter()
|
|
.map(|(name, v)| (name.clone(), hv.similarity(v)))
|
|
.collect();
|
|
|
|
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
|
results
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// Compositional Structures
|
|
// ============================================================================
|
|
|
|
/// Compose "modifier concept" pairs (e.g., "red" + "dog")
|
|
pub fn compose_modifier(memory: &mut ConceptMemory, modifier: &str, concept: &str) -> Hypervector {
|
|
let m = memory.get(modifier);
|
|
let c = memory.get(concept);
|
|
|
|
// Bind modifier to modifier role, then bundle with concept
|
|
let m_bound = m.bind(memory.role("modifier").unwrap());
|
|
let c_bound = c.bind(memory.role("subject").unwrap());
|
|
|
|
Hypervector::bundle(&[m_bound, c_bound])
|
|
}
|
|
|
|
/// Compose a sequence (e.g., "A then B then C")
|
|
pub fn compose_sequence(memory: &mut ConceptMemory, items: &[&str]) -> Hypervector {
|
|
let mut parts = Vec::new();
|
|
|
|
for (i, item) in items.iter().enumerate() {
|
|
let v = memory.get(item);
|
|
// Permute by position to create order-sensitive representation
|
|
parts.push(v.permute(i * 10));
|
|
}
|
|
|
|
Hypervector::bundle(&parts)
|
|
}
|
|
|
|
/// Compose a relation triple (subject, predicate, object)
|
|
pub fn compose_triple(
|
|
memory: &mut ConceptMemory,
|
|
subject: &str,
|
|
predicate: &str,
|
|
object: &str,
|
|
) -> Hypervector {
|
|
let s = memory.get(subject).bind(memory.role("subject").unwrap());
|
|
let p = memory
|
|
.get(predicate)
|
|
.bind(memory.role("predicate").unwrap());
|
|
let o = memory.get(object).bind(memory.role("object").unwrap());
|
|
|
|
Hypervector::bundle(&[s, p, o])
|
|
}
|
|
|
|
/// Query a composed structure for a specific role
|
|
pub fn query_role(memory: &ConceptMemory, composed: &Hypervector, role: &str) -> Hypervector {
|
|
composed.unbind(memory.role(role).unwrap())
|
|
}
|
|
|
|
// ============================================================================
|
|
// Analogical Reasoning
|
|
// ============================================================================
|
|
|
|
/// Solve analogy: A is to B as C is to ?
|
|
/// Using: D = C ⊗ (B ⊗ A⁻¹) where A⁻¹ = A (self-inverse)
|
|
pub fn analogy(memory: &mut ConceptMemory, a: &str, b: &str, c: &str) -> Hypervector {
|
|
let a_vec = memory.get(a);
|
|
let b_vec = memory.get(b);
|
|
let c_vec = memory.get(c);
|
|
|
|
// Relationship: B ⊗ A (since XOR is self-inverse)
|
|
let relationship = b_vec.bind(&a_vec);
|
|
|
|
// Apply to C
|
|
c_vec.bind(&relationship)
|
|
}
|
|
|
|
// ============================================================================
|
|
// Example Usage
|
|
// ============================================================================
|
|
|
|
fn main() {
|
|
println!("=== Tier 4: Compositional Hyperdimensional Computing ===\n");
|
|
|
|
let mut memory = ConceptMemory::new();
|
|
|
|
// Learn atomic concepts
|
|
println!("Learning atomic concepts...");
|
|
let concepts = [
|
|
"dog", "cat", "bird", "red", "blue", "big", "small", "run", "fly", "swim", "chase", "eat",
|
|
"king", "queen", "man", "woman", "prince", "princess",
|
|
];
|
|
|
|
for concept in &concepts {
|
|
memory.learn(concept);
|
|
}
|
|
println!(" Learned {} concepts\n", concepts.len());
|
|
|
|
// Demonstrate composition
|
|
println!("=== Modifier + Concept Composition ===");
|
|
|
|
let red_dog = compose_modifier(&mut memory, "red", "dog");
|
|
let blue_dog = compose_modifier(&mut memory, "blue", "dog");
|
|
let red_cat = compose_modifier(&mut memory, "red", "cat");
|
|
|
|
println!(
|
|
"'red dog' vs 'blue dog' similarity: {:.3}",
|
|
red_dog.similarity(&blue_dog)
|
|
);
|
|
println!(
|
|
"'red dog' vs 'red cat' similarity: {:.3}",
|
|
red_dog.similarity(&red_cat)
|
|
);
|
|
println!(
|
|
"'blue dog' vs 'red cat' similarity: {:.3}",
|
|
blue_dog.similarity(&red_cat)
|
|
);
|
|
|
|
// Query composed structure
|
|
println!("\nQuerying 'red dog' for modifier role:");
|
|
let recovered = query_role(&memory, &red_dog, "modifier");
|
|
let matches = memory.query(&recovered);
|
|
println!(" Top matches: {:?}", &matches[..3.min(matches.len())]);
|
|
|
|
// Sequence composition
|
|
println!("\n=== Sequence Composition ===");
|
|
|
|
let seq1 = compose_sequence(&mut memory, &["run", "jump", "fly"]);
|
|
let seq2 = compose_sequence(&mut memory, &["run", "jump", "swim"]);
|
|
let seq3 = compose_sequence(&mut memory, &["fly", "jump", "run"]);
|
|
|
|
println!(
|
|
"'run→jump→fly' vs 'run→jump→swim': {:.3}",
|
|
seq1.similarity(&seq2)
|
|
);
|
|
println!(
|
|
"'run→jump→fly' vs 'fly→jump→run': {:.3}",
|
|
seq1.similarity(&seq3)
|
|
);
|
|
println!(" (Order matters: same elements, different sequence = different representation)");
|
|
|
|
// Triple composition
|
|
println!("\n=== Relation Triple Composition ===");
|
|
|
|
let triple1 = compose_triple(&mut memory, "dog", "chase", "cat");
|
|
let triple2 = compose_triple(&mut memory, "cat", "chase", "bird");
|
|
let triple3 = compose_triple(&mut memory, "dog", "eat", "cat");
|
|
|
|
println!(
|
|
"'dog chase cat' vs 'cat chase bird': {:.3}",
|
|
triple1.similarity(&triple2)
|
|
);
|
|
println!(
|
|
"'dog chase cat' vs 'dog eat cat': {:.3}",
|
|
triple1.similarity(&triple3)
|
|
);
|
|
|
|
// Query subject from triple
|
|
println!("\nQuerying 'dog chase cat' for subject:");
|
|
let subject_query = query_role(&memory, &triple1, "subject");
|
|
let subject_matches = memory.query(&subject_query);
|
|
println!(
|
|
" Top matches: {:?}",
|
|
&subject_matches[..3.min(subject_matches.len())]
|
|
);
|
|
|
|
// Analogical reasoning
|
|
println!("\n=== Analogical Reasoning ===");
|
|
println!("Solving: 'king' is to 'queen' as 'man' is to ?");
|
|
|
|
let answer = analogy(&mut memory, "king", "queen", "man");
|
|
let analogy_matches = memory.query(&answer);
|
|
println!(
|
|
" Top matches: {:?}",
|
|
&analogy_matches[..5.min(analogy_matches.len())]
|
|
);
|
|
println!(" Expected: 'woman' should be near the top");
|
|
|
|
// Zero-shot composition
|
|
println!("\n=== Zero-Shot Composition ===");
|
|
println!("Composing 'big blue cat' (never seen together):");
|
|
|
|
// Multi-modifier composition
|
|
let big = memory.get("big").bind(memory.role("modifier").unwrap());
|
|
let blue = memory
|
|
.get("blue")
|
|
.bind(memory.role("modifier").unwrap())
|
|
.permute(5);
|
|
let cat = memory.get("cat").bind(memory.role("subject").unwrap());
|
|
let big_blue_cat = Hypervector::bundle(&[big, blue, cat]);
|
|
|
|
// Compare to similar compositions
|
|
let small_red_dog = {
|
|
let small = memory.get("small").bind(memory.role("modifier").unwrap());
|
|
let red = memory
|
|
.get("red")
|
|
.bind(memory.role("modifier").unwrap())
|
|
.permute(5);
|
|
let dog = memory.get("dog").bind(memory.role("subject").unwrap());
|
|
Hypervector::bundle(&[small, red, dog])
|
|
};
|
|
|
|
let big_blue_dog = {
|
|
let big = memory.get("big").bind(memory.role("modifier").unwrap());
|
|
let blue = memory
|
|
.get("blue")
|
|
.bind(memory.role("modifier").unwrap())
|
|
.permute(5);
|
|
let dog = memory.get("dog").bind(memory.role("subject").unwrap());
|
|
Hypervector::bundle(&[big, blue, dog])
|
|
};
|
|
|
|
println!(
|
|
"'big blue cat' vs 'small red dog': {:.3}",
|
|
big_blue_cat.similarity(&small_red_dog)
|
|
);
|
|
println!(
|
|
"'big blue cat' vs 'big blue dog': {:.3}",
|
|
big_blue_cat.similarity(&big_blue_dog)
|
|
);
|
|
println!(" (Sharing modifiers increases similarity)");
|
|
|
|
// Performance test
|
|
println!("\n=== Performance ===");
|
|
let start = std::time::Instant::now();
|
|
let iterations = 10_000;
|
|
|
|
let v1 = Hypervector::random(42);
|
|
let v2 = Hypervector::random(123);
|
|
|
|
for _ in 0..iterations {
|
|
let _ = v1.bind(&v2);
|
|
}
|
|
let bind_time = start.elapsed();
|
|
|
|
let start = std::time::Instant::now();
|
|
for _ in 0..iterations {
|
|
let _ = v1.similarity(&v2);
|
|
}
|
|
let sim_time = start.elapsed();
|
|
|
|
println!(
|
|
"Bind (XOR) time: {:.1}ns per op",
|
|
bind_time.as_nanos() as f64 / iterations as f64
|
|
);
|
|
println!(
|
|
"Similarity time: {:.1}ns per op",
|
|
sim_time.as_nanos() as f64 / iterations as f64
|
|
);
|
|
|
|
println!("\n=== Key Benefits ===");
|
|
println!("- Zero-shot: compose any combination of known concepts");
|
|
println!("- Sub-100ns: composition is just XOR operations");
|
|
println!("- Algebraic: unbind to recover components");
|
|
println!("- Distributed: no central vocabulary server");
|
|
println!("- Interpretable: query reveals structure");
|
|
println!("\nThis is what embeddings should have been: compositional by construction.");
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_bind_unbind() {
|
|
let a = Hypervector::random(42);
|
|
let b = Hypervector::random(123);
|
|
|
|
let bound = a.bind(&b);
|
|
let recovered = bound.unbind(&b);
|
|
|
|
// Recovered should be very similar to original
|
|
assert!(recovered.similarity(&a) > 0.95);
|
|
}
|
|
|
|
#[test]
|
|
fn test_binding_dissimilarity() {
|
|
let a = Hypervector::random(42);
|
|
let b = Hypervector::random(123);
|
|
|
|
let bound = a.bind(&b);
|
|
|
|
// Bound should be dissimilar to both components
|
|
assert!(bound.similarity(&a).abs() < 0.2);
|
|
assert!(bound.similarity(&b).abs() < 0.2);
|
|
}
|
|
|
|
#[test]
|
|
fn test_bundle_similarity() {
|
|
let a = Hypervector::random(42);
|
|
let b = Hypervector::random(123);
|
|
let c = Hypervector::random(456);
|
|
|
|
let bundle_ab = Hypervector::bundle(&[a.clone(), b.clone()]);
|
|
let bundle_ac = Hypervector::bundle(&[a.clone(), c.clone()]);
|
|
|
|
// Bundles with shared component should be somewhat similar
|
|
let sim = bundle_ab.similarity(&bundle_ac);
|
|
assert!(sim > 0.2); // Some similarity due to shared A
|
|
}
|
|
|
|
#[test]
|
|
fn test_composition() {
|
|
let mut memory = ConceptMemory::new();
|
|
|
|
let red_dog = compose_modifier(&mut memory, "red", "dog");
|
|
let red_cat = compose_modifier(&mut memory, "red", "cat");
|
|
let blue_dog = compose_modifier(&mut memory, "blue", "dog");
|
|
|
|
// Same modifier = more similar than same noun
|
|
let rd_rc = red_dog.similarity(&red_cat);
|
|
let rd_bd = red_dog.similarity(&blue_dog);
|
|
|
|
// Both should show some similarity due to shared component
|
|
assert!(rd_rc.abs() > 0.1);
|
|
assert!(rd_bd.abs() > 0.1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_sequence_order() {
|
|
let mut memory = ConceptMemory::new();
|
|
|
|
let seq1 = compose_sequence(&mut memory, &["a", "b", "c"]);
|
|
let seq2 = compose_sequence(&mut memory, &["c", "b", "a"]);
|
|
|
|
// Different order should produce different representations
|
|
assert!(seq1.similarity(&seq2) < 0.5);
|
|
}
|
|
}
|