Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
280
vendor/ruvector/crates/prime-radiant/src/attention/adapter.rs
vendored
Normal file
280
vendor/ruvector/crates/prime-radiant/src/attention/adapter.rs
vendored
Normal file
@@ -0,0 +1,280 @@
|
||||
//! Adapter to ruvector-attention
|
||||
//!
|
||||
//! Wraps attention mechanisms for coherence computation.
|
||||
|
||||
use super::{AttentionCoherenceConfig, AttentionError, Result};
|
||||
|
||||
/// Adapter wrapping ruvector-attention functionality
|
||||
#[derive(Debug)]
|
||||
pub struct AttentionAdapter {
|
||||
/// Configuration
|
||||
config: AttentionCoherenceConfig,
|
||||
}
|
||||
|
||||
impl AttentionAdapter {
|
||||
/// Create a new adapter
|
||||
pub fn new(config: AttentionCoherenceConfig) -> Self {
|
||||
Self { config }
|
||||
}
|
||||
|
||||
/// Compute attention scores for node states
|
||||
///
|
||||
/// Returns a vector of attention scores (one per node).
|
||||
pub fn compute_scores(&self, node_states: &[&[f32]]) -> Result<Vec<f32>> {
|
||||
if node_states.is_empty() {
|
||||
return Err(AttentionError::EmptyInput("node_states".to_string()));
|
||||
}
|
||||
|
||||
let n = node_states.len();
|
||||
|
||||
// Validate dimensions
|
||||
let dim = node_states[0].len();
|
||||
for (i, state) in node_states.iter().enumerate() {
|
||||
if state.len() != dim {
|
||||
return Err(AttentionError::DimensionMismatch {
|
||||
expected: dim,
|
||||
actual: state.len(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Compute pairwise similarities
|
||||
let mut similarity_matrix = vec![vec![0.0f32; n]; n];
|
||||
for i in 0..n {
|
||||
for j in 0..n {
|
||||
if i != j {
|
||||
similarity_matrix[i][j] =
|
||||
self.cosine_similarity(node_states[i], node_states[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute attention scores as normalized sum of similarities
|
||||
let mut scores = Vec::with_capacity(n);
|
||||
for i in 0..n {
|
||||
let sum: f32 = similarity_matrix[i].iter().sum();
|
||||
let avg = sum / (n - 1).max(1) as f32;
|
||||
// Normalize to [0, 1]
|
||||
let normalized = (avg + 1.0) / 2.0; // cosine is in [-1, 1]
|
||||
scores.push(normalized.clamp(0.0, 1.0));
|
||||
}
|
||||
|
||||
Ok(scores)
|
||||
}
|
||||
|
||||
/// Compute attention over query and keys
|
||||
pub fn compute_attention(
|
||||
&self,
|
||||
query: &[f32],
|
||||
keys: &[&[f32]],
|
||||
values: &[&[f32]],
|
||||
) -> Result<Vec<f32>> {
|
||||
if keys.is_empty() || values.is_empty() {
|
||||
return Err(AttentionError::EmptyInput("keys/values".to_string()));
|
||||
}
|
||||
|
||||
if keys.len() != values.len() {
|
||||
return Err(AttentionError::InvalidConfig(
|
||||
"keys and values must have same length".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let dim = query.len();
|
||||
|
||||
// Compute scaled dot-product attention
|
||||
let scale = 1.0 / (dim as f32).sqrt();
|
||||
|
||||
let logits: Vec<f32> = keys
|
||||
.iter()
|
||||
.map(|k| self.dot_product(query, k) * scale / self.config.temperature)
|
||||
.collect();
|
||||
|
||||
let weights = self.stable_softmax(&logits);
|
||||
|
||||
// Weighted sum of values
|
||||
self.weighted_sum(&weights, values)
|
||||
}
|
||||
|
||||
/// Compute sparse attention (top-k)
|
||||
pub fn compute_sparse_attention(
|
||||
&self,
|
||||
query: &[f32],
|
||||
keys: &[&[f32]],
|
||||
values: &[&[f32]],
|
||||
k: usize,
|
||||
) -> Result<Vec<f32>> {
|
||||
if keys.is_empty() || values.is_empty() {
|
||||
return Err(AttentionError::EmptyInput("keys/values".to_string()));
|
||||
}
|
||||
|
||||
let k = k.min(keys.len());
|
||||
let dim = query.len();
|
||||
let scale = 1.0 / (dim as f32).sqrt();
|
||||
|
||||
// Get top-k scores
|
||||
let mut scores: Vec<(usize, f32)> = keys
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, k)| (i, self.dot_product(query, k) * scale))
|
||||
.collect();
|
||||
|
||||
scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||
let top_k: Vec<(usize, f32)> = scores.into_iter().take(k).collect();
|
||||
|
||||
// Compute attention over selected
|
||||
let logits: Vec<f32> = top_k
|
||||
.iter()
|
||||
.map(|(_, s)| s / self.config.temperature)
|
||||
.collect();
|
||||
|
||||
let weights = self.stable_softmax(&logits);
|
||||
|
||||
let selected_values: Vec<&[f32]> = top_k.iter().map(|(i, _)| values[*i]).collect();
|
||||
|
||||
self.weighted_sum(&weights, &selected_values)
|
||||
}
|
||||
|
||||
// === Helper methods ===
|
||||
|
||||
fn dot_product(&self, a: &[f32], b: &[f32]) -> f32 {
|
||||
let len = a.len().min(b.len());
|
||||
let mut sum = 0.0f32;
|
||||
|
||||
// Unrolled for performance
|
||||
let chunks = len / 4;
|
||||
let remainder = len % 4;
|
||||
|
||||
for i in 0..chunks {
|
||||
let base = i * 4;
|
||||
sum += a[base] * b[base];
|
||||
sum += a[base + 1] * b[base + 1];
|
||||
sum += a[base + 2] * b[base + 2];
|
||||
sum += a[base + 3] * b[base + 3];
|
||||
}
|
||||
|
||||
let base = chunks * 4;
|
||||
for i in 0..remainder {
|
||||
sum += a[base + i] * b[base + i];
|
||||
}
|
||||
|
||||
sum
|
||||
}
|
||||
|
||||
fn cosine_similarity(&self, a: &[f32], b: &[f32]) -> f32 {
|
||||
let dot = self.dot_product(a, b);
|
||||
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
|
||||
if norm_a < 1e-10 || norm_b < 1e-10 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
(dot / (norm_a * norm_b)).clamp(-1.0, 1.0)
|
||||
}
|
||||
|
||||
fn stable_softmax(&self, logits: &[f32]) -> Vec<f32> {
|
||||
if logits.is_empty() {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let max_logit = logits.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
|
||||
let exp_logits: Vec<f32> = logits.iter().map(|&l| (l - max_logit).exp()).collect();
|
||||
let sum: f32 = exp_logits.iter().sum();
|
||||
|
||||
if sum > 0.0 {
|
||||
exp_logits.iter().map(|&e| e / sum).collect()
|
||||
} else {
|
||||
// Fallback to uniform
|
||||
vec![1.0 / logits.len() as f32; logits.len()]
|
||||
}
|
||||
}
|
||||
|
||||
fn weighted_sum(&self, weights: &[f32], values: &[&[f32]]) -> Result<Vec<f32>> {
|
||||
if weights.is_empty() || values.is_empty() {
|
||||
return Err(AttentionError::EmptyInput("weights/values".to_string()));
|
||||
}
|
||||
|
||||
let dim = values[0].len();
|
||||
let mut output = vec![0.0f32; dim];
|
||||
|
||||
for (weight, value) in weights.iter().zip(values.iter()) {
|
||||
for (o, &v) in output.iter_mut().zip(value.iter()) {
|
||||
*o += weight * v;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_compute_scores() {
|
||||
let config = AttentionCoherenceConfig::default();
|
||||
let adapter = AttentionAdapter::new(config);
|
||||
|
||||
let states: Vec<Vec<f32>> = (0..5).map(|i| vec![0.1 * (i + 1) as f32; 16]).collect();
|
||||
let state_refs: Vec<&[f32]> = states.iter().map(|s| s.as_slice()).collect();
|
||||
|
||||
let scores = adapter.compute_scores(&state_refs).unwrap();
|
||||
|
||||
assert_eq!(scores.len(), 5);
|
||||
for score in &scores {
|
||||
assert!(*score >= 0.0 && *score <= 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compute_attention() {
|
||||
let config = AttentionCoherenceConfig::default();
|
||||
let adapter = AttentionAdapter::new(config);
|
||||
|
||||
let query = vec![0.5f32; 16];
|
||||
let keys: Vec<Vec<f32>> = (0..10).map(|i| vec![0.1 * (i + 1) as f32; 16]).collect();
|
||||
let values: Vec<Vec<f32>> = (0..10).map(|i| vec![i as f32; 16]).collect();
|
||||
|
||||
let key_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
|
||||
let value_refs: Vec<&[f32]> = values.iter().map(|v| v.as_slice()).collect();
|
||||
|
||||
let output = adapter
|
||||
.compute_attention(&query, &key_refs, &value_refs)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(output.len(), 16);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sparse_attention() {
|
||||
let config = AttentionCoherenceConfig::default();
|
||||
let adapter = AttentionAdapter::new(config);
|
||||
|
||||
let query = vec![0.5f32; 16];
|
||||
let keys: Vec<Vec<f32>> = (0..20).map(|i| vec![0.1 * (i + 1) as f32; 16]).collect();
|
||||
let values: Vec<Vec<f32>> = (0..20).map(|i| vec![i as f32; 16]).collect();
|
||||
|
||||
let key_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
|
||||
let value_refs: Vec<&[f32]> = values.iter().map(|v| v.as_slice()).collect();
|
||||
|
||||
let output = adapter
|
||||
.compute_sparse_attention(&query, &key_refs, &value_refs, 5)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(output.len(), 16);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cosine_similarity() {
|
||||
let config = AttentionCoherenceConfig::default();
|
||||
let adapter = AttentionAdapter::new(config);
|
||||
|
||||
let a = vec![1.0, 0.0, 0.0, 0.0];
|
||||
let b = vec![1.0, 0.0, 0.0, 0.0];
|
||||
let c = vec![-1.0, 0.0, 0.0, 0.0];
|
||||
|
||||
assert!((adapter.cosine_similarity(&a, &b) - 1.0).abs() < 0.01);
|
||||
assert!((adapter.cosine_similarity(&a, &c) + 1.0).abs() < 0.01);
|
||||
}
|
||||
}
|
||||
228
vendor/ruvector/crates/prime-radiant/src/attention/config.rs
vendored
Normal file
228
vendor/ruvector/crates/prime-radiant/src/attention/config.rs
vendored
Normal file
@@ -0,0 +1,228 @@
|
||||
//! Attention Coherence Configuration
|
||||
//!
|
||||
//! Configuration for attention-weighted residual computation.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Configuration for attention-weighted coherence
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AttentionCoherenceConfig {
|
||||
/// State vector dimension
|
||||
pub dimension: usize,
|
||||
|
||||
/// Number of neighbors for coherence graph construction
|
||||
pub k_neighbors: usize,
|
||||
|
||||
/// Temperature for attention softmax
|
||||
pub temperature: f32,
|
||||
|
||||
/// Base attention width
|
||||
pub base_width: usize,
|
||||
|
||||
// Topology gating configuration
|
||||
/// Threshold for stable mode
|
||||
pub stable_threshold: f32,
|
||||
/// Threshold for freeze mode
|
||||
pub freeze_threshold: f32,
|
||||
/// Coherence update period (ticks)
|
||||
pub coherence_update_period: usize,
|
||||
|
||||
// MoE configuration
|
||||
/// Number of MoE experts
|
||||
pub num_experts: usize,
|
||||
/// Top-k experts to use
|
||||
pub moe_top_k: usize,
|
||||
/// Expert capacity factor
|
||||
pub expert_capacity: f32,
|
||||
|
||||
// Diffusion configuration
|
||||
/// Enable diffusion smoothing
|
||||
pub enable_diffusion: bool,
|
||||
/// Diffusion time parameter
|
||||
pub diffusion_time: f32,
|
||||
/// Number of diffusion steps
|
||||
pub diffusion_steps: usize,
|
||||
/// Sigma for diffusion kernel
|
||||
pub diffusion_sigma: f32,
|
||||
}
|
||||
|
||||
impl Default for AttentionCoherenceConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
dimension: 64,
|
||||
k_neighbors: 8,
|
||||
temperature: 1.0,
|
||||
base_width: 64,
|
||||
stable_threshold: 0.7,
|
||||
freeze_threshold: 0.3,
|
||||
coherence_update_period: 16,
|
||||
num_experts: 4,
|
||||
moe_top_k: 2,
|
||||
expert_capacity: 1.25,
|
||||
enable_diffusion: false,
|
||||
diffusion_time: 1.0,
|
||||
diffusion_steps: 5,
|
||||
diffusion_sigma: 1.0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AttentionCoherenceConfig {
|
||||
/// Create configuration for small collections
|
||||
pub fn small() -> Self {
|
||||
Self {
|
||||
dimension: 32,
|
||||
k_neighbors: 4,
|
||||
base_width: 32,
|
||||
num_experts: 2,
|
||||
diffusion_steps: 3,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Create configuration for large collections
|
||||
pub fn large() -> Self {
|
||||
Self {
|
||||
dimension: 128,
|
||||
k_neighbors: 16,
|
||||
base_width: 128,
|
||||
num_experts: 8,
|
||||
moe_top_k: 3,
|
||||
diffusion_steps: 10,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Validate configuration
|
||||
pub fn validate(&self) -> Result<(), String> {
|
||||
if self.dimension == 0 {
|
||||
return Err("dimension must be positive".to_string());
|
||||
}
|
||||
if self.temperature <= 0.0 {
|
||||
return Err("temperature must be positive".to_string());
|
||||
}
|
||||
if self.stable_threshold <= self.freeze_threshold {
|
||||
return Err("stable_threshold must be greater than freeze_threshold".to_string());
|
||||
}
|
||||
if self.num_experts == 0 {
|
||||
return Err("num_experts must be positive".to_string());
|
||||
}
|
||||
if self.moe_top_k > self.num_experts {
|
||||
return Err("moe_top_k cannot exceed num_experts".to_string());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get width reduction factor for cautious mode
|
||||
pub fn cautious_width_factor(&self) -> f32 {
|
||||
0.5
|
||||
}
|
||||
|
||||
/// Get width for given coherence score
|
||||
pub fn width_for_coherence(&self, coherence: f32) -> usize {
|
||||
if coherence >= self.stable_threshold {
|
||||
self.base_width
|
||||
} else if coherence >= self.freeze_threshold {
|
||||
((self.base_width as f32) * self.cautious_width_factor()) as usize
|
||||
} else {
|
||||
1 // Freeze mode: single element
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Attention mode based on coherence state
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum AttentionMode {
|
||||
/// Full attention, normal updates
|
||||
Stable,
|
||||
/// Reduced width, increased sparsity
|
||||
Cautious,
|
||||
/// Retrieval only, no updates
|
||||
Freeze,
|
||||
}
|
||||
|
||||
impl AttentionMode {
|
||||
/// Determine mode from coherence score
|
||||
pub fn from_coherence(coherence: f32, config: &AttentionCoherenceConfig) -> Self {
|
||||
if coherence >= config.stable_threshold {
|
||||
Self::Stable
|
||||
} else if coherence >= config.freeze_threshold {
|
||||
Self::Cautious
|
||||
} else {
|
||||
Self::Freeze
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if updates are allowed
|
||||
pub fn allows_updates(&self) -> bool {
|
||||
matches!(self, Self::Stable | Self::Cautious)
|
||||
}
|
||||
|
||||
/// Get name
|
||||
pub fn name(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Stable => "stable",
|
||||
Self::Cautious => "cautious",
|
||||
Self::Freeze => "freeze",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for AttentionMode {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.name())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_default_config() {
|
||||
let config = AttentionCoherenceConfig::default();
|
||||
assert!(config.validate().is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mode_from_coherence() {
|
||||
let config = AttentionCoherenceConfig::default();
|
||||
|
||||
assert_eq!(
|
||||
AttentionMode::from_coherence(0.8, &config),
|
||||
AttentionMode::Stable
|
||||
);
|
||||
assert_eq!(
|
||||
AttentionMode::from_coherence(0.5, &config),
|
||||
AttentionMode::Cautious
|
||||
);
|
||||
assert_eq!(
|
||||
AttentionMode::from_coherence(0.2, &config),
|
||||
AttentionMode::Freeze
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_width_for_coherence() {
|
||||
let config = AttentionCoherenceConfig {
|
||||
base_width: 64,
|
||||
stable_threshold: 0.7,
|
||||
freeze_threshold: 0.3,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
assert_eq!(config.width_for_coherence(0.8), 64);
|
||||
assert_eq!(config.width_for_coherence(0.5), 32);
|
||||
assert_eq!(config.width_for_coherence(0.2), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_config() {
|
||||
let config = AttentionCoherenceConfig {
|
||||
stable_threshold: 0.3,
|
||||
freeze_threshold: 0.7, // Invalid: freeze > stable
|
||||
..Default::default()
|
||||
};
|
||||
assert!(config.validate().is_err());
|
||||
}
|
||||
}
|
||||
336
vendor/ruvector/crates/prime-radiant/src/attention/diffusion.rs
vendored
Normal file
336
vendor/ruvector/crates/prime-radiant/src/attention/diffusion.rs
vendored
Normal file
@@ -0,0 +1,336 @@
|
||||
//! PDE Diffusion-Based Energy Smoothing
|
||||
//!
|
||||
//! Applies heat diffusion to smooth energy across the coherence graph.
|
||||
|
||||
use super::{AttentionCoherenceConfig, AttentionError, Result};
|
||||
|
||||
/// Result of diffusion smoothing
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SmoothedEnergy {
|
||||
/// Node energies after smoothing
|
||||
pub node_energies: Vec<f32>,
|
||||
/// Edge energies after smoothing
|
||||
pub edge_energies: Vec<(usize, usize, f32)>,
|
||||
/// Total energy before smoothing
|
||||
pub initial_total: f32,
|
||||
/// Total energy after smoothing
|
||||
pub final_total: f32,
|
||||
/// Number of diffusion steps applied
|
||||
pub steps_applied: usize,
|
||||
/// Convergence achieved
|
||||
pub converged: bool,
|
||||
}
|
||||
|
||||
impl SmoothedEnergy {
|
||||
/// Get energy ratio (final/initial)
|
||||
pub fn energy_ratio(&self) -> f32 {
|
||||
if self.initial_total > 0.0 {
|
||||
self.final_total / self.initial_total
|
||||
} else {
|
||||
1.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if energy was reduced
|
||||
pub fn energy_reduced(&self) -> bool {
|
||||
self.final_total < self.initial_total
|
||||
}
|
||||
|
||||
/// Get smoothing factor
|
||||
pub fn smoothing_factor(&self) -> f32 {
|
||||
1.0 - self.energy_ratio()
|
||||
}
|
||||
}
|
||||
|
||||
/// PDE diffusion smoother for energy propagation
|
||||
///
|
||||
/// Uses heat diffusion equation to smooth energy across the graph,
|
||||
/// reducing sharp energy gradients while preserving total energy.
|
||||
#[derive(Debug)]
|
||||
pub struct DiffusionSmoothing {
|
||||
/// Configuration
|
||||
config: AttentionCoherenceConfig,
|
||||
}
|
||||
|
||||
impl DiffusionSmoothing {
|
||||
/// Create a new diffusion smoother
|
||||
pub fn new(config: AttentionCoherenceConfig) -> Self {
|
||||
Self { config }
|
||||
}
|
||||
|
||||
/// Apply diffusion smoothing to edge energies
|
||||
///
|
||||
/// Uses the graph Laplacian to diffuse energy from high-energy
|
||||
/// regions to low-energy regions.
|
||||
pub fn smooth(
|
||||
&self,
|
||||
edge_energies: &[(usize, usize, f32)],
|
||||
node_states: &[&[f32]],
|
||||
steps: usize,
|
||||
) -> Result<SmoothedEnergy> {
|
||||
if edge_energies.is_empty() {
|
||||
return Ok(SmoothedEnergy {
|
||||
node_energies: vec![],
|
||||
edge_energies: vec![],
|
||||
initial_total: 0.0,
|
||||
final_total: 0.0,
|
||||
steps_applied: 0,
|
||||
converged: true,
|
||||
});
|
||||
}
|
||||
|
||||
let n = node_states.len();
|
||||
if n == 0 {
|
||||
return Err(AttentionError::EmptyInput("node_states".to_string()));
|
||||
}
|
||||
|
||||
// Build adjacency and compute initial node energies
|
||||
let (adjacency, mut node_energies) = self.build_graph(edge_energies, n);
|
||||
|
||||
let initial_total: f32 = node_energies.iter().sum();
|
||||
|
||||
// Build Laplacian-like diffusion kernel
|
||||
let kernel = self.build_diffusion_kernel(&adjacency, node_states, n);
|
||||
|
||||
// Apply diffusion steps
|
||||
let actual_steps = steps.min(self.config.diffusion_steps);
|
||||
let dt = self.config.diffusion_time / actual_steps.max(1) as f32;
|
||||
|
||||
let mut converged = false;
|
||||
for step in 0..actual_steps {
|
||||
let prev_energies = node_energies.clone();
|
||||
|
||||
// Diffusion step: e_new = e_old + dt * L * e_old
|
||||
node_energies = self.diffusion_step(&node_energies, &kernel, dt);
|
||||
|
||||
// Check convergence
|
||||
let change: f32 = node_energies
|
||||
.iter()
|
||||
.zip(prev_energies.iter())
|
||||
.map(|(a, b)| (a - b).abs())
|
||||
.sum();
|
||||
|
||||
if change < 1e-6 {
|
||||
converged = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// Early termination if energy is stable
|
||||
if step > 2 {
|
||||
let current_total: f32 = node_energies.iter().sum();
|
||||
if (current_total - initial_total).abs() / initial_total.max(1e-10) < 1e-4 {
|
||||
converged = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reconstruct edge energies from smoothed node energies
|
||||
let smoothed_edges = self.reconstruct_edge_energies(edge_energies, &node_energies);
|
||||
|
||||
let final_total: f32 = node_energies.iter().sum();
|
||||
|
||||
Ok(SmoothedEnergy {
|
||||
node_energies,
|
||||
edge_energies: smoothed_edges,
|
||||
initial_total,
|
||||
final_total,
|
||||
steps_applied: actual_steps,
|
||||
converged,
|
||||
})
|
||||
}
|
||||
|
||||
/// Build graph from edge energies
|
||||
fn build_graph(
|
||||
&self,
|
||||
edge_energies: &[(usize, usize, f32)],
|
||||
n: usize,
|
||||
) -> (Vec<Vec<(usize, f32)>>, Vec<f32>) {
|
||||
let mut adjacency: Vec<Vec<(usize, f32)>> = vec![vec![]; n];
|
||||
let mut node_energies = vec![0.0f32; n];
|
||||
|
||||
for &(src, dst, energy) in edge_energies {
|
||||
if src < n && dst < n {
|
||||
adjacency[src].push((dst, energy));
|
||||
adjacency[dst].push((src, energy));
|
||||
|
||||
// Distribute edge energy to nodes
|
||||
node_energies[src] += energy / 2.0;
|
||||
node_energies[dst] += energy / 2.0;
|
||||
}
|
||||
}
|
||||
|
||||
(adjacency, node_energies)
|
||||
}
|
||||
|
||||
/// Build diffusion kernel based on graph structure
|
||||
fn build_diffusion_kernel(
|
||||
&self,
|
||||
adjacency: &[Vec<(usize, f32)>],
|
||||
node_states: &[&[f32]],
|
||||
n: usize,
|
||||
) -> Vec<Vec<f32>> {
|
||||
let sigma_sq = self.config.diffusion_sigma * self.config.diffusion_sigma;
|
||||
|
||||
let mut kernel = vec![vec![0.0f32; n]; n];
|
||||
|
||||
for i in 0..n {
|
||||
let degree = adjacency[i].len() as f32;
|
||||
|
||||
for &(j, _edge_weight) in &adjacency[i] {
|
||||
// Compute similarity-based weight
|
||||
let sim = self.cosine_similarity(node_states[i], node_states[j]);
|
||||
let weight = (sim / sigma_sq).exp();
|
||||
|
||||
kernel[i][j] = weight;
|
||||
}
|
||||
|
||||
// Diagonal: negative sum of off-diagonals (Laplacian property)
|
||||
let row_sum: f32 = kernel[i].iter().sum();
|
||||
kernel[i][i] = -row_sum;
|
||||
|
||||
// Normalize by degree for stability
|
||||
if degree > 0.0 {
|
||||
for k in 0..n {
|
||||
kernel[i][k] /= degree;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
kernel
|
||||
}
|
||||
|
||||
/// Perform one diffusion step
|
||||
fn diffusion_step(&self, energies: &[f32], kernel: &[Vec<f32>], dt: f32) -> Vec<f32> {
|
||||
let n = energies.len();
|
||||
let mut new_energies = vec![0.0f32; n];
|
||||
|
||||
for i in 0..n {
|
||||
// e_new[i] = e[i] + dt * sum_j(K[i][j] * e[j])
|
||||
let diffusion: f32 = kernel[i]
|
||||
.iter()
|
||||
.zip(energies.iter())
|
||||
.map(|(&k, &e)| k * e)
|
||||
.sum();
|
||||
|
||||
new_energies[i] = (energies[i] + dt * diffusion).max(0.0);
|
||||
}
|
||||
|
||||
new_energies
|
||||
}
|
||||
|
||||
/// Reconstruct edge energies from smoothed node energies
|
||||
fn reconstruct_edge_energies(
|
||||
&self,
|
||||
original_edges: &[(usize, usize, f32)],
|
||||
node_energies: &[f32],
|
||||
) -> Vec<(usize, usize, f32)> {
|
||||
original_edges
|
||||
.iter()
|
||||
.map(|&(src, dst, original)| {
|
||||
let src_energy = node_energies.get(src).copied().unwrap_or(0.0);
|
||||
let dst_energy = node_energies.get(dst).copied().unwrap_or(0.0);
|
||||
|
||||
// New edge energy is average of endpoint node energies
|
||||
// scaled by original proportion
|
||||
let avg_node_energy = (src_energy + dst_energy) / 2.0;
|
||||
|
||||
// Blend original and smoothed
|
||||
let alpha = 0.5; // Smoothing blend factor
|
||||
let smoothed = alpha * avg_node_energy + (1.0 - alpha) * original;
|
||||
|
||||
(src, dst, smoothed.max(0.0))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn cosine_similarity(&self, a: &[f32], b: &[f32]) -> f32 {
|
||||
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
||||
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
|
||||
if norm_a < 1e-10 || norm_b < 1e-10 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
(dot / (norm_a * norm_b)).clamp(-1.0, 1.0)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_empty_input() {
|
||||
let config = AttentionCoherenceConfig::default();
|
||||
let smoother = DiffusionSmoothing::new(config);
|
||||
|
||||
let result = smoother.smooth(&[], &[], 5).unwrap();
|
||||
assert!(result.converged);
|
||||
assert_eq!(result.initial_total, 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_basic_smoothing() {
|
||||
let config = AttentionCoherenceConfig {
|
||||
diffusion_time: 1.0,
|
||||
diffusion_steps: 10,
|
||||
diffusion_sigma: 1.0,
|
||||
..Default::default()
|
||||
};
|
||||
let smoother = DiffusionSmoothing::new(config);
|
||||
|
||||
let states: Vec<Vec<f32>> = (0..4).map(|i| vec![0.1 * (i + 1) as f32; 8]).collect();
|
||||
let state_refs: Vec<&[f32]> = states.iter().map(|s| s.as_slice()).collect();
|
||||
|
||||
let edges = vec![(0, 1, 1.0), (1, 2, 2.0), (2, 3, 0.5)];
|
||||
|
||||
let result = smoother.smooth(&edges, &state_refs, 5).unwrap();
|
||||
|
||||
assert_eq!(result.edge_energies.len(), 3);
|
||||
assert!(result.steps_applied <= 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_energy_conservation() {
|
||||
let config = AttentionCoherenceConfig {
|
||||
diffusion_time: 0.5,
|
||||
diffusion_steps: 5,
|
||||
diffusion_sigma: 1.0,
|
||||
..Default::default()
|
||||
};
|
||||
let smoother = DiffusionSmoothing::new(config);
|
||||
|
||||
let states: Vec<Vec<f32>> = (0..3).map(|_| vec![1.0; 4]).collect();
|
||||
let state_refs: Vec<&[f32]> = states.iter().map(|s| s.as_slice()).collect();
|
||||
|
||||
let edges = vec![(0, 1, 1.0), (1, 2, 1.0)];
|
||||
|
||||
let result = smoother.smooth(&edges, &state_refs, 3).unwrap();
|
||||
|
||||
// Energy should be roughly conserved (within tolerance)
|
||||
let ratio = result.energy_ratio();
|
||||
assert!(
|
||||
ratio > 0.5 && ratio < 2.0,
|
||||
"Energy ratio {} out of expected range",
|
||||
ratio
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_smoothed_energy_methods() {
|
||||
let smoothed = SmoothedEnergy {
|
||||
node_energies: vec![0.5, 0.5],
|
||||
edge_energies: vec![(0, 1, 0.8)],
|
||||
initial_total: 2.0,
|
||||
final_total: 1.0,
|
||||
steps_applied: 5,
|
||||
converged: true,
|
||||
};
|
||||
|
||||
assert_eq!(smoothed.energy_ratio(), 0.5);
|
||||
assert!(smoothed.energy_reduced());
|
||||
assert_eq!(smoothed.smoothing_factor(), 0.5);
|
||||
}
|
||||
}
|
||||
404
vendor/ruvector/crates/prime-radiant/src/attention/mod.rs
vendored
Normal file
404
vendor/ruvector/crates/prime-radiant/src/attention/mod.rs
vendored
Normal file
@@ -0,0 +1,404 @@
|
||||
//! Attention-Weighted Residuals Module
|
||||
//!
|
||||
//! Computes attention-weighted coherence using multiple mechanisms:
|
||||
//! - Topology-gated attention (structural coherence as permission signal)
|
||||
//! - Mixture of Experts (specialized residual processing)
|
||||
//! - PDE diffusion (smooth energy propagation)
|
||||
//!
|
||||
//! Leverages `ruvector-attention` for the underlying attention implementations.
|
||||
//!
|
||||
//! # Features
|
||||
//!
|
||||
//! - Three attention modes: Stable, Cautious, Freeze
|
||||
//! - MoE routing for specialized residual experts
|
||||
//! - Diffusion-based energy smoothing
|
||||
//! - Attention score computation for residual weighting
|
||||
|
||||
mod adapter;
|
||||
mod config;
|
||||
mod diffusion;
|
||||
mod moe;
|
||||
mod topology;
|
||||
|
||||
pub use adapter::AttentionAdapter;
|
||||
pub use config::AttentionCoherenceConfig;
|
||||
pub use diffusion::{DiffusionSmoothing, SmoothedEnergy};
|
||||
pub use moe::{ExpertRouting, MoEResidualProcessor};
|
||||
pub use topology::{AttentionScore, TopologyGate, TopologyGateResult};
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Node identifier type
|
||||
pub type NodeId = u64;
|
||||
|
||||
/// Edge identifier type
|
||||
pub type EdgeId = (NodeId, NodeId);
|
||||
|
||||
/// Result type for attention operations
|
||||
pub type Result<T> = std::result::Result<T, AttentionError>;
|
||||
|
||||
/// Errors in attention-weighted coherence computation
|
||||
#[derive(Debug, Clone, thiserror::Error)]
|
||||
pub enum AttentionError {
|
||||
/// Invalid dimension
|
||||
#[error("Dimension mismatch: expected {expected}, got {actual}")]
|
||||
DimensionMismatch { expected: usize, actual: usize },
|
||||
|
||||
/// Empty input
|
||||
#[error("Empty input: {0}")]
|
||||
EmptyInput(String),
|
||||
|
||||
/// Invalid configuration
|
||||
#[error("Invalid configuration: {0}")]
|
||||
InvalidConfig(String),
|
||||
|
||||
/// Computation failed
|
||||
#[error("Computation failed: {0}")]
|
||||
ComputationFailed(String),
|
||||
|
||||
/// Mode not supported
|
||||
#[error("Mode not supported in current state: {0}")]
|
||||
ModeNotSupported(String),
|
||||
}
|
||||
|
||||
/// Main attention-weighted coherence engine
|
||||
///
|
||||
/// Combines topology-gated attention, MoE routing, and PDE diffusion
|
||||
/// to compute attention-weighted residuals for coherence analysis.
|
||||
#[derive(Debug)]
|
||||
pub struct AttentionCoherence {
|
||||
/// Configuration
|
||||
config: AttentionCoherenceConfig,
|
||||
/// Adapter to attention implementations
|
||||
adapter: AttentionAdapter,
|
||||
/// Topology gate
|
||||
topo_gate: TopologyGate,
|
||||
/// MoE residual processor
|
||||
moe: MoEResidualProcessor,
|
||||
/// Diffusion smoother
|
||||
diffusion: DiffusionSmoothing,
|
||||
}
|
||||
|
||||
impl AttentionCoherence {
|
||||
/// Create a new attention coherence engine
|
||||
pub fn new(config: AttentionCoherenceConfig) -> Self {
|
||||
let adapter = AttentionAdapter::new(config.clone());
|
||||
let topo_gate = TopologyGate::new(config.clone());
|
||||
let moe = MoEResidualProcessor::new(config.clone());
|
||||
let diffusion = DiffusionSmoothing::new(config.clone());
|
||||
|
||||
Self {
|
||||
config,
|
||||
adapter,
|
||||
topo_gate,
|
||||
moe,
|
||||
diffusion,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create with default configuration
|
||||
pub fn default_config() -> Self {
|
||||
Self::new(AttentionCoherenceConfig::default())
|
||||
}
|
||||
|
||||
/// Compute attention scores for nodes
|
||||
///
|
||||
/// Returns attention scores indicating structural importance.
|
||||
pub fn compute_attention_scores(
|
||||
&mut self,
|
||||
node_states: &[&[f32]],
|
||||
) -> Result<HashMap<usize, f32>> {
|
||||
if node_states.is_empty() {
|
||||
return Err(AttentionError::EmptyInput("node_states".to_string()));
|
||||
}
|
||||
|
||||
// Update topology gate coherence
|
||||
self.topo_gate.update_coherence(node_states);
|
||||
|
||||
// Compute scores using adapter
|
||||
let scores = self.adapter.compute_scores(node_states)?;
|
||||
|
||||
// Convert to hashmap
|
||||
Ok(scores
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(i, s)| (i, s))
|
||||
.collect())
|
||||
}
|
||||
|
||||
/// Compute attention-weighted residuals
|
||||
///
|
||||
/// Weights each edge residual by the attention scores of its endpoints.
|
||||
pub fn weighted_residuals(
|
||||
&mut self,
|
||||
node_states: &[&[f32]],
|
||||
edge_residuals: &[(usize, usize, Vec<f32>)], // (source_idx, target_idx, residual)
|
||||
) -> Result<Vec<WeightedEdgeResidual>> {
|
||||
if node_states.is_empty() {
|
||||
return Err(AttentionError::EmptyInput("node_states".to_string()));
|
||||
}
|
||||
|
||||
// Compute attention scores
|
||||
let scores = self.compute_attention_scores(node_states)?;
|
||||
|
||||
// Weight residuals
|
||||
let mut weighted = Vec::with_capacity(edge_residuals.len());
|
||||
|
||||
for (source, target, residual) in edge_residuals {
|
||||
let source_score = scores.get(source).copied().unwrap_or(1.0);
|
||||
let target_score = scores.get(target).copied().unwrap_or(1.0);
|
||||
|
||||
// Average attention weight
|
||||
let attention_weight = (source_score + target_score) / 2.0;
|
||||
|
||||
// Residual norm squared
|
||||
let residual_norm_sq: f32 = residual.iter().map(|x| x * x).sum();
|
||||
|
||||
// Weighted energy
|
||||
let weighted_energy = residual_norm_sq * attention_weight;
|
||||
|
||||
weighted.push(WeightedEdgeResidual {
|
||||
source_idx: *source,
|
||||
target_idx: *target,
|
||||
source_attention: source_score,
|
||||
target_attention: target_score,
|
||||
attention_weight,
|
||||
residual_norm_sq,
|
||||
weighted_energy,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(weighted)
|
||||
}
|
||||
|
||||
/// Route residual through MoE experts
|
||||
///
|
||||
/// Uses specialized experts for different residual characteristics.
|
||||
pub fn moe_process_residual(
|
||||
&self,
|
||||
residual: &[f32],
|
||||
context: &[f32],
|
||||
) -> Result<MoEProcessedResidual> {
|
||||
self.moe.process(residual, context)
|
||||
}
|
||||
|
||||
/// Apply diffusion smoothing to energy values
|
||||
///
|
||||
/// Smooths energy across the graph using PDE diffusion.
|
||||
pub fn smooth_energy(
|
||||
&self,
|
||||
edge_energies: &[(usize, usize, f32)], // (source, target, energy)
|
||||
node_states: &[&[f32]],
|
||||
steps: usize,
|
||||
) -> Result<SmoothedEnergy> {
|
||||
self.diffusion.smooth(edge_energies, node_states, steps)
|
||||
}
|
||||
|
||||
/// Get current topology gate result
|
||||
pub fn gate_result(&self) -> TopologyGateResult {
|
||||
self.topo_gate.current_result()
|
||||
}
|
||||
|
||||
/// Check if updates are allowed (not in freeze mode)
|
||||
pub fn allows_updates(&self) -> bool {
|
||||
self.topo_gate.allows_updates()
|
||||
}
|
||||
|
||||
/// Get effective attention width based on current mode
|
||||
pub fn attention_width(&self) -> usize {
|
||||
self.topo_gate.attention_width()
|
||||
}
|
||||
|
||||
/// Get configuration
|
||||
pub fn config(&self) -> &AttentionCoherenceConfig {
|
||||
&self.config
|
||||
}
|
||||
|
||||
/// Compute full attention-weighted energy analysis
|
||||
pub fn full_analysis(
|
||||
&mut self,
|
||||
node_states: &[&[f32]],
|
||||
edge_residuals: &[(usize, usize, Vec<f32>)],
|
||||
) -> Result<AttentionEnergyAnalysis> {
|
||||
// Get gate result
|
||||
let gate_result = self.topo_gate.current_result();
|
||||
|
||||
// Compute weighted residuals
|
||||
let weighted = self.weighted_residuals(node_states, edge_residuals)?;
|
||||
|
||||
// Compute energies
|
||||
let edge_energies: Vec<(usize, usize, f32)> = weighted
|
||||
.iter()
|
||||
.map(|w| (w.source_idx, w.target_idx, w.weighted_energy))
|
||||
.collect();
|
||||
|
||||
// Apply diffusion if enabled
|
||||
let smoothed = if self.config.enable_diffusion {
|
||||
Some(self.smooth_energy(&edge_energies, node_states, self.config.diffusion_steps)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Aggregate
|
||||
let total_energy: f32 = weighted.iter().map(|w| w.weighted_energy).sum();
|
||||
let avg_attention: f32 =
|
||||
weighted.iter().map(|w| w.attention_weight).sum::<f32>() / weighted.len().max(1) as f32;
|
||||
|
||||
Ok(AttentionEnergyAnalysis {
|
||||
weighted_residuals: weighted,
|
||||
smoothed_energy: smoothed,
|
||||
total_energy,
|
||||
avg_attention_weight: avg_attention,
|
||||
gate_result,
|
||||
num_edges: edge_residuals.len(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of weighting an edge residual by attention
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct WeightedEdgeResidual {
|
||||
/// Source node index
|
||||
pub source_idx: usize,
|
||||
/// Target node index
|
||||
pub target_idx: usize,
|
||||
/// Attention score of source node
|
||||
pub source_attention: f32,
|
||||
/// Attention score of target node
|
||||
pub target_attention: f32,
|
||||
/// Combined attention weight
|
||||
pub attention_weight: f32,
|
||||
/// Squared norm of residual
|
||||
pub residual_norm_sq: f32,
|
||||
/// Final weighted energy
|
||||
pub weighted_energy: f32,
|
||||
}
|
||||
|
||||
/// Result of processing a residual through MoE
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MoEProcessedResidual {
|
||||
/// Output from expert combination
|
||||
pub output: Vec<f32>,
|
||||
/// Expert indices that were used
|
||||
pub expert_indices: Vec<usize>,
|
||||
/// Weights for each expert
|
||||
pub expert_weights: Vec<f32>,
|
||||
/// Load balance loss (for training)
|
||||
pub load_balance_loss: f32,
|
||||
}
|
||||
|
||||
/// Complete attention energy analysis
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct AttentionEnergyAnalysis {
|
||||
/// All weighted residuals
|
||||
pub weighted_residuals: Vec<WeightedEdgeResidual>,
|
||||
/// Smoothed energy (if diffusion enabled)
|
||||
pub smoothed_energy: Option<SmoothedEnergy>,
|
||||
/// Total weighted energy
|
||||
pub total_energy: f32,
|
||||
/// Average attention weight
|
||||
pub avg_attention_weight: f32,
|
||||
/// Current gate result
|
||||
pub gate_result: TopologyGateResult,
|
||||
/// Number of edges analyzed
|
||||
pub num_edges: usize,
|
||||
}
|
||||
|
||||
impl AttentionEnergyAnalysis {
|
||||
/// Check if coherent (energy below threshold)
|
||||
pub fn is_coherent(&self, threshold: f32) -> bool {
|
||||
self.total_energy < threshold
|
||||
}
|
||||
|
||||
/// Get highest energy edge
|
||||
pub fn highest_energy_edge(&self) -> Option<&WeightedEdgeResidual> {
|
||||
self.weighted_residuals
|
||||
.iter()
|
||||
.max_by(|a, b| a.weighted_energy.partial_cmp(&b.weighted_energy).unwrap())
|
||||
}
|
||||
|
||||
/// Get edges above threshold
|
||||
pub fn edges_above_threshold(&self, threshold: f32) -> Vec<&WeightedEdgeResidual> {
|
||||
self.weighted_residuals
|
||||
.iter()
|
||||
.filter(|r| r.weighted_energy > threshold)
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn make_states(n: usize, dim: usize) -> Vec<Vec<f32>> {
|
||||
(0..n).map(|i| vec![0.1 * (i + 1) as f32; dim]).collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_basic_coherence() {
|
||||
let config = AttentionCoherenceConfig {
|
||||
dimension: 16,
|
||||
..Default::default()
|
||||
};
|
||||
let mut coherence = AttentionCoherence::new(config);
|
||||
|
||||
let states = make_states(5, 16);
|
||||
let state_refs: Vec<&[f32]> = states.iter().map(|s| s.as_slice()).collect();
|
||||
|
||||
let scores = coherence.compute_attention_scores(&state_refs).unwrap();
|
||||
|
||||
assert_eq!(scores.len(), 5);
|
||||
for (_, &score) in &scores {
|
||||
assert!(score >= 0.0 && score <= 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_weighted_residuals() {
|
||||
let config = AttentionCoherenceConfig {
|
||||
dimension: 8,
|
||||
..Default::default()
|
||||
};
|
||||
let mut coherence = AttentionCoherence::new(config);
|
||||
|
||||
let states = make_states(4, 8);
|
||||
let state_refs: Vec<&[f32]> = states.iter().map(|s| s.as_slice()).collect();
|
||||
|
||||
let residuals = vec![
|
||||
(0, 1, vec![0.1f32; 8]),
|
||||
(1, 2, vec![0.2f32; 8]),
|
||||
(2, 3, vec![0.3f32; 8]),
|
||||
];
|
||||
|
||||
let weighted = coherence
|
||||
.weighted_residuals(&state_refs, &residuals)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(weighted.len(), 3);
|
||||
for w in &weighted {
|
||||
assert!(w.weighted_energy >= 0.0);
|
||||
assert!(w.attention_weight > 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_full_analysis() {
|
||||
let config = AttentionCoherenceConfig {
|
||||
dimension: 8,
|
||||
enable_diffusion: false,
|
||||
..Default::default()
|
||||
};
|
||||
let mut coherence = AttentionCoherence::new(config);
|
||||
|
||||
let states = make_states(3, 8);
|
||||
let state_refs: Vec<&[f32]> = states.iter().map(|s| s.as_slice()).collect();
|
||||
|
||||
let residuals = vec![(0, 1, vec![0.1f32; 8]), (1, 2, vec![0.2f32; 8])];
|
||||
|
||||
let analysis = coherence.full_analysis(&state_refs, &residuals).unwrap();
|
||||
|
||||
assert_eq!(analysis.num_edges, 2);
|
||||
assert!(analysis.total_energy >= 0.0);
|
||||
assert!(analysis.avg_attention_weight > 0.0);
|
||||
}
|
||||
}
|
||||
360
vendor/ruvector/crates/prime-radiant/src/attention/moe.rs
vendored
Normal file
360
vendor/ruvector/crates/prime-radiant/src/attention/moe.rs
vendored
Normal file
@@ -0,0 +1,360 @@
|
||||
//! Mixture of Experts Residual Processing
|
||||
//!
|
||||
//! Specialized expert routing for different residual characteristics.
|
||||
|
||||
use super::{AttentionCoherenceConfig, AttentionError, MoEProcessedResidual, Result};
|
||||
|
||||
/// Expert routing decision
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ExpertRouting {
|
||||
/// Selected expert indices
|
||||
pub expert_indices: Vec<usize>,
|
||||
/// Weights for each selected expert
|
||||
pub weights: Vec<f32>,
|
||||
/// Router logits (before top-k selection)
|
||||
pub router_logits: Vec<f32>,
|
||||
}
|
||||
|
||||
impl ExpertRouting {
|
||||
/// Check if a specific expert was selected
|
||||
pub fn contains_expert(&self, idx: usize) -> bool {
|
||||
self.expert_indices.contains(&idx)
|
||||
}
|
||||
|
||||
/// Get weight for a specific expert (0 if not selected)
|
||||
pub fn weight_for(&self, idx: usize) -> f32 {
|
||||
self.expert_indices
|
||||
.iter()
|
||||
.position(|&i| i == idx)
|
||||
.map(|pos| self.weights[pos])
|
||||
.unwrap_or(0.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Mixture of Experts residual processor
|
||||
///
|
||||
/// Routes residuals to specialized experts based on their characteristics.
|
||||
/// Each expert specializes in different types of residuals.
|
||||
#[derive(Debug)]
|
||||
pub struct MoEResidualProcessor {
|
||||
/// Configuration
|
||||
config: AttentionCoherenceConfig,
|
||||
/// Expert parameters (weights for each expert)
|
||||
experts: Vec<ExpertParams>,
|
||||
/// Router parameters
|
||||
router: RouterParams,
|
||||
}
|
||||
|
||||
/// Parameters for a single expert
|
||||
#[derive(Debug, Clone)]
|
||||
struct ExpertParams {
|
||||
/// Linear transformation weights (dim x dim)
|
||||
weights: Vec<Vec<f32>>,
|
||||
/// Bias vector
|
||||
bias: Vec<f32>,
|
||||
/// Expert specialization (for interpretability)
|
||||
specialization: ExpertSpecialization,
|
||||
}
|
||||
|
||||
/// Type of expert specialization
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum ExpertSpecialization {
|
||||
/// High-magnitude residuals
|
||||
HighMagnitude,
|
||||
/// Low-magnitude residuals
|
||||
LowMagnitude,
|
||||
/// Sparse residuals
|
||||
Sparse,
|
||||
/// Dense residuals
|
||||
Dense,
|
||||
}
|
||||
|
||||
/// Router parameters
|
||||
#[derive(Debug, Clone)]
|
||||
struct RouterParams {
|
||||
/// Router weights (num_experts x dim)
|
||||
weights: Vec<Vec<f32>>,
|
||||
/// Noise scale for exploration
|
||||
jitter_noise: f32,
|
||||
}
|
||||
|
||||
impl MoEResidualProcessor {
|
||||
/// Create a new MoE processor
|
||||
pub fn new(config: AttentionCoherenceConfig) -> Self {
|
||||
let num_experts = config.num_experts;
|
||||
let dim = config.dimension;
|
||||
|
||||
// Initialize experts with different specializations
|
||||
let specializations = [
|
||||
ExpertSpecialization::HighMagnitude,
|
||||
ExpertSpecialization::LowMagnitude,
|
||||
ExpertSpecialization::Sparse,
|
||||
ExpertSpecialization::Dense,
|
||||
];
|
||||
|
||||
let experts: Vec<ExpertParams> = (0..num_experts)
|
||||
.map(|i| {
|
||||
// Initialize with identity-like transformation
|
||||
let weights: Vec<Vec<f32>> = (0..dim)
|
||||
.map(|j| {
|
||||
let mut row = vec![0.0f32; dim];
|
||||
row[j] = 1.0 + 0.1 * (i as f32 - num_experts as f32 / 2.0);
|
||||
row
|
||||
})
|
||||
.collect();
|
||||
|
||||
ExpertParams {
|
||||
weights,
|
||||
bias: vec![0.0; dim],
|
||||
specialization: specializations[i % specializations.len()],
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Initialize router
|
||||
let router_weights: Vec<Vec<f32>> = (0..num_experts)
|
||||
.map(|i| {
|
||||
// Different experts respond to different features
|
||||
let mut row = vec![0.1f32; dim];
|
||||
// Make each expert sensitive to different dimensions
|
||||
let start = (i * dim / num_experts).min(dim - 1);
|
||||
let end = ((i + 1) * dim / num_experts).min(dim);
|
||||
for j in start..end {
|
||||
row[j] = 1.0;
|
||||
}
|
||||
row
|
||||
})
|
||||
.collect();
|
||||
|
||||
let router = RouterParams {
|
||||
weights: router_weights,
|
||||
jitter_noise: 0.0,
|
||||
};
|
||||
|
||||
Self {
|
||||
config,
|
||||
experts,
|
||||
router,
|
||||
}
|
||||
}
|
||||
|
||||
/// Process a residual through MoE
|
||||
pub fn process(&self, residual: &[f32], context: &[f32]) -> Result<MoEProcessedResidual> {
|
||||
// Validate dimensions
|
||||
if residual.len() != self.config.dimension {
|
||||
return Err(AttentionError::DimensionMismatch {
|
||||
expected: self.config.dimension,
|
||||
actual: residual.len(),
|
||||
});
|
||||
}
|
||||
|
||||
// Route to experts
|
||||
let routing = self.route(residual, context);
|
||||
|
||||
// Process through selected experts
|
||||
let mut output = vec![0.0f32; self.config.dimension];
|
||||
|
||||
for (&expert_idx, &weight) in routing.expert_indices.iter().zip(routing.weights.iter()) {
|
||||
let expert_output = self.apply_expert(expert_idx, residual);
|
||||
for (o, e) in output.iter_mut().zip(expert_output.iter()) {
|
||||
*o += weight * e;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute load balance loss
|
||||
let load_balance_loss = self.compute_load_balance_loss(&routing);
|
||||
|
||||
Ok(MoEProcessedResidual {
|
||||
output,
|
||||
expert_indices: routing.expert_indices,
|
||||
expert_weights: routing.weights,
|
||||
load_balance_loss,
|
||||
})
|
||||
}
|
||||
|
||||
/// Route input to experts
|
||||
pub fn route(&self, input: &[f32], _context: &[f32]) -> ExpertRouting {
|
||||
// Compute router logits
|
||||
let logits: Vec<f32> = self
|
||||
.router
|
||||
.weights
|
||||
.iter()
|
||||
.map(|w| self.dot_product(input, w))
|
||||
.collect();
|
||||
|
||||
// Top-k selection
|
||||
let k = self.config.moe_top_k.min(self.config.num_experts);
|
||||
|
||||
let mut indexed_logits: Vec<(usize, f32)> =
|
||||
logits.iter().enumerate().map(|(i, &l)| (i, l)).collect();
|
||||
|
||||
indexed_logits.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
let top_k: Vec<(usize, f32)> = indexed_logits.into_iter().take(k).collect();
|
||||
|
||||
// Softmax over selected
|
||||
let max_logit = top_k
|
||||
.iter()
|
||||
.map(|(_, l)| *l)
|
||||
.fold(f32::NEG_INFINITY, f32::max);
|
||||
let exp_sum: f32 = top_k.iter().map(|(_, l)| (l - max_logit).exp()).sum();
|
||||
|
||||
let expert_indices: Vec<usize> = top_k.iter().map(|(i, _)| *i).collect();
|
||||
let weights: Vec<f32> = top_k
|
||||
.iter()
|
||||
.map(|(_, l)| (l - max_logit).exp() / exp_sum)
|
||||
.collect();
|
||||
|
||||
ExpertRouting {
|
||||
expert_indices,
|
||||
weights,
|
||||
router_logits: logits,
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply a single expert
|
||||
fn apply_expert(&self, expert_idx: usize, input: &[f32]) -> Vec<f32> {
|
||||
let expert = &self.experts[expert_idx];
|
||||
let dim = input.len();
|
||||
|
||||
let mut output = expert.bias.clone();
|
||||
|
||||
// Matrix-vector multiply
|
||||
for (i, w_row) in expert.weights.iter().enumerate() {
|
||||
if i < dim {
|
||||
for (j, &x) in input.iter().enumerate() {
|
||||
if j < w_row.len() {
|
||||
output[i] += w_row[j] * x;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
/// Compute load balance loss
|
||||
fn compute_load_balance_loss(&self, routing: &ExpertRouting) -> f32 {
|
||||
// Count how many times each expert is used
|
||||
let mut usage = vec![0.0f32; self.config.num_experts];
|
||||
for (&idx, &weight) in routing.expert_indices.iter().zip(routing.weights.iter()) {
|
||||
usage[idx] += weight;
|
||||
}
|
||||
|
||||
// Ideal uniform distribution
|
||||
let ideal = 1.0 / self.config.num_experts as f32;
|
||||
|
||||
// L2 deviation from uniform
|
||||
usage.iter().map(|&u| (u - ideal).powi(2)).sum::<f32>()
|
||||
}
|
||||
|
||||
fn dot_product(&self, a: &[f32], b: &[f32]) -> f32 {
|
||||
a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
|
||||
}
|
||||
|
||||
/// Get expert statistics
|
||||
pub fn expert_usage(&self, routings: &[ExpertRouting]) -> Vec<f32> {
|
||||
let mut usage = vec![0.0f32; self.config.num_experts];
|
||||
|
||||
for routing in routings {
|
||||
for (&idx, &weight) in routing.expert_indices.iter().zip(routing.weights.iter()) {
|
||||
usage[idx] += weight;
|
||||
}
|
||||
}
|
||||
|
||||
// Normalize
|
||||
let total: f32 = usage.iter().sum();
|
||||
if total > 0.0 {
|
||||
for u in usage.iter_mut() {
|
||||
*u /= total;
|
||||
}
|
||||
}
|
||||
|
||||
usage
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_moe_creation() {
|
||||
let config = AttentionCoherenceConfig {
|
||||
dimension: 16,
|
||||
num_experts: 4,
|
||||
moe_top_k: 2,
|
||||
..Default::default()
|
||||
};
|
||||
let moe = MoEResidualProcessor::new(config);
|
||||
|
||||
assert_eq!(moe.experts.len(), 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_routing() {
|
||||
let config = AttentionCoherenceConfig {
|
||||
dimension: 8,
|
||||
num_experts: 4,
|
||||
moe_top_k: 2,
|
||||
..Default::default()
|
||||
};
|
||||
let moe = MoEResidualProcessor::new(config);
|
||||
|
||||
let input = vec![0.5f32; 8];
|
||||
let context = vec![0.1f32; 8];
|
||||
|
||||
let routing = moe.route(&input, &context);
|
||||
|
||||
assert_eq!(routing.expert_indices.len(), 2);
|
||||
assert_eq!(routing.weights.len(), 2);
|
||||
|
||||
// Weights should sum to approximately 1
|
||||
let sum: f32 = routing.weights.iter().sum();
|
||||
assert!((sum - 1.0).abs() < 0.01);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_process() {
|
||||
let config = AttentionCoherenceConfig {
|
||||
dimension: 8,
|
||||
num_experts: 4,
|
||||
moe_top_k: 2,
|
||||
..Default::default()
|
||||
};
|
||||
let moe = MoEResidualProcessor::new(config);
|
||||
|
||||
let residual = vec![0.1f32; 8];
|
||||
let context = vec![0.1f32; 8];
|
||||
|
||||
let result = moe.process(&residual, &context).unwrap();
|
||||
|
||||
assert_eq!(result.output.len(), 8);
|
||||
assert_eq!(result.expert_indices.len(), 2);
|
||||
assert!(result.load_balance_loss >= 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expert_usage() {
|
||||
let config = AttentionCoherenceConfig {
|
||||
dimension: 8,
|
||||
num_experts: 4,
|
||||
moe_top_k: 2,
|
||||
..Default::default()
|
||||
};
|
||||
let moe = MoEResidualProcessor::new(config);
|
||||
|
||||
let inputs: Vec<Vec<f32>> = (0..10).map(|i| vec![0.1 * (i + 1) as f32; 8]).collect();
|
||||
let context = vec![0.1f32; 8];
|
||||
|
||||
let routings: Vec<ExpertRouting> =
|
||||
inputs.iter().map(|inp| moe.route(inp, &context)).collect();
|
||||
|
||||
let usage = moe.expert_usage(&routings);
|
||||
|
||||
assert_eq!(usage.len(), 4);
|
||||
// Should sum to approximately 1
|
||||
let sum: f32 = usage.iter().sum();
|
||||
assert!((sum - 1.0).abs() < 0.01);
|
||||
}
|
||||
}
|
||||
381
vendor/ruvector/crates/prime-radiant/src/attention/topology.rs
vendored
Normal file
381
vendor/ruvector/crates/prime-radiant/src/attention/topology.rs
vendored
Normal file
@@ -0,0 +1,381 @@
|
||||
//! Topology-Gated Attention
|
||||
//!
|
||||
//! Uses topological coherence as a permission signal for attention behavior.
|
||||
|
||||
use super::config::AttentionMode;
|
||||
use super::{AttentionCoherenceConfig, AttentionError, Result};
|
||||
|
||||
/// Score from attention computation
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct AttentionScore {
|
||||
/// Node index
|
||||
pub node_idx: usize,
|
||||
/// Attention score value
|
||||
pub score: f32,
|
||||
/// Contribution to coherence
|
||||
pub coherence_contribution: f32,
|
||||
}
|
||||
|
||||
/// Result of topology gate evaluation
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TopologyGateResult {
|
||||
/// Current coherence score
|
||||
pub coherence: f32,
|
||||
/// Current mode
|
||||
pub mode: AttentionMode,
|
||||
/// Effective attention width
|
||||
pub width: usize,
|
||||
/// Whether updates are allowed
|
||||
pub allows_updates: bool,
|
||||
/// Ticks since last coherence update
|
||||
pub ticks_since_update: usize,
|
||||
}
|
||||
|
||||
impl TopologyGateResult {
|
||||
/// Create a default result (stable mode)
|
||||
pub fn stable(config: &AttentionCoherenceConfig) -> Self {
|
||||
Self {
|
||||
coherence: 1.0,
|
||||
mode: AttentionMode::Stable,
|
||||
width: config.base_width,
|
||||
allows_updates: true,
|
||||
ticks_since_update: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Topology-gated attention controller
|
||||
///
|
||||
/// Uses structural coherence to control attention behavior:
|
||||
/// - Stable mode: full attention, normal updates
|
||||
/// - Cautious mode: reduced width, increased sparsity
|
||||
/// - Freeze mode: retrieval only, no updates
|
||||
#[derive(Debug)]
|
||||
pub struct TopologyGate {
|
||||
/// Configuration
|
||||
config: AttentionCoherenceConfig,
|
||||
/// Current coherence score
|
||||
coherence: f32,
|
||||
/// Current mode
|
||||
mode: AttentionMode,
|
||||
/// Ticks since last coherence update
|
||||
ticks_since_update: usize,
|
||||
/// Cached coherence metrics
|
||||
cached_metrics: Option<CoherenceMetrics>,
|
||||
}
|
||||
|
||||
impl TopologyGate {
|
||||
/// Create a new topology gate
|
||||
pub fn new(config: AttentionCoherenceConfig) -> Self {
|
||||
Self {
|
||||
coherence: 1.0, // Start optimistic
|
||||
mode: AttentionMode::Stable,
|
||||
ticks_since_update: 0,
|
||||
cached_metrics: None,
|
||||
config,
|
||||
}
|
||||
}
|
||||
|
||||
/// Update coherence from key states
|
||||
pub fn update_coherence(&mut self, keys: &[&[f32]]) {
|
||||
if keys.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let metrics = self.compute_coherence_metrics(keys);
|
||||
self.coherence = metrics.coherence_score;
|
||||
self.mode = AttentionMode::from_coherence(self.coherence, &self.config);
|
||||
self.ticks_since_update = 0;
|
||||
self.cached_metrics = Some(metrics);
|
||||
}
|
||||
|
||||
/// Tick the coherence counter
|
||||
pub fn tick(&mut self) {
|
||||
self.ticks_since_update += 1;
|
||||
}
|
||||
|
||||
/// Check if coherence update is needed
|
||||
pub fn needs_update(&self) -> bool {
|
||||
self.ticks_since_update >= self.config.coherence_update_period
|
||||
|| self.cached_metrics.is_none()
|
||||
}
|
||||
|
||||
/// Get current mode
|
||||
pub fn current_mode(&self) -> AttentionMode {
|
||||
self.mode
|
||||
}
|
||||
|
||||
/// Get current coherence score
|
||||
pub fn current_coherence(&self) -> f32 {
|
||||
self.coherence
|
||||
}
|
||||
|
||||
/// Check if updates are allowed
|
||||
pub fn allows_updates(&self) -> bool {
|
||||
self.mode.allows_updates()
|
||||
}
|
||||
|
||||
/// Get effective attention width
|
||||
pub fn attention_width(&self) -> usize {
|
||||
self.config.width_for_coherence(self.coherence)
|
||||
}
|
||||
|
||||
/// Get current gate result
|
||||
pub fn current_result(&self) -> TopologyGateResult {
|
||||
TopologyGateResult {
|
||||
coherence: self.coherence,
|
||||
mode: self.mode,
|
||||
width: self.attention_width(),
|
||||
allows_updates: self.allows_updates(),
|
||||
ticks_since_update: self.ticks_since_update,
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute coherence metrics from keys
|
||||
fn compute_coherence_metrics(&self, keys: &[&[f32]]) -> CoherenceMetrics {
|
||||
if keys.is_empty() {
|
||||
return CoherenceMetrics::empty();
|
||||
}
|
||||
|
||||
let n = keys.len();
|
||||
let k = self.config.k_neighbors.min(n - 1);
|
||||
|
||||
if k == 0 {
|
||||
return CoherenceMetrics::with_score(1.0);
|
||||
}
|
||||
|
||||
// Compute pairwise similarities
|
||||
let mut similarities: Vec<Vec<f32>> = Vec::with_capacity(n);
|
||||
for i in 0..n {
|
||||
let mut row = Vec::with_capacity(n);
|
||||
for j in 0..n {
|
||||
if i == j {
|
||||
row.push(1.0);
|
||||
} else {
|
||||
row.push(self.cosine_similarity(keys[i], keys[j]));
|
||||
}
|
||||
}
|
||||
similarities.push(row);
|
||||
}
|
||||
|
||||
// Compute boundary mass (proportion of edges to k nearest neighbors)
|
||||
let mut total_boundary_mass = 0.0f32;
|
||||
let mut total_edges = 0;
|
||||
|
||||
for i in 0..n {
|
||||
// Get k nearest neighbors
|
||||
let mut neighbor_sims: Vec<(usize, f32)> = similarities[i]
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(j, _)| *j != i)
|
||||
.map(|(j, &s)| (j, s))
|
||||
.collect();
|
||||
|
||||
neighbor_sims
|
||||
.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||
let neighbors: Vec<usize> = neighbor_sims.iter().take(k).map(|(j, _)| *j).collect();
|
||||
|
||||
// Boundary mass: edges to non-neighbors
|
||||
for j in 0..n {
|
||||
if j != i && !neighbors.contains(&j) {
|
||||
total_boundary_mass += similarities[i][j].max(0.0);
|
||||
total_edges += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute similarity variance
|
||||
let all_sims: Vec<f32> = similarities
|
||||
.iter()
|
||||
.enumerate()
|
||||
.flat_map(|(i, row)| {
|
||||
row.iter()
|
||||
.enumerate()
|
||||
.filter(move |(j, _)| *j > i)
|
||||
.map(|(_, &s)| s)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mean_sim: f32 = all_sims.iter().sum::<f32>() / all_sims.len().max(1) as f32;
|
||||
let variance: f32 = all_sims.iter().map(|s| (s - mean_sim).powi(2)).sum::<f32>()
|
||||
/ all_sims.len().max(1) as f32;
|
||||
|
||||
// Coherence score: high similarity, low variance, low boundary mass
|
||||
let boundary_ratio = if total_edges > 0 {
|
||||
total_boundary_mass / total_edges as f32
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
// Combine metrics
|
||||
// High mean similarity and low variance = high coherence
|
||||
// High boundary mass = low coherence
|
||||
let coherence_score =
|
||||
(mean_sim * 0.5 + (1.0 - variance.sqrt()) * 0.3 + (1.0 - boundary_ratio) * 0.2)
|
||||
.clamp(0.0, 1.0);
|
||||
|
||||
CoherenceMetrics {
|
||||
coherence_score,
|
||||
mean_similarity: mean_sim,
|
||||
similarity_variance: variance,
|
||||
boundary_mass: total_boundary_mass,
|
||||
num_nodes: n,
|
||||
}
|
||||
}
|
||||
|
||||
fn cosine_similarity(&self, a: &[f32], b: &[f32]) -> f32 {
|
||||
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
||||
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
|
||||
if norm_a < 1e-10 || norm_b < 1e-10 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
(dot / (norm_a * norm_b)).clamp(-1.0, 1.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Coherence metrics computed from key states
|
||||
#[derive(Debug, Clone)]
|
||||
struct CoherenceMetrics {
|
||||
/// Overall coherence score
|
||||
coherence_score: f32,
|
||||
/// Mean pairwise similarity
|
||||
mean_similarity: f32,
|
||||
/// Variance of pairwise similarities
|
||||
similarity_variance: f32,
|
||||
/// Total boundary mass (edges to non-neighbors)
|
||||
boundary_mass: f32,
|
||||
/// Number of nodes
|
||||
num_nodes: usize,
|
||||
}
|
||||
|
||||
impl CoherenceMetrics {
|
||||
fn empty() -> Self {
|
||||
Self {
|
||||
coherence_score: 1.0,
|
||||
mean_similarity: 1.0,
|
||||
similarity_variance: 0.0,
|
||||
boundary_mass: 0.0,
|
||||
num_nodes: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn with_score(score: f32) -> Self {
|
||||
Self {
|
||||
coherence_score: score,
|
||||
mean_similarity: score,
|
||||
similarity_variance: 0.0,
|
||||
boundary_mass: 0.0,
|
||||
num_nodes: 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_topology_gate_creation() {
|
||||
let config = AttentionCoherenceConfig::default();
|
||||
let gate = TopologyGate::new(config);
|
||||
|
||||
assert_eq!(gate.current_mode(), AttentionMode::Stable);
|
||||
assert!(gate.allows_updates());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_update_coherence_similar_keys() {
|
||||
let config = AttentionCoherenceConfig::default();
|
||||
let mut gate = TopologyGate::new(config);
|
||||
|
||||
// All similar keys = high coherence
|
||||
let keys: Vec<Vec<f32>> = (0..10).map(|_| vec![1.0, 0.0, 0.0, 0.0]).collect();
|
||||
let key_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
|
||||
|
||||
gate.update_coherence(&key_refs);
|
||||
|
||||
assert!(gate.current_coherence() > 0.5);
|
||||
assert_eq!(gate.current_mode(), AttentionMode::Stable);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_update_coherence_diverse_keys() {
|
||||
let config = AttentionCoherenceConfig {
|
||||
stable_threshold: 0.9,
|
||||
freeze_threshold: 0.5,
|
||||
..Default::default()
|
||||
};
|
||||
let mut gate = TopologyGate::new(config);
|
||||
|
||||
// Diverse keys = lower coherence
|
||||
let keys: Vec<Vec<f32>> = (0..10)
|
||||
.map(|i| {
|
||||
let mut v = vec![0.0f32; 16];
|
||||
v[i % 16] = 1.0;
|
||||
v
|
||||
})
|
||||
.collect();
|
||||
let key_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
|
||||
|
||||
gate.update_coherence(&key_refs);
|
||||
|
||||
// Should trigger cautious or freeze mode due to diversity
|
||||
assert!(
|
||||
gate.current_mode() == AttentionMode::Cautious
|
||||
|| gate.current_mode() == AttentionMode::Freeze
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tick_and_update_period() {
|
||||
let config = AttentionCoherenceConfig {
|
||||
coherence_update_period: 4,
|
||||
..Default::default()
|
||||
};
|
||||
let mut gate = TopologyGate::new(config);
|
||||
|
||||
// Initially needs update (no cache)
|
||||
assert!(gate.needs_update());
|
||||
|
||||
let keys: Vec<Vec<f32>> = vec![vec![1.0; 8]; 5];
|
||||
let key_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
|
||||
|
||||
gate.update_coherence(&key_refs);
|
||||
assert!(!gate.needs_update());
|
||||
|
||||
// Tick 4 times
|
||||
for _ in 0..4 {
|
||||
gate.tick();
|
||||
}
|
||||
assert!(gate.needs_update());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_attention_width() {
|
||||
let config = AttentionCoherenceConfig {
|
||||
base_width: 64,
|
||||
stable_threshold: 0.7,
|
||||
freeze_threshold: 0.3,
|
||||
..Default::default()
|
||||
};
|
||||
let mut gate = TopologyGate::new(config);
|
||||
|
||||
// High coherence = full width
|
||||
gate.coherence = 0.8;
|
||||
gate.mode = AttentionMode::from_coherence(0.8, &gate.config);
|
||||
assert_eq!(gate.attention_width(), 64);
|
||||
|
||||
// Medium coherence = reduced width
|
||||
gate.coherence = 0.5;
|
||||
gate.mode = AttentionMode::from_coherence(0.5, &gate.config);
|
||||
assert_eq!(gate.attention_width(), 32);
|
||||
|
||||
// Low coherence = minimal width
|
||||
gate.coherence = 0.2;
|
||||
gate.mode = AttentionMode::from_coherence(0.2, &gate.config);
|
||||
assert_eq!(gate.attention_width(), 1);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user