Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,280 @@
//! Adapter to ruvector-attention
//!
//! Wraps attention mechanisms for coherence computation.
use super::{AttentionCoherenceConfig, AttentionError, Result};
/// Adapter wrapping ruvector-attention functionality
#[derive(Debug)]
pub struct AttentionAdapter {
/// Configuration
config: AttentionCoherenceConfig,
}
impl AttentionAdapter {
/// Create a new adapter
pub fn new(config: AttentionCoherenceConfig) -> Self {
Self { config }
}
/// Compute attention scores for node states
///
/// Returns a vector of attention scores (one per node).
pub fn compute_scores(&self, node_states: &[&[f32]]) -> Result<Vec<f32>> {
if node_states.is_empty() {
return Err(AttentionError::EmptyInput("node_states".to_string()));
}
let n = node_states.len();
// Validate dimensions
let dim = node_states[0].len();
for (i, state) in node_states.iter().enumerate() {
if state.len() != dim {
return Err(AttentionError::DimensionMismatch {
expected: dim,
actual: state.len(),
});
}
}
// Compute pairwise similarities
let mut similarity_matrix = vec![vec![0.0f32; n]; n];
for i in 0..n {
for j in 0..n {
if i != j {
similarity_matrix[i][j] =
self.cosine_similarity(node_states[i], node_states[j]);
}
}
}
// Compute attention scores as normalized sum of similarities
let mut scores = Vec::with_capacity(n);
for i in 0..n {
let sum: f32 = similarity_matrix[i].iter().sum();
let avg = sum / (n - 1).max(1) as f32;
// Normalize to [0, 1]
let normalized = (avg + 1.0) / 2.0; // cosine is in [-1, 1]
scores.push(normalized.clamp(0.0, 1.0));
}
Ok(scores)
}
/// Compute attention over query and keys
pub fn compute_attention(
&self,
query: &[f32],
keys: &[&[f32]],
values: &[&[f32]],
) -> Result<Vec<f32>> {
if keys.is_empty() || values.is_empty() {
return Err(AttentionError::EmptyInput("keys/values".to_string()));
}
if keys.len() != values.len() {
return Err(AttentionError::InvalidConfig(
"keys and values must have same length".to_string(),
));
}
let dim = query.len();
// Compute scaled dot-product attention
let scale = 1.0 / (dim as f32).sqrt();
let logits: Vec<f32> = keys
.iter()
.map(|k| self.dot_product(query, k) * scale / self.config.temperature)
.collect();
let weights = self.stable_softmax(&logits);
// Weighted sum of values
self.weighted_sum(&weights, values)
}
/// Compute sparse attention (top-k)
pub fn compute_sparse_attention(
&self,
query: &[f32],
keys: &[&[f32]],
values: &[&[f32]],
k: usize,
) -> Result<Vec<f32>> {
if keys.is_empty() || values.is_empty() {
return Err(AttentionError::EmptyInput("keys/values".to_string()));
}
let k = k.min(keys.len());
let dim = query.len();
let scale = 1.0 / (dim as f32).sqrt();
// Get top-k scores
let mut scores: Vec<(usize, f32)> = keys
.iter()
.enumerate()
.map(|(i, k)| (i, self.dot_product(query, k) * scale))
.collect();
scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
let top_k: Vec<(usize, f32)> = scores.into_iter().take(k).collect();
// Compute attention over selected
let logits: Vec<f32> = top_k
.iter()
.map(|(_, s)| s / self.config.temperature)
.collect();
let weights = self.stable_softmax(&logits);
let selected_values: Vec<&[f32]> = top_k.iter().map(|(i, _)| values[*i]).collect();
self.weighted_sum(&weights, &selected_values)
}
// === Helper methods ===
fn dot_product(&self, a: &[f32], b: &[f32]) -> f32 {
let len = a.len().min(b.len());
let mut sum = 0.0f32;
// Unrolled for performance
let chunks = len / 4;
let remainder = len % 4;
for i in 0..chunks {
let base = i * 4;
sum += a[base] * b[base];
sum += a[base + 1] * b[base + 1];
sum += a[base + 2] * b[base + 2];
sum += a[base + 3] * b[base + 3];
}
let base = chunks * 4;
for i in 0..remainder {
sum += a[base + i] * b[base + i];
}
sum
}
fn cosine_similarity(&self, a: &[f32], b: &[f32]) -> f32 {
let dot = self.dot_product(a, b);
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm_a < 1e-10 || norm_b < 1e-10 {
return 0.0;
}
(dot / (norm_a * norm_b)).clamp(-1.0, 1.0)
}
fn stable_softmax(&self, logits: &[f32]) -> Vec<f32> {
if logits.is_empty() {
return vec![];
}
let max_logit = logits.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
let exp_logits: Vec<f32> = logits.iter().map(|&l| (l - max_logit).exp()).collect();
let sum: f32 = exp_logits.iter().sum();
if sum > 0.0 {
exp_logits.iter().map(|&e| e / sum).collect()
} else {
// Fallback to uniform
vec![1.0 / logits.len() as f32; logits.len()]
}
}
fn weighted_sum(&self, weights: &[f32], values: &[&[f32]]) -> Result<Vec<f32>> {
if weights.is_empty() || values.is_empty() {
return Err(AttentionError::EmptyInput("weights/values".to_string()));
}
let dim = values[0].len();
let mut output = vec![0.0f32; dim];
for (weight, value) in weights.iter().zip(values.iter()) {
for (o, &v) in output.iter_mut().zip(value.iter()) {
*o += weight * v;
}
}
Ok(output)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compute_scores() {
let config = AttentionCoherenceConfig::default();
let adapter = AttentionAdapter::new(config);
let states: Vec<Vec<f32>> = (0..5).map(|i| vec![0.1 * (i + 1) as f32; 16]).collect();
let state_refs: Vec<&[f32]> = states.iter().map(|s| s.as_slice()).collect();
let scores = adapter.compute_scores(&state_refs).unwrap();
assert_eq!(scores.len(), 5);
for score in &scores {
assert!(*score >= 0.0 && *score <= 1.0);
}
}
#[test]
fn test_compute_attention() {
let config = AttentionCoherenceConfig::default();
let adapter = AttentionAdapter::new(config);
let query = vec![0.5f32; 16];
let keys: Vec<Vec<f32>> = (0..10).map(|i| vec![0.1 * (i + 1) as f32; 16]).collect();
let values: Vec<Vec<f32>> = (0..10).map(|i| vec![i as f32; 16]).collect();
let key_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
let value_refs: Vec<&[f32]> = values.iter().map(|v| v.as_slice()).collect();
let output = adapter
.compute_attention(&query, &key_refs, &value_refs)
.unwrap();
assert_eq!(output.len(), 16);
}
#[test]
fn test_sparse_attention() {
let config = AttentionCoherenceConfig::default();
let adapter = AttentionAdapter::new(config);
let query = vec![0.5f32; 16];
let keys: Vec<Vec<f32>> = (0..20).map(|i| vec![0.1 * (i + 1) as f32; 16]).collect();
let values: Vec<Vec<f32>> = (0..20).map(|i| vec![i as f32; 16]).collect();
let key_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
let value_refs: Vec<&[f32]> = values.iter().map(|v| v.as_slice()).collect();
let output = adapter
.compute_sparse_attention(&query, &key_refs, &value_refs, 5)
.unwrap();
assert_eq!(output.len(), 16);
}
#[test]
fn test_cosine_similarity() {
let config = AttentionCoherenceConfig::default();
let adapter = AttentionAdapter::new(config);
let a = vec![1.0, 0.0, 0.0, 0.0];
let b = vec![1.0, 0.0, 0.0, 0.0];
let c = vec![-1.0, 0.0, 0.0, 0.0];
assert!((adapter.cosine_similarity(&a, &b) - 1.0).abs() < 0.01);
assert!((adapter.cosine_similarity(&a, &c) + 1.0).abs() < 0.01);
}
}

View File

@@ -0,0 +1,228 @@
//! Attention Coherence Configuration
//!
//! Configuration for attention-weighted residual computation.
use serde::{Deserialize, Serialize};
/// Configuration for attention-weighted coherence
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AttentionCoherenceConfig {
/// State vector dimension
pub dimension: usize,
/// Number of neighbors for coherence graph construction
pub k_neighbors: usize,
/// Temperature for attention softmax
pub temperature: f32,
/// Base attention width
pub base_width: usize,
// Topology gating configuration
/// Threshold for stable mode
pub stable_threshold: f32,
/// Threshold for freeze mode
pub freeze_threshold: f32,
/// Coherence update period (ticks)
pub coherence_update_period: usize,
// MoE configuration
/// Number of MoE experts
pub num_experts: usize,
/// Top-k experts to use
pub moe_top_k: usize,
/// Expert capacity factor
pub expert_capacity: f32,
// Diffusion configuration
/// Enable diffusion smoothing
pub enable_diffusion: bool,
/// Diffusion time parameter
pub diffusion_time: f32,
/// Number of diffusion steps
pub diffusion_steps: usize,
/// Sigma for diffusion kernel
pub diffusion_sigma: f32,
}
impl Default for AttentionCoherenceConfig {
fn default() -> Self {
Self {
dimension: 64,
k_neighbors: 8,
temperature: 1.0,
base_width: 64,
stable_threshold: 0.7,
freeze_threshold: 0.3,
coherence_update_period: 16,
num_experts: 4,
moe_top_k: 2,
expert_capacity: 1.25,
enable_diffusion: false,
diffusion_time: 1.0,
diffusion_steps: 5,
diffusion_sigma: 1.0,
}
}
}
impl AttentionCoherenceConfig {
/// Create configuration for small collections
pub fn small() -> Self {
Self {
dimension: 32,
k_neighbors: 4,
base_width: 32,
num_experts: 2,
diffusion_steps: 3,
..Default::default()
}
}
/// Create configuration for large collections
pub fn large() -> Self {
Self {
dimension: 128,
k_neighbors: 16,
base_width: 128,
num_experts: 8,
moe_top_k: 3,
diffusion_steps: 10,
..Default::default()
}
}
/// Validate configuration
pub fn validate(&self) -> Result<(), String> {
if self.dimension == 0 {
return Err("dimension must be positive".to_string());
}
if self.temperature <= 0.0 {
return Err("temperature must be positive".to_string());
}
if self.stable_threshold <= self.freeze_threshold {
return Err("stable_threshold must be greater than freeze_threshold".to_string());
}
if self.num_experts == 0 {
return Err("num_experts must be positive".to_string());
}
if self.moe_top_k > self.num_experts {
return Err("moe_top_k cannot exceed num_experts".to_string());
}
Ok(())
}
/// Get width reduction factor for cautious mode
pub fn cautious_width_factor(&self) -> f32 {
0.5
}
/// Get width for given coherence score
pub fn width_for_coherence(&self, coherence: f32) -> usize {
if coherence >= self.stable_threshold {
self.base_width
} else if coherence >= self.freeze_threshold {
((self.base_width as f32) * self.cautious_width_factor()) as usize
} else {
1 // Freeze mode: single element
}
}
}
/// Attention mode based on coherence state
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AttentionMode {
/// Full attention, normal updates
Stable,
/// Reduced width, increased sparsity
Cautious,
/// Retrieval only, no updates
Freeze,
}
impl AttentionMode {
/// Determine mode from coherence score
pub fn from_coherence(coherence: f32, config: &AttentionCoherenceConfig) -> Self {
if coherence >= config.stable_threshold {
Self::Stable
} else if coherence >= config.freeze_threshold {
Self::Cautious
} else {
Self::Freeze
}
}
/// Check if updates are allowed
pub fn allows_updates(&self) -> bool {
matches!(self, Self::Stable | Self::Cautious)
}
/// Get name
pub fn name(&self) -> &'static str {
match self {
Self::Stable => "stable",
Self::Cautious => "cautious",
Self::Freeze => "freeze",
}
}
}
impl std::fmt::Display for AttentionMode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.name())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config() {
let config = AttentionCoherenceConfig::default();
assert!(config.validate().is_ok());
}
#[test]
fn test_mode_from_coherence() {
let config = AttentionCoherenceConfig::default();
assert_eq!(
AttentionMode::from_coherence(0.8, &config),
AttentionMode::Stable
);
assert_eq!(
AttentionMode::from_coherence(0.5, &config),
AttentionMode::Cautious
);
assert_eq!(
AttentionMode::from_coherence(0.2, &config),
AttentionMode::Freeze
);
}
#[test]
fn test_width_for_coherence() {
let config = AttentionCoherenceConfig {
base_width: 64,
stable_threshold: 0.7,
freeze_threshold: 0.3,
..Default::default()
};
assert_eq!(config.width_for_coherence(0.8), 64);
assert_eq!(config.width_for_coherence(0.5), 32);
assert_eq!(config.width_for_coherence(0.2), 1);
}
#[test]
fn test_invalid_config() {
let config = AttentionCoherenceConfig {
stable_threshold: 0.3,
freeze_threshold: 0.7, // Invalid: freeze > stable
..Default::default()
};
assert!(config.validate().is_err());
}
}

View File

@@ -0,0 +1,336 @@
//! PDE Diffusion-Based Energy Smoothing
//!
//! Applies heat diffusion to smooth energy across the coherence graph.
use super::{AttentionCoherenceConfig, AttentionError, Result};
/// Result of diffusion smoothing
#[derive(Debug, Clone)]
pub struct SmoothedEnergy {
/// Node energies after smoothing
pub node_energies: Vec<f32>,
/// Edge energies after smoothing
pub edge_energies: Vec<(usize, usize, f32)>,
/// Total energy before smoothing
pub initial_total: f32,
/// Total energy after smoothing
pub final_total: f32,
/// Number of diffusion steps applied
pub steps_applied: usize,
/// Convergence achieved
pub converged: bool,
}
impl SmoothedEnergy {
/// Get energy ratio (final/initial)
pub fn energy_ratio(&self) -> f32 {
if self.initial_total > 0.0 {
self.final_total / self.initial_total
} else {
1.0
}
}
/// Check if energy was reduced
pub fn energy_reduced(&self) -> bool {
self.final_total < self.initial_total
}
/// Get smoothing factor
pub fn smoothing_factor(&self) -> f32 {
1.0 - self.energy_ratio()
}
}
/// PDE diffusion smoother for energy propagation
///
/// Uses heat diffusion equation to smooth energy across the graph,
/// reducing sharp energy gradients while preserving total energy.
#[derive(Debug)]
pub struct DiffusionSmoothing {
/// Configuration
config: AttentionCoherenceConfig,
}
impl DiffusionSmoothing {
/// Create a new diffusion smoother
pub fn new(config: AttentionCoherenceConfig) -> Self {
Self { config }
}
/// Apply diffusion smoothing to edge energies
///
/// Uses the graph Laplacian to diffuse energy from high-energy
/// regions to low-energy regions.
pub fn smooth(
&self,
edge_energies: &[(usize, usize, f32)],
node_states: &[&[f32]],
steps: usize,
) -> Result<SmoothedEnergy> {
if edge_energies.is_empty() {
return Ok(SmoothedEnergy {
node_energies: vec![],
edge_energies: vec![],
initial_total: 0.0,
final_total: 0.0,
steps_applied: 0,
converged: true,
});
}
let n = node_states.len();
if n == 0 {
return Err(AttentionError::EmptyInput("node_states".to_string()));
}
// Build adjacency and compute initial node energies
let (adjacency, mut node_energies) = self.build_graph(edge_energies, n);
let initial_total: f32 = node_energies.iter().sum();
// Build Laplacian-like diffusion kernel
let kernel = self.build_diffusion_kernel(&adjacency, node_states, n);
// Apply diffusion steps
let actual_steps = steps.min(self.config.diffusion_steps);
let dt = self.config.diffusion_time / actual_steps.max(1) as f32;
let mut converged = false;
for step in 0..actual_steps {
let prev_energies = node_energies.clone();
// Diffusion step: e_new = e_old + dt * L * e_old
node_energies = self.diffusion_step(&node_energies, &kernel, dt);
// Check convergence
let change: f32 = node_energies
.iter()
.zip(prev_energies.iter())
.map(|(a, b)| (a - b).abs())
.sum();
if change < 1e-6 {
converged = true;
break;
}
// Early termination if energy is stable
if step > 2 {
let current_total: f32 = node_energies.iter().sum();
if (current_total - initial_total).abs() / initial_total.max(1e-10) < 1e-4 {
converged = true;
break;
}
}
}
// Reconstruct edge energies from smoothed node energies
let smoothed_edges = self.reconstruct_edge_energies(edge_energies, &node_energies);
let final_total: f32 = node_energies.iter().sum();
Ok(SmoothedEnergy {
node_energies,
edge_energies: smoothed_edges,
initial_total,
final_total,
steps_applied: actual_steps,
converged,
})
}
/// Build graph from edge energies
fn build_graph(
&self,
edge_energies: &[(usize, usize, f32)],
n: usize,
) -> (Vec<Vec<(usize, f32)>>, Vec<f32>) {
let mut adjacency: Vec<Vec<(usize, f32)>> = vec![vec![]; n];
let mut node_energies = vec![0.0f32; n];
for &(src, dst, energy) in edge_energies {
if src < n && dst < n {
adjacency[src].push((dst, energy));
adjacency[dst].push((src, energy));
// Distribute edge energy to nodes
node_energies[src] += energy / 2.0;
node_energies[dst] += energy / 2.0;
}
}
(adjacency, node_energies)
}
/// Build diffusion kernel based on graph structure
fn build_diffusion_kernel(
&self,
adjacency: &[Vec<(usize, f32)>],
node_states: &[&[f32]],
n: usize,
) -> Vec<Vec<f32>> {
let sigma_sq = self.config.diffusion_sigma * self.config.diffusion_sigma;
let mut kernel = vec![vec![0.0f32; n]; n];
for i in 0..n {
let degree = adjacency[i].len() as f32;
for &(j, _edge_weight) in &adjacency[i] {
// Compute similarity-based weight
let sim = self.cosine_similarity(node_states[i], node_states[j]);
let weight = (sim / sigma_sq).exp();
kernel[i][j] = weight;
}
// Diagonal: negative sum of off-diagonals (Laplacian property)
let row_sum: f32 = kernel[i].iter().sum();
kernel[i][i] = -row_sum;
// Normalize by degree for stability
if degree > 0.0 {
for k in 0..n {
kernel[i][k] /= degree;
}
}
}
kernel
}
/// Perform one diffusion step
fn diffusion_step(&self, energies: &[f32], kernel: &[Vec<f32>], dt: f32) -> Vec<f32> {
let n = energies.len();
let mut new_energies = vec![0.0f32; n];
for i in 0..n {
// e_new[i] = e[i] + dt * sum_j(K[i][j] * e[j])
let diffusion: f32 = kernel[i]
.iter()
.zip(energies.iter())
.map(|(&k, &e)| k * e)
.sum();
new_energies[i] = (energies[i] + dt * diffusion).max(0.0);
}
new_energies
}
/// Reconstruct edge energies from smoothed node energies
fn reconstruct_edge_energies(
&self,
original_edges: &[(usize, usize, f32)],
node_energies: &[f32],
) -> Vec<(usize, usize, f32)> {
original_edges
.iter()
.map(|&(src, dst, original)| {
let src_energy = node_energies.get(src).copied().unwrap_or(0.0);
let dst_energy = node_energies.get(dst).copied().unwrap_or(0.0);
// New edge energy is average of endpoint node energies
// scaled by original proportion
let avg_node_energy = (src_energy + dst_energy) / 2.0;
// Blend original and smoothed
let alpha = 0.5; // Smoothing blend factor
let smoothed = alpha * avg_node_energy + (1.0 - alpha) * original;
(src, dst, smoothed.max(0.0))
})
.collect()
}
fn cosine_similarity(&self, a: &[f32], b: &[f32]) -> f32 {
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm_a < 1e-10 || norm_b < 1e-10 {
return 0.0;
}
(dot / (norm_a * norm_b)).clamp(-1.0, 1.0)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_input() {
let config = AttentionCoherenceConfig::default();
let smoother = DiffusionSmoothing::new(config);
let result = smoother.smooth(&[], &[], 5).unwrap();
assert!(result.converged);
assert_eq!(result.initial_total, 0.0);
}
#[test]
fn test_basic_smoothing() {
let config = AttentionCoherenceConfig {
diffusion_time: 1.0,
diffusion_steps: 10,
diffusion_sigma: 1.0,
..Default::default()
};
let smoother = DiffusionSmoothing::new(config);
let states: Vec<Vec<f32>> = (0..4).map(|i| vec![0.1 * (i + 1) as f32; 8]).collect();
let state_refs: Vec<&[f32]> = states.iter().map(|s| s.as_slice()).collect();
let edges = vec![(0, 1, 1.0), (1, 2, 2.0), (2, 3, 0.5)];
let result = smoother.smooth(&edges, &state_refs, 5).unwrap();
assert_eq!(result.edge_energies.len(), 3);
assert!(result.steps_applied <= 10);
}
#[test]
fn test_energy_conservation() {
let config = AttentionCoherenceConfig {
diffusion_time: 0.5,
diffusion_steps: 5,
diffusion_sigma: 1.0,
..Default::default()
};
let smoother = DiffusionSmoothing::new(config);
let states: Vec<Vec<f32>> = (0..3).map(|_| vec![1.0; 4]).collect();
let state_refs: Vec<&[f32]> = states.iter().map(|s| s.as_slice()).collect();
let edges = vec![(0, 1, 1.0), (1, 2, 1.0)];
let result = smoother.smooth(&edges, &state_refs, 3).unwrap();
// Energy should be roughly conserved (within tolerance)
let ratio = result.energy_ratio();
assert!(
ratio > 0.5 && ratio < 2.0,
"Energy ratio {} out of expected range",
ratio
);
}
#[test]
fn test_smoothed_energy_methods() {
let smoothed = SmoothedEnergy {
node_energies: vec![0.5, 0.5],
edge_energies: vec![(0, 1, 0.8)],
initial_total: 2.0,
final_total: 1.0,
steps_applied: 5,
converged: true,
};
assert_eq!(smoothed.energy_ratio(), 0.5);
assert!(smoothed.energy_reduced());
assert_eq!(smoothed.smoothing_factor(), 0.5);
}
}

View File

@@ -0,0 +1,404 @@
//! Attention-Weighted Residuals Module
//!
//! Computes attention-weighted coherence using multiple mechanisms:
//! - Topology-gated attention (structural coherence as permission signal)
//! - Mixture of Experts (specialized residual processing)
//! - PDE diffusion (smooth energy propagation)
//!
//! Leverages `ruvector-attention` for the underlying attention implementations.
//!
//! # Features
//!
//! - Three attention modes: Stable, Cautious, Freeze
//! - MoE routing for specialized residual experts
//! - Diffusion-based energy smoothing
//! - Attention score computation for residual weighting
mod adapter;
mod config;
mod diffusion;
mod moe;
mod topology;
pub use adapter::AttentionAdapter;
pub use config::AttentionCoherenceConfig;
pub use diffusion::{DiffusionSmoothing, SmoothedEnergy};
pub use moe::{ExpertRouting, MoEResidualProcessor};
pub use topology::{AttentionScore, TopologyGate, TopologyGateResult};
use std::collections::HashMap;
/// Node identifier type
pub type NodeId = u64;
/// Edge identifier type
pub type EdgeId = (NodeId, NodeId);
/// Result type for attention operations
pub type Result<T> = std::result::Result<T, AttentionError>;
/// Errors in attention-weighted coherence computation
#[derive(Debug, Clone, thiserror::Error)]
pub enum AttentionError {
/// Invalid dimension
#[error("Dimension mismatch: expected {expected}, got {actual}")]
DimensionMismatch { expected: usize, actual: usize },
/// Empty input
#[error("Empty input: {0}")]
EmptyInput(String),
/// Invalid configuration
#[error("Invalid configuration: {0}")]
InvalidConfig(String),
/// Computation failed
#[error("Computation failed: {0}")]
ComputationFailed(String),
/// Mode not supported
#[error("Mode not supported in current state: {0}")]
ModeNotSupported(String),
}
/// Main attention-weighted coherence engine
///
/// Combines topology-gated attention, MoE routing, and PDE diffusion
/// to compute attention-weighted residuals for coherence analysis.
#[derive(Debug)]
pub struct AttentionCoherence {
/// Configuration
config: AttentionCoherenceConfig,
/// Adapter to attention implementations
adapter: AttentionAdapter,
/// Topology gate
topo_gate: TopologyGate,
/// MoE residual processor
moe: MoEResidualProcessor,
/// Diffusion smoother
diffusion: DiffusionSmoothing,
}
impl AttentionCoherence {
/// Create a new attention coherence engine
pub fn new(config: AttentionCoherenceConfig) -> Self {
let adapter = AttentionAdapter::new(config.clone());
let topo_gate = TopologyGate::new(config.clone());
let moe = MoEResidualProcessor::new(config.clone());
let diffusion = DiffusionSmoothing::new(config.clone());
Self {
config,
adapter,
topo_gate,
moe,
diffusion,
}
}
/// Create with default configuration
pub fn default_config() -> Self {
Self::new(AttentionCoherenceConfig::default())
}
/// Compute attention scores for nodes
///
/// Returns attention scores indicating structural importance.
pub fn compute_attention_scores(
&mut self,
node_states: &[&[f32]],
) -> Result<HashMap<usize, f32>> {
if node_states.is_empty() {
return Err(AttentionError::EmptyInput("node_states".to_string()));
}
// Update topology gate coherence
self.topo_gate.update_coherence(node_states);
// Compute scores using adapter
let scores = self.adapter.compute_scores(node_states)?;
// Convert to hashmap
Ok(scores
.into_iter()
.enumerate()
.map(|(i, s)| (i, s))
.collect())
}
/// Compute attention-weighted residuals
///
/// Weights each edge residual by the attention scores of its endpoints.
pub fn weighted_residuals(
&mut self,
node_states: &[&[f32]],
edge_residuals: &[(usize, usize, Vec<f32>)], // (source_idx, target_idx, residual)
) -> Result<Vec<WeightedEdgeResidual>> {
if node_states.is_empty() {
return Err(AttentionError::EmptyInput("node_states".to_string()));
}
// Compute attention scores
let scores = self.compute_attention_scores(node_states)?;
// Weight residuals
let mut weighted = Vec::with_capacity(edge_residuals.len());
for (source, target, residual) in edge_residuals {
let source_score = scores.get(source).copied().unwrap_or(1.0);
let target_score = scores.get(target).copied().unwrap_or(1.0);
// Average attention weight
let attention_weight = (source_score + target_score) / 2.0;
// Residual norm squared
let residual_norm_sq: f32 = residual.iter().map(|x| x * x).sum();
// Weighted energy
let weighted_energy = residual_norm_sq * attention_weight;
weighted.push(WeightedEdgeResidual {
source_idx: *source,
target_idx: *target,
source_attention: source_score,
target_attention: target_score,
attention_weight,
residual_norm_sq,
weighted_energy,
});
}
Ok(weighted)
}
/// Route residual through MoE experts
///
/// Uses specialized experts for different residual characteristics.
pub fn moe_process_residual(
&self,
residual: &[f32],
context: &[f32],
) -> Result<MoEProcessedResidual> {
self.moe.process(residual, context)
}
/// Apply diffusion smoothing to energy values
///
/// Smooths energy across the graph using PDE diffusion.
pub fn smooth_energy(
&self,
edge_energies: &[(usize, usize, f32)], // (source, target, energy)
node_states: &[&[f32]],
steps: usize,
) -> Result<SmoothedEnergy> {
self.diffusion.smooth(edge_energies, node_states, steps)
}
/// Get current topology gate result
pub fn gate_result(&self) -> TopologyGateResult {
self.topo_gate.current_result()
}
/// Check if updates are allowed (not in freeze mode)
pub fn allows_updates(&self) -> bool {
self.topo_gate.allows_updates()
}
/// Get effective attention width based on current mode
pub fn attention_width(&self) -> usize {
self.topo_gate.attention_width()
}
/// Get configuration
pub fn config(&self) -> &AttentionCoherenceConfig {
&self.config
}
/// Compute full attention-weighted energy analysis
pub fn full_analysis(
&mut self,
node_states: &[&[f32]],
edge_residuals: &[(usize, usize, Vec<f32>)],
) -> Result<AttentionEnergyAnalysis> {
// Get gate result
let gate_result = self.topo_gate.current_result();
// Compute weighted residuals
let weighted = self.weighted_residuals(node_states, edge_residuals)?;
// Compute energies
let edge_energies: Vec<(usize, usize, f32)> = weighted
.iter()
.map(|w| (w.source_idx, w.target_idx, w.weighted_energy))
.collect();
// Apply diffusion if enabled
let smoothed = if self.config.enable_diffusion {
Some(self.smooth_energy(&edge_energies, node_states, self.config.diffusion_steps)?)
} else {
None
};
// Aggregate
let total_energy: f32 = weighted.iter().map(|w| w.weighted_energy).sum();
let avg_attention: f32 =
weighted.iter().map(|w| w.attention_weight).sum::<f32>() / weighted.len().max(1) as f32;
Ok(AttentionEnergyAnalysis {
weighted_residuals: weighted,
smoothed_energy: smoothed,
total_energy,
avg_attention_weight: avg_attention,
gate_result,
num_edges: edge_residuals.len(),
})
}
}
/// Result of weighting an edge residual by attention
#[derive(Debug, Clone)]
pub struct WeightedEdgeResidual {
/// Source node index
pub source_idx: usize,
/// Target node index
pub target_idx: usize,
/// Attention score of source node
pub source_attention: f32,
/// Attention score of target node
pub target_attention: f32,
/// Combined attention weight
pub attention_weight: f32,
/// Squared norm of residual
pub residual_norm_sq: f32,
/// Final weighted energy
pub weighted_energy: f32,
}
/// Result of processing a residual through MoE
#[derive(Debug, Clone)]
pub struct MoEProcessedResidual {
/// Output from expert combination
pub output: Vec<f32>,
/// Expert indices that were used
pub expert_indices: Vec<usize>,
/// Weights for each expert
pub expert_weights: Vec<f32>,
/// Load balance loss (for training)
pub load_balance_loss: f32,
}
/// Complete attention energy analysis
#[derive(Debug, Clone)]
pub struct AttentionEnergyAnalysis {
/// All weighted residuals
pub weighted_residuals: Vec<WeightedEdgeResidual>,
/// Smoothed energy (if diffusion enabled)
pub smoothed_energy: Option<SmoothedEnergy>,
/// Total weighted energy
pub total_energy: f32,
/// Average attention weight
pub avg_attention_weight: f32,
/// Current gate result
pub gate_result: TopologyGateResult,
/// Number of edges analyzed
pub num_edges: usize,
}
impl AttentionEnergyAnalysis {
/// Check if coherent (energy below threshold)
pub fn is_coherent(&self, threshold: f32) -> bool {
self.total_energy < threshold
}
/// Get highest energy edge
pub fn highest_energy_edge(&self) -> Option<&WeightedEdgeResidual> {
self.weighted_residuals
.iter()
.max_by(|a, b| a.weighted_energy.partial_cmp(&b.weighted_energy).unwrap())
}
/// Get edges above threshold
pub fn edges_above_threshold(&self, threshold: f32) -> Vec<&WeightedEdgeResidual> {
self.weighted_residuals
.iter()
.filter(|r| r.weighted_energy > threshold)
.collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_states(n: usize, dim: usize) -> Vec<Vec<f32>> {
(0..n).map(|i| vec![0.1 * (i + 1) as f32; dim]).collect()
}
#[test]
fn test_basic_coherence() {
let config = AttentionCoherenceConfig {
dimension: 16,
..Default::default()
};
let mut coherence = AttentionCoherence::new(config);
let states = make_states(5, 16);
let state_refs: Vec<&[f32]> = states.iter().map(|s| s.as_slice()).collect();
let scores = coherence.compute_attention_scores(&state_refs).unwrap();
assert_eq!(scores.len(), 5);
for (_, &score) in &scores {
assert!(score >= 0.0 && score <= 1.0);
}
}
#[test]
fn test_weighted_residuals() {
let config = AttentionCoherenceConfig {
dimension: 8,
..Default::default()
};
let mut coherence = AttentionCoherence::new(config);
let states = make_states(4, 8);
let state_refs: Vec<&[f32]> = states.iter().map(|s| s.as_slice()).collect();
let residuals = vec![
(0, 1, vec![0.1f32; 8]),
(1, 2, vec![0.2f32; 8]),
(2, 3, vec![0.3f32; 8]),
];
let weighted = coherence
.weighted_residuals(&state_refs, &residuals)
.unwrap();
assert_eq!(weighted.len(), 3);
for w in &weighted {
assert!(w.weighted_energy >= 0.0);
assert!(w.attention_weight > 0.0);
}
}
#[test]
fn test_full_analysis() {
let config = AttentionCoherenceConfig {
dimension: 8,
enable_diffusion: false,
..Default::default()
};
let mut coherence = AttentionCoherence::new(config);
let states = make_states(3, 8);
let state_refs: Vec<&[f32]> = states.iter().map(|s| s.as_slice()).collect();
let residuals = vec![(0, 1, vec![0.1f32; 8]), (1, 2, vec![0.2f32; 8])];
let analysis = coherence.full_analysis(&state_refs, &residuals).unwrap();
assert_eq!(analysis.num_edges, 2);
assert!(analysis.total_energy >= 0.0);
assert!(analysis.avg_attention_weight > 0.0);
}
}

View File

@@ -0,0 +1,360 @@
//! Mixture of Experts Residual Processing
//!
//! Specialized expert routing for different residual characteristics.
use super::{AttentionCoherenceConfig, AttentionError, MoEProcessedResidual, Result};
/// Expert routing decision
#[derive(Debug, Clone)]
pub struct ExpertRouting {
/// Selected expert indices
pub expert_indices: Vec<usize>,
/// Weights for each selected expert
pub weights: Vec<f32>,
/// Router logits (before top-k selection)
pub router_logits: Vec<f32>,
}
impl ExpertRouting {
/// Check if a specific expert was selected
pub fn contains_expert(&self, idx: usize) -> bool {
self.expert_indices.contains(&idx)
}
/// Get weight for a specific expert (0 if not selected)
pub fn weight_for(&self, idx: usize) -> f32 {
self.expert_indices
.iter()
.position(|&i| i == idx)
.map(|pos| self.weights[pos])
.unwrap_or(0.0)
}
}
/// Mixture of Experts residual processor
///
/// Routes residuals to specialized experts based on their characteristics.
/// Each expert specializes in different types of residuals.
#[derive(Debug)]
pub struct MoEResidualProcessor {
/// Configuration
config: AttentionCoherenceConfig,
/// Expert parameters (weights for each expert)
experts: Vec<ExpertParams>,
/// Router parameters
router: RouterParams,
}
/// Parameters for a single expert
#[derive(Debug, Clone)]
struct ExpertParams {
/// Linear transformation weights (dim x dim)
weights: Vec<Vec<f32>>,
/// Bias vector
bias: Vec<f32>,
/// Expert specialization (for interpretability)
specialization: ExpertSpecialization,
}
/// Type of expert specialization
#[derive(Debug, Clone, Copy)]
enum ExpertSpecialization {
/// High-magnitude residuals
HighMagnitude,
/// Low-magnitude residuals
LowMagnitude,
/// Sparse residuals
Sparse,
/// Dense residuals
Dense,
}
/// Router parameters
#[derive(Debug, Clone)]
struct RouterParams {
/// Router weights (num_experts x dim)
weights: Vec<Vec<f32>>,
/// Noise scale for exploration
jitter_noise: f32,
}
impl MoEResidualProcessor {
/// Create a new MoE processor
pub fn new(config: AttentionCoherenceConfig) -> Self {
let num_experts = config.num_experts;
let dim = config.dimension;
// Initialize experts with different specializations
let specializations = [
ExpertSpecialization::HighMagnitude,
ExpertSpecialization::LowMagnitude,
ExpertSpecialization::Sparse,
ExpertSpecialization::Dense,
];
let experts: Vec<ExpertParams> = (0..num_experts)
.map(|i| {
// Initialize with identity-like transformation
let weights: Vec<Vec<f32>> = (0..dim)
.map(|j| {
let mut row = vec![0.0f32; dim];
row[j] = 1.0 + 0.1 * (i as f32 - num_experts as f32 / 2.0);
row
})
.collect();
ExpertParams {
weights,
bias: vec![0.0; dim],
specialization: specializations[i % specializations.len()],
}
})
.collect();
// Initialize router
let router_weights: Vec<Vec<f32>> = (0..num_experts)
.map(|i| {
// Different experts respond to different features
let mut row = vec![0.1f32; dim];
// Make each expert sensitive to different dimensions
let start = (i * dim / num_experts).min(dim - 1);
let end = ((i + 1) * dim / num_experts).min(dim);
for j in start..end {
row[j] = 1.0;
}
row
})
.collect();
let router = RouterParams {
weights: router_weights,
jitter_noise: 0.0,
};
Self {
config,
experts,
router,
}
}
/// Process a residual through MoE
pub fn process(&self, residual: &[f32], context: &[f32]) -> Result<MoEProcessedResidual> {
// Validate dimensions
if residual.len() != self.config.dimension {
return Err(AttentionError::DimensionMismatch {
expected: self.config.dimension,
actual: residual.len(),
});
}
// Route to experts
let routing = self.route(residual, context);
// Process through selected experts
let mut output = vec![0.0f32; self.config.dimension];
for (&expert_idx, &weight) in routing.expert_indices.iter().zip(routing.weights.iter()) {
let expert_output = self.apply_expert(expert_idx, residual);
for (o, e) in output.iter_mut().zip(expert_output.iter()) {
*o += weight * e;
}
}
// Compute load balance loss
let load_balance_loss = self.compute_load_balance_loss(&routing);
Ok(MoEProcessedResidual {
output,
expert_indices: routing.expert_indices,
expert_weights: routing.weights,
load_balance_loss,
})
}
/// Route input to experts
pub fn route(&self, input: &[f32], _context: &[f32]) -> ExpertRouting {
// Compute router logits
let logits: Vec<f32> = self
.router
.weights
.iter()
.map(|w| self.dot_product(input, w))
.collect();
// Top-k selection
let k = self.config.moe_top_k.min(self.config.num_experts);
let mut indexed_logits: Vec<(usize, f32)> =
logits.iter().enumerate().map(|(i, &l)| (i, l)).collect();
indexed_logits.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
let top_k: Vec<(usize, f32)> = indexed_logits.into_iter().take(k).collect();
// Softmax over selected
let max_logit = top_k
.iter()
.map(|(_, l)| *l)
.fold(f32::NEG_INFINITY, f32::max);
let exp_sum: f32 = top_k.iter().map(|(_, l)| (l - max_logit).exp()).sum();
let expert_indices: Vec<usize> = top_k.iter().map(|(i, _)| *i).collect();
let weights: Vec<f32> = top_k
.iter()
.map(|(_, l)| (l - max_logit).exp() / exp_sum)
.collect();
ExpertRouting {
expert_indices,
weights,
router_logits: logits,
}
}
/// Apply a single expert
fn apply_expert(&self, expert_idx: usize, input: &[f32]) -> Vec<f32> {
let expert = &self.experts[expert_idx];
let dim = input.len();
let mut output = expert.bias.clone();
// Matrix-vector multiply
for (i, w_row) in expert.weights.iter().enumerate() {
if i < dim {
for (j, &x) in input.iter().enumerate() {
if j < w_row.len() {
output[i] += w_row[j] * x;
}
}
}
}
output
}
/// Compute load balance loss
fn compute_load_balance_loss(&self, routing: &ExpertRouting) -> f32 {
// Count how many times each expert is used
let mut usage = vec![0.0f32; self.config.num_experts];
for (&idx, &weight) in routing.expert_indices.iter().zip(routing.weights.iter()) {
usage[idx] += weight;
}
// Ideal uniform distribution
let ideal = 1.0 / self.config.num_experts as f32;
// L2 deviation from uniform
usage.iter().map(|&u| (u - ideal).powi(2)).sum::<f32>()
}
fn dot_product(&self, a: &[f32], b: &[f32]) -> f32 {
a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
}
/// Get expert statistics
pub fn expert_usage(&self, routings: &[ExpertRouting]) -> Vec<f32> {
let mut usage = vec![0.0f32; self.config.num_experts];
for routing in routings {
for (&idx, &weight) in routing.expert_indices.iter().zip(routing.weights.iter()) {
usage[idx] += weight;
}
}
// Normalize
let total: f32 = usage.iter().sum();
if total > 0.0 {
for u in usage.iter_mut() {
*u /= total;
}
}
usage
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_moe_creation() {
let config = AttentionCoherenceConfig {
dimension: 16,
num_experts: 4,
moe_top_k: 2,
..Default::default()
};
let moe = MoEResidualProcessor::new(config);
assert_eq!(moe.experts.len(), 4);
}
#[test]
fn test_routing() {
let config = AttentionCoherenceConfig {
dimension: 8,
num_experts: 4,
moe_top_k: 2,
..Default::default()
};
let moe = MoEResidualProcessor::new(config);
let input = vec![0.5f32; 8];
let context = vec![0.1f32; 8];
let routing = moe.route(&input, &context);
assert_eq!(routing.expert_indices.len(), 2);
assert_eq!(routing.weights.len(), 2);
// Weights should sum to approximately 1
let sum: f32 = routing.weights.iter().sum();
assert!((sum - 1.0).abs() < 0.01);
}
#[test]
fn test_process() {
let config = AttentionCoherenceConfig {
dimension: 8,
num_experts: 4,
moe_top_k: 2,
..Default::default()
};
let moe = MoEResidualProcessor::new(config);
let residual = vec![0.1f32; 8];
let context = vec![0.1f32; 8];
let result = moe.process(&residual, &context).unwrap();
assert_eq!(result.output.len(), 8);
assert_eq!(result.expert_indices.len(), 2);
assert!(result.load_balance_loss >= 0.0);
}
#[test]
fn test_expert_usage() {
let config = AttentionCoherenceConfig {
dimension: 8,
num_experts: 4,
moe_top_k: 2,
..Default::default()
};
let moe = MoEResidualProcessor::new(config);
let inputs: Vec<Vec<f32>> = (0..10).map(|i| vec![0.1 * (i + 1) as f32; 8]).collect();
let context = vec![0.1f32; 8];
let routings: Vec<ExpertRouting> =
inputs.iter().map(|inp| moe.route(inp, &context)).collect();
let usage = moe.expert_usage(&routings);
assert_eq!(usage.len(), 4);
// Should sum to approximately 1
let sum: f32 = usage.iter().sum();
assert!((sum - 1.0).abs() < 0.01);
}
}

View File

@@ -0,0 +1,381 @@
//! Topology-Gated Attention
//!
//! Uses topological coherence as a permission signal for attention behavior.
use super::config::AttentionMode;
use super::{AttentionCoherenceConfig, AttentionError, Result};
/// Score from attention computation
#[derive(Debug, Clone)]
pub struct AttentionScore {
/// Node index
pub node_idx: usize,
/// Attention score value
pub score: f32,
/// Contribution to coherence
pub coherence_contribution: f32,
}
/// Result of topology gate evaluation
#[derive(Debug, Clone)]
pub struct TopologyGateResult {
/// Current coherence score
pub coherence: f32,
/// Current mode
pub mode: AttentionMode,
/// Effective attention width
pub width: usize,
/// Whether updates are allowed
pub allows_updates: bool,
/// Ticks since last coherence update
pub ticks_since_update: usize,
}
impl TopologyGateResult {
/// Create a default result (stable mode)
pub fn stable(config: &AttentionCoherenceConfig) -> Self {
Self {
coherence: 1.0,
mode: AttentionMode::Stable,
width: config.base_width,
allows_updates: true,
ticks_since_update: 0,
}
}
}
/// Topology-gated attention controller
///
/// Uses structural coherence to control attention behavior:
/// - Stable mode: full attention, normal updates
/// - Cautious mode: reduced width, increased sparsity
/// - Freeze mode: retrieval only, no updates
#[derive(Debug)]
pub struct TopologyGate {
/// Configuration
config: AttentionCoherenceConfig,
/// Current coherence score
coherence: f32,
/// Current mode
mode: AttentionMode,
/// Ticks since last coherence update
ticks_since_update: usize,
/// Cached coherence metrics
cached_metrics: Option<CoherenceMetrics>,
}
impl TopologyGate {
/// Create a new topology gate
pub fn new(config: AttentionCoherenceConfig) -> Self {
Self {
coherence: 1.0, // Start optimistic
mode: AttentionMode::Stable,
ticks_since_update: 0,
cached_metrics: None,
config,
}
}
/// Update coherence from key states
pub fn update_coherence(&mut self, keys: &[&[f32]]) {
if keys.is_empty() {
return;
}
let metrics = self.compute_coherence_metrics(keys);
self.coherence = metrics.coherence_score;
self.mode = AttentionMode::from_coherence(self.coherence, &self.config);
self.ticks_since_update = 0;
self.cached_metrics = Some(metrics);
}
/// Tick the coherence counter
pub fn tick(&mut self) {
self.ticks_since_update += 1;
}
/// Check if coherence update is needed
pub fn needs_update(&self) -> bool {
self.ticks_since_update >= self.config.coherence_update_period
|| self.cached_metrics.is_none()
}
/// Get current mode
pub fn current_mode(&self) -> AttentionMode {
self.mode
}
/// Get current coherence score
pub fn current_coherence(&self) -> f32 {
self.coherence
}
/// Check if updates are allowed
pub fn allows_updates(&self) -> bool {
self.mode.allows_updates()
}
/// Get effective attention width
pub fn attention_width(&self) -> usize {
self.config.width_for_coherence(self.coherence)
}
/// Get current gate result
pub fn current_result(&self) -> TopologyGateResult {
TopologyGateResult {
coherence: self.coherence,
mode: self.mode,
width: self.attention_width(),
allows_updates: self.allows_updates(),
ticks_since_update: self.ticks_since_update,
}
}
/// Compute coherence metrics from keys
fn compute_coherence_metrics(&self, keys: &[&[f32]]) -> CoherenceMetrics {
if keys.is_empty() {
return CoherenceMetrics::empty();
}
let n = keys.len();
let k = self.config.k_neighbors.min(n - 1);
if k == 0 {
return CoherenceMetrics::with_score(1.0);
}
// Compute pairwise similarities
let mut similarities: Vec<Vec<f32>> = Vec::with_capacity(n);
for i in 0..n {
let mut row = Vec::with_capacity(n);
for j in 0..n {
if i == j {
row.push(1.0);
} else {
row.push(self.cosine_similarity(keys[i], keys[j]));
}
}
similarities.push(row);
}
// Compute boundary mass (proportion of edges to k nearest neighbors)
let mut total_boundary_mass = 0.0f32;
let mut total_edges = 0;
for i in 0..n {
// Get k nearest neighbors
let mut neighbor_sims: Vec<(usize, f32)> = similarities[i]
.iter()
.enumerate()
.filter(|(j, _)| *j != i)
.map(|(j, &s)| (j, s))
.collect();
neighbor_sims
.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
let neighbors: Vec<usize> = neighbor_sims.iter().take(k).map(|(j, _)| *j).collect();
// Boundary mass: edges to non-neighbors
for j in 0..n {
if j != i && !neighbors.contains(&j) {
total_boundary_mass += similarities[i][j].max(0.0);
total_edges += 1;
}
}
}
// Compute similarity variance
let all_sims: Vec<f32> = similarities
.iter()
.enumerate()
.flat_map(|(i, row)| {
row.iter()
.enumerate()
.filter(move |(j, _)| *j > i)
.map(|(_, &s)| s)
})
.collect();
let mean_sim: f32 = all_sims.iter().sum::<f32>() / all_sims.len().max(1) as f32;
let variance: f32 = all_sims.iter().map(|s| (s - mean_sim).powi(2)).sum::<f32>()
/ all_sims.len().max(1) as f32;
// Coherence score: high similarity, low variance, low boundary mass
let boundary_ratio = if total_edges > 0 {
total_boundary_mass / total_edges as f32
} else {
0.0
};
// Combine metrics
// High mean similarity and low variance = high coherence
// High boundary mass = low coherence
let coherence_score =
(mean_sim * 0.5 + (1.0 - variance.sqrt()) * 0.3 + (1.0 - boundary_ratio) * 0.2)
.clamp(0.0, 1.0);
CoherenceMetrics {
coherence_score,
mean_similarity: mean_sim,
similarity_variance: variance,
boundary_mass: total_boundary_mass,
num_nodes: n,
}
}
fn cosine_similarity(&self, a: &[f32], b: &[f32]) -> f32 {
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm_a < 1e-10 || norm_b < 1e-10 {
return 0.0;
}
(dot / (norm_a * norm_b)).clamp(-1.0, 1.0)
}
}
/// Coherence metrics computed from key states
#[derive(Debug, Clone)]
struct CoherenceMetrics {
/// Overall coherence score
coherence_score: f32,
/// Mean pairwise similarity
mean_similarity: f32,
/// Variance of pairwise similarities
similarity_variance: f32,
/// Total boundary mass (edges to non-neighbors)
boundary_mass: f32,
/// Number of nodes
num_nodes: usize,
}
impl CoherenceMetrics {
fn empty() -> Self {
Self {
coherence_score: 1.0,
mean_similarity: 1.0,
similarity_variance: 0.0,
boundary_mass: 0.0,
num_nodes: 0,
}
}
fn with_score(score: f32) -> Self {
Self {
coherence_score: score,
mean_similarity: score,
similarity_variance: 0.0,
boundary_mass: 0.0,
num_nodes: 1,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_topology_gate_creation() {
let config = AttentionCoherenceConfig::default();
let gate = TopologyGate::new(config);
assert_eq!(gate.current_mode(), AttentionMode::Stable);
assert!(gate.allows_updates());
}
#[test]
fn test_update_coherence_similar_keys() {
let config = AttentionCoherenceConfig::default();
let mut gate = TopologyGate::new(config);
// All similar keys = high coherence
let keys: Vec<Vec<f32>> = (0..10).map(|_| vec![1.0, 0.0, 0.0, 0.0]).collect();
let key_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
gate.update_coherence(&key_refs);
assert!(gate.current_coherence() > 0.5);
assert_eq!(gate.current_mode(), AttentionMode::Stable);
}
#[test]
fn test_update_coherence_diverse_keys() {
let config = AttentionCoherenceConfig {
stable_threshold: 0.9,
freeze_threshold: 0.5,
..Default::default()
};
let mut gate = TopologyGate::new(config);
// Diverse keys = lower coherence
let keys: Vec<Vec<f32>> = (0..10)
.map(|i| {
let mut v = vec![0.0f32; 16];
v[i % 16] = 1.0;
v
})
.collect();
let key_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
gate.update_coherence(&key_refs);
// Should trigger cautious or freeze mode due to diversity
assert!(
gate.current_mode() == AttentionMode::Cautious
|| gate.current_mode() == AttentionMode::Freeze
);
}
#[test]
fn test_tick_and_update_period() {
let config = AttentionCoherenceConfig {
coherence_update_period: 4,
..Default::default()
};
let mut gate = TopologyGate::new(config);
// Initially needs update (no cache)
assert!(gate.needs_update());
let keys: Vec<Vec<f32>> = vec![vec![1.0; 8]; 5];
let key_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
gate.update_coherence(&key_refs);
assert!(!gate.needs_update());
// Tick 4 times
for _ in 0..4 {
gate.tick();
}
assert!(gate.needs_update());
}
#[test]
fn test_attention_width() {
let config = AttentionCoherenceConfig {
base_width: 64,
stable_threshold: 0.7,
freeze_threshold: 0.3,
..Default::default()
};
let mut gate = TopologyGate::new(config);
// High coherence = full width
gate.coherence = 0.8;
gate.mode = AttentionMode::from_coherence(0.8, &gate.config);
assert_eq!(gate.attention_width(), 64);
// Medium coherence = reduced width
gate.coherence = 0.5;
gate.mode = AttentionMode::from_coherence(0.5, &gate.config);
assert_eq!(gate.attention_width(), 32);
// Low coherence = minimal width
gate.coherence = 0.2;
gate.mode = AttentionMode::from_coherence(0.2, &gate.config);
assert_eq!(gate.attention_width(), 1);
}
}