Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/crates/prime-radiant/src/attention/adapter.rs
+++ b/vendor/ruvector/crates/prime-radiant/src/attention/adapter.rs
@@ -0,0 +1,280 @@
+//! Adapter to ruvector-attention
+//!
+//! Wraps attention mechanisms for coherence computation.
+
+use super::{AttentionCoherenceConfig, AttentionError, Result};
+
+/// Adapter wrapping ruvector-attention functionality
+#[derive(Debug)]
+pub struct AttentionAdapter {
+    /// Configuration
+    config: AttentionCoherenceConfig,
+}
+
+impl AttentionAdapter {
+    /// Create a new adapter
+    pub fn new(config: AttentionCoherenceConfig) -> Self {
+        Self { config }
+    }
+
+    /// Compute attention scores for node states
+    ///
+    /// Returns a vector of attention scores (one per node).
+    pub fn compute_scores(&self, node_states: &[&[f32]]) -> Result<Vec<f32>> {
+        if node_states.is_empty() {
+            return Err(AttentionError::EmptyInput("node_states".to_string()));
+        }
+
+        let n = node_states.len();
+
+        // Validate dimensions
+        let dim = node_states[0].len();
+        for (i, state) in node_states.iter().enumerate() {
+            if state.len() != dim {
+                return Err(AttentionError::DimensionMismatch {
+                    expected: dim,
+                    actual: state.len(),
+                });
+            }
+        }
+
+        // Compute pairwise similarities
+        let mut similarity_matrix = vec![vec![0.0f32; n]; n];
+        for i in 0..n {
+            for j in 0..n {
+                if i != j {
+                    similarity_matrix[i][j] =
+                        self.cosine_similarity(node_states[i], node_states[j]);
+                }
+            }
+        }
+
+        // Compute attention scores as normalized sum of similarities
+        let mut scores = Vec::with_capacity(n);
+        for i in 0..n {
+            let sum: f32 = similarity_matrix[i].iter().sum();
+            let avg = sum / (n - 1).max(1) as f32;
+            // Normalize to [0, 1]
+            let normalized = (avg + 1.0) / 2.0; // cosine is in [-1, 1]
+            scores.push(normalized.clamp(0.0, 1.0));
+        }
+
+        Ok(scores)
+    }
+
+    /// Compute attention over query and keys
+    pub fn compute_attention(
+        &self,
+        query: &[f32],
+        keys: &[&[f32]],
+        values: &[&[f32]],
+    ) -> Result<Vec<f32>> {
+        if keys.is_empty() || values.is_empty() {
+            return Err(AttentionError::EmptyInput("keys/values".to_string()));
+        }
+
+        if keys.len() != values.len() {
+            return Err(AttentionError::InvalidConfig(
+                "keys and values must have same length".to_string(),
+            ));
+        }
+
+        let dim = query.len();
+
+        // Compute scaled dot-product attention
+        let scale = 1.0 / (dim as f32).sqrt();
+
+        let logits: Vec<f32> = keys
+            .iter()
+            .map(|k| self.dot_product(query, k) * scale / self.config.temperature)
+            .collect();
+
+        let weights = self.stable_softmax(&logits);
+
+        // Weighted sum of values
+        self.weighted_sum(&weights, values)
+    }
+
+    /// Compute sparse attention (top-k)
+    pub fn compute_sparse_attention(
+        &self,
+        query: &[f32],
+        keys: &[&[f32]],
+        values: &[&[f32]],
+        k: usize,
+    ) -> Result<Vec<f32>> {
+        if keys.is_empty() || values.is_empty() {
+            return Err(AttentionError::EmptyInput("keys/values".to_string()));
+        }
+
+        let k = k.min(keys.len());
+        let dim = query.len();
+        let scale = 1.0 / (dim as f32).sqrt();
+
+        // Get top-k scores
+        let mut scores: Vec<(usize, f32)> = keys
+            .iter()
+            .enumerate()
+            .map(|(i, k)| (i, self.dot_product(query, k) * scale))
+            .collect();
+
+        scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
+        let top_k: Vec<(usize, f32)> = scores.into_iter().take(k).collect();
+
+        // Compute attention over selected
+        let logits: Vec<f32> = top_k
+            .iter()
+            .map(|(_, s)| s / self.config.temperature)
+            .collect();
+
+        let weights = self.stable_softmax(&logits);
+
+        let selected_values: Vec<&[f32]> = top_k.iter().map(|(i, _)| values[*i]).collect();
+
+        self.weighted_sum(&weights, &selected_values)
+    }
+
+    // === Helper methods ===
+
+    fn dot_product(&self, a: &[f32], b: &[f32]) -> f32 {
+        let len = a.len().min(b.len());
+        let mut sum = 0.0f32;
+
+        // Unrolled for performance
+        let chunks = len / 4;
+        let remainder = len % 4;
+
+        for i in 0..chunks {
+            let base = i * 4;
+            sum += a[base] * b[base];
+            sum += a[base + 1] * b[base + 1];
+            sum += a[base + 2] * b[base + 2];
+            sum += a[base + 3] * b[base + 3];
+        }
+
+        let base = chunks * 4;
+        for i in 0..remainder {
+            sum += a[base + i] * b[base + i];
+        }
+
+        sum
+    }
+
+    fn cosine_similarity(&self, a: &[f32], b: &[f32]) -> f32 {
+        let dot = self.dot_product(a, b);
+        let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
+        let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
+
+        if norm_a < 1e-10 || norm_b < 1e-10 {
+            return 0.0;
+        }
+
+        (dot / (norm_a * norm_b)).clamp(-1.0, 1.0)
+    }
+
+    fn stable_softmax(&self, logits: &[f32]) -> Vec<f32> {
+        if logits.is_empty() {
+            return vec![];
+        }
+
+        let max_logit = logits.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
+        let exp_logits: Vec<f32> = logits.iter().map(|&l| (l - max_logit).exp()).collect();
+        let sum: f32 = exp_logits.iter().sum();
+
+        if sum > 0.0 {
+            exp_logits.iter().map(|&e| e / sum).collect()
+        } else {
+            // Fallback to uniform
+            vec![1.0 / logits.len() as f32; logits.len()]
+        }
+    }
+
+    fn weighted_sum(&self, weights: &[f32], values: &[&[f32]]) -> Result<Vec<f32>> {
+        if weights.is_empty() || values.is_empty() {
+            return Err(AttentionError::EmptyInput("weights/values".to_string()));
+        }
+
+        let dim = values[0].len();
+        let mut output = vec![0.0f32; dim];
+
+        for (weight, value) in weights.iter().zip(values.iter()) {
+            for (o, &v) in output.iter_mut().zip(value.iter()) {
+                *o += weight * v;
+            }
+        }
+
+        Ok(output)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_compute_scores() {
+        let config = AttentionCoherenceConfig::default();
+        let adapter = AttentionAdapter::new(config);
+
+        let states: Vec<Vec<f32>> = (0..5).map(|i| vec![0.1 * (i + 1) as f32; 16]).collect();
+        let state_refs: Vec<&[f32]> = states.iter().map(|s| s.as_slice()).collect();
+
+        let scores = adapter.compute_scores(&state_refs).unwrap();
+
+        assert_eq!(scores.len(), 5);
+        for score in &scores {
+            assert!(*score >= 0.0 && *score <= 1.0);
+        }
+    }
+
+    #[test]
+    fn test_compute_attention() {
+        let config = AttentionCoherenceConfig::default();
+        let adapter = AttentionAdapter::new(config);
+
+        let query = vec![0.5f32; 16];
+        let keys: Vec<Vec<f32>> = (0..10).map(|i| vec![0.1 * (i + 1) as f32; 16]).collect();
+        let values: Vec<Vec<f32>> = (0..10).map(|i| vec![i as f32; 16]).collect();
+
+        let key_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
+        let value_refs: Vec<&[f32]> = values.iter().map(|v| v.as_slice()).collect();
+
+        let output = adapter
+            .compute_attention(&query, &key_refs, &value_refs)
+            .unwrap();
+
+        assert_eq!(output.len(), 16);
+    }
+
+    #[test]
+    fn test_sparse_attention() {
+        let config = AttentionCoherenceConfig::default();
+        let adapter = AttentionAdapter::new(config);
+
+        let query = vec![0.5f32; 16];
+        let keys: Vec<Vec<f32>> = (0..20).map(|i| vec![0.1 * (i + 1) as f32; 16]).collect();
+        let values: Vec<Vec<f32>> = (0..20).map(|i| vec![i as f32; 16]).collect();
+
+        let key_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
+        let value_refs: Vec<&[f32]> = values.iter().map(|v| v.as_slice()).collect();
+
+        let output = adapter
+            .compute_sparse_attention(&query, &key_refs, &value_refs, 5)
+            .unwrap();
+
+        assert_eq!(output.len(), 16);
+    }
+
+    #[test]
+    fn test_cosine_similarity() {
+        let config = AttentionCoherenceConfig::default();
+        let adapter = AttentionAdapter::new(config);
+
+        let a = vec![1.0, 0.0, 0.0, 0.0];
+        let b = vec![1.0, 0.0, 0.0, 0.0];
+        let c = vec![-1.0, 0.0, 0.0, 0.0];
+
+        assert!((adapter.cosine_similarity(&a, &b) - 1.0).abs() < 0.01);
+        assert!((adapter.cosine_similarity(&a, &c) + 1.0).abs() < 0.01);
+    }
+}
--- a/vendor/ruvector/crates/prime-radiant/src/attention/config.rs
+++ b/vendor/ruvector/crates/prime-radiant/src/attention/config.rs
@@ -0,0 +1,228 @@
+//! Attention Coherence Configuration
+//!
+//! Configuration for attention-weighted residual computation.
+
+use serde::{Deserialize, Serialize};
+
+/// Configuration for attention-weighted coherence
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AttentionCoherenceConfig {
+    /// State vector dimension
+    pub dimension: usize,
+
+    /// Number of neighbors for coherence graph construction
+    pub k_neighbors: usize,
+
+    /// Temperature for attention softmax
+    pub temperature: f32,
+
+    /// Base attention width
+    pub base_width: usize,
+
+    // Topology gating configuration
+    /// Threshold for stable mode
+    pub stable_threshold: f32,
+    /// Threshold for freeze mode
+    pub freeze_threshold: f32,
+    /// Coherence update period (ticks)
+    pub coherence_update_period: usize,
+
+    // MoE configuration
+    /// Number of MoE experts
+    pub num_experts: usize,
+    /// Top-k experts to use
+    pub moe_top_k: usize,
+    /// Expert capacity factor
+    pub expert_capacity: f32,
+
+    // Diffusion configuration
+    /// Enable diffusion smoothing
+    pub enable_diffusion: bool,
+    /// Diffusion time parameter
+    pub diffusion_time: f32,
+    /// Number of diffusion steps
+    pub diffusion_steps: usize,
+    /// Sigma for diffusion kernel
+    pub diffusion_sigma: f32,
+}
+
+impl Default for AttentionCoherenceConfig {
+    fn default() -> Self {
+        Self {
+            dimension: 64,
+            k_neighbors: 8,
+            temperature: 1.0,
+            base_width: 64,
+            stable_threshold: 0.7,
+            freeze_threshold: 0.3,
+            coherence_update_period: 16,
+            num_experts: 4,
+            moe_top_k: 2,
+            expert_capacity: 1.25,
+            enable_diffusion: false,
+            diffusion_time: 1.0,
+            diffusion_steps: 5,
+            diffusion_sigma: 1.0,
+        }
+    }
+}
+
+impl AttentionCoherenceConfig {
+    /// Create configuration for small collections
+    pub fn small() -> Self {
+        Self {
+            dimension: 32,
+            k_neighbors: 4,
+            base_width: 32,
+            num_experts: 2,
+            diffusion_steps: 3,
+            ..Default::default()
+        }
+    }
+
+    /// Create configuration for large collections
+    pub fn large() -> Self {
+        Self {
+            dimension: 128,
+            k_neighbors: 16,
+            base_width: 128,
+            num_experts: 8,
+            moe_top_k: 3,
+            diffusion_steps: 10,
+            ..Default::default()
+        }
+    }
+
+    /// Validate configuration
+    pub fn validate(&self) -> Result<(), String> {
+        if self.dimension == 0 {
+            return Err("dimension must be positive".to_string());
+        }
+        if self.temperature <= 0.0 {
+            return Err("temperature must be positive".to_string());
+        }
+        if self.stable_threshold <= self.freeze_threshold {
+            return Err("stable_threshold must be greater than freeze_threshold".to_string());
+        }
+        if self.num_experts == 0 {
+            return Err("num_experts must be positive".to_string());
+        }
+        if self.moe_top_k > self.num_experts {
+            return Err("moe_top_k cannot exceed num_experts".to_string());
+        }
+        Ok(())
+    }
+
+    /// Get width reduction factor for cautious mode
+    pub fn cautious_width_factor(&self) -> f32 {
+        0.5
+    }
+
+    /// Get width for given coherence score
+    pub fn width_for_coherence(&self, coherence: f32) -> usize {
+        if coherence >= self.stable_threshold {
+            self.base_width
+        } else if coherence >= self.freeze_threshold {
+            ((self.base_width as f32) * self.cautious_width_factor()) as usize
+        } else {
+            1 // Freeze mode: single element
+        }
+    }
+}
+
+/// Attention mode based on coherence state
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum AttentionMode {
+    /// Full attention, normal updates
+    Stable,
+    /// Reduced width, increased sparsity
+    Cautious,
+    /// Retrieval only, no updates
+    Freeze,
+}
+
+impl AttentionMode {
+    /// Determine mode from coherence score
+    pub fn from_coherence(coherence: f32, config: &AttentionCoherenceConfig) -> Self {
+        if coherence >= config.stable_threshold {
+            Self::Stable
+        } else if coherence >= config.freeze_threshold {
+            Self::Cautious
+        } else {
+            Self::Freeze
+        }
+    }
+
+    /// Check if updates are allowed
+    pub fn allows_updates(&self) -> bool {
+        matches!(self, Self::Stable | Self::Cautious)
+    }
+
+    /// Get name
+    pub fn name(&self) -> &'static str {
+        match self {
+            Self::Stable => "stable",
+            Self::Cautious => "cautious",
+            Self::Freeze => "freeze",
+        }
+    }
+}
+
+impl std::fmt::Display for AttentionMode {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.name())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_default_config() {
+        let config = AttentionCoherenceConfig::default();
+        assert!(config.validate().is_ok());
+    }
+
+    #[test]
+    fn test_mode_from_coherence() {
+        let config = AttentionCoherenceConfig::default();
+
+        assert_eq!(
+            AttentionMode::from_coherence(0.8, &config),
+            AttentionMode::Stable
+        );
+        assert_eq!(
+            AttentionMode::from_coherence(0.5, &config),
+            AttentionMode::Cautious
+        );
+        assert_eq!(
+            AttentionMode::from_coherence(0.2, &config),
+            AttentionMode::Freeze
+        );
+    }
+
+    #[test]
+    fn test_width_for_coherence() {
+        let config = AttentionCoherenceConfig {
+            base_width: 64,
+            stable_threshold: 0.7,
+            freeze_threshold: 0.3,
+            ..Default::default()
+        };
+
+        assert_eq!(config.width_for_coherence(0.8), 64);
+        assert_eq!(config.width_for_coherence(0.5), 32);
+        assert_eq!(config.width_for_coherence(0.2), 1);
+    }
+
+    #[test]
+    fn test_invalid_config() {
+        let config = AttentionCoherenceConfig {
+            stable_threshold: 0.3,
+            freeze_threshold: 0.7, // Invalid: freeze > stable
+            ..Default::default()
+        };
+        assert!(config.validate().is_err());
+    }
+}
--- a/vendor/ruvector/crates/prime-radiant/src/attention/diffusion.rs
+++ b/vendor/ruvector/crates/prime-radiant/src/attention/diffusion.rs
@@ -0,0 +1,336 @@
+//! PDE Diffusion-Based Energy Smoothing
+//!
+//! Applies heat diffusion to smooth energy across the coherence graph.
+
+use super::{AttentionCoherenceConfig, AttentionError, Result};
+
+/// Result of diffusion smoothing
+#[derive(Debug, Clone)]
+pub struct SmoothedEnergy {
+    /// Node energies after smoothing
+    pub node_energies: Vec<f32>,
+    /// Edge energies after smoothing
+    pub edge_energies: Vec<(usize, usize, f32)>,
+    /// Total energy before smoothing
+    pub initial_total: f32,
+    /// Total energy after smoothing
+    pub final_total: f32,
+    /// Number of diffusion steps applied
+    pub steps_applied: usize,
+    /// Convergence achieved
+    pub converged: bool,
+}
+
+impl SmoothedEnergy {
+    /// Get energy ratio (final/initial)
+    pub fn energy_ratio(&self) -> f32 {
+        if self.initial_total > 0.0 {
+            self.final_total / self.initial_total
+        } else {
+            1.0
+        }
+    }
+
+    /// Check if energy was reduced
+    pub fn energy_reduced(&self) -> bool {
+        self.final_total < self.initial_total
+    }
+
+    /// Get smoothing factor
+    pub fn smoothing_factor(&self) -> f32 {
+        1.0 - self.energy_ratio()
+    }
+}
+
+/// PDE diffusion smoother for energy propagation
+///
+/// Uses heat diffusion equation to smooth energy across the graph,
+/// reducing sharp energy gradients while preserving total energy.
+#[derive(Debug)]
+pub struct DiffusionSmoothing {
+    /// Configuration
+    config: AttentionCoherenceConfig,
+}
+
+impl DiffusionSmoothing {
+    /// Create a new diffusion smoother
+    pub fn new(config: AttentionCoherenceConfig) -> Self {
+        Self { config }
+    }
+
+    /// Apply diffusion smoothing to edge energies
+    ///
+    /// Uses the graph Laplacian to diffuse energy from high-energy
+    /// regions to low-energy regions.
+    pub fn smooth(
+        &self,
+        edge_energies: &[(usize, usize, f32)],
+        node_states: &[&[f32]],
+        steps: usize,
+    ) -> Result<SmoothedEnergy> {
+        if edge_energies.is_empty() {
+            return Ok(SmoothedEnergy {
+                node_energies: vec![],
+                edge_energies: vec![],
+                initial_total: 0.0,
+                final_total: 0.0,
+                steps_applied: 0,
+                converged: true,
+            });
+        }
+
+        let n = node_states.len();
+        if n == 0 {
+            return Err(AttentionError::EmptyInput("node_states".to_string()));
+        }
+
+        // Build adjacency and compute initial node energies
+        let (adjacency, mut node_energies) = self.build_graph(edge_energies, n);
+
+        let initial_total: f32 = node_energies.iter().sum();
+
+        // Build Laplacian-like diffusion kernel
+        let kernel = self.build_diffusion_kernel(&adjacency, node_states, n);
+
+        // Apply diffusion steps
+        let actual_steps = steps.min(self.config.diffusion_steps);
+        let dt = self.config.diffusion_time / actual_steps.max(1) as f32;
+
+        let mut converged = false;
+        for step in 0..actual_steps {
+            let prev_energies = node_energies.clone();
+
+            // Diffusion step: e_new = e_old + dt * L * e_old
+            node_energies = self.diffusion_step(&node_energies, &kernel, dt);
+
+            // Check convergence
+            let change: f32 = node_energies
+                .iter()
+                .zip(prev_energies.iter())
+                .map(|(a, b)| (a - b).abs())
+                .sum();
+
+            if change < 1e-6 {
+                converged = true;
+                break;
+            }
+
+            // Early termination if energy is stable
+            if step > 2 {
+                let current_total: f32 = node_energies.iter().sum();
+                if (current_total - initial_total).abs() / initial_total.max(1e-10) < 1e-4 {
+                    converged = true;
+                    break;
+                }
+            }
+        }
+
+        // Reconstruct edge energies from smoothed node energies
+        let smoothed_edges = self.reconstruct_edge_energies(edge_energies, &node_energies);
+
+        let final_total: f32 = node_energies.iter().sum();
+
+        Ok(SmoothedEnergy {
+            node_energies,
+            edge_energies: smoothed_edges,
+            initial_total,
+            final_total,
+            steps_applied: actual_steps,
+            converged,
+        })
+    }
+
+    /// Build graph from edge energies
+    fn build_graph(
+        &self,
+        edge_energies: &[(usize, usize, f32)],
+        n: usize,
+    ) -> (Vec<Vec<(usize, f32)>>, Vec<f32>) {
+        let mut adjacency: Vec<Vec<(usize, f32)>> = vec![vec![]; n];
+        let mut node_energies = vec![0.0f32; n];
+
+        for &(src, dst, energy) in edge_energies {
+            if src < n && dst < n {
+                adjacency[src].push((dst, energy));
+                adjacency[dst].push((src, energy));
+
+                // Distribute edge energy to nodes
+                node_energies[src] += energy / 2.0;
+                node_energies[dst] += energy / 2.0;
+            }
+        }
+
+        (adjacency, node_energies)
+    }
+
+    /// Build diffusion kernel based on graph structure
+    fn build_diffusion_kernel(
+        &self,
+        adjacency: &[Vec<(usize, f32)>],
+        node_states: &[&[f32]],
+        n: usize,
+    ) -> Vec<Vec<f32>> {
+        let sigma_sq = self.config.diffusion_sigma * self.config.diffusion_sigma;
+
+        let mut kernel = vec![vec![0.0f32; n]; n];
+
+        for i in 0..n {
+            let degree = adjacency[i].len() as f32;
+
+            for &(j, _edge_weight) in &adjacency[i] {
+                // Compute similarity-based weight
+                let sim = self.cosine_similarity(node_states[i], node_states[j]);
+                let weight = (sim / sigma_sq).exp();
+
+                kernel[i][j] = weight;
+            }
+
+            // Diagonal: negative sum of off-diagonals (Laplacian property)
+            let row_sum: f32 = kernel[i].iter().sum();
+            kernel[i][i] = -row_sum;
+
+            // Normalize by degree for stability
+            if degree > 0.0 {
+                for k in 0..n {
+                    kernel[i][k] /= degree;
+                }
+            }
+        }
+
+        kernel
+    }
+
+    /// Perform one diffusion step
+    fn diffusion_step(&self, energies: &[f32], kernel: &[Vec<f32>], dt: f32) -> Vec<f32> {
+        let n = energies.len();
+        let mut new_energies = vec![0.0f32; n];
+
+        for i in 0..n {
+            // e_new[i] = e[i] + dt * sum_j(K[i][j] * e[j])
+            let diffusion: f32 = kernel[i]
+                .iter()
+                .zip(energies.iter())
+                .map(|(&k, &e)| k * e)
+                .sum();
+
+            new_energies[i] = (energies[i] + dt * diffusion).max(0.0);
+        }
+
+        new_energies
+    }
+
+    /// Reconstruct edge energies from smoothed node energies
+    fn reconstruct_edge_energies(
+        &self,
+        original_edges: &[(usize, usize, f32)],
+        node_energies: &[f32],
+    ) -> Vec<(usize, usize, f32)> {
+        original_edges
+            .iter()
+            .map(|&(src, dst, original)| {
+                let src_energy = node_energies.get(src).copied().unwrap_or(0.0);
+                let dst_energy = node_energies.get(dst).copied().unwrap_or(0.0);
+
+                // New edge energy is average of endpoint node energies
+                // scaled by original proportion
+                let avg_node_energy = (src_energy + dst_energy) / 2.0;
+
+                // Blend original and smoothed
+                let alpha = 0.5; // Smoothing blend factor
+                let smoothed = alpha * avg_node_energy + (1.0 - alpha) * original;
+
+                (src, dst, smoothed.max(0.0))
+            })
+            .collect()
+    }
+
+    fn cosine_similarity(&self, a: &[f32], b: &[f32]) -> f32 {
+        let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
+        let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
+        let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
+
+        if norm_a < 1e-10 || norm_b < 1e-10 {
+            return 0.0;
+        }
+
+        (dot / (norm_a * norm_b)).clamp(-1.0, 1.0)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_empty_input() {
+        let config = AttentionCoherenceConfig::default();
+        let smoother = DiffusionSmoothing::new(config);
+
+        let result = smoother.smooth(&[], &[], 5).unwrap();
+        assert!(result.converged);
+        assert_eq!(result.initial_total, 0.0);
+    }
+
+    #[test]
+    fn test_basic_smoothing() {
+        let config = AttentionCoherenceConfig {
+            diffusion_time: 1.0,
+            diffusion_steps: 10,
+            diffusion_sigma: 1.0,
+            ..Default::default()
+        };
+        let smoother = DiffusionSmoothing::new(config);
+
+        let states: Vec<Vec<f32>> = (0..4).map(|i| vec![0.1 * (i + 1) as f32; 8]).collect();
+        let state_refs: Vec<&[f32]> = states.iter().map(|s| s.as_slice()).collect();
+
+        let edges = vec![(0, 1, 1.0), (1, 2, 2.0), (2, 3, 0.5)];
+
+        let result = smoother.smooth(&edges, &state_refs, 5).unwrap();
+
+        assert_eq!(result.edge_energies.len(), 3);
+        assert!(result.steps_applied <= 10);
+    }
+
+    #[test]
+    fn test_energy_conservation() {
+        let config = AttentionCoherenceConfig {
+            diffusion_time: 0.5,
+            diffusion_steps: 5,
+            diffusion_sigma: 1.0,
+            ..Default::default()
+        };
+        let smoother = DiffusionSmoothing::new(config);
+
+        let states: Vec<Vec<f32>> = (0..3).map(|_| vec![1.0; 4]).collect();
+        let state_refs: Vec<&[f32]> = states.iter().map(|s| s.as_slice()).collect();
+
+        let edges = vec![(0, 1, 1.0), (1, 2, 1.0)];
+
+        let result = smoother.smooth(&edges, &state_refs, 3).unwrap();
+
+        // Energy should be roughly conserved (within tolerance)
+        let ratio = result.energy_ratio();
+        assert!(
+            ratio > 0.5 && ratio < 2.0,
+            "Energy ratio {} out of expected range",
+            ratio
+        );
+    }
+
+    #[test]
+    fn test_smoothed_energy_methods() {
+        let smoothed = SmoothedEnergy {
+            node_energies: vec![0.5, 0.5],
+            edge_energies: vec![(0, 1, 0.8)],
+            initial_total: 2.0,
+            final_total: 1.0,
+            steps_applied: 5,
+            converged: true,
+        };
+
+        assert_eq!(smoothed.energy_ratio(), 0.5);
+        assert!(smoothed.energy_reduced());
+        assert_eq!(smoothed.smoothing_factor(), 0.5);
+    }
+}
--- a/vendor/ruvector/crates/prime-radiant/src/attention/mod.rs
+++ b/vendor/ruvector/crates/prime-radiant/src/attention/mod.rs
@@ -0,0 +1,404 @@
+//! Attention-Weighted Residuals Module
+//!
+//! Computes attention-weighted coherence using multiple mechanisms:
+//! - Topology-gated attention (structural coherence as permission signal)
+//! - Mixture of Experts (specialized residual processing)
+//! - PDE diffusion (smooth energy propagation)
+//!
+//! Leverages `ruvector-attention` for the underlying attention implementations.
+//!
+//! # Features
+//!
+//! - Three attention modes: Stable, Cautious, Freeze
+//! - MoE routing for specialized residual experts
+//! - Diffusion-based energy smoothing
+//! - Attention score computation for residual weighting
+
+mod adapter;
+mod config;
+mod diffusion;
+mod moe;
+mod topology;
+
+pub use adapter::AttentionAdapter;
+pub use config::AttentionCoherenceConfig;
+pub use diffusion::{DiffusionSmoothing, SmoothedEnergy};
+pub use moe::{ExpertRouting, MoEResidualProcessor};
+pub use topology::{AttentionScore, TopologyGate, TopologyGateResult};
+
+use std::collections::HashMap;
+
+/// Node identifier type
+pub type NodeId = u64;
+
+/// Edge identifier type
+pub type EdgeId = (NodeId, NodeId);
+
+/// Result type for attention operations
+pub type Result<T> = std::result::Result<T, AttentionError>;
+
+/// Errors in attention-weighted coherence computation
+#[derive(Debug, Clone, thiserror::Error)]
+pub enum AttentionError {
+    /// Invalid dimension
+    #[error("Dimension mismatch: expected {expected}, got {actual}")]
+    DimensionMismatch { expected: usize, actual: usize },
+
+    /// Empty input
+    #[error("Empty input: {0}")]
+    EmptyInput(String),
+
+    /// Invalid configuration
+    #[error("Invalid configuration: {0}")]
+    InvalidConfig(String),
+
+    /// Computation failed
+    #[error("Computation failed: {0}")]
+    ComputationFailed(String),
+
+    /// Mode not supported
+    #[error("Mode not supported in current state: {0}")]
+    ModeNotSupported(String),
+}
+
+/// Main attention-weighted coherence engine
+///
+/// Combines topology-gated attention, MoE routing, and PDE diffusion
+/// to compute attention-weighted residuals for coherence analysis.
+#[derive(Debug)]
+pub struct AttentionCoherence {
+    /// Configuration
+    config: AttentionCoherenceConfig,
+    /// Adapter to attention implementations
+    adapter: AttentionAdapter,
+    /// Topology gate
+    topo_gate: TopologyGate,
+    /// MoE residual processor
+    moe: MoEResidualProcessor,
+    /// Diffusion smoother
+    diffusion: DiffusionSmoothing,
+}
+
+impl AttentionCoherence {
+    /// Create a new attention coherence engine
+    pub fn new(config: AttentionCoherenceConfig) -> Self {
+        let adapter = AttentionAdapter::new(config.clone());
+        let topo_gate = TopologyGate::new(config.clone());
+        let moe = MoEResidualProcessor::new(config.clone());
+        let diffusion = DiffusionSmoothing::new(config.clone());
+
+        Self {
+            config,
+            adapter,
+            topo_gate,
+            moe,
+            diffusion,
+        }
+    }
+
+    /// Create with default configuration
+    pub fn default_config() -> Self {
+        Self::new(AttentionCoherenceConfig::default())
+    }
+
+    /// Compute attention scores for nodes
+    ///
+    /// Returns attention scores indicating structural importance.
+    pub fn compute_attention_scores(
+        &mut self,
+        node_states: &[&[f32]],
+    ) -> Result<HashMap<usize, f32>> {
+        if node_states.is_empty() {
+            return Err(AttentionError::EmptyInput("node_states".to_string()));
+        }
+
+        // Update topology gate coherence
+        self.topo_gate.update_coherence(node_states);
+
+        // Compute scores using adapter
+        let scores = self.adapter.compute_scores(node_states)?;
+
+        // Convert to hashmap
+        Ok(scores
+            .into_iter()
+            .enumerate()
+            .map(|(i, s)| (i, s))
+            .collect())
+    }
+
+    /// Compute attention-weighted residuals
+    ///
+    /// Weights each edge residual by the attention scores of its endpoints.
+    pub fn weighted_residuals(
+        &mut self,
+        node_states: &[&[f32]],
+        edge_residuals: &[(usize, usize, Vec<f32>)], // (source_idx, target_idx, residual)
+    ) -> Result<Vec<WeightedEdgeResidual>> {
+        if node_states.is_empty() {
+            return Err(AttentionError::EmptyInput("node_states".to_string()));
+        }
+
+        // Compute attention scores
+        let scores = self.compute_attention_scores(node_states)?;
+
+        // Weight residuals
+        let mut weighted = Vec::with_capacity(edge_residuals.len());
+
+        for (source, target, residual) in edge_residuals {
+            let source_score = scores.get(source).copied().unwrap_or(1.0);
+            let target_score = scores.get(target).copied().unwrap_or(1.0);
+
+            // Average attention weight
+            let attention_weight = (source_score + target_score) / 2.0;
+
+            // Residual norm squared
+            let residual_norm_sq: f32 = residual.iter().map(|x| x * x).sum();
+
+            // Weighted energy
+            let weighted_energy = residual_norm_sq * attention_weight;
+
+            weighted.push(WeightedEdgeResidual {
+                source_idx: *source,
+                target_idx: *target,
+                source_attention: source_score,
+                target_attention: target_score,
+                attention_weight,
+                residual_norm_sq,
+                weighted_energy,
+            });
+        }
+
+        Ok(weighted)
+    }
+
+    /// Route residual through MoE experts
+    ///
+    /// Uses specialized experts for different residual characteristics.
+    pub fn moe_process_residual(
+        &self,
+        residual: &[f32],
+        context: &[f32],
+    ) -> Result<MoEProcessedResidual> {
+        self.moe.process(residual, context)
+    }
+
+    /// Apply diffusion smoothing to energy values
+    ///
+    /// Smooths energy across the graph using PDE diffusion.
+    pub fn smooth_energy(
+        &self,
+        edge_energies: &[(usize, usize, f32)], // (source, target, energy)
+        node_states: &[&[f32]],
+        steps: usize,
+    ) -> Result<SmoothedEnergy> {
+        self.diffusion.smooth(edge_energies, node_states, steps)
+    }
+
+    /// Get current topology gate result
+    pub fn gate_result(&self) -> TopologyGateResult {
+        self.topo_gate.current_result()
+    }
+
+    /// Check if updates are allowed (not in freeze mode)
+    pub fn allows_updates(&self) -> bool {
+        self.topo_gate.allows_updates()
+    }
+
+    /// Get effective attention width based on current mode
+    pub fn attention_width(&self) -> usize {
+        self.topo_gate.attention_width()
+    }
+
+    /// Get configuration
+    pub fn config(&self) -> &AttentionCoherenceConfig {
+        &self.config
+    }
+
+    /// Compute full attention-weighted energy analysis
+    pub fn full_analysis(
+        &mut self,
+        node_states: &[&[f32]],
+        edge_residuals: &[(usize, usize, Vec<f32>)],
+    ) -> Result<AttentionEnergyAnalysis> {
+        // Get gate result
+        let gate_result = self.topo_gate.current_result();
+
+        // Compute weighted residuals
+        let weighted = self.weighted_residuals(node_states, edge_residuals)?;
+
+        // Compute energies
+        let edge_energies: Vec<(usize, usize, f32)> = weighted
+            .iter()
+            .map(|w| (w.source_idx, w.target_idx, w.weighted_energy))
+            .collect();
+
+        // Apply diffusion if enabled
+        let smoothed = if self.config.enable_diffusion {
+            Some(self.smooth_energy(&edge_energies, node_states, self.config.diffusion_steps)?)
+        } else {
+            None
+        };
+
+        // Aggregate
+        let total_energy: f32 = weighted.iter().map(|w| w.weighted_energy).sum();
+        let avg_attention: f32 =
+            weighted.iter().map(|w| w.attention_weight).sum::<f32>() / weighted.len().max(1) as f32;
+
+        Ok(AttentionEnergyAnalysis {
+            weighted_residuals: weighted,
+            smoothed_energy: smoothed,
+            total_energy,
+            avg_attention_weight: avg_attention,
+            gate_result,
+            num_edges: edge_residuals.len(),
+        })
+    }
+}
+
+/// Result of weighting an edge residual by attention
+#[derive(Debug, Clone)]
+pub struct WeightedEdgeResidual {
+    /// Source node index
+    pub source_idx: usize,
+    /// Target node index
+    pub target_idx: usize,
+    /// Attention score of source node
+    pub source_attention: f32,
+    /// Attention score of target node
+    pub target_attention: f32,
+    /// Combined attention weight
+    pub attention_weight: f32,
+    /// Squared norm of residual
+    pub residual_norm_sq: f32,
+    /// Final weighted energy
+    pub weighted_energy: f32,
+}
+
+/// Result of processing a residual through MoE
+#[derive(Debug, Clone)]
+pub struct MoEProcessedResidual {
+    /// Output from expert combination
+    pub output: Vec<f32>,
+    /// Expert indices that were used
+    pub expert_indices: Vec<usize>,
+    /// Weights for each expert
+    pub expert_weights: Vec<f32>,
+    /// Load balance loss (for training)
+    pub load_balance_loss: f32,
+}
+
+/// Complete attention energy analysis
+#[derive(Debug, Clone)]
+pub struct AttentionEnergyAnalysis {
+    /// All weighted residuals
+    pub weighted_residuals: Vec<WeightedEdgeResidual>,
+    /// Smoothed energy (if diffusion enabled)
+    pub smoothed_energy: Option<SmoothedEnergy>,
+    /// Total weighted energy
+    pub total_energy: f32,
+    /// Average attention weight
+    pub avg_attention_weight: f32,
+    /// Current gate result
+    pub gate_result: TopologyGateResult,
+    /// Number of edges analyzed
+    pub num_edges: usize,
+}
+
+impl AttentionEnergyAnalysis {
+    /// Check if coherent (energy below threshold)
+    pub fn is_coherent(&self, threshold: f32) -> bool {
+        self.total_energy < threshold
+    }
+
+    /// Get highest energy edge
+    pub fn highest_energy_edge(&self) -> Option<&WeightedEdgeResidual> {
+        self.weighted_residuals
+            .iter()
+            .max_by(|a, b| a.weighted_energy.partial_cmp(&b.weighted_energy).unwrap())
+    }
+
+    /// Get edges above threshold
+    pub fn edges_above_threshold(&self, threshold: f32) -> Vec<&WeightedEdgeResidual> {
+        self.weighted_residuals
+            .iter()
+            .filter(|r| r.weighted_energy > threshold)
+            .collect()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn make_states(n: usize, dim: usize) -> Vec<Vec<f32>> {
+        (0..n).map(|i| vec![0.1 * (i + 1) as f32; dim]).collect()
+    }
+
+    #[test]
+    fn test_basic_coherence() {
+        let config = AttentionCoherenceConfig {
+            dimension: 16,
+            ..Default::default()
+        };
+        let mut coherence = AttentionCoherence::new(config);
+
+        let states = make_states(5, 16);
+        let state_refs: Vec<&[f32]> = states.iter().map(|s| s.as_slice()).collect();
+
+        let scores = coherence.compute_attention_scores(&state_refs).unwrap();
+
+        assert_eq!(scores.len(), 5);
+        for (_, &score) in &scores {
+            assert!(score >= 0.0 && score <= 1.0);
+        }
+    }
+
+    #[test]
+    fn test_weighted_residuals() {
+        let config = AttentionCoherenceConfig {
+            dimension: 8,
+            ..Default::default()
+        };
+        let mut coherence = AttentionCoherence::new(config);
+
+        let states = make_states(4, 8);
+        let state_refs: Vec<&[f32]> = states.iter().map(|s| s.as_slice()).collect();
+
+        let residuals = vec![
+            (0, 1, vec![0.1f32; 8]),
+            (1, 2, vec![0.2f32; 8]),
+            (2, 3, vec![0.3f32; 8]),
+        ];
+
+        let weighted = coherence
+            .weighted_residuals(&state_refs, &residuals)
+            .unwrap();
+
+        assert_eq!(weighted.len(), 3);
+        for w in &weighted {
+            assert!(w.weighted_energy >= 0.0);
+            assert!(w.attention_weight > 0.0);
+        }
+    }
+
+    #[test]
+    fn test_full_analysis() {
+        let config = AttentionCoherenceConfig {
+            dimension: 8,
+            enable_diffusion: false,
+            ..Default::default()
+        };
+        let mut coherence = AttentionCoherence::new(config);
+
+        let states = make_states(3, 8);
+        let state_refs: Vec<&[f32]> = states.iter().map(|s| s.as_slice()).collect();
+
+        let residuals = vec![(0, 1, vec![0.1f32; 8]), (1, 2, vec![0.2f32; 8])];
+
+        let analysis = coherence.full_analysis(&state_refs, &residuals).unwrap();
+
+        assert_eq!(analysis.num_edges, 2);
+        assert!(analysis.total_energy >= 0.0);
+        assert!(analysis.avg_attention_weight > 0.0);
+    }
+}
--- a/vendor/ruvector/crates/prime-radiant/src/attention/moe.rs
+++ b/vendor/ruvector/crates/prime-radiant/src/attention/moe.rs
@@ -0,0 +1,360 @@
+//! Mixture of Experts Residual Processing
+//!
+//! Specialized expert routing for different residual characteristics.
+
+use super::{AttentionCoherenceConfig, AttentionError, MoEProcessedResidual, Result};
+
+/// Expert routing decision
+#[derive(Debug, Clone)]
+pub struct ExpertRouting {
+    /// Selected expert indices
+    pub expert_indices: Vec<usize>,
+    /// Weights for each selected expert
+    pub weights: Vec<f32>,
+    /// Router logits (before top-k selection)
+    pub router_logits: Vec<f32>,
+}
+
+impl ExpertRouting {
+    /// Check if a specific expert was selected
+    pub fn contains_expert(&self, idx: usize) -> bool {
+        self.expert_indices.contains(&idx)
+    }
+
+    /// Get weight for a specific expert (0 if not selected)
+    pub fn weight_for(&self, idx: usize) -> f32 {
+        self.expert_indices
+            .iter()
+            .position(|&i| i == idx)
+            .map(|pos| self.weights[pos])
+            .unwrap_or(0.0)
+    }
+}
+
+/// Mixture of Experts residual processor
+///
+/// Routes residuals to specialized experts based on their characteristics.
+/// Each expert specializes in different types of residuals.
+#[derive(Debug)]
+pub struct MoEResidualProcessor {
+    /// Configuration
+    config: AttentionCoherenceConfig,
+    /// Expert parameters (weights for each expert)
+    experts: Vec<ExpertParams>,
+    /// Router parameters
+    router: RouterParams,
+}
+
+/// Parameters for a single expert
+#[derive(Debug, Clone)]
+struct ExpertParams {
+    /// Linear transformation weights (dim x dim)
+    weights: Vec<Vec<f32>>,
+    /// Bias vector
+    bias: Vec<f32>,
+    /// Expert specialization (for interpretability)
+    specialization: ExpertSpecialization,
+}
+
+/// Type of expert specialization
+#[derive(Debug, Clone, Copy)]
+enum ExpertSpecialization {
+    /// High-magnitude residuals
+    HighMagnitude,
+    /// Low-magnitude residuals
+    LowMagnitude,
+    /// Sparse residuals
+    Sparse,
+    /// Dense residuals
+    Dense,
+}
+
+/// Router parameters
+#[derive(Debug, Clone)]
+struct RouterParams {
+    /// Router weights (num_experts x dim)
+    weights: Vec<Vec<f32>>,
+    /// Noise scale for exploration
+    jitter_noise: f32,
+}
+
+impl MoEResidualProcessor {
+    /// Create a new MoE processor
+    pub fn new(config: AttentionCoherenceConfig) -> Self {
+        let num_experts = config.num_experts;
+        let dim = config.dimension;
+
+        // Initialize experts with different specializations
+        let specializations = [
+            ExpertSpecialization::HighMagnitude,
+            ExpertSpecialization::LowMagnitude,
+            ExpertSpecialization::Sparse,
+            ExpertSpecialization::Dense,
+        ];
+
+        let experts: Vec<ExpertParams> = (0..num_experts)
+            .map(|i| {
+                // Initialize with identity-like transformation
+                let weights: Vec<Vec<f32>> = (0..dim)
+                    .map(|j| {
+                        let mut row = vec![0.0f32; dim];
+                        row[j] = 1.0 + 0.1 * (i as f32 - num_experts as f32 / 2.0);
+                        row
+                    })
+                    .collect();
+
+                ExpertParams {
+                    weights,
+                    bias: vec![0.0; dim],
+                    specialization: specializations[i % specializations.len()],
+                }
+            })
+            .collect();
+
+        // Initialize router
+        let router_weights: Vec<Vec<f32>> = (0..num_experts)
+            .map(|i| {
+                // Different experts respond to different features
+                let mut row = vec![0.1f32; dim];
+                // Make each expert sensitive to different dimensions
+                let start = (i * dim / num_experts).min(dim - 1);
+                let end = ((i + 1) * dim / num_experts).min(dim);
+                for j in start..end {
+                    row[j] = 1.0;
+                }
+                row
+            })
+            .collect();
+
+        let router = RouterParams {
+            weights: router_weights,
+            jitter_noise: 0.0,
+        };
+
+        Self {
+            config,
+            experts,
+            router,
+        }
+    }
+
+    /// Process a residual through MoE
+    pub fn process(&self, residual: &[f32], context: &[f32]) -> Result<MoEProcessedResidual> {
+        // Validate dimensions
+        if residual.len() != self.config.dimension {
+            return Err(AttentionError::DimensionMismatch {
+                expected: self.config.dimension,
+                actual: residual.len(),
+            });
+        }
+
+        // Route to experts
+        let routing = self.route(residual, context);
+
+        // Process through selected experts
+        let mut output = vec![0.0f32; self.config.dimension];
+
+        for (&expert_idx, &weight) in routing.expert_indices.iter().zip(routing.weights.iter()) {
+            let expert_output = self.apply_expert(expert_idx, residual);
+            for (o, e) in output.iter_mut().zip(expert_output.iter()) {
+                *o += weight * e;
+            }
+        }
+
+        // Compute load balance loss
+        let load_balance_loss = self.compute_load_balance_loss(&routing);
+
+        Ok(MoEProcessedResidual {
+            output,
+            expert_indices: routing.expert_indices,
+            expert_weights: routing.weights,
+            load_balance_loss,
+        })
+    }
+
+    /// Route input to experts
+    pub fn route(&self, input: &[f32], _context: &[f32]) -> ExpertRouting {
+        // Compute router logits
+        let logits: Vec<f32> = self
+            .router
+            .weights
+            .iter()
+            .map(|w| self.dot_product(input, w))
+            .collect();
+
+        // Top-k selection
+        let k = self.config.moe_top_k.min(self.config.num_experts);
+
+        let mut indexed_logits: Vec<(usize, f32)> =
+            logits.iter().enumerate().map(|(i, &l)| (i, l)).collect();
+
+        indexed_logits.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
+
+        let top_k: Vec<(usize, f32)> = indexed_logits.into_iter().take(k).collect();
+
+        // Softmax over selected
+        let max_logit = top_k
+            .iter()
+            .map(|(_, l)| *l)
+            .fold(f32::NEG_INFINITY, f32::max);
+        let exp_sum: f32 = top_k.iter().map(|(_, l)| (l - max_logit).exp()).sum();
+
+        let expert_indices: Vec<usize> = top_k.iter().map(|(i, _)| *i).collect();
+        let weights: Vec<f32> = top_k
+            .iter()
+            .map(|(_, l)| (l - max_logit).exp() / exp_sum)
+            .collect();
+
+        ExpertRouting {
+            expert_indices,
+            weights,
+            router_logits: logits,
+        }
+    }
+
+    /// Apply a single expert
+    fn apply_expert(&self, expert_idx: usize, input: &[f32]) -> Vec<f32> {
+        let expert = &self.experts[expert_idx];
+        let dim = input.len();
+
+        let mut output = expert.bias.clone();
+
+        // Matrix-vector multiply
+        for (i, w_row) in expert.weights.iter().enumerate() {
+            if i < dim {
+                for (j, &x) in input.iter().enumerate() {
+                    if j < w_row.len() {
+                        output[i] += w_row[j] * x;
+                    }
+                }
+            }
+        }
+
+        output
+    }
+
+    /// Compute load balance loss
+    fn compute_load_balance_loss(&self, routing: &ExpertRouting) -> f32 {
+        // Count how many times each expert is used
+        let mut usage = vec![0.0f32; self.config.num_experts];
+        for (&idx, &weight) in routing.expert_indices.iter().zip(routing.weights.iter()) {
+            usage[idx] += weight;
+        }
+
+        // Ideal uniform distribution
+        let ideal = 1.0 / self.config.num_experts as f32;
+
+        // L2 deviation from uniform
+        usage.iter().map(|&u| (u - ideal).powi(2)).sum::<f32>()
+    }
+
+    fn dot_product(&self, a: &[f32], b: &[f32]) -> f32 {
+        a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
+    }
+
+    /// Get expert statistics
+    pub fn expert_usage(&self, routings: &[ExpertRouting]) -> Vec<f32> {
+        let mut usage = vec![0.0f32; self.config.num_experts];
+
+        for routing in routings {
+            for (&idx, &weight) in routing.expert_indices.iter().zip(routing.weights.iter()) {
+                usage[idx] += weight;
+            }
+        }
+
+        // Normalize
+        let total: f32 = usage.iter().sum();
+        if total > 0.0 {
+            for u in usage.iter_mut() {
+                *u /= total;
+            }
+        }
+
+        usage
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_moe_creation() {
+        let config = AttentionCoherenceConfig {
+            dimension: 16,
+            num_experts: 4,
+            moe_top_k: 2,
+            ..Default::default()
+        };
+        let moe = MoEResidualProcessor::new(config);
+
+        assert_eq!(moe.experts.len(), 4);
+    }
+
+    #[test]
+    fn test_routing() {
+        let config = AttentionCoherenceConfig {
+            dimension: 8,
+            num_experts: 4,
+            moe_top_k: 2,
+            ..Default::default()
+        };
+        let moe = MoEResidualProcessor::new(config);
+
+        let input = vec![0.5f32; 8];
+        let context = vec![0.1f32; 8];
+
+        let routing = moe.route(&input, &context);
+
+        assert_eq!(routing.expert_indices.len(), 2);
+        assert_eq!(routing.weights.len(), 2);
+
+        // Weights should sum to approximately 1
+        let sum: f32 = routing.weights.iter().sum();
+        assert!((sum - 1.0).abs() < 0.01);
+    }
+
+    #[test]
+    fn test_process() {
+        let config = AttentionCoherenceConfig {
+            dimension: 8,
+            num_experts: 4,
+            moe_top_k: 2,
+            ..Default::default()
+        };
+        let moe = MoEResidualProcessor::new(config);
+
+        let residual = vec![0.1f32; 8];
+        let context = vec![0.1f32; 8];
+
+        let result = moe.process(&residual, &context).unwrap();
+
+        assert_eq!(result.output.len(), 8);
+        assert_eq!(result.expert_indices.len(), 2);
+        assert!(result.load_balance_loss >= 0.0);
+    }
+
+    #[test]
+    fn test_expert_usage() {
+        let config = AttentionCoherenceConfig {
+            dimension: 8,
+            num_experts: 4,
+            moe_top_k: 2,
+            ..Default::default()
+        };
+        let moe = MoEResidualProcessor::new(config);
+
+        let inputs: Vec<Vec<f32>> = (0..10).map(|i| vec![0.1 * (i + 1) as f32; 8]).collect();
+        let context = vec![0.1f32; 8];
+
+        let routings: Vec<ExpertRouting> =
+            inputs.iter().map(|inp| moe.route(inp, &context)).collect();
+
+        let usage = moe.expert_usage(&routings);
+
+        assert_eq!(usage.len(), 4);
+        // Should sum to approximately 1
+        let sum: f32 = usage.iter().sum();
+        assert!((sum - 1.0).abs() < 0.01);
+    }
+}
--- a/vendor/ruvector/crates/prime-radiant/src/attention/topology.rs
+++ b/vendor/ruvector/crates/prime-radiant/src/attention/topology.rs
@@ -0,0 +1,381 @@
+//! Topology-Gated Attention
+//!
+//! Uses topological coherence as a permission signal for attention behavior.
+
+use super::config::AttentionMode;
+use super::{AttentionCoherenceConfig, AttentionError, Result};
+
+/// Score from attention computation
+#[derive(Debug, Clone)]
+pub struct AttentionScore {
+    /// Node index
+    pub node_idx: usize,
+    /// Attention score value
+    pub score: f32,
+    /// Contribution to coherence
+    pub coherence_contribution: f32,
+}
+
+/// Result of topology gate evaluation
+#[derive(Debug, Clone)]
+pub struct TopologyGateResult {
+    /// Current coherence score
+    pub coherence: f32,
+    /// Current mode
+    pub mode: AttentionMode,
+    /// Effective attention width
+    pub width: usize,
+    /// Whether updates are allowed
+    pub allows_updates: bool,
+    /// Ticks since last coherence update
+    pub ticks_since_update: usize,
+}
+
+impl TopologyGateResult {
+    /// Create a default result (stable mode)
+    pub fn stable(config: &AttentionCoherenceConfig) -> Self {
+        Self {
+            coherence: 1.0,
+            mode: AttentionMode::Stable,
+            width: config.base_width,
+            allows_updates: true,
+            ticks_since_update: 0,
+        }
+    }
+}
+
+/// Topology-gated attention controller
+///
+/// Uses structural coherence to control attention behavior:
+/// - Stable mode: full attention, normal updates
+/// - Cautious mode: reduced width, increased sparsity
+/// - Freeze mode: retrieval only, no updates
+#[derive(Debug)]
+pub struct TopologyGate {
+    /// Configuration
+    config: AttentionCoherenceConfig,
+    /// Current coherence score
+    coherence: f32,
+    /// Current mode
+    mode: AttentionMode,
+    /// Ticks since last coherence update
+    ticks_since_update: usize,
+    /// Cached coherence metrics
+    cached_metrics: Option<CoherenceMetrics>,
+}
+
+impl TopologyGate {
+    /// Create a new topology gate
+    pub fn new(config: AttentionCoherenceConfig) -> Self {
+        Self {
+            coherence: 1.0, // Start optimistic
+            mode: AttentionMode::Stable,
+            ticks_since_update: 0,
+            cached_metrics: None,
+            config,
+        }
+    }
+
+    /// Update coherence from key states
+    pub fn update_coherence(&mut self, keys: &[&[f32]]) {
+        if keys.is_empty() {
+            return;
+        }
+
+        let metrics = self.compute_coherence_metrics(keys);
+        self.coherence = metrics.coherence_score;
+        self.mode = AttentionMode::from_coherence(self.coherence, &self.config);
+        self.ticks_since_update = 0;
+        self.cached_metrics = Some(metrics);
+    }
+
+    /// Tick the coherence counter
+    pub fn tick(&mut self) {
+        self.ticks_since_update += 1;
+    }
+
+    /// Check if coherence update is needed
+    pub fn needs_update(&self) -> bool {
+        self.ticks_since_update >= self.config.coherence_update_period
+            || self.cached_metrics.is_none()
+    }
+
+    /// Get current mode
+    pub fn current_mode(&self) -> AttentionMode {
+        self.mode
+    }
+
+    /// Get current coherence score
+    pub fn current_coherence(&self) -> f32 {
+        self.coherence
+    }
+
+    /// Check if updates are allowed
+    pub fn allows_updates(&self) -> bool {
+        self.mode.allows_updates()
+    }
+
+    /// Get effective attention width
+    pub fn attention_width(&self) -> usize {
+        self.config.width_for_coherence(self.coherence)
+    }
+
+    /// Get current gate result
+    pub fn current_result(&self) -> TopologyGateResult {
+        TopologyGateResult {
+            coherence: self.coherence,
+            mode: self.mode,
+            width: self.attention_width(),
+            allows_updates: self.allows_updates(),
+            ticks_since_update: self.ticks_since_update,
+        }
+    }
+
+    /// Compute coherence metrics from keys
+    fn compute_coherence_metrics(&self, keys: &[&[f32]]) -> CoherenceMetrics {
+        if keys.is_empty() {
+            return CoherenceMetrics::empty();
+        }
+
+        let n = keys.len();
+        let k = self.config.k_neighbors.min(n - 1);
+
+        if k == 0 {
+            return CoherenceMetrics::with_score(1.0);
+        }
+
+        // Compute pairwise similarities
+        let mut similarities: Vec<Vec<f32>> = Vec::with_capacity(n);
+        for i in 0..n {
+            let mut row = Vec::with_capacity(n);
+            for j in 0..n {
+                if i == j {
+                    row.push(1.0);
+                } else {
+                    row.push(self.cosine_similarity(keys[i], keys[j]));
+                }
+            }
+            similarities.push(row);
+        }
+
+        // Compute boundary mass (proportion of edges to k nearest neighbors)
+        let mut total_boundary_mass = 0.0f32;
+        let mut total_edges = 0;
+
+        for i in 0..n {
+            // Get k nearest neighbors
+            let mut neighbor_sims: Vec<(usize, f32)> = similarities[i]
+                .iter()
+                .enumerate()
+                .filter(|(j, _)| *j != i)
+                .map(|(j, &s)| (j, s))
+                .collect();
+
+            neighbor_sims
+                .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
+            let neighbors: Vec<usize> = neighbor_sims.iter().take(k).map(|(j, _)| *j).collect();
+
+            // Boundary mass: edges to non-neighbors
+            for j in 0..n {
+                if j != i && !neighbors.contains(&j) {
+                    total_boundary_mass += similarities[i][j].max(0.0);
+                    total_edges += 1;
+                }
+            }
+        }
+
+        // Compute similarity variance
+        let all_sims: Vec<f32> = similarities
+            .iter()
+            .enumerate()
+            .flat_map(|(i, row)| {
+                row.iter()
+                    .enumerate()
+                    .filter(move |(j, _)| *j > i)
+                    .map(|(_, &s)| s)
+            })
+            .collect();
+
+        let mean_sim: f32 = all_sims.iter().sum::<f32>() / all_sims.len().max(1) as f32;
+        let variance: f32 = all_sims.iter().map(|s| (s - mean_sim).powi(2)).sum::<f32>()
+            / all_sims.len().max(1) as f32;
+
+        // Coherence score: high similarity, low variance, low boundary mass
+        let boundary_ratio = if total_edges > 0 {
+            total_boundary_mass / total_edges as f32
+        } else {
+            0.0
+        };
+
+        // Combine metrics
+        // High mean similarity and low variance = high coherence
+        // High boundary mass = low coherence
+        let coherence_score =
+            (mean_sim * 0.5 + (1.0 - variance.sqrt()) * 0.3 + (1.0 - boundary_ratio) * 0.2)
+                .clamp(0.0, 1.0);
+
+        CoherenceMetrics {
+            coherence_score,
+            mean_similarity: mean_sim,
+            similarity_variance: variance,
+            boundary_mass: total_boundary_mass,
+            num_nodes: n,
+        }
+    }
+
+    fn cosine_similarity(&self, a: &[f32], b: &[f32]) -> f32 {
+        let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
+        let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
+        let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
+
+        if norm_a < 1e-10 || norm_b < 1e-10 {
+            return 0.0;
+        }
+
+        (dot / (norm_a * norm_b)).clamp(-1.0, 1.0)
+    }
+}
+
+/// Coherence metrics computed from key states
+#[derive(Debug, Clone)]
+struct CoherenceMetrics {
+    /// Overall coherence score
+    coherence_score: f32,
+    /// Mean pairwise similarity
+    mean_similarity: f32,
+    /// Variance of pairwise similarities
+    similarity_variance: f32,
+    /// Total boundary mass (edges to non-neighbors)
+    boundary_mass: f32,
+    /// Number of nodes
+    num_nodes: usize,
+}
+
+impl CoherenceMetrics {
+    fn empty() -> Self {
+        Self {
+            coherence_score: 1.0,
+            mean_similarity: 1.0,
+            similarity_variance: 0.0,
+            boundary_mass: 0.0,
+            num_nodes: 0,
+        }
+    }
+
+    fn with_score(score: f32) -> Self {
+        Self {
+            coherence_score: score,
+            mean_similarity: score,
+            similarity_variance: 0.0,
+            boundary_mass: 0.0,
+            num_nodes: 1,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_topology_gate_creation() {
+        let config = AttentionCoherenceConfig::default();
+        let gate = TopologyGate::new(config);
+
+        assert_eq!(gate.current_mode(), AttentionMode::Stable);
+        assert!(gate.allows_updates());
+    }
+
+    #[test]
+    fn test_update_coherence_similar_keys() {
+        let config = AttentionCoherenceConfig::default();
+        let mut gate = TopologyGate::new(config);
+
+        // All similar keys = high coherence
+        let keys: Vec<Vec<f32>> = (0..10).map(|_| vec![1.0, 0.0, 0.0, 0.0]).collect();
+        let key_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
+
+        gate.update_coherence(&key_refs);
+
+        assert!(gate.current_coherence() > 0.5);
+        assert_eq!(gate.current_mode(), AttentionMode::Stable);
+    }
+
+    #[test]
+    fn test_update_coherence_diverse_keys() {
+        let config = AttentionCoherenceConfig {
+            stable_threshold: 0.9,
+            freeze_threshold: 0.5,
+            ..Default::default()
+        };
+        let mut gate = TopologyGate::new(config);
+
+        // Diverse keys = lower coherence
+        let keys: Vec<Vec<f32>> = (0..10)
+            .map(|i| {
+                let mut v = vec![0.0f32; 16];
+                v[i % 16] = 1.0;
+                v
+            })
+            .collect();
+        let key_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
+
+        gate.update_coherence(&key_refs);
+
+        // Should trigger cautious or freeze mode due to diversity
+        assert!(
+            gate.current_mode() == AttentionMode::Cautious
+                || gate.current_mode() == AttentionMode::Freeze
+        );
+    }
+
+    #[test]
+    fn test_tick_and_update_period() {
+        let config = AttentionCoherenceConfig {
+            coherence_update_period: 4,
+            ..Default::default()
+        };
+        let mut gate = TopologyGate::new(config);
+
+        // Initially needs update (no cache)
+        assert!(gate.needs_update());
+
+        let keys: Vec<Vec<f32>> = vec![vec![1.0; 8]; 5];
+        let key_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
+
+        gate.update_coherence(&key_refs);
+        assert!(!gate.needs_update());
+
+        // Tick 4 times
+        for _ in 0..4 {
+            gate.tick();
+        }
+        assert!(gate.needs_update());
+    }
+
+    #[test]
+    fn test_attention_width() {
+        let config = AttentionCoherenceConfig {
+            base_width: 64,
+            stable_threshold: 0.7,
+            freeze_threshold: 0.3,
+            ..Default::default()
+        };
+        let mut gate = TopologyGate::new(config);
+
+        // High coherence = full width
+        gate.coherence = 0.8;
+        gate.mode = AttentionMode::from_coherence(0.8, &gate.config);
+        assert_eq!(gate.attention_width(), 64);
+
+        // Medium coherence = reduced width
+        gate.coherence = 0.5;
+        gate.mode = AttentionMode::from_coherence(0.5, &gate.config);
+        assert_eq!(gate.attention_width(), 32);
+
+        // Low coherence = minimal width
+        gate.coherence = 0.2;
+        gate.mode = AttentionMode::from_coherence(0.2, &gate.config);
+        assert_eq!(gate.attention_width(), 1);
+    }
+}