//! Sheaf Neural Network Layers //! //! Neural network layers that respect sheaf structure, enabling //! coherence-aware deep learning. use super::laplacian::{LaplacianConfig, SheafLaplacian}; use super::sheaf::{Sheaf, SheafSection}; use crate::substrate::NodeId; use crate::substrate::SheafGraph; use ndarray::{Array1, Array2}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; /// Activation functions for neural layers #[derive(Debug, Clone, Copy, Serialize, Deserialize)] pub enum Activation { /// No activation (identity) Identity, /// ReLU: max(0, x) ReLU, /// Leaky ReLU: max(alpha * x, x) LeakyReLU(f64), /// Sigmoid: 1 / (1 + exp(-x)) Sigmoid, /// Tanh: tanh(x) Tanh, /// GELU: x * Phi(x) GELU, /// Softmax (applied per-node) Softmax, } impl Activation { /// Apply activation function pub fn apply(&self, x: f64) -> f64 { match self { Activation::Identity => x, Activation::ReLU => x.max(0.0), Activation::LeakyReLU(alpha) => { if x > 0.0 { x } else { alpha * x } } Activation::Sigmoid => 1.0 / (1.0 + (-x).exp()), Activation::Tanh => x.tanh(), Activation::GELU => { // Approximation: x * sigmoid(1.702 * x) let sigmoid = 1.0 / (1.0 + (-1.702 * x).exp()); x * sigmoid } Activation::Softmax => x, // Softmax handled separately } } /// Apply activation to array pub fn apply_array(&self, arr: &Array1) -> Array1 { match self { Activation::Softmax => { let max_val = arr.iter().cloned().fold(f64::NEG_INFINITY, f64::max); let exp_vals: Array1 = arr.mapv(|x| (x - max_val).exp()); let sum: f64 = exp_vals.sum(); exp_vals / sum } _ => arr.mapv(|x| self.apply(x)), } } /// Compute derivative pub fn derivative(&self, x: f64) -> f64 { match self { Activation::Identity => 1.0, Activation::ReLU => { if x > 0.0 { 1.0 } else { 0.0 } } Activation::LeakyReLU(alpha) => { if x > 0.0 { 1.0 } else { *alpha } } Activation::Sigmoid => { let s = self.apply(x); s * (1.0 - s) } Activation::Tanh => { let t = x.tanh(); 1.0 - t * t } Activation::GELU => { // Derivative of GELU approximation let sigmoid = 1.0 / (1.0 + (-1.702 * x).exp()); sigmoid + x * 1.702 * sigmoid * (1.0 - sigmoid) } Activation::Softmax => 1.0, // Jacobian needed for full derivative } } } /// Configuration for sheaf neural layer #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SheafNeuralConfig { /// Input dimension per node pub input_dim: usize, /// Output dimension per node pub output_dim: usize, /// Number of diffusion steps pub diffusion_steps: usize, /// Diffusion coefficient pub diffusion_coeff: f64, /// Activation function pub activation: Activation, /// Dropout rate pub dropout: f64, /// Whether to use residual connection pub use_residual: bool, /// Whether to normalize output pub layer_norm: bool, } impl Default for SheafNeuralConfig { fn default() -> Self { Self { input_dim: 64, output_dim: 64, diffusion_steps: 3, diffusion_coeff: 0.5, activation: Activation::ReLU, dropout: 0.0, layer_norm: true, use_residual: true, } } } /// A sheaf-aware neural network layer /// /// Combines linear transformation with sheaf diffusion to produce /// outputs that respect graph structure. #[derive(Clone)] pub struct SheafNeuralLayer { /// Configuration config: SheafNeuralConfig, /// Weight matrix (output_dim x input_dim) weights: Array2, /// Bias vector (output_dim) bias: Array1, /// Diffusion weight (how much to mix diffusion vs direct) diffusion_weight: f64, } impl SheafNeuralLayer { /// Create a new layer with Xavier initialization pub fn new(config: SheafNeuralConfig) -> Self { let scale = (2.0 / (config.input_dim + config.output_dim) as f64).sqrt(); // Initialize weights with Xavier let weights = Array2::from_shape_fn((config.output_dim, config.input_dim), |_| { rand::random::() * scale - scale / 2.0 }); let bias = Array1::zeros(config.output_dim); Self { config, weights, bias, diffusion_weight: 0.5, } } /// Create with specific weights pub fn with_weights( config: SheafNeuralConfig, weights: Array2, bias: Array1, ) -> Self { assert_eq!(weights.nrows(), config.output_dim); assert_eq!(weights.ncols(), config.input_dim); assert_eq!(bias.len(), config.output_dim); Self { config, weights, bias, diffusion_weight: 0.5, } } /// Set diffusion weight pub fn set_diffusion_weight(&mut self, weight: f64) { self.diffusion_weight = weight.clamp(0.0, 1.0); } /// Forward pass on a section /// /// output = activation(W * diffuse(x) + b) pub fn forward(&self, graph: &SheafGraph, input: &SheafSection) -> SheafSection { let mut output = SheafSection::empty(); // Step 1: Apply linear transformation at each node for (node_id, input_vec) in &input.sections { let transformed = self.weights.dot(input_vec) + &self.bias; output.set(*node_id, transformed); } // Step 2: Apply sheaf diffusion if self.config.diffusion_steps > 0 && self.diffusion_weight > 0.0 { let laplacian_config = LaplacianConfig::default(); let laplacian = SheafLaplacian::from_graph(graph, laplacian_config); for _ in 0..self.config.diffusion_steps { let laplacian_out = laplacian.apply(graph, &output); // Update: x = x - alpha * L * x for (node_id, out_vec) in output.sections.iter_mut() { if let Some(lap_vec) = laplacian_out.sections.get(node_id) { let scale = self.diffusion_weight * self.config.diffusion_coeff; *out_vec = &*out_vec - &(lap_vec * scale); } } } } // Step 3: Apply activation for out_vec in output.sections.values_mut() { *out_vec = self.config.activation.apply_array(out_vec); } // Step 4: Residual connection (if dimensions match and enabled) if self.config.use_residual && self.config.input_dim == self.config.output_dim { for (node_id, out_vec) in output.sections.iter_mut() { if let Some(in_vec) = input.sections.get(node_id) { *out_vec = &*out_vec + in_vec; } } } // Step 5: Layer normalization if self.config.layer_norm { for out_vec in output.sections.values_mut() { let mean: f64 = out_vec.mean().unwrap_or(0.0); let std: f64 = out_vec.std(0.0); if std > 1e-10 { *out_vec = out_vec.mapv(|x| (x - mean) / std); } } } output } /// Get weights pub fn weights(&self) -> &Array2 { &self.weights } /// Get bias pub fn bias(&self) -> &Array1 { &self.bias } /// Set weights (for training) pub fn set_weights(&mut self, weights: Array2) { assert_eq!(weights.shape(), self.weights.shape()); self.weights = weights; } /// Set bias (for training) pub fn set_bias(&mut self, bias: Array1) { assert_eq!(bias.len(), self.bias.len()); self.bias = bias; } } impl std::fmt::Debug for SheafNeuralLayer { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("SheafNeuralLayer") .field("input_dim", &self.config.input_dim) .field("output_dim", &self.config.output_dim) .field("diffusion_steps", &self.config.diffusion_steps) .field("activation", &self.config.activation) .finish() } } /// Sheaf convolution layer /// /// Generalizes graph convolution using sheaf structure #[derive(Clone)] pub struct SheafConvolution { /// Input dimension input_dim: usize, /// Output dimension output_dim: usize, /// Weight for self-features self_weight: Array2, /// Weight for neighbor features neighbor_weight: Array2, /// Bias bias: Array1, /// Activation activation: Activation, } impl SheafConvolution { /// Create a new sheaf convolution layer pub fn new(input_dim: usize, output_dim: usize) -> Self { let scale = (2.0 / (input_dim + output_dim) as f64).sqrt(); let self_weight = Array2::from_shape_fn((output_dim, input_dim), |_| { rand::random::() * scale - scale / 2.0 }); let neighbor_weight = Array2::from_shape_fn((output_dim, input_dim), |_| { rand::random::() * scale - scale / 2.0 }); let bias = Array1::zeros(output_dim); Self { input_dim, output_dim, self_weight, neighbor_weight, bias, activation: Activation::ReLU, } } /// Set activation function pub fn with_activation(mut self, activation: Activation) -> Self { self.activation = activation; self } /// Forward pass /// /// h_v = activation(W_self * x_v + W_neigh * sum_u rho_{u->v}(x_u) / deg(v) + b) pub fn forward(&self, graph: &SheafGraph, input: &SheafSection) -> SheafSection { let mut output = SheafSection::empty(); for node_id in graph.node_ids() { if let Some(self_vec) = input.get(node_id) { // Self contribution let mut h = self.self_weight.dot(self_vec); // Neighbor contribution (average of restricted neighbors) let neighbors: Vec<_> = graph.edges_incident_to(node_id); if !neighbors.is_empty() { let mut neighbor_sum = Array1::zeros(self.input_dim); let mut count = 0; for edge_id in neighbors { if let Some(edge) = graph.get_edge(edge_id) { let neighbor_id = if edge.source == node_id { edge.target } else { edge.source }; if let Some(neighbor_vec) = input.get(neighbor_id) { // For identity restriction, just add neighbor // For general restriction, would apply rho here neighbor_sum = neighbor_sum + neighbor_vec; count += 1; } } } if count > 0 { neighbor_sum /= count as f64; h = h + self.neighbor_weight.dot(&neighbor_sum); } } // Add bias and apply activation h = h + &self.bias; h = self.activation.apply_array(&h); output.set(node_id, h); } } output } } impl std::fmt::Debug for SheafConvolution { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("SheafConvolution") .field("input_dim", &self.input_dim) .field("output_dim", &self.output_dim) .field("activation", &self.activation) .finish() } } /// Cohomology-aware pooling layer /// /// Pools node features while preserving cohomological structure #[derive(Clone)] pub struct CohomologyPooling { /// Pooling method method: PoolingMethod, /// Whether to weight by node importance (from Laplacian spectrum) spectral_weighting: bool, } /// Pooling methods #[derive(Debug, Clone, Copy, Serialize, Deserialize)] pub enum PoolingMethod { /// Mean of all nodes Mean, /// Max over all nodes Max, /// Sum over all nodes Sum, /// Attention-weighted sum Attention, /// Top-k nodes by energy TopK(usize), } impl CohomologyPooling { /// Create a new pooling layer pub fn new(method: PoolingMethod) -> Self { Self { method, spectral_weighting: false, } } /// Enable spectral weighting pub fn with_spectral_weighting(mut self) -> Self { self.spectral_weighting = true; self } /// Pool section to single vector pub fn pool(&self, graph: &SheafGraph, section: &SheafSection) -> Array1 { if section.sections.is_empty() { return Array1::zeros(0); } let dim = section .sections .values() .next() .map(|v| v.len()) .unwrap_or(0); match self.method { PoolingMethod::Mean => { let mut sum = Array1::zeros(dim); let mut count = 0; for vec in section.sections.values() { sum = sum + vec; count += 1; } if count > 0 { sum / count as f64 } else { sum } } PoolingMethod::Max => { let mut max_vec = Array1::from_elem(dim, f64::NEG_INFINITY); for vec in section.sections.values() { for (i, &val) in vec.iter().enumerate() { max_vec[i] = max_vec[i].max(val); } } max_vec } PoolingMethod::Sum => { let mut sum = Array1::zeros(dim); for vec in section.sections.values() { sum = sum + vec; } sum } PoolingMethod::Attention => { // Simple attention: weight by L2 norm let mut sum = Array1::zeros(dim); let mut total_weight = 0.0; for vec in section.sections.values() { let weight = vec.iter().map(|x| x * x).sum::().sqrt(); sum = sum + vec * weight; total_weight += weight; } if total_weight > 0.0 { sum / total_weight } else { sum } } PoolingMethod::TopK(k) => { // Select top k nodes by L2 norm let mut node_norms: Vec<_> = section .sections .iter() .map(|(id, vec)| (*id, vec.iter().map(|x| x * x).sum::())) .collect(); node_norms .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); let mut sum = Array1::zeros(dim); for (node_id, _) in node_norms.into_iter().take(k) { if let Some(vec) = section.get(node_id) { sum = sum + vec; } } sum / k as f64 } } } } impl std::fmt::Debug for CohomologyPooling { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("CohomologyPooling") .field("method", &self.method) .field("spectral_weighting", &self.spectral_weighting) .finish() } } #[cfg(test)] mod tests { use super::*; use crate::substrate::edge::SheafEdgeBuilder; use crate::substrate::node::SheafNodeBuilder; use uuid::Uuid; fn make_node_id() -> NodeId { Uuid::new_v4() } #[test] fn test_activation_functions() { assert!((Activation::ReLU.apply(-1.0) - 0.0).abs() < 1e-10); assert!((Activation::ReLU.apply(1.0) - 1.0).abs() < 1e-10); assert!((Activation::Sigmoid.apply(0.0) - 0.5).abs() < 1e-10); let arr = Array1::from_vec(vec![1.0, 2.0, 3.0]); let softmax = Activation::Softmax.apply_array(&arr); assert!((softmax.sum() - 1.0).abs() < 1e-10); } #[test] fn test_sheaf_neural_layer() { let graph = SheafGraph::new(); let node1 = SheafNodeBuilder::new() .state_from_slice(&[1.0, 0.0, 0.0, 0.0]) .build(); let node2 = SheafNodeBuilder::new() .state_from_slice(&[0.0, 1.0, 0.0, 0.0]) .build(); let id1 = graph.add_node(node1); let id2 = graph.add_node(node2); let edge = SheafEdgeBuilder::new(id1, id2) .identity_restrictions(4) .weight(1.0) .build(); graph.add_edge(edge).unwrap(); let config = SheafNeuralConfig { input_dim: 4, output_dim: 2, diffusion_steps: 1, ..Default::default() }; let layer = SheafNeuralLayer::new(config); // Create input section let mut input = SheafSection::empty(); input.set(id1, Array1::from_vec(vec![1.0, 0.0, 0.0, 0.0])); input.set(id2, Array1::from_vec(vec![0.0, 1.0, 0.0, 0.0])); let output = layer.forward(&graph, &input); assert!(output.contains(id1)); assert!(output.contains(id2)); assert_eq!(output.get(id1).unwrap().len(), 2); } #[test] fn test_sheaf_convolution() { let graph = SheafGraph::new(); let node1 = SheafNodeBuilder::new() .state_from_slice(&[1.0, 0.0]) .build(); let node2 = SheafNodeBuilder::new() .state_from_slice(&[0.0, 1.0]) .build(); let id1 = graph.add_node(node1); let id2 = graph.add_node(node2); let edge = SheafEdgeBuilder::new(id1, id2) .identity_restrictions(2) .build(); graph.add_edge(edge).unwrap(); let conv = SheafConvolution::new(2, 3); let mut input = SheafSection::empty(); input.set(id1, Array1::from_vec(vec![1.0, 0.0])); input.set(id2, Array1::from_vec(vec![0.0, 1.0])); let output = conv.forward(&graph, &input); assert!(output.contains(id1)); assert_eq!(output.get(id1).unwrap().len(), 3); } #[test] fn test_pooling() { let graph = SheafGraph::new(); let node1 = SheafNodeBuilder::new().state_from_slice(&[1.0]).build(); let node2 = SheafNodeBuilder::new().state_from_slice(&[3.0]).build(); let id1 = graph.add_node(node1); let id2 = graph.add_node(node2); let mut section = SheafSection::empty(); section.set(id1, Array1::from_vec(vec![1.0])); section.set(id2, Array1::from_vec(vec![3.0])); let mean_pool = CohomologyPooling::new(PoolingMethod::Mean); let result = mean_pool.pool(&graph, §ion); assert!((result[0] - 2.0).abs() < 1e-10); let max_pool = CohomologyPooling::new(PoolingMethod::Max); let result = max_pool.pool(&graph, §ion); assert!((result[0] - 3.0).abs() < 1e-10); } }