Files
wifi-densepose/vendor/ruvector/crates/prime-radiant/src/cohomology/neural.rs

650 lines
20 KiB
Rust

//! Sheaf Neural Network Layers
//!
//! Neural network layers that respect sheaf structure, enabling
//! coherence-aware deep learning.
use super::laplacian::{LaplacianConfig, SheafLaplacian};
use super::sheaf::{Sheaf, SheafSection};
use crate::substrate::NodeId;
use crate::substrate::SheafGraph;
use ndarray::{Array1, Array2};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
/// Activation functions for neural layers
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum Activation {
/// No activation (identity)
Identity,
/// ReLU: max(0, x)
ReLU,
/// Leaky ReLU: max(alpha * x, x)
LeakyReLU(f64),
/// Sigmoid: 1 / (1 + exp(-x))
Sigmoid,
/// Tanh: tanh(x)
Tanh,
/// GELU: x * Phi(x)
GELU,
/// Softmax (applied per-node)
Softmax,
}
impl Activation {
/// Apply activation function
pub fn apply(&self, x: f64) -> f64 {
match self {
Activation::Identity => x,
Activation::ReLU => x.max(0.0),
Activation::LeakyReLU(alpha) => {
if x > 0.0 {
x
} else {
alpha * x
}
}
Activation::Sigmoid => 1.0 / (1.0 + (-x).exp()),
Activation::Tanh => x.tanh(),
Activation::GELU => {
// Approximation: x * sigmoid(1.702 * x)
let sigmoid = 1.0 / (1.0 + (-1.702 * x).exp());
x * sigmoid
}
Activation::Softmax => x, // Softmax handled separately
}
}
/// Apply activation to array
pub fn apply_array(&self, arr: &Array1<f64>) -> Array1<f64> {
match self {
Activation::Softmax => {
let max_val = arr.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
let exp_vals: Array1<f64> = arr.mapv(|x| (x - max_val).exp());
let sum: f64 = exp_vals.sum();
exp_vals / sum
}
_ => arr.mapv(|x| self.apply(x)),
}
}
/// Compute derivative
pub fn derivative(&self, x: f64) -> f64 {
match self {
Activation::Identity => 1.0,
Activation::ReLU => {
if x > 0.0 {
1.0
} else {
0.0
}
}
Activation::LeakyReLU(alpha) => {
if x > 0.0 {
1.0
} else {
*alpha
}
}
Activation::Sigmoid => {
let s = self.apply(x);
s * (1.0 - s)
}
Activation::Tanh => {
let t = x.tanh();
1.0 - t * t
}
Activation::GELU => {
// Derivative of GELU approximation
let sigmoid = 1.0 / (1.0 + (-1.702 * x).exp());
sigmoid + x * 1.702 * sigmoid * (1.0 - sigmoid)
}
Activation::Softmax => 1.0, // Jacobian needed for full derivative
}
}
}
/// Configuration for sheaf neural layer
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SheafNeuralConfig {
/// Input dimension per node
pub input_dim: usize,
/// Output dimension per node
pub output_dim: usize,
/// Number of diffusion steps
pub diffusion_steps: usize,
/// Diffusion coefficient
pub diffusion_coeff: f64,
/// Activation function
pub activation: Activation,
/// Dropout rate
pub dropout: f64,
/// Whether to use residual connection
pub use_residual: bool,
/// Whether to normalize output
pub layer_norm: bool,
}
impl Default for SheafNeuralConfig {
fn default() -> Self {
Self {
input_dim: 64,
output_dim: 64,
diffusion_steps: 3,
diffusion_coeff: 0.5,
activation: Activation::ReLU,
dropout: 0.0,
layer_norm: true,
use_residual: true,
}
}
}
/// A sheaf-aware neural network layer
///
/// Combines linear transformation with sheaf diffusion to produce
/// outputs that respect graph structure.
#[derive(Clone)]
pub struct SheafNeuralLayer {
/// Configuration
config: SheafNeuralConfig,
/// Weight matrix (output_dim x input_dim)
weights: Array2<f64>,
/// Bias vector (output_dim)
bias: Array1<f64>,
/// Diffusion weight (how much to mix diffusion vs direct)
diffusion_weight: f64,
}
impl SheafNeuralLayer {
/// Create a new layer with Xavier initialization
pub fn new(config: SheafNeuralConfig) -> Self {
let scale = (2.0 / (config.input_dim + config.output_dim) as f64).sqrt();
// Initialize weights with Xavier
let weights = Array2::from_shape_fn((config.output_dim, config.input_dim), |_| {
rand::random::<f64>() * scale - scale / 2.0
});
let bias = Array1::zeros(config.output_dim);
Self {
config,
weights,
bias,
diffusion_weight: 0.5,
}
}
/// Create with specific weights
pub fn with_weights(
config: SheafNeuralConfig,
weights: Array2<f64>,
bias: Array1<f64>,
) -> Self {
assert_eq!(weights.nrows(), config.output_dim);
assert_eq!(weights.ncols(), config.input_dim);
assert_eq!(bias.len(), config.output_dim);
Self {
config,
weights,
bias,
diffusion_weight: 0.5,
}
}
/// Set diffusion weight
pub fn set_diffusion_weight(&mut self, weight: f64) {
self.diffusion_weight = weight.clamp(0.0, 1.0);
}
/// Forward pass on a section
///
/// output = activation(W * diffuse(x) + b)
pub fn forward(&self, graph: &SheafGraph, input: &SheafSection) -> SheafSection {
let mut output = SheafSection::empty();
// Step 1: Apply linear transformation at each node
for (node_id, input_vec) in &input.sections {
let transformed = self.weights.dot(input_vec) + &self.bias;
output.set(*node_id, transformed);
}
// Step 2: Apply sheaf diffusion
if self.config.diffusion_steps > 0 && self.diffusion_weight > 0.0 {
let laplacian_config = LaplacianConfig::default();
let laplacian = SheafLaplacian::from_graph(graph, laplacian_config);
for _ in 0..self.config.diffusion_steps {
let laplacian_out = laplacian.apply(graph, &output);
// Update: x = x - alpha * L * x
for (node_id, out_vec) in output.sections.iter_mut() {
if let Some(lap_vec) = laplacian_out.sections.get(node_id) {
let scale = self.diffusion_weight * self.config.diffusion_coeff;
*out_vec = &*out_vec - &(lap_vec * scale);
}
}
}
}
// Step 3: Apply activation
for out_vec in output.sections.values_mut() {
*out_vec = self.config.activation.apply_array(out_vec);
}
// Step 4: Residual connection (if dimensions match and enabled)
if self.config.use_residual && self.config.input_dim == self.config.output_dim {
for (node_id, out_vec) in output.sections.iter_mut() {
if let Some(in_vec) = input.sections.get(node_id) {
*out_vec = &*out_vec + in_vec;
}
}
}
// Step 5: Layer normalization
if self.config.layer_norm {
for out_vec in output.sections.values_mut() {
let mean: f64 = out_vec.mean().unwrap_or(0.0);
let std: f64 = out_vec.std(0.0);
if std > 1e-10 {
*out_vec = out_vec.mapv(|x| (x - mean) / std);
}
}
}
output
}
/// Get weights
pub fn weights(&self) -> &Array2<f64> {
&self.weights
}
/// Get bias
pub fn bias(&self) -> &Array1<f64> {
&self.bias
}
/// Set weights (for training)
pub fn set_weights(&mut self, weights: Array2<f64>) {
assert_eq!(weights.shape(), self.weights.shape());
self.weights = weights;
}
/// Set bias (for training)
pub fn set_bias(&mut self, bias: Array1<f64>) {
assert_eq!(bias.len(), self.bias.len());
self.bias = bias;
}
}
impl std::fmt::Debug for SheafNeuralLayer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SheafNeuralLayer")
.field("input_dim", &self.config.input_dim)
.field("output_dim", &self.config.output_dim)
.field("diffusion_steps", &self.config.diffusion_steps)
.field("activation", &self.config.activation)
.finish()
}
}
/// Sheaf convolution layer
///
/// Generalizes graph convolution using sheaf structure
#[derive(Clone)]
pub struct SheafConvolution {
/// Input dimension
input_dim: usize,
/// Output dimension
output_dim: usize,
/// Weight for self-features
self_weight: Array2<f64>,
/// Weight for neighbor features
neighbor_weight: Array2<f64>,
/// Bias
bias: Array1<f64>,
/// Activation
activation: Activation,
}
impl SheafConvolution {
/// Create a new sheaf convolution layer
pub fn new(input_dim: usize, output_dim: usize) -> Self {
let scale = (2.0 / (input_dim + output_dim) as f64).sqrt();
let self_weight = Array2::from_shape_fn((output_dim, input_dim), |_| {
rand::random::<f64>() * scale - scale / 2.0
});
let neighbor_weight = Array2::from_shape_fn((output_dim, input_dim), |_| {
rand::random::<f64>() * scale - scale / 2.0
});
let bias = Array1::zeros(output_dim);
Self {
input_dim,
output_dim,
self_weight,
neighbor_weight,
bias,
activation: Activation::ReLU,
}
}
/// Set activation function
pub fn with_activation(mut self, activation: Activation) -> Self {
self.activation = activation;
self
}
/// Forward pass
///
/// h_v = activation(W_self * x_v + W_neigh * sum_u rho_{u->v}(x_u) / deg(v) + b)
pub fn forward(&self, graph: &SheafGraph, input: &SheafSection) -> SheafSection {
let mut output = SheafSection::empty();
for node_id in graph.node_ids() {
if let Some(self_vec) = input.get(node_id) {
// Self contribution
let mut h = self.self_weight.dot(self_vec);
// Neighbor contribution (average of restricted neighbors)
let neighbors: Vec<_> = graph.edges_incident_to(node_id);
if !neighbors.is_empty() {
let mut neighbor_sum = Array1::zeros(self.input_dim);
let mut count = 0;
for edge_id in neighbors {
if let Some(edge) = graph.get_edge(edge_id) {
let neighbor_id = if edge.source == node_id {
edge.target
} else {
edge.source
};
if let Some(neighbor_vec) = input.get(neighbor_id) {
// For identity restriction, just add neighbor
// For general restriction, would apply rho here
neighbor_sum = neighbor_sum + neighbor_vec;
count += 1;
}
}
}
if count > 0 {
neighbor_sum /= count as f64;
h = h + self.neighbor_weight.dot(&neighbor_sum);
}
}
// Add bias and apply activation
h = h + &self.bias;
h = self.activation.apply_array(&h);
output.set(node_id, h);
}
}
output
}
}
impl std::fmt::Debug for SheafConvolution {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SheafConvolution")
.field("input_dim", &self.input_dim)
.field("output_dim", &self.output_dim)
.field("activation", &self.activation)
.finish()
}
}
/// Cohomology-aware pooling layer
///
/// Pools node features while preserving cohomological structure
#[derive(Clone)]
pub struct CohomologyPooling {
/// Pooling method
method: PoolingMethod,
/// Whether to weight by node importance (from Laplacian spectrum)
spectral_weighting: bool,
}
/// Pooling methods
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum PoolingMethod {
/// Mean of all nodes
Mean,
/// Max over all nodes
Max,
/// Sum over all nodes
Sum,
/// Attention-weighted sum
Attention,
/// Top-k nodes by energy
TopK(usize),
}
impl CohomologyPooling {
/// Create a new pooling layer
pub fn new(method: PoolingMethod) -> Self {
Self {
method,
spectral_weighting: false,
}
}
/// Enable spectral weighting
pub fn with_spectral_weighting(mut self) -> Self {
self.spectral_weighting = true;
self
}
/// Pool section to single vector
pub fn pool(&self, graph: &SheafGraph, section: &SheafSection) -> Array1<f64> {
if section.sections.is_empty() {
return Array1::zeros(0);
}
let dim = section
.sections
.values()
.next()
.map(|v| v.len())
.unwrap_or(0);
match self.method {
PoolingMethod::Mean => {
let mut sum = Array1::zeros(dim);
let mut count = 0;
for vec in section.sections.values() {
sum = sum + vec;
count += 1;
}
if count > 0 {
sum / count as f64
} else {
sum
}
}
PoolingMethod::Max => {
let mut max_vec = Array1::from_elem(dim, f64::NEG_INFINITY);
for vec in section.sections.values() {
for (i, &val) in vec.iter().enumerate() {
max_vec[i] = max_vec[i].max(val);
}
}
max_vec
}
PoolingMethod::Sum => {
let mut sum = Array1::zeros(dim);
for vec in section.sections.values() {
sum = sum + vec;
}
sum
}
PoolingMethod::Attention => {
// Simple attention: weight by L2 norm
let mut sum = Array1::zeros(dim);
let mut total_weight = 0.0;
for vec in section.sections.values() {
let weight = vec.iter().map(|x| x * x).sum::<f64>().sqrt();
sum = sum + vec * weight;
total_weight += weight;
}
if total_weight > 0.0 {
sum / total_weight
} else {
sum
}
}
PoolingMethod::TopK(k) => {
// Select top k nodes by L2 norm
let mut node_norms: Vec<_> = section
.sections
.iter()
.map(|(id, vec)| (*id, vec.iter().map(|x| x * x).sum::<f64>()))
.collect();
node_norms
.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
let mut sum = Array1::zeros(dim);
for (node_id, _) in node_norms.into_iter().take(k) {
if let Some(vec) = section.get(node_id) {
sum = sum + vec;
}
}
sum / k as f64
}
}
}
}
impl std::fmt::Debug for CohomologyPooling {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("CohomologyPooling")
.field("method", &self.method)
.field("spectral_weighting", &self.spectral_weighting)
.finish()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::substrate::edge::SheafEdgeBuilder;
use crate::substrate::node::SheafNodeBuilder;
use uuid::Uuid;
fn make_node_id() -> NodeId {
Uuid::new_v4()
}
#[test]
fn test_activation_functions() {
assert!((Activation::ReLU.apply(-1.0) - 0.0).abs() < 1e-10);
assert!((Activation::ReLU.apply(1.0) - 1.0).abs() < 1e-10);
assert!((Activation::Sigmoid.apply(0.0) - 0.5).abs() < 1e-10);
let arr = Array1::from_vec(vec![1.0, 2.0, 3.0]);
let softmax = Activation::Softmax.apply_array(&arr);
assert!((softmax.sum() - 1.0).abs() < 1e-10);
}
#[test]
fn test_sheaf_neural_layer() {
let graph = SheafGraph::new();
let node1 = SheafNodeBuilder::new()
.state_from_slice(&[1.0, 0.0, 0.0, 0.0])
.build();
let node2 = SheafNodeBuilder::new()
.state_from_slice(&[0.0, 1.0, 0.0, 0.0])
.build();
let id1 = graph.add_node(node1);
let id2 = graph.add_node(node2);
let edge = SheafEdgeBuilder::new(id1, id2)
.identity_restrictions(4)
.weight(1.0)
.build();
graph.add_edge(edge).unwrap();
let config = SheafNeuralConfig {
input_dim: 4,
output_dim: 2,
diffusion_steps: 1,
..Default::default()
};
let layer = SheafNeuralLayer::new(config);
// Create input section
let mut input = SheafSection::empty();
input.set(id1, Array1::from_vec(vec![1.0, 0.0, 0.0, 0.0]));
input.set(id2, Array1::from_vec(vec![0.0, 1.0, 0.0, 0.0]));
let output = layer.forward(&graph, &input);
assert!(output.contains(id1));
assert!(output.contains(id2));
assert_eq!(output.get(id1).unwrap().len(), 2);
}
#[test]
fn test_sheaf_convolution() {
let graph = SheafGraph::new();
let node1 = SheafNodeBuilder::new()
.state_from_slice(&[1.0, 0.0])
.build();
let node2 = SheafNodeBuilder::new()
.state_from_slice(&[0.0, 1.0])
.build();
let id1 = graph.add_node(node1);
let id2 = graph.add_node(node2);
let edge = SheafEdgeBuilder::new(id1, id2)
.identity_restrictions(2)
.build();
graph.add_edge(edge).unwrap();
let conv = SheafConvolution::new(2, 3);
let mut input = SheafSection::empty();
input.set(id1, Array1::from_vec(vec![1.0, 0.0]));
input.set(id2, Array1::from_vec(vec![0.0, 1.0]));
let output = conv.forward(&graph, &input);
assert!(output.contains(id1));
assert_eq!(output.get(id1).unwrap().len(), 3);
}
#[test]
fn test_pooling() {
let graph = SheafGraph::new();
let node1 = SheafNodeBuilder::new().state_from_slice(&[1.0]).build();
let node2 = SheafNodeBuilder::new().state_from_slice(&[3.0]).build();
let id1 = graph.add_node(node1);
let id2 = graph.add_node(node2);
let mut section = SheafSection::empty();
section.set(id1, Array1::from_vec(vec![1.0]));
section.set(id2, Array1::from_vec(vec![3.0]));
let mean_pool = CohomologyPooling::new(PoolingMethod::Mean);
let result = mean_pool.pool(&graph, &section);
assert!((result[0] - 2.0).abs() < 1e-10);
let max_pool = CohomologyPooling::new(PoolingMethod::Max);
let result = max_pool.pool(&graph, &section);
assert!((result[0] - 3.0).abs() < 1e-10);
}
}