wifi-densepose/vendor/ruvector/crates/prime-radiant/src/cohomology/neural.rs

//! Sheaf Neural Network Layers
//!
//! Neural network layers that respect sheaf structure, enabling
//! coherence-aware deep learning.

use super::laplacian::{LaplacianConfig, SheafLaplacian};
use super::sheaf::{Sheaf, SheafSection};
use crate::substrate::NodeId;
use crate::substrate::SheafGraph;
use ndarray::{Array1, Array2};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

/// Activation functions for neural layers
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum Activation {
    /// No activation (identity)
    Identity,
    /// ReLU: max(0, x)
    ReLU,
    /// Leaky ReLU: max(alpha * x, x)
    LeakyReLU(f64),
    /// Sigmoid: 1 / (1 + exp(-x))
    Sigmoid,
    /// Tanh: tanh(x)
    Tanh,
    /// GELU: x * Phi(x)
    GELU,
    /// Softmax (applied per-node)
    Softmax,
}

impl Activation {
    /// Apply activation function
    pub fn apply(&self, x: f64) -> f64 {
        match self {
            Activation::Identity => x,
            Activation::ReLU => x.max(0.0),
            Activation::LeakyReLU(alpha) => {
                if x > 0.0 {
                    x
                } else {
                    alpha * x
                }
            }
            Activation::Sigmoid => 1.0 / (1.0 + (-x).exp()),
            Activation::Tanh => x.tanh(),
            Activation::GELU => {
                // Approximation: x * sigmoid(1.702 * x)
                let sigmoid = 1.0 / (1.0 + (-1.702 * x).exp());
                x * sigmoid
            }
            Activation::Softmax => x, // Softmax handled separately
        }
    }

    /// Apply activation to array
    pub fn apply_array(&self, arr: &Array1<f64>) -> Array1<f64> {
        match self {
            Activation::Softmax => {
                let max_val = arr.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
                let exp_vals: Array1<f64> = arr.mapv(|x| (x - max_val).exp());
                let sum: f64 = exp_vals.sum();
                exp_vals / sum
            }
            _ => arr.mapv(|x| self.apply(x)),
        }
    }

    /// Compute derivative
    pub fn derivative(&self, x: f64) -> f64 {
        match self {
            Activation::Identity => 1.0,
            Activation::ReLU => {
                if x > 0.0 {
                    1.0
                } else {
                    0.0
                }
            }
            Activation::LeakyReLU(alpha) => {
                if x > 0.0 {
                    1.0
                } else {
                    *alpha
                }
            }
            Activation::Sigmoid => {
                let s = self.apply(x);
                s * (1.0 - s)
            }
            Activation::Tanh => {
                let t = x.tanh();
                1.0 - t * t
            }
            Activation::GELU => {
                // Derivative of GELU approximation
                let sigmoid = 1.0 / (1.0 + (-1.702 * x).exp());
                sigmoid + x * 1.702 * sigmoid * (1.0 - sigmoid)
            }
            Activation::Softmax => 1.0, // Jacobian needed for full derivative
        }
    }
}

/// Configuration for sheaf neural layer
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SheafNeuralConfig {
    /// Input dimension per node
    pub input_dim: usize,
    /// Output dimension per node
    pub output_dim: usize,
    /// Number of diffusion steps
    pub diffusion_steps: usize,
    /// Diffusion coefficient
    pub diffusion_coeff: f64,
    /// Activation function
    pub activation: Activation,
    /// Dropout rate
    pub dropout: f64,
    /// Whether to use residual connection
    pub use_residual: bool,
    /// Whether to normalize output
    pub layer_norm: bool,
}

impl Default for SheafNeuralConfig {
    fn default() -> Self {
        Self {
            input_dim: 64,
            output_dim: 64,
            diffusion_steps: 3,
            diffusion_coeff: 0.5,
            activation: Activation::ReLU,
            dropout: 0.0,
            layer_norm: true,
            use_residual: true,
        }
    }
}

/// A sheaf-aware neural network layer
///
/// Combines linear transformation with sheaf diffusion to produce
/// outputs that respect graph structure.
#[derive(Clone)]
pub struct SheafNeuralLayer {
    /// Configuration
    config: SheafNeuralConfig,
    /// Weight matrix (output_dim x input_dim)
    weights: Array2<f64>,
    /// Bias vector (output_dim)
    bias: Array1<f64>,
    /// Diffusion weight (how much to mix diffusion vs direct)
    diffusion_weight: f64,
}

impl SheafNeuralLayer {
    /// Create a new layer with Xavier initialization
    pub fn new(config: SheafNeuralConfig) -> Self {
        let scale = (2.0 / (config.input_dim + config.output_dim) as f64).sqrt();

        // Initialize weights with Xavier
        let weights = Array2::from_shape_fn((config.output_dim, config.input_dim), |_| {
            rand::random::<f64>() * scale - scale / 2.0
        });

        let bias = Array1::zeros(config.output_dim);

        Self {
            config,
            weights,
            bias,
            diffusion_weight: 0.5,
        }
    }

    /// Create with specific weights
    pub fn with_weights(
        config: SheafNeuralConfig,
        weights: Array2<f64>,
        bias: Array1<f64>,
    ) -> Self {
        assert_eq!(weights.nrows(), config.output_dim);
        assert_eq!(weights.ncols(), config.input_dim);
        assert_eq!(bias.len(), config.output_dim);

        Self {
            config,
            weights,
            bias,
            diffusion_weight: 0.5,
        }
    }

    /// Set diffusion weight
    pub fn set_diffusion_weight(&mut self, weight: f64) {
        self.diffusion_weight = weight.clamp(0.0, 1.0);
    }

    /// Forward pass on a section
    ///
    /// output = activation(W * diffuse(x) + b)
    pub fn forward(&self, graph: &SheafGraph, input: &SheafSection) -> SheafSection {
        let mut output = SheafSection::empty();

        // Step 1: Apply linear transformation at each node
        for (node_id, input_vec) in &input.sections {
            let transformed = self.weights.dot(input_vec) + &self.bias;
            output.set(*node_id, transformed);
        }

        // Step 2: Apply sheaf diffusion
        if self.config.diffusion_steps > 0 && self.diffusion_weight > 0.0 {
            let laplacian_config = LaplacianConfig::default();
            let laplacian = SheafLaplacian::from_graph(graph, laplacian_config);

            for _ in 0..self.config.diffusion_steps {
                let laplacian_out = laplacian.apply(graph, &output);

                // Update: x = x - alpha * L * x
                for (node_id, out_vec) in output.sections.iter_mut() {
                    if let Some(lap_vec) = laplacian_out.sections.get(node_id) {
                        let scale = self.diffusion_weight * self.config.diffusion_coeff;
                        *out_vec = &*out_vec - &(lap_vec * scale);
                    }
                }
            }
        }

        // Step 3: Apply activation
        for out_vec in output.sections.values_mut() {
            *out_vec = self.config.activation.apply_array(out_vec);
        }

        // Step 4: Residual connection (if dimensions match and enabled)
        if self.config.use_residual && self.config.input_dim == self.config.output_dim {
            for (node_id, out_vec) in output.sections.iter_mut() {
                if let Some(in_vec) = input.sections.get(node_id) {
                    *out_vec = &*out_vec + in_vec;
                }
            }
        }

        // Step 5: Layer normalization
        if self.config.layer_norm {
            for out_vec in output.sections.values_mut() {
                let mean: f64 = out_vec.mean().unwrap_or(0.0);
                let std: f64 = out_vec.std(0.0);
                if std > 1e-10 {
                    *out_vec = out_vec.mapv(|x| (x - mean) / std);
                }
            }
        }

        output
    }

    /// Get weights
    pub fn weights(&self) -> &Array2<f64> {
        &self.weights
    }

    /// Get bias
    pub fn bias(&self) -> &Array1<f64> {
        &self.bias
    }

    /// Set weights (for training)
    pub fn set_weights(&mut self, weights: Array2<f64>) {
        assert_eq!(weights.shape(), self.weights.shape());
        self.weights = weights;
    }

    /// Set bias (for training)
    pub fn set_bias(&mut self, bias: Array1<f64>) {
        assert_eq!(bias.len(), self.bias.len());
        self.bias = bias;
    }
}

impl std::fmt::Debug for SheafNeuralLayer {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("SheafNeuralLayer")
            .field("input_dim", &self.config.input_dim)
            .field("output_dim", &self.config.output_dim)
            .field("diffusion_steps", &self.config.diffusion_steps)
            .field("activation", &self.config.activation)
            .finish()
    }
}

/// Sheaf convolution layer
///
/// Generalizes graph convolution using sheaf structure
#[derive(Clone)]
pub struct SheafConvolution {
    /// Input dimension
    input_dim: usize,
    /// Output dimension
    output_dim: usize,
    /// Weight for self-features
    self_weight: Array2<f64>,
    /// Weight for neighbor features
    neighbor_weight: Array2<f64>,
    /// Bias
    bias: Array1<f64>,
    /// Activation
    activation: Activation,
}

impl SheafConvolution {
    /// Create a new sheaf convolution layer
    pub fn new(input_dim: usize, output_dim: usize) -> Self {
        let scale = (2.0 / (input_dim + output_dim) as f64).sqrt();

        let self_weight = Array2::from_shape_fn((output_dim, input_dim), |_| {
            rand::random::<f64>() * scale - scale / 2.0
        });
        let neighbor_weight = Array2::from_shape_fn((output_dim, input_dim), |_| {
            rand::random::<f64>() * scale - scale / 2.0
        });
        let bias = Array1::zeros(output_dim);

        Self {
            input_dim,
            output_dim,
            self_weight,
            neighbor_weight,
            bias,
            activation: Activation::ReLU,
        }
    }

    /// Set activation function
    pub fn with_activation(mut self, activation: Activation) -> Self {
        self.activation = activation;
        self
    }

    /// Forward pass
    ///
    /// h_v = activation(W_self * x_v + W_neigh * sum_u rho_{u->v}(x_u) / deg(v) + b)
    pub fn forward(&self, graph: &SheafGraph, input: &SheafSection) -> SheafSection {
        let mut output = SheafSection::empty();

        for node_id in graph.node_ids() {
            if let Some(self_vec) = input.get(node_id) {
                // Self contribution
                let mut h = self.self_weight.dot(self_vec);

                // Neighbor contribution (average of restricted neighbors)
                let neighbors: Vec<_> = graph.edges_incident_to(node_id);
                if !neighbors.is_empty() {
                    let mut neighbor_sum = Array1::zeros(self.input_dim);
                    let mut count = 0;

                    for edge_id in neighbors {
                        if let Some(edge) = graph.get_edge(edge_id) {
                            let neighbor_id = if edge.source == node_id {
                                edge.target
                            } else {
                                edge.source
                            };

                            if let Some(neighbor_vec) = input.get(neighbor_id) {
                                // For identity restriction, just add neighbor
                                // For general restriction, would apply rho here
                                neighbor_sum = neighbor_sum + neighbor_vec;
                                count += 1;
                            }
                        }
                    }

                    if count > 0 {
                        neighbor_sum /= count as f64;
                        h = h + self.neighbor_weight.dot(&neighbor_sum);
                    }
                }

                // Add bias and apply activation
                h = h + &self.bias;
                h = self.activation.apply_array(&h);

                output.set(node_id, h);
            }
        }

        output
    }
}

impl std::fmt::Debug for SheafConvolution {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("SheafConvolution")
            .field("input_dim", &self.input_dim)
            .field("output_dim", &self.output_dim)
            .field("activation", &self.activation)
            .finish()
    }
}

/// Cohomology-aware pooling layer
///
/// Pools node features while preserving cohomological structure
#[derive(Clone)]
pub struct CohomologyPooling {
    /// Pooling method
    method: PoolingMethod,
    /// Whether to weight by node importance (from Laplacian spectrum)
    spectral_weighting: bool,
}

/// Pooling methods
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum PoolingMethod {
    /// Mean of all nodes
    Mean,
    /// Max over all nodes
    Max,
    /// Sum over all nodes
    Sum,
    /// Attention-weighted sum
    Attention,
    /// Top-k nodes by energy
    TopK(usize),
}

impl CohomologyPooling {
    /// Create a new pooling layer
    pub fn new(method: PoolingMethod) -> Self {
        Self {
            method,
            spectral_weighting: false,
        }
    }

    /// Enable spectral weighting
    pub fn with_spectral_weighting(mut self) -> Self {
        self.spectral_weighting = true;
        self
    }

    /// Pool section to single vector
    pub fn pool(&self, graph: &SheafGraph, section: &SheafSection) -> Array1<f64> {
        if section.sections.is_empty() {
            return Array1::zeros(0);
        }

        let dim = section
            .sections
            .values()
            .next()
            .map(|v| v.len())
            .unwrap_or(0);

        match self.method {
            PoolingMethod::Mean => {
                let mut sum = Array1::zeros(dim);
                let mut count = 0;
                for vec in section.sections.values() {
                    sum = sum + vec;
                    count += 1;
                }
                if count > 0 {
                    sum / count as f64
                } else {
                    sum
                }
            }
            PoolingMethod::Max => {
                let mut max_vec = Array1::from_elem(dim, f64::NEG_INFINITY);
                for vec in section.sections.values() {
                    for (i, &val) in vec.iter().enumerate() {
                        max_vec[i] = max_vec[i].max(val);
                    }
                }
                max_vec
            }
            PoolingMethod::Sum => {
                let mut sum = Array1::zeros(dim);
                for vec in section.sections.values() {
                    sum = sum + vec;
                }
                sum
            }
            PoolingMethod::Attention => {
                // Simple attention: weight by L2 norm
                let mut sum = Array1::zeros(dim);
                let mut total_weight = 0.0;
                for vec in section.sections.values() {
                    let weight = vec.iter().map(|x| x * x).sum::<f64>().sqrt();
                    sum = sum + vec * weight;
                    total_weight += weight;
                }
                if total_weight > 0.0 {
                    sum / total_weight
                } else {
                    sum
                }
            }
            PoolingMethod::TopK(k) => {
                // Select top k nodes by L2 norm
                let mut node_norms: Vec<_> = section
                    .sections
                    .iter()
                    .map(|(id, vec)| (*id, vec.iter().map(|x| x * x).sum::<f64>()))
                    .collect();
                node_norms
                    .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));

                let mut sum = Array1::zeros(dim);
                for (node_id, _) in node_norms.into_iter().take(k) {
                    if let Some(vec) = section.get(node_id) {
                        sum = sum + vec;
                    }
                }
                sum / k as f64
            }
        }
    }
}

impl std::fmt::Debug for CohomologyPooling {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("CohomologyPooling")
            .field("method", &self.method)
            .field("spectral_weighting", &self.spectral_weighting)
            .finish()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::substrate::edge::SheafEdgeBuilder;
    use crate::substrate::node::SheafNodeBuilder;
    use uuid::Uuid;

    fn make_node_id() -> NodeId {
        Uuid::new_v4()
    }

    #[test]
    fn test_activation_functions() {
        assert!((Activation::ReLU.apply(-1.0) - 0.0).abs() < 1e-10);
        assert!((Activation::ReLU.apply(1.0) - 1.0).abs() < 1e-10);

        assert!((Activation::Sigmoid.apply(0.0) - 0.5).abs() < 1e-10);

        let arr = Array1::from_vec(vec![1.0, 2.0, 3.0]);
        let softmax = Activation::Softmax.apply_array(&arr);
        assert!((softmax.sum() - 1.0).abs() < 1e-10);
    }

    #[test]
    fn test_sheaf_neural_layer() {
        let graph = SheafGraph::new();

        let node1 = SheafNodeBuilder::new()
            .state_from_slice(&[1.0, 0.0, 0.0, 0.0])
            .build();
        let node2 = SheafNodeBuilder::new()
            .state_from_slice(&[0.0, 1.0, 0.0, 0.0])
            .build();

        let id1 = graph.add_node(node1);
        let id2 = graph.add_node(node2);

        let edge = SheafEdgeBuilder::new(id1, id2)
            .identity_restrictions(4)
            .weight(1.0)
            .build();
        graph.add_edge(edge).unwrap();

        let config = SheafNeuralConfig {
            input_dim: 4,
            output_dim: 2,
            diffusion_steps: 1,
            ..Default::default()
        };
        let layer = SheafNeuralLayer::new(config);

        // Create input section
        let mut input = SheafSection::empty();
        input.set(id1, Array1::from_vec(vec![1.0, 0.0, 0.0, 0.0]));
        input.set(id2, Array1::from_vec(vec![0.0, 1.0, 0.0, 0.0]));

        let output = layer.forward(&graph, &input);

        assert!(output.contains(id1));
        assert!(output.contains(id2));
        assert_eq!(output.get(id1).unwrap().len(), 2);
    }

    #[test]
    fn test_sheaf_convolution() {
        let graph = SheafGraph::new();

        let node1 = SheafNodeBuilder::new()
            .state_from_slice(&[1.0, 0.0])
            .build();
        let node2 = SheafNodeBuilder::new()
            .state_from_slice(&[0.0, 1.0])
            .build();

        let id1 = graph.add_node(node1);
        let id2 = graph.add_node(node2);

        let edge = SheafEdgeBuilder::new(id1, id2)
            .identity_restrictions(2)
            .build();
        graph.add_edge(edge).unwrap();

        let conv = SheafConvolution::new(2, 3);

        let mut input = SheafSection::empty();
        input.set(id1, Array1::from_vec(vec![1.0, 0.0]));
        input.set(id2, Array1::from_vec(vec![0.0, 1.0]));

        let output = conv.forward(&graph, &input);

        assert!(output.contains(id1));
        assert_eq!(output.get(id1).unwrap().len(), 3);
    }

    #[test]
    fn test_pooling() {
        let graph = SheafGraph::new();

        let node1 = SheafNodeBuilder::new().state_from_slice(&[1.0]).build();
        let node2 = SheafNodeBuilder::new().state_from_slice(&[3.0]).build();

        let id1 = graph.add_node(node1);
        let id2 = graph.add_node(node2);

        let mut section = SheafSection::empty();
        section.set(id1, Array1::from_vec(vec![1.0]));
        section.set(id2, Array1::from_vec(vec![3.0]));

        let mean_pool = CohomologyPooling::new(PoolingMethod::Mean);
        let result = mean_pool.pool(&graph, &section);
        assert!((result[0] - 2.0).abs() < 1e-10);

        let max_pool = CohomologyPooling::new(PoolingMethod::Max);
        let result = max_pool.pool(&graph, &section);
        assert!((result[0] - 3.0).abs() < 1e-10);
    }
}