Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/crates/ruvector-sparse-inference/src/sparse/ffn.rs
+++ b/vendor/ruvector/crates/ruvector-sparse-inference/src/sparse/ffn.rs
@@ -0,0 +1,341 @@
+//! Sparse Feed-Forward Network implementation.
+
+use ndarray::{Array1, Array2};
+use serde::{Deserialize, Serialize};
+use tracing::{debug, trace};
+
+use crate::backend::{get_backend, Backend};
+use crate::config::ActivationType;
+use crate::error::{InferenceError, Result};
+
+/// Sparse Feed-Forward Network computation.
+///
+/// This implements a two-layer FFN that can compute using only a subset of neurons:
+/// - W1: [hidden_dim, input_dim] - first projection (row-major for neuron access)
+/// - W2_T: [hidden_dim, output_dim] - second projection TRANSPOSED (row-major for contiguous access)
+/// - Activation function applied between layers
+///
+/// The sparse forward pass:
+/// 1. Sparse first layer: only compute active neurons
+/// 2. Apply activation function
+/// 3. Sparse second layer: accumulate only active neuron contributions (now contiguous!)
+///
+/// # Performance Optimization
+///
+/// W2 is stored transposed so that accessing columns (by neuron index) becomes row access,
+/// which is contiguous in memory. This provides 15-25% speedup in the sparse accumulation step.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SparseFfn {
+    /// W1: [hidden_dim, input_dim] - first projection.
+    /// Row-major layout for efficient neuron access.
+    w1: Array2<f32>,
+
+    /// W2_T: [hidden_dim, output_dim] - second projection TRANSPOSED.
+    /// Row-major layout for contiguous neuron weight access.
+    /// Original W2 shape was [output_dim, hidden_dim].
+    #[serde(with = "w2_serde")]
+    w2_t: Array2<f32>,
+
+    /// Bias for first layer.
+    b1: Array1<f32>,
+
+    /// Bias for second layer.
+    b2: Array1<f32>,
+
+    /// Activation function type.
+    activation: ActivationType,
+
+    /// Output dimension (cached for efficiency)
+    output_dim: usize,
+}
+
+// Custom serialization for w2_t - stores as original W2 for compatibility
+mod w2_serde {
+    use super::*;
+    use ndarray::Array2;
+
+    pub fn serialize<S>(w2_t: &Array2<f32>, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        // Transpose back to original W2 shape for serialization compatibility
+        let w2 = w2_t.t().to_owned();
+        w2.serialize(serializer)
+    }
+
+    pub fn deserialize<'de, D>(deserializer: D) -> std::result::Result<Array2<f32>, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        // Load as original W2 and transpose for optimized storage
+        let w2 = Array2::<f32>::deserialize(deserializer)?;
+        Ok(w2.t().to_owned())
+    }
+}
+
+impl SparseFfn {
+    /// Create a new sparse FFN with given dimensions.
+    pub fn new(
+        input_dim: usize,
+        hidden_dim: usize,
+        output_dim: usize,
+        activation: ActivationType,
+    ) -> Result<Self> {
+        use rand::Rng;
+        let mut rng = rand::thread_rng();
+
+        // Initialize with small random values
+        let w1 = Array2::from_shape_fn((hidden_dim, input_dim), |_| rng.gen::<f32>() * 0.01);
+
+        // Store W2 transposed: [hidden_dim, output_dim] instead of [output_dim, hidden_dim]
+        // This allows contiguous row access when iterating by neuron index
+        let w2_t = Array2::from_shape_fn((hidden_dim, output_dim), |_| rng.gen::<f32>() * 0.01);
+
+        let b1 = Array1::zeros(hidden_dim);
+        let b2 = Array1::zeros(output_dim);
+
+        Ok(Self {
+            w1,
+            w2_t,
+            b1,
+            b2,
+            activation,
+            output_dim,
+        })
+    }
+
+    /// Create from existing weights.
+    pub fn from_weights(
+        w1: Array2<f32>,
+        w2: Array2<f32>,
+        b1: Array1<f32>,
+        b2: Array1<f32>,
+        activation: ActivationType,
+    ) -> Result<Self> {
+        let (hidden_dim, _input_dim) = w1.dim();
+        let (output_dim, w2_hidden) = w2.dim();
+
+        if hidden_dim != w2_hidden {
+            return Err(InferenceError::Failed(format!(
+                "Hidden dimension mismatch: W1 has {}, W2 has {}",
+                hidden_dim, w2_hidden
+            ))
+            .into());
+        }
+
+        if b1.len() != hidden_dim {
+            return Err(InferenceError::Failed(format!(
+                "b1 dimension mismatch: expected {}, got {}",
+                hidden_dim,
+                b1.len()
+            ))
+            .into());
+        }
+
+        if b2.len() != output_dim {
+            return Err(InferenceError::Failed(format!(
+                "b2 dimension mismatch: expected {}, got {}",
+                output_dim,
+                b2.len()
+            ))
+            .into());
+        }
+
+        // Transpose W2 for optimized storage
+        let w2_t = w2.t().to_owned();
+
+        Ok(Self {
+            w1,
+            w2_t,
+            b1,
+            b2,
+            activation,
+            output_dim,
+        })
+    }
+
+    /// Get input dimension.
+    pub fn input_dim(&self) -> usize {
+        self.w1.ncols()
+    }
+
+    /// Get hidden dimension.
+    pub fn hidden_dim(&self) -> usize {
+        self.w1.nrows()
+    }
+
+    /// Get output dimension.
+    pub fn output_dim(&self) -> usize {
+        self.output_dim
+    }
+
+    /// Compute FFN using only active neurons (sparse computation).
+    ///
+    /// This is the main optimization: only compute activations for predicted neurons.
+    pub fn forward_sparse(&self, input: &[f32], active_neurons: &[usize]) -> Result<Vec<f32>> {
+        if input.len() != self.input_dim() {
+            return Err(InferenceError::InputDimensionMismatch {
+                expected: self.input_dim(),
+                actual: input.len(),
+            }
+            .into());
+        }
+
+        if active_neurons.is_empty() {
+            return Err(InferenceError::NoActiveNeurons.into());
+        }
+
+        trace!(
+            "Sparse forward: {} active neurons ({:.1}% sparsity)",
+            active_neurons.len(),
+            100.0 * (1.0 - active_neurons.len() as f32 / self.hidden_dim() as f32)
+        );
+
+        let backend = get_backend();
+
+        // 1. Sparse first layer: only compute active neurons
+        let mut hidden = Vec::with_capacity(active_neurons.len());
+        for &neuron_idx in active_neurons {
+            if neuron_idx >= self.hidden_dim() {
+                return Err(InferenceError::Failed(format!(
+                    "Invalid neuron index: {}",
+                    neuron_idx
+                ))
+                .into());
+            }
+
+            let row = self.w1.row(neuron_idx);
+            let dot = backend.dot_product(row.as_slice().unwrap(), input);
+            hidden.push(dot + self.b1[neuron_idx]);
+        }
+
+        // 2. Apply activation function
+        backend.activation(&mut hidden, self.activation);
+
+        // 3. Sparse second layer: accumulate only active neuron contributions
+        // W2_T is [hidden_dim, output_dim], so row access by neuron_idx is CONTIGUOUS
+        let mut output = self.b2.to_vec();
+        let backend = get_backend();
+
+        for (i, &neuron_idx) in active_neurons.iter().enumerate() {
+            // Row access is contiguous in memory - major optimization!
+            let weights = self.w2_t.row(neuron_idx);
+            let h_val = hidden[i];
+
+            // Use SIMD-optimized axpy: output += h_val * weights
+            backend.axpy(&mut output, weights.as_slice().unwrap(), h_val);
+        }
+
+        Ok(output)
+    }
+
+    /// Compute FFN using all neurons (dense computation).
+    ///
+    /// This is the baseline for comparison and correctness checking.
+    pub fn forward_dense(&self, input: &[f32]) -> Result<Vec<f32>> {
+        if input.len() != self.input_dim() {
+            return Err(InferenceError::InputDimensionMismatch {
+                expected: self.input_dim(),
+                actual: input.len(),
+            }
+            .into());
+        }
+
+        let backend = get_backend();
+        let input_arr = Array1::from_vec(input.to_vec());
+
+        // 1. First layer: hidden = activation(W1 · input + b1)
+        let mut hidden = self.w1.dot(&input_arr) + &self.b1;
+        backend.activation(hidden.as_slice_mut().unwrap(), self.activation);
+
+        // 2. Second layer: output = W2 · hidden + b2
+        // W2_T is [hidden_dim, output_dim], so W2 = W2_T.t()
+        // output = W2_T.t() · hidden = (hidden.t() · W2_T).t() = W2_T.t().dot(hidden)
+        let output = self.w2_t.t().dot(&hidden) + &self.b2;
+
+        Ok(output.to_vec())
+    }
+
+    /// Compute both sparse and dense, returning the difference for validation.
+    #[cfg(test)]
+    pub fn validate_sparse(&self, input: &[f32], active_neurons: &[usize]) -> Result<f32> {
+        let sparse_output = self.forward_sparse(input, active_neurons)?;
+        let dense_output = self.forward_dense(input)?;
+
+        // Compute mean absolute error
+        let mae: f32 = sparse_output
+            .iter()
+            .zip(dense_output.iter())
+            .map(|(s, d)| (s - d).abs())
+            .sum::<f32>()
+            / sparse_output.len() as f32;
+
+        Ok(mae)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_ffn_creation() {
+        let ffn = SparseFfn::new(128, 512, 128, ActivationType::Gelu).unwrap();
+
+        assert_eq!(ffn.input_dim(), 128);
+        assert_eq!(ffn.hidden_dim(), 512);
+        assert_eq!(ffn.output_dim(), 128);
+    }
+
+    #[test]
+    fn test_dense_forward() {
+        let ffn = SparseFfn::new(64, 256, 64, ActivationType::Relu).unwrap();
+        let input = vec![0.1; 64];
+
+        let output = ffn.forward_dense(&input).unwrap();
+        assert_eq!(output.len(), 64);
+    }
+
+    #[test]
+    fn test_sparse_forward() {
+        let ffn = SparseFfn::new(64, 256, 64, ActivationType::Relu).unwrap();
+        let input = vec![0.1; 64];
+        let active_neurons: Vec<usize> = (0..64).collect(); // First 64 neurons
+
+        let output = ffn.forward_sparse(&input, &active_neurons).unwrap();
+        assert_eq!(output.len(), 64);
+    }
+
+    #[test]
+    fn test_sparse_vs_dense() {
+        let ffn = SparseFfn::new(32, 128, 32, ActivationType::Relu).unwrap();
+        let input = vec![0.5; 32];
+
+        // Use all neurons - should match dense computation
+        let all_neurons: Vec<usize> = (0..128).collect();
+        let mae = ffn.validate_sparse(&input, &all_neurons).unwrap();
+
+        // Should be very close (allowing for floating point precision)
+        assert!(mae < 1e-5, "MAE too large: {}", mae);
+    }
+
+    #[test]
+    fn test_empty_active_neurons() {
+        let ffn = SparseFfn::new(32, 128, 32, ActivationType::Relu).unwrap();
+        let input = vec![0.1; 32];
+        let empty: Vec<usize> = vec![];
+
+        let result = ffn.forward_sparse(&input, &empty);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_invalid_neuron_index() {
+        let ffn = SparseFfn::new(32, 128, 32, ActivationType::Relu).unwrap();
+        let input = vec![0.1; 32];
+        let invalid = vec![200]; // Out of bounds
+
+        let result = ffn.forward_sparse(&input, &invalid);
+        assert!(result.is_err());
+    }
+}
--- a/vendor/ruvector/crates/ruvector-sparse-inference/src/sparse/mod.rs
+++ b/vendor/ruvector/crates/ruvector-sparse-inference/src/sparse/mod.rs
@@ -0,0 +1,59 @@
+//! Sparse computation module.
+//!
+//! This module provides sparse implementations of neural network layers.
+
+mod ffn;
+
+pub use crate::config::ActivationType;
+pub use ffn::SparseFfn;
+
+/// Trait for feed-forward network layers.
+pub trait FeedForward: Send + Sync {
+    /// Sparse forward pass using only active neurons.
+    fn forward_sparse(
+        &self,
+        input: &[f32],
+        active_neurons: &[usize],
+    ) -> crate::error::Result<Vec<f32>>;
+
+    /// Dense forward pass using all neurons.
+    fn forward_dense(&self, input: &[f32]) -> crate::error::Result<Vec<f32>>;
+}
+
+impl FeedForward for SparseFfn {
+    fn forward_sparse(
+        &self,
+        input: &[f32],
+        active_neurons: &[usize],
+    ) -> crate::error::Result<Vec<f32>> {
+        SparseFfn::forward_sparse(self, input, active_neurons)
+    }
+
+    fn forward_dense(&self, input: &[f32]) -> crate::error::Result<Vec<f32>> {
+        SparseFfn::forward_dense(self, input)
+    }
+}
+
+/// SwiGLU FFN (placeholder for future implementation).
+pub struct SwiGLUFfn;
+
+impl SwiGLUFfn {
+    /// Create a new SwiGLU FFN.
+    pub fn new(_input_dim: usize, _hidden_dim: usize) -> Self {
+        Self
+    }
+}
+
+impl FeedForward for SwiGLUFfn {
+    fn forward_sparse(
+        &self,
+        _input: &[f32],
+        _active_neurons: &[usize],
+    ) -> crate::error::Result<Vec<f32>> {
+        unimplemented!("SwiGLUFfn not yet implemented")
+    }
+
+    fn forward_dense(&self, _input: &[f32]) -> crate::error::Result<Vec<f32>> {
+        unimplemented!("SwiGLUFfn not yet implemented")
+    }
+}