Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
341
vendor/ruvector/crates/ruvector-sparse-inference/src/sparse/ffn.rs
vendored
Normal file
341
vendor/ruvector/crates/ruvector-sparse-inference/src/sparse/ffn.rs
vendored
Normal file
@@ -0,0 +1,341 @@
|
||||
//! Sparse Feed-Forward Network implementation.
|
||||
|
||||
use ndarray::{Array1, Array2};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::{debug, trace};
|
||||
|
||||
use crate::backend::{get_backend, Backend};
|
||||
use crate::config::ActivationType;
|
||||
use crate::error::{InferenceError, Result};
|
||||
|
||||
/// Sparse Feed-Forward Network computation.
|
||||
///
|
||||
/// This implements a two-layer FFN that can compute using only a subset of neurons:
|
||||
/// - W1: [hidden_dim, input_dim] - first projection (row-major for neuron access)
|
||||
/// - W2_T: [hidden_dim, output_dim] - second projection TRANSPOSED (row-major for contiguous access)
|
||||
/// - Activation function applied between layers
|
||||
///
|
||||
/// The sparse forward pass:
|
||||
/// 1. Sparse first layer: only compute active neurons
|
||||
/// 2. Apply activation function
|
||||
/// 3. Sparse second layer: accumulate only active neuron contributions (now contiguous!)
|
||||
///
|
||||
/// # Performance Optimization
|
||||
///
|
||||
/// W2 is stored transposed so that accessing columns (by neuron index) becomes row access,
|
||||
/// which is contiguous in memory. This provides 15-25% speedup in the sparse accumulation step.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SparseFfn {
|
||||
/// W1: [hidden_dim, input_dim] - first projection.
|
||||
/// Row-major layout for efficient neuron access.
|
||||
w1: Array2<f32>,
|
||||
|
||||
/// W2_T: [hidden_dim, output_dim] - second projection TRANSPOSED.
|
||||
/// Row-major layout for contiguous neuron weight access.
|
||||
/// Original W2 shape was [output_dim, hidden_dim].
|
||||
#[serde(with = "w2_serde")]
|
||||
w2_t: Array2<f32>,
|
||||
|
||||
/// Bias for first layer.
|
||||
b1: Array1<f32>,
|
||||
|
||||
/// Bias for second layer.
|
||||
b2: Array1<f32>,
|
||||
|
||||
/// Activation function type.
|
||||
activation: ActivationType,
|
||||
|
||||
/// Output dimension (cached for efficiency)
|
||||
output_dim: usize,
|
||||
}
|
||||
|
||||
// Custom serialization for w2_t - stores as original W2 for compatibility
|
||||
mod w2_serde {
|
||||
use super::*;
|
||||
use ndarray::Array2;
|
||||
|
||||
pub fn serialize<S>(w2_t: &Array2<f32>, serializer: S) -> std::result::Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
// Transpose back to original W2 shape for serialization compatibility
|
||||
let w2 = w2_t.t().to_owned();
|
||||
w2.serialize(serializer)
|
||||
}
|
||||
|
||||
pub fn deserialize<'de, D>(deserializer: D) -> std::result::Result<Array2<f32>, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
// Load as original W2 and transpose for optimized storage
|
||||
let w2 = Array2::<f32>::deserialize(deserializer)?;
|
||||
Ok(w2.t().to_owned())
|
||||
}
|
||||
}
|
||||
|
||||
impl SparseFfn {
|
||||
/// Create a new sparse FFN with given dimensions.
|
||||
pub fn new(
|
||||
input_dim: usize,
|
||||
hidden_dim: usize,
|
||||
output_dim: usize,
|
||||
activation: ActivationType,
|
||||
) -> Result<Self> {
|
||||
use rand::Rng;
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
// Initialize with small random values
|
||||
let w1 = Array2::from_shape_fn((hidden_dim, input_dim), |_| rng.gen::<f32>() * 0.01);
|
||||
|
||||
// Store W2 transposed: [hidden_dim, output_dim] instead of [output_dim, hidden_dim]
|
||||
// This allows contiguous row access when iterating by neuron index
|
||||
let w2_t = Array2::from_shape_fn((hidden_dim, output_dim), |_| rng.gen::<f32>() * 0.01);
|
||||
|
||||
let b1 = Array1::zeros(hidden_dim);
|
||||
let b2 = Array1::zeros(output_dim);
|
||||
|
||||
Ok(Self {
|
||||
w1,
|
||||
w2_t,
|
||||
b1,
|
||||
b2,
|
||||
activation,
|
||||
output_dim,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create from existing weights.
|
||||
pub fn from_weights(
|
||||
w1: Array2<f32>,
|
||||
w2: Array2<f32>,
|
||||
b1: Array1<f32>,
|
||||
b2: Array1<f32>,
|
||||
activation: ActivationType,
|
||||
) -> Result<Self> {
|
||||
let (hidden_dim, _input_dim) = w1.dim();
|
||||
let (output_dim, w2_hidden) = w2.dim();
|
||||
|
||||
if hidden_dim != w2_hidden {
|
||||
return Err(InferenceError::Failed(format!(
|
||||
"Hidden dimension mismatch: W1 has {}, W2 has {}",
|
||||
hidden_dim, w2_hidden
|
||||
))
|
||||
.into());
|
||||
}
|
||||
|
||||
if b1.len() != hidden_dim {
|
||||
return Err(InferenceError::Failed(format!(
|
||||
"b1 dimension mismatch: expected {}, got {}",
|
||||
hidden_dim,
|
||||
b1.len()
|
||||
))
|
||||
.into());
|
||||
}
|
||||
|
||||
if b2.len() != output_dim {
|
||||
return Err(InferenceError::Failed(format!(
|
||||
"b2 dimension mismatch: expected {}, got {}",
|
||||
output_dim,
|
||||
b2.len()
|
||||
))
|
||||
.into());
|
||||
}
|
||||
|
||||
// Transpose W2 for optimized storage
|
||||
let w2_t = w2.t().to_owned();
|
||||
|
||||
Ok(Self {
|
||||
w1,
|
||||
w2_t,
|
||||
b1,
|
||||
b2,
|
||||
activation,
|
||||
output_dim,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get input dimension.
|
||||
pub fn input_dim(&self) -> usize {
|
||||
self.w1.ncols()
|
||||
}
|
||||
|
||||
/// Get hidden dimension.
|
||||
pub fn hidden_dim(&self) -> usize {
|
||||
self.w1.nrows()
|
||||
}
|
||||
|
||||
/// Get output dimension.
|
||||
pub fn output_dim(&self) -> usize {
|
||||
self.output_dim
|
||||
}
|
||||
|
||||
/// Compute FFN using only active neurons (sparse computation).
|
||||
///
|
||||
/// This is the main optimization: only compute activations for predicted neurons.
|
||||
pub fn forward_sparse(&self, input: &[f32], active_neurons: &[usize]) -> Result<Vec<f32>> {
|
||||
if input.len() != self.input_dim() {
|
||||
return Err(InferenceError::InputDimensionMismatch {
|
||||
expected: self.input_dim(),
|
||||
actual: input.len(),
|
||||
}
|
||||
.into());
|
||||
}
|
||||
|
||||
if active_neurons.is_empty() {
|
||||
return Err(InferenceError::NoActiveNeurons.into());
|
||||
}
|
||||
|
||||
trace!(
|
||||
"Sparse forward: {} active neurons ({:.1}% sparsity)",
|
||||
active_neurons.len(),
|
||||
100.0 * (1.0 - active_neurons.len() as f32 / self.hidden_dim() as f32)
|
||||
);
|
||||
|
||||
let backend = get_backend();
|
||||
|
||||
// 1. Sparse first layer: only compute active neurons
|
||||
let mut hidden = Vec::with_capacity(active_neurons.len());
|
||||
for &neuron_idx in active_neurons {
|
||||
if neuron_idx >= self.hidden_dim() {
|
||||
return Err(InferenceError::Failed(format!(
|
||||
"Invalid neuron index: {}",
|
||||
neuron_idx
|
||||
))
|
||||
.into());
|
||||
}
|
||||
|
||||
let row = self.w1.row(neuron_idx);
|
||||
let dot = backend.dot_product(row.as_slice().unwrap(), input);
|
||||
hidden.push(dot + self.b1[neuron_idx]);
|
||||
}
|
||||
|
||||
// 2. Apply activation function
|
||||
backend.activation(&mut hidden, self.activation);
|
||||
|
||||
// 3. Sparse second layer: accumulate only active neuron contributions
|
||||
// W2_T is [hidden_dim, output_dim], so row access by neuron_idx is CONTIGUOUS
|
||||
let mut output = self.b2.to_vec();
|
||||
let backend = get_backend();
|
||||
|
||||
for (i, &neuron_idx) in active_neurons.iter().enumerate() {
|
||||
// Row access is contiguous in memory - major optimization!
|
||||
let weights = self.w2_t.row(neuron_idx);
|
||||
let h_val = hidden[i];
|
||||
|
||||
// Use SIMD-optimized axpy: output += h_val * weights
|
||||
backend.axpy(&mut output, weights.as_slice().unwrap(), h_val);
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
/// Compute FFN using all neurons (dense computation).
|
||||
///
|
||||
/// This is the baseline for comparison and correctness checking.
|
||||
pub fn forward_dense(&self, input: &[f32]) -> Result<Vec<f32>> {
|
||||
if input.len() != self.input_dim() {
|
||||
return Err(InferenceError::InputDimensionMismatch {
|
||||
expected: self.input_dim(),
|
||||
actual: input.len(),
|
||||
}
|
||||
.into());
|
||||
}
|
||||
|
||||
let backend = get_backend();
|
||||
let input_arr = Array1::from_vec(input.to_vec());
|
||||
|
||||
// 1. First layer: hidden = activation(W1 · input + b1)
|
||||
let mut hidden = self.w1.dot(&input_arr) + &self.b1;
|
||||
backend.activation(hidden.as_slice_mut().unwrap(), self.activation);
|
||||
|
||||
// 2. Second layer: output = W2 · hidden + b2
|
||||
// W2_T is [hidden_dim, output_dim], so W2 = W2_T.t()
|
||||
// output = W2_T.t() · hidden = (hidden.t() · W2_T).t() = W2_T.t().dot(hidden)
|
||||
let output = self.w2_t.t().dot(&hidden) + &self.b2;
|
||||
|
||||
Ok(output.to_vec())
|
||||
}
|
||||
|
||||
/// Compute both sparse and dense, returning the difference for validation.
|
||||
#[cfg(test)]
|
||||
pub fn validate_sparse(&self, input: &[f32], active_neurons: &[usize]) -> Result<f32> {
|
||||
let sparse_output = self.forward_sparse(input, active_neurons)?;
|
||||
let dense_output = self.forward_dense(input)?;
|
||||
|
||||
// Compute mean absolute error
|
||||
let mae: f32 = sparse_output
|
||||
.iter()
|
||||
.zip(dense_output.iter())
|
||||
.map(|(s, d)| (s - d).abs())
|
||||
.sum::<f32>()
|
||||
/ sparse_output.len() as f32;
|
||||
|
||||
Ok(mae)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_ffn_creation() {
|
||||
let ffn = SparseFfn::new(128, 512, 128, ActivationType::Gelu).unwrap();
|
||||
|
||||
assert_eq!(ffn.input_dim(), 128);
|
||||
assert_eq!(ffn.hidden_dim(), 512);
|
||||
assert_eq!(ffn.output_dim(), 128);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dense_forward() {
|
||||
let ffn = SparseFfn::new(64, 256, 64, ActivationType::Relu).unwrap();
|
||||
let input = vec![0.1; 64];
|
||||
|
||||
let output = ffn.forward_dense(&input).unwrap();
|
||||
assert_eq!(output.len(), 64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sparse_forward() {
|
||||
let ffn = SparseFfn::new(64, 256, 64, ActivationType::Relu).unwrap();
|
||||
let input = vec![0.1; 64];
|
||||
let active_neurons: Vec<usize> = (0..64).collect(); // First 64 neurons
|
||||
|
||||
let output = ffn.forward_sparse(&input, &active_neurons).unwrap();
|
||||
assert_eq!(output.len(), 64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sparse_vs_dense() {
|
||||
let ffn = SparseFfn::new(32, 128, 32, ActivationType::Relu).unwrap();
|
||||
let input = vec![0.5; 32];
|
||||
|
||||
// Use all neurons - should match dense computation
|
||||
let all_neurons: Vec<usize> = (0..128).collect();
|
||||
let mae = ffn.validate_sparse(&input, &all_neurons).unwrap();
|
||||
|
||||
// Should be very close (allowing for floating point precision)
|
||||
assert!(mae < 1e-5, "MAE too large: {}", mae);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_active_neurons() {
|
||||
let ffn = SparseFfn::new(32, 128, 32, ActivationType::Relu).unwrap();
|
||||
let input = vec![0.1; 32];
|
||||
let empty: Vec<usize> = vec![];
|
||||
|
||||
let result = ffn.forward_sparse(&input, &empty);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_neuron_index() {
|
||||
let ffn = SparseFfn::new(32, 128, 32, ActivationType::Relu).unwrap();
|
||||
let input = vec![0.1; 32];
|
||||
let invalid = vec![200]; // Out of bounds
|
||||
|
||||
let result = ffn.forward_sparse(&input, &invalid);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
59
vendor/ruvector/crates/ruvector-sparse-inference/src/sparse/mod.rs
vendored
Normal file
59
vendor/ruvector/crates/ruvector-sparse-inference/src/sparse/mod.rs
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
//! Sparse computation module.
|
||||
//!
|
||||
//! This module provides sparse implementations of neural network layers.
|
||||
|
||||
mod ffn;
|
||||
|
||||
pub use crate::config::ActivationType;
|
||||
pub use ffn::SparseFfn;
|
||||
|
||||
/// Trait for feed-forward network layers.
|
||||
pub trait FeedForward: Send + Sync {
|
||||
/// Sparse forward pass using only active neurons.
|
||||
fn forward_sparse(
|
||||
&self,
|
||||
input: &[f32],
|
||||
active_neurons: &[usize],
|
||||
) -> crate::error::Result<Vec<f32>>;
|
||||
|
||||
/// Dense forward pass using all neurons.
|
||||
fn forward_dense(&self, input: &[f32]) -> crate::error::Result<Vec<f32>>;
|
||||
}
|
||||
|
||||
impl FeedForward for SparseFfn {
|
||||
fn forward_sparse(
|
||||
&self,
|
||||
input: &[f32],
|
||||
active_neurons: &[usize],
|
||||
) -> crate::error::Result<Vec<f32>> {
|
||||
SparseFfn::forward_sparse(self, input, active_neurons)
|
||||
}
|
||||
|
||||
fn forward_dense(&self, input: &[f32]) -> crate::error::Result<Vec<f32>> {
|
||||
SparseFfn::forward_dense(self, input)
|
||||
}
|
||||
}
|
||||
|
||||
/// SwiGLU FFN (placeholder for future implementation).
|
||||
pub struct SwiGLUFfn;
|
||||
|
||||
impl SwiGLUFfn {
|
||||
/// Create a new SwiGLU FFN.
|
||||
pub fn new(_input_dim: usize, _hidden_dim: usize) -> Self {
|
||||
Self
|
||||
}
|
||||
}
|
||||
|
||||
impl FeedForward for SwiGLUFfn {
|
||||
fn forward_sparse(
|
||||
&self,
|
||||
_input: &[f32],
|
||||
_active_neurons: &[usize],
|
||||
) -> crate::error::Result<Vec<f32>> {
|
||||
unimplemented!("SwiGLUFfn not yet implemented")
|
||||
}
|
||||
|
||||
fn forward_dense(&self, _input: &[f32]) -> crate::error::Result<Vec<f32>> {
|
||||
unimplemented!("SwiGLUFfn not yet implemented")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user