64 KiB
64 KiB
Testing and Benchmarking Specification for ruvector-attention
Version: 1.0 Date: 2025-11-30 Status: Draft
Executive Summary
This document defines comprehensive testing and benchmarking strategies for the ruvector-attention crate, ensuring reliability, performance, and correctness across all attention mechanisms and platforms.
1. Testing Strategy Overview
Testing Pyramid
Testing Distribution:
/\
/ \ E2E Tests (5%)
/----\ - End-to-end workflows
/ \ - Real-world scenarios
/--------\ Integration Tests (25%)
/ \ - Module integration
/ \ - Cross-platform validation
/--------------\ Unit Tests (70%)
/ \ - Component isolation
/ \ - Edge cases & correctness
/--------------------\
Testing Philosophy
- Test-Driven Development (TDD): Write tests before implementation
- Property-Based Testing: Use
proptestfor mathematical properties - Regression Prevention: Automated performance benchmarks
- Platform Parity: Ensure WASM/NAPI-RS match Rust implementations
- Continuous Validation: Integrate into CI/CD pipeline
Test Coverage Goals
| Component | Target Coverage | Critical Paths |
|---|---|---|
| Core Attention | 95% | 100% |
| Hyperbolic | 90% | 100% |
| Graph Attention | 90% | 100% |
| Optimization (Flash, Linear) | 85% | 100% |
| Platform Bindings | 80% | 95% |
2. Unit Test Specifications
2.1 Scaled Dot-Product Attention
// crates/ruvector-attention/src/scaled_dot_product/tests.rs
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_relative_eq;
use ndarray::{Array1, Array2};
use proptest::prelude::*;
#[test]
fn test_basic_attention_computation() {
let attention = ScaledDotProduct::new(4);
let query = Array1::from_vec(vec![1.0, 0.0, 0.0, 0.0]);
let keys = Array2::from_shape_vec(
(3, 4),
vec![
1.0, 0.0, 0.0, 0.0, // Identical to query
0.0, 1.0, 0.0, 0.0, // Orthogonal
0.5, 0.5, 0.5, 0.5, // Mixed
],
).unwrap();
let values = Array2::from_shape_vec(
(3, 4),
vec![
1.0, 0.0, 0.0, 0.0,
0.0, 1.0, 0.0, 0.0,
0.0, 0.0, 1.0, 0.0,
],
).unwrap();
let output = attention.forward(&query, &keys, &values).unwrap();
// Should weight most similar key highest
assert!(output[0] > 0.5, "First value should dominate");
}
#[test]
fn test_zero_vectors() {
let attention = ScaledDotProduct::new(4);
let query = Array1::zeros(4);
let keys = Array2::zeros((3, 4));
let values = Array2::ones((3, 4));
let output = attention.forward(&query, &keys, &values).unwrap();
// All weights equal, should average values
assert_relative_eq!(output[0], 1.0, epsilon = 1e-5);
}
#[test]
fn test_numerical_stability_large_values() {
let attention = ScaledDotProduct::new(128);
// Large magnitude vectors
let query = Array1::from_elem(128, 100.0);
let keys = Array2::from_elem((100, 128), 100.0);
let values = Array2::from_elem((100, 128), 1.0);
let output = attention.forward(&query, &keys, &values).unwrap();
// Should not overflow/underflow
assert!(output.iter().all(|&x| x.is_finite()));
assert!(output.iter().all(|&x| x >= 0.0 && x <= 2.0));
}
#[test]
fn test_softmax_sums_to_one() {
let attention = ScaledDotProduct::new(64);
let query = Array1::from_vec((0..64).map(|i| i as f32).collect());
let keys = Array2::from_shape_vec(
(50, 64),
(0..50*64).map(|i| (i % 100) as f32).collect(),
).unwrap();
let weights = attention.compute_attention_weights(&query, &keys).unwrap();
let sum: f32 = weights.sum();
assert_relative_eq!(sum, 1.0, epsilon = 1e-5);
}
#[test]
fn test_gradient_correctness() {
// Numerical gradient checking
let attention = ScaledDotProduct::new(8);
let query = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
let keys = Array2::from_elem((5, 8), 0.5);
let values = Array2::eye(8).slice(s![0..5, ..]).to_owned();
let epsilon = 1e-4;
// Compute numerical gradient
let output = attention.forward(&query, &keys, &values).unwrap();
for i in 0..8 {
let mut query_plus = query.clone();
query_plus[i] += epsilon;
let output_plus = attention.forward(&query_plus, &keys, &values).unwrap();
let numerical_grad = (&output_plus - &output) / epsilon;
// Analytical gradient should match
// (Would require backward pass implementation)
assert!(numerical_grad.iter().all(|&x| x.is_finite()));
}
}
#[test]
fn test_serialization_roundtrip() {
let attention = ScaledDotProduct::new(64);
let serialized = serde_json::to_string(&attention).unwrap();
let deserialized: ScaledDotProduct = serde_json::from_str(&serialized).unwrap();
assert_eq!(attention.dim(), deserialized.dim());
}
#[test]
fn test_masking() {
let attention = ScaledDotProduct::new(4);
let query = Array1::ones(4);
let keys = Array2::ones((3, 4));
let values = Array2::from_shape_vec(
(3, 4),
vec![
1.0, 0.0, 0.0, 0.0,
0.0, 2.0, 0.0, 0.0,
0.0, 0.0, 3.0, 0.0,
],
).unwrap();
// Mask out second key
let mask = vec![true, false, true];
let output = attention.forward_with_mask(&query, &keys, &values, &mask).unwrap();
// Second value should not contribute
assert_relative_eq!(output[1], 0.0, epsilon = 1e-5);
}
// Property-based testing
proptest! {
#[test]
fn prop_attention_preserves_dimension(
dim in 4usize..256,
n_keys in 1usize..100,
) {
let attention = ScaledDotProduct::new(dim);
let query = Array1::from_elem(dim, 1.0);
let keys = Array2::from_elem((n_keys, dim), 0.5);
let values = Array2::from_elem((n_keys, dim), 0.5);
let output = attention.forward(&query, &keys, &values).unwrap();
prop_assert_eq!(output.len(), dim);
}
#[test]
fn prop_weights_non_negative(
dim in 4usize..64,
n_keys in 1usize..50,
) {
let attention = ScaledDotProduct::new(dim);
let query = Array1::from_elem(dim, 1.0);
let keys = Array2::from_elem((n_keys, dim), 0.5);
let weights = attention.compute_attention_weights(&query, &keys).unwrap();
prop_assert!(weights.iter().all(|&w| w >= 0.0));
}
}
}
2.2 Hyperbolic Attention Tests
// crates/ruvector-attention/src/hyperbolic/tests.rs
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_relative_eq;
#[test]
fn test_poincare_distance_symmetry() {
let attention = HyperbolicAttention::new(-1.0); // Unit curvature
let x = Array1::from_vec(vec![0.1, 0.2, 0.0, 0.0]);
let y = Array1::from_vec(vec![0.3, 0.1, 0.0, 0.0]);
let d_xy = attention.poincare_distance(&x, &y);
let d_yx = attention.poincare_distance(&y, &x);
assert_relative_eq!(d_xy, d_yx, epsilon = 1e-6);
}
#[test]
fn test_poincare_distance_identity() {
let attention = HyperbolicAttention::new(-1.0);
let x = Array1::from_vec(vec![0.1, 0.2, 0.3, 0.4]);
let d = attention.poincare_distance(&x, &x);
assert_relative_eq!(d, 0.0, epsilon = 1e-6);
}
#[test]
fn test_mobius_addition_identity() {
let attention = HyperbolicAttention::new(-1.0);
let x = Array1::from_vec(vec![0.1, 0.2, 0.0, 0.0]);
let zero = Array1::zeros(4);
let result = attention.mobius_add(&x, &zero);
assert_relative_eq!(result.as_slice().unwrap(), x.as_slice().unwrap(), epsilon = 1e-6);
}
#[test]
fn test_mobius_addition_commutativity() {
let attention = HyperbolicAttention::new(-1.0);
let x = Array1::from_vec(vec![0.1, 0.2, 0.0, 0.0]);
let y = Array1::from_vec(vec![0.05, 0.1, 0.0, 0.0]);
let xy = attention.mobius_add(&x, &y);
let yx = attention.mobius_add(&y, &x);
assert_relative_eq!(xy.as_slice().unwrap(), yx.as_slice().unwrap(), epsilon = 1e-5);
}
#[test]
fn test_boundary_stability() {
let attention = HyperbolicAttention::new(-1.0);
// Vector near boundary (||x|| → 1)
let norm = 0.99;
let x = Array1::from_vec(vec![norm / 2.0_f32.sqrt(), norm / 2.0_f32.sqrt(), 0.0, 0.0]);
let y = Array1::from_vec(vec![0.1, 0.1, 0.0, 0.0]);
let distance = attention.poincare_distance(&x, &y);
// Should remain finite
assert!(distance.is_finite());
assert!(distance >= 0.0);
}
#[test]
fn test_exponential_map() {
let attention = HyperbolicAttention::new(-1.0);
let origin = Array1::zeros(4);
let tangent = Array1::from_vec(vec![0.1, 0.2, 0.0, 0.0]);
let point = attention.exponential_map(&origin, &tangent);
// Point should be in disk
let norm: f32 = point.iter().map(|x| x * x).sum();
assert!(norm < 1.0);
}
#[test]
fn test_logarithmic_map() {
let attention = HyperbolicAttention::new(-1.0);
let origin = Array1::zeros(4);
let point = Array1::from_vec(vec![0.3, 0.4, 0.0, 0.0]);
let tangent = attention.logarithmic_map(&origin, &point);
// Roundtrip should recover point
let recovered = attention.exponential_map(&origin, &tangent);
assert_relative_eq!(recovered.as_slice().unwrap(), point.as_slice().unwrap(), epsilon = 1e-5);
}
#[test]
fn test_curvature_parameter() {
let c_values = vec![-0.1, -0.5, -1.0, -2.0];
for c in c_values {
let attention = HyperbolicAttention::new(c);
let x = Array1::from_vec(vec![0.1, 0.1, 0.0, 0.0]);
let y = Array1::from_vec(vec![0.2, 0.2, 0.0, 0.0]);
let distance = attention.poincare_distance(&x, &y);
// Distance should vary with curvature
assert!(distance > 0.0);
assert!(distance.is_finite());
}
}
#[test]
fn test_gyrovector_parallel_transport() {
let attention = HyperbolicAttention::new(-1.0);
let x = Array1::from_vec(vec![0.1, 0.0, 0.0, 0.0]);
let y = Array1::from_vec(vec![0.0, 0.1, 0.0, 0.0]);
let v = Array1::from_vec(vec![0.05, 0.05, 0.0, 0.0]);
let transported = attention.parallel_transport(&x, &y, &v);
// Transported vector should maintain properties
assert!(transported.iter().all(|&x| x.is_finite()));
}
proptest! {
#[test]
fn prop_poincare_distance_triangle_inequality(
dim in 4usize..16,
) {
let attention = HyperbolicAttention::new(-1.0);
// Generate points in Poincaré disk
let x = Array1::from_elem(dim, 0.1);
let y = Array1::from_elem(dim, 0.2);
let z = Array1::from_elem(dim, 0.15);
let d_xy = attention.poincare_distance(&x, &y);
let d_yz = attention.poincare_distance(&y, &z);
let d_xz = attention.poincare_distance(&x, &z);
// Triangle inequality: d(x,z) ≤ d(x,y) + d(y,z)
prop_assert!(d_xz <= d_xy + d_yz + 1e-5);
}
}
}
2.3 Graph Attention Tests
// crates/ruvector-attention/src/graph/tests.rs
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_edge_featured_attention() {
let attention = EdgeFeaturedAttention::new(4, 2);
let node = Array1::from_vec(vec![1.0, 0.0, 0.0, 0.0]);
let neighbors = Array2::from_shape_vec(
(3, 4),
vec![
0.5, 0.5, 0.0, 0.0,
0.0, 1.0, 0.0, 0.0,
0.0, 0.0, 1.0, 0.0,
],
).unwrap();
// Edge features: distance, weight
let edge_features = Array2::from_shape_vec(
(3, 2),
vec![
0.1, 1.0, // Close, high weight
0.5, 0.5, // Medium distance, medium weight
1.0, 0.1, // Far, low weight
],
).unwrap();
let output = attention.forward(&node, &neighbors, &edge_features).unwrap();
assert_eq!(output.len(), 4);
assert!(output.iter().all(|&x| x.is_finite()));
}
#[test]
fn test_local_global_attention() {
let attention = LocalGlobalAttention::new(4, 2, 8);
let query = Array1::ones(4);
let local_keys = Array2::ones((10, 4));
let global_keys = Array2::ones((100, 4));
let local_values = Array2::ones((10, 4));
let global_values = Array2::ones((100, 4));
let output = attention.forward(
&query,
&local_keys,
&global_keys,
&local_values,
&global_values,
).unwrap();
assert_eq!(output.len(), 4);
}
#[test]
fn test_relational_attention() {
let attention = RelationalAttention::new(4, vec![
"friend".to_string(),
"colleague".to_string(),
"family".to_string(),
]);
let node = Array1::ones(4);
let neighbors = Array2::ones((5, 4));
let relations = vec![0, 1, 0, 2, 1]; // Relation types
let output = attention.forward(&node, &neighbors, &relations).unwrap();
assert_eq!(output.len(), 4);
}
}
2.4 Optimization Tests
// crates/ruvector-attention/src/flash/tests.rs
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_flash_attention_matches_standard() {
let dim = 64;
let n_neighbors = 1000;
let flash = FlashAttention::new(dim, 256); // Block size 256
let standard = ScaledDotProduct::new(dim);
let query = Array1::ones(dim);
let keys = Array2::ones((n_neighbors, dim));
let values = Array2::ones((n_neighbors, dim));
let flash_output = flash.forward(&query, &keys, &values).unwrap();
let standard_output = standard.forward(&query, &keys, &values).unwrap();
// Should be nearly identical
assert_relative_eq!(
flash_output.as_slice().unwrap(),
standard_output.as_slice().unwrap(),
epsilon = 1e-3
);
}
#[test]
fn test_flash_memory_usage() {
let flash = FlashAttention::new(128, 512);
// Should use O(sqrt(N)) memory for blocks
let query = Array1::ones(128);
let keys = Array2::ones((10000, 128));
let values = Array2::ones((10000, 128));
// Measure memory before and after
let before = get_memory_usage();
let _ = flash.forward(&query, &keys, &values).unwrap();
let after = get_memory_usage();
let memory_used = after - before;
// Should be much less than O(N) memory
assert!(memory_used < 50 * 1024 * 1024); // < 50MB
}
#[test]
fn test_linear_attention_approximation() {
let linear = LinearAttention::new(64);
let standard = ScaledDotProduct::new(64);
let query = Array1::ones(64);
let keys = Array2::ones((100, 64));
let values = Array2::ones((100, 64));
let linear_output = linear.forward(&query, &keys, &values).unwrap();
let standard_output = standard.forward(&query, &keys, &values).unwrap();
// Should approximate standard attention
let diff: f32 = (&linear_output - &standard_output)
.iter()
.map(|x| x.abs())
.sum();
assert!(diff < 5.0); // Reasonable approximation error
}
}
3. Integration Test Specifications
3.1 Integration with ruvector-gnn
// tests/integration/gnn_integration.rs
use ruvector_attention::{HyperbolicAttention, GraphAttention};
use ruvector_gnn::{GNNLayer, GraphConvolution};
#[test]
fn test_hyperbolic_attention_in_gnn_layer() {
let attention = HyperbolicAttention::new(-1.0);
let layer = GNNLayer::with_attention(4, 8, attention);
// Create graph: 5 nodes, 8 edges
let nodes = Array2::ones((5, 4));
let edges = vec![
(0, 1), (0, 2), (1, 2), (1, 3),
(2, 3), (2, 4), (3, 4), (4, 0),
];
let output = layer.forward(&nodes, &edges).unwrap();
assert_eq!(output.shape(), &[5, 8]);
assert!(output.iter().all(|&x| x.is_finite()));
}
#[test]
fn test_multi_layer_gnn_with_attention() {
let attention1 = GraphAttention::new(4);
let attention2 = GraphAttention::new(8);
let layer1 = GNNLayer::with_attention(4, 8, attention1);
let layer2 = GNNLayer::with_attention(8, 16, attention2);
let nodes = Array2::ones((10, 4));
let edges = (0..20).map(|i| (i % 10, (i + 1) % 10)).collect();
let hidden = layer1.forward(&nodes, &edges).unwrap();
let output = layer2.forward(&hidden, &edges).unwrap();
assert_eq!(output.shape(), &[10, 16]);
}
#[test]
fn test_attention_aggregation_methods() {
let attention = GraphAttention::new(4);
let node_features = Array1::ones(4);
let neighbor_features = Array2::ones((10, 4));
// Test different aggregation
let sum_agg = attention.aggregate_neighbors(&node_features, &neighbor_features, "sum").unwrap();
let mean_agg = attention.aggregate_neighbors(&node_features, &neighbor_features, "mean").unwrap();
let max_agg = attention.aggregate_neighbors(&node_features, &neighbor_features, "max").unwrap();
assert_eq!(sum_agg.len(), 4);
assert_eq!(mean_agg.len(), 4);
assert_eq!(max_agg.len(), 4);
// Mean should be sum / count
assert_relative_eq!(mean_agg[0], sum_agg[0] / 10.0, epsilon = 1e-5);
}
3.2 Integration with ruvector-core HNSW
// tests/integration/hnsw_integration.rs
use ruvector_core::{HnswIndex, SearchParams};
use ruvector_attention::{ScaledDotProduct, AttentionGuidedSearch};
#[test]
fn test_attention_guided_search() {
// Build HNSW index
let mut index = HnswIndex::new(128, 16, 200);
for i in 0..1000 {
let vector = generate_random_vector(128, i);
index.add(i, vector);
}
// Create attention mechanism
let attention = ScaledDotProduct::new(128);
let guided_search = AttentionGuidedSearch::new(attention);
let query = generate_random_vector(128, 12345);
// Standard search
let standard_results = index.search(&query, 10).unwrap();
// Attention-guided search
let attention_results = guided_search.search(&index, &query, 10).unwrap();
// Should find similar results but potentially better ranking
assert_eq!(attention_results.len(), 10);
// Check recall
let standard_ids: HashSet<_> = standard_results.iter().map(|r| r.id).collect();
let attention_ids: HashSet<_> = attention_results.iter().map(|r| r.id).collect();
let overlap = standard_ids.intersection(&attention_ids).count();
assert!(overlap >= 7); // At least 70% overlap
}
#[test]
fn test_attention_edge_weighting() {
let mut index = HnswIndex::new(64, 16, 200);
// Add hierarchical clusters
for cluster in 0..10 {
for i in 0..100 {
let vector = generate_cluster_vector(64, cluster, i);
index.add(cluster * 100 + i, vector);
}
}
let attention = GraphAttention::new(64);
// Query from cluster 0
let query = generate_cluster_vector(64, 0, 0);
let results = index.search_with_attention(&query, 20, &attention).unwrap();
// Most results should be from cluster 0
let cluster_0_count = results.iter()
.filter(|r| r.id < 100)
.count();
assert!(cluster_0_count >= 15); // At least 75% from same cluster
}
#[test]
fn test_multi_scale_search() {
let index = build_test_index(1000, 128);
let local_attention = GraphAttention::new(128);
let global_attention = ScaledDotProduct::new(128);
let multi_scale = MultiScaleAttention::new(local_attention, global_attention);
let query = generate_random_vector(128, 999);
let results = index.search_with_multi_scale(&query, 10, &multi_scale).unwrap();
assert_eq!(results.len(), 10);
assert!(results.windows(2).all(|w| w[0].distance <= w[1].distance));
}
3.3 Cross-Platform Consistency Tests
// tests/integration/cross_platform.rs
#[cfg(all(feature = "wasm", target_arch = "wasm32"))]
use wasm_bindgen_test::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
#[cfg_attr(not(target_arch = "wasm32"), test)]
fn test_wasm_rust_attention_consistency() {
let dim = 128;
let n_neighbors = 100;
#[cfg(target_arch = "wasm32")]
let attention = ScaledDotProduct::new_wasm(dim);
#[cfg(not(target_arch = "wasm32"))]
let attention = ScaledDotProduct::new(dim);
let query = Array1::ones(dim);
let keys = Array2::ones((n_neighbors, dim));
let values = Array2::ones((n_neighbors, dim));
let output = attention.forward(&query, &keys, &values).unwrap();
// Expected output (computed offline)
let expected = Array1::ones(dim);
assert_relative_eq!(
output.as_slice().unwrap(),
expected.as_slice().unwrap(),
epsilon = 1e-4
);
}
#[cfg(feature = "napi")]
#[test]
fn test_napi_rust_consistency() {
use napi::threadsafe_function::ThreadsafeFunction;
use ruvector_attention_napi::ScaledDotProductNAPI;
let rust_attention = ScaledDotProduct::new(64);
let napi_attention = ScaledDotProductNAPI::new(64);
let query = vec![1.0f32; 64];
let keys = vec![vec![0.5f32; 64]; 50];
let values = vec![vec![0.75f32; 64]; 50];
let rust_output = rust_attention.forward(
&Array1::from_vec(query.clone()),
&Array2::from_shape_vec((50, 64), keys.clone().into_iter().flatten().collect()).unwrap(),
&Array2::from_shape_vec((50, 64), values.clone().into_iter().flatten().collect()).unwrap(),
).unwrap();
let napi_output = napi_attention.forward_sync(query, keys, values).unwrap();
assert_relative_eq!(
rust_output.as_slice().unwrap(),
&napi_output[..],
epsilon = 1e-5
);
}
#[test]
fn test_serialization_consistency() {
let attention = HyperbolicAttention::new(-1.0);
// Serialize to JSON
let json = serde_json::to_string(&attention).unwrap();
// Deserialize
let deserialized: HyperbolicAttention = serde_json::from_str(&json).unwrap();
// Test equivalence
let query = Array1::from_vec(vec![0.1, 0.2, 0.0, 0.0]);
let keys = Array2::from_elem((10, 4), 0.3);
let values = Array2::from_elem((10, 4), 0.5);
let output1 = attention.forward(&query, &keys, &values).unwrap();
let output2 = deserialized.forward(&query, &keys, &values).unwrap();
assert_relative_eq!(
output1.as_slice().unwrap(),
output2.as_slice().unwrap(),
epsilon = 1e-8
);
}
4. Benchmark Suite
4.1 Criterion Benchmarks
// benches/attention_benchmarks.rs
use criterion::{
black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
};
use ruvector_attention::*;
use ndarray::{Array1, Array2};
fn benchmark_scaled_dot_product(c: &mut Criterion) {
let dims = [64, 128, 256, 512, 1024];
let neighbors = [10, 50, 100, 500, 1000];
let mut group = c.benchmark_group("scaled_dot_product");
for &d in &dims {
for &n in &neighbors {
let param = format!("d={}/n={}", d, n);
group.throughput(Throughput::Elements((n * d) as u64));
group.bench_with_input(
BenchmarkId::new("forward", ¶m),
&(d, n),
|b, &(d, n)| {
let attention = ScaledDotProduct::new(d);
let query = Array1::ones(d);
let keys = Array2::ones((n, d));
let values = Array2::ones((n, d));
b.iter(|| {
black_box(attention.forward(
black_box(&query),
black_box(&keys),
black_box(&values),
))
});
},
);
}
}
group.finish();
}
fn benchmark_hyperbolic_attention(c: &mut Criterion) {
let mut group = c.benchmark_group("hyperbolic_attention");
let curvatures = [-0.1, -0.5, -1.0, -2.0];
let dims = [64, 128, 256];
let neighbors = [10, 50, 100];
for &c_val in &curvatures {
for &d in &dims {
for &n in &neighbors {
let param = format!("c={}/d={}/n={}", c_val, d, n);
group.bench_with_input(
BenchmarkId::new("poincare_distance", ¶m),
&(c_val, d, n),
|b, &(c_val, d, n)| {
let attention = HyperbolicAttention::new(c_val);
let query = Array1::from_elem(d, 0.1);
let keys = Array2::from_elem((n, d), 0.2);
b.iter(|| {
for i in 0..n {
black_box(attention.poincare_distance(
black_box(&query),
black_box(&keys.row(i).to_owned()),
));
}
});
},
);
}
}
}
group.finish();
}
fn benchmark_flash_attention(c: &mut Criterion) {
let mut group = c.benchmark_group("flash_attention");
let block_sizes = [64, 128, 256, 512];
let neighbors = [1000, 5000, 10000];
for &block_size in &block_sizes {
for &n in &neighbors {
let param = format!("block={}/n={}", block_size, n);
group.throughput(Throughput::Elements((n * 128) as u64));
group.bench_with_input(
BenchmarkId::new("flash_vs_standard", ¶m),
&(block_size, n),
|b, &(block_size, n)| {
let flash = FlashAttention::new(128, block_size);
let query = Array1::ones(128);
let keys = Array2::ones((n, 128));
let values = Array2::ones((n, 128));
b.iter(|| {
black_box(flash.forward(
black_box(&query),
black_box(&keys),
black_box(&values),
))
});
},
);
}
}
group.finish();
}
fn benchmark_memory_usage(c: &mut Criterion) {
let mut group = c.benchmark_group("memory_usage");
let methods = [
("standard", false),
("flash", true),
];
for (name, use_flash) in &methods {
group.bench_function(*name, |b| {
b.iter_custom(|iters| {
let start_memory = get_memory_usage();
let start_time = std::time::Instant::now();
for _ in 0..iters {
if *use_flash {
let attention = FlashAttention::new(128, 256);
let query = Array1::ones(128);
let keys = Array2::ones((10000, 128));
let values = Array2::ones((10000, 128));
black_box(attention.forward(&query, &keys, &values));
} else {
let attention = ScaledDotProduct::new(128);
let query = Array1::ones(128);
let keys = Array2::ones((10000, 128));
let values = Array2::ones((10000, 128));
black_box(attention.forward(&query, &keys, &values));
}
}
let end_memory = get_memory_usage();
let duration = start_time.elapsed();
println!(
"{}: Memory used = {} MB",
name,
(end_memory - start_memory) / 1024 / 1024
);
duration
});
});
}
group.finish();
}
fn benchmark_throughput(c: &mut Criterion) {
let mut group = c.benchmark_group("throughput");
let batch_sizes = [1, 10, 100, 1000];
for &batch_size in &batch_sizes {
group.throughput(Throughput::Elements(batch_size as u64));
group.bench_with_input(
BenchmarkId::new("batch_processing", batch_size),
&batch_size,
|b, &batch_size| {
let attention = ScaledDotProduct::new(128);
let queries: Vec<_> = (0..batch_size)
.map(|_| Array1::ones(128))
.collect();
let keys = Array2::ones((100, 128));
let values = Array2::ones((100, 128));
b.iter(|| {
for query in &queries {
black_box(attention.forward(
black_box(query),
black_box(&keys),
black_box(&values),
));
}
});
},
);
}
group.finish();
}
fn benchmark_multi_head_attention(c: &mut Criterion) {
let mut group = c.benchmark_group("multi_head_attention");
let num_heads = [1, 2, 4, 8, 16];
for &heads in &num_heads {
group.bench_with_input(
BenchmarkId::new("heads", heads),
&heads,
|b, &heads| {
let attention = MultiHeadAttention::new(128, heads);
let query = Array1::ones(128);
let keys = Array2::ones((100, 128));
let values = Array2::ones((100, 128));
b.iter(|| {
black_box(attention.forward(
black_box(&query),
black_box(&keys),
black_box(&values),
))
});
},
);
}
group.finish();
}
fn benchmark_mixture_of_experts(c: &mut Criterion) {
let mut group = c.benchmark_group("mixture_of_experts");
let num_experts = [2, 4, 8, 16];
for &experts in &num_experts {
group.bench_with_input(
BenchmarkId::new("experts", experts),
&experts,
|b, &experts| {
let attention = MixtureOfExpertsAttention::new(128, experts);
let query = Array1::ones(128);
let keys = Array2::ones((100, 128));
let values = Array2::ones((100, 128));
b.iter(|| {
black_box(attention.forward(
black_box(&query),
black_box(&keys),
black_box(&values),
))
});
},
);
}
group.finish();
}
criterion_group!(
benches,
benchmark_scaled_dot_product,
benchmark_hyperbolic_attention,
benchmark_flash_attention,
benchmark_memory_usage,
benchmark_throughput,
benchmark_multi_head_attention,
benchmark_mixture_of_experts,
);
criterion_main!(benches);
// Helper function
fn get_memory_usage() -> usize {
#[cfg(target_os = "linux")]
{
use std::fs;
let status = fs::read_to_string("/proc/self/status").unwrap();
for line in status.lines() {
if line.starts_with("VmRSS:") {
let parts: Vec<&str> = line.split_whitespace().collect();
return parts[1].parse::<usize>().unwrap() * 1024;
}
}
}
0
}
5. Performance Targets
5.1 Latency Targets
| Attention Type | Dims | Neighbors | P50 Latency | P99 Latency | Memory Peak |
|---|---|---|---|---|---|
| Scaled Dot-Product | 64 | 100 | < 30μs | < 50μs | < 500KB |
| Scaled Dot-Product | 128 | 100 | < 40μs | < 70μs | < 1MB |
| Scaled Dot-Product | 256 | 100 | < 60μs | < 100μs | < 2MB |
| Multi-Head (4) | 128 | 100 | < 120μs | < 200μs | < 1.5MB |
| Multi-Head (8) | 128 | 100 | < 200μs | < 350μs | < 2MB |
| Hyperbolic | 64 | 100 | < 60μs | < 100μs | < 750KB |
| Hyperbolic | 128 | 100 | < 90μs | < 150μs | < 1.2MB |
| Graph (Edge) | 128 | 100 | < 80μs | < 130μs | < 1.5MB |
| Local+Global | 128 | 100+500 | < 400μs | < 700μs | < 4MB |
| Local+Global | 128 | 1000+5000 | < 2ms | < 4ms | < 15MB |
| Linear | 128 | 1000 | < 150μs | < 250μs | < 2MB |
| Linear | 128 | 10000 | < 1ms | < 1.8ms | < 8MB |
| Flash | 128 | 1000 | < 120μs | < 200μs | < 1.5MB |
| Flash | 128 | 10000 | < 800μs | < 1.5ms | < 6MB |
| Flash | 128 | 100000 | < 15ms | < 25ms | < 40MB |
| MoE (2 experts) | 128 | 100 | < 180μs | < 300μs | < 4MB |
| MoE (4 experts) | 128 | 100 | < 300μs | < 500μs | < 8MB |
| MoE (8 experts) | 128 | 100 | < 550μs | < 900μs | < 15MB |
5.2 Throughput Targets
| Operation | Batch Size | Target QPS | Notes |
|---|---|---|---|
| Single Query | 1 | > 20,000 | 128D, 100 neighbors |
| Batch Processing | 10 | > 150,000 | Total queries/sec |
| Batch Processing | 100 | > 1,000,000 | Total queries/sec |
| Large Scale | 1000 | > 8,000,000 | Parallel processing |
5.3 Accuracy Targets
| Metric | Target | Measurement Method |
|---|---|---|
| Recall@10 | > 95% | vs brute-force |
| Recall@100 | > 98% | vs brute-force |
| Precision@10 | > 90% | vs brute-force |
| Numerical Stability | < 1e-5 error | Relative to FP64 |
| Cross-Platform Consistency | < 1e-4 error | WASM vs Native |
6. Regression Testing
6.1 Automated Performance Regression
# .github/workflows/performance-regression.yml
name: Performance Regression Tests
on:
pull_request:
branches: [main, develop]
push:
branches: [main]
jobs:
benchmark:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install Rust
uses: actions-rust-lang/setup-rust-toolchain@v1
- name: Run benchmarks (PR)
run: |
cargo bench --bench attention_benchmarks -- --save-baseline pr
- name: Checkout main branch
run: |
git fetch origin main
git checkout origin/main
- name: Run benchmarks (main)
run: |
cargo bench --bench attention_benchmarks -- --save-baseline main
- name: Compare benchmarks
run: |
cargo bench --bench attention_benchmarks -- --baseline main --compare pr
- name: Check for regressions
run: |
python3 scripts/check_regression.py \
--threshold-p50 5 \
--threshold-p99 10 \
--threshold-memory 10
- name: Upload results
uses: actions/upload-artifact@v3
with:
name: benchmark-results
path: target/criterion/
6.2 Regression Detection Script
# scripts/check_regression.py
import json
import sys
import argparse
from pathlib import Path
def load_benchmark_results(path):
"""Load Criterion benchmark results"""
results = {}
for bench_dir in Path(path).glob("*/"):
bench_name = bench_dir.name
# Load estimates.json
estimates_file = bench_dir / "new" / "estimates.json"
if estimates_file.exists():
with open(estimates_file) as f:
data = json.load(f)
results[bench_name] = {
'mean': data['mean']['point_estimate'],
'median': data['median']['point_estimate'],
}
return results
def compare_results(baseline, current, thresholds):
"""Compare benchmark results and detect regressions"""
regressions = []
warnings = []
for bench_name in current:
if bench_name not in baseline:
continue
base_mean = baseline[bench_name]['mean']
curr_mean = current[bench_name]['mean']
percent_change = ((curr_mean - base_mean) / base_mean) * 100
if percent_change > thresholds['p99']:
regressions.append({
'benchmark': bench_name,
'change': percent_change,
'baseline': base_mean,
'current': curr_mean,
})
elif percent_change > thresholds['p50']:
warnings.append({
'benchmark': bench_name,
'change': percent_change,
'baseline': base_mean,
'current': curr_mean,
})
return regressions, warnings
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--threshold-p50', type=float, default=5.0)
parser.add_argument('--threshold-p99', type=float, default=10.0)
parser.add_argument('--threshold-memory', type=float, default=10.0)
parser.add_argument('--baseline-path', default='target/criterion/main')
parser.add_argument('--current-path', default='target/criterion/pr')
args = parser.parse_args()
thresholds = {
'p50': args.threshold_p50,
'p99': args.threshold_p99,
'memory': args.threshold_memory,
}
baseline = load_benchmark_results(args.baseline_path)
current = load_benchmark_results(args.current_path)
regressions, warnings = compare_results(baseline, current, thresholds)
if warnings:
print("⚠️ Performance Warnings:")
for w in warnings:
print(f" {w['benchmark']}: +{w['change']:.2f}% ({w['baseline']:.2f}ns → {w['current']:.2f}ns)")
if regressions:
print("\n❌ Performance Regressions Detected:")
for r in regressions:
print(f" {r['benchmark']}: +{r['change']:.2f}% ({r['baseline']:.2f}ns → {r['current']:.2f}ns)")
sys.exit(1)
else:
print("✅ No performance regressions detected")
sys.exit(0)
if __name__ == '__main__':
main()
6.3 Regression Test Configuration
# regression-tests.toml
[metrics]
[metrics.p50_latency]
threshold = 5.0 # 5% increase triggers warning
action = "warn"
description = "Median latency regression"
[metrics.p99_latency]
threshold = 10.0 # 10% increase blocks PR
action = "block"
description = "99th percentile latency regression"
[metrics.memory_peak]
threshold = 10.0
action = "warn"
description = "Peak memory usage regression"
[metrics.recall_at_10]
threshold = 1.0 # 1% decrease blocks PR
action = "block"
comparison = "decrease"
description = "Recall@10 accuracy regression"
[metrics.throughput]
threshold = 5.0
action = "warn"
comparison = "decrease"
description = "Throughput regression"
[benchmarks]
critical = [
"scaled_dot_product/forward/d=128/n=100",
"hyperbolic_attention/poincare_distance/c=-1.0/d=128/n=100",
"flash_attention/flash_vs_standard/block=256/n=10000",
]
[notifications]
slack_webhook = "${SLACK_WEBHOOK_URL}"
email = ["team@example.com"]
7. Platform-Specific Tests
7.1 WASM Tests
// tests/wasm/attention.test.js
import { describe, it, expect, beforeAll } from 'vitest';
import init, {
ScaledDotProduct,
HyperbolicAttention,
MultiHeadAttention,
} from '../../pkg/ruvector_attention.js';
describe('WASM Attention Module', () => {
beforeAll(async () => {
await init();
});
describe('ScaledDotProduct', () => {
it('should create instance with correct dimensions', () => {
const attention = ScaledDotProduct.new(128);
expect(attention.dim()).toBe(128);
});
it('should compute attention for small inputs', () => {
const attention = ScaledDotProduct.new(4);
const query = new Float32Array([1, 0, 0, 0]);
const keys = new Float32Array([
1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, 0,
]);
const values = new Float32Array([
1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, 0,
]);
const result = attention.forward(query, keys, values, 3);
expect(result).toBeInstanceOf(Float32Array);
expect(result.length).toBe(4);
expect(result[0]).toBeGreaterThan(0.5);
});
it('should handle large batches', async () => {
const attention = ScaledDotProduct.new(128);
const query = new Float32Array(128).fill(1);
const n_neighbors = 10000;
const keys = new Float32Array(n_neighbors * 128).fill(0.5);
const values = new Float32Array(n_neighbors * 128).fill(0.75);
const start = performance.now();
const result = attention.forward(query, keys, values, n_neighbors);
const duration = performance.now() - start;
expect(result.length).toBe(128);
expect(duration).toBeLessThan(100); // < 100ms
});
it('should produce consistent results', () => {
const attention = ScaledDotProduct.new(64);
const query = new Float32Array(64).fill(1);
const keys = new Float32Array(50 * 64).fill(0.5);
const values = new Float32Array(50 * 64).fill(0.75);
const result1 = attention.forward(query, keys, values, 50);
const result2 = attention.forward(query, keys, values, 50);
for (let i = 0; i < 64; i++) {
expect(Math.abs(result1[i] - result2[i])).toBeLessThan(1e-6);
}
});
});
describe('HyperbolicAttention', () => {
it('should compute Poincaré distance', () => {
const attention = HyperbolicAttention.new(-1.0, 4);
const x = new Float32Array([0.1, 0.2, 0, 0]);
const y = new Float32Array([0.3, 0.1, 0, 0]);
const distance = attention.poincare_distance(x, y);
expect(distance).toBeGreaterThan(0);
expect(distance).toBeLessThan(10);
expect(isFinite(distance)).toBe(true);
});
it('should handle boundary cases', () => {
const attention = HyperbolicAttention.new(-1.0, 4);
// Near boundary
const x = new Float32Array([0.7, 0.7, 0, 0]);
const y = new Float32Array([0.1, 0.1, 0, 0]);
const distance = attention.poincare_distance(x, y);
expect(isFinite(distance)).toBe(true);
});
});
describe('MultiHeadAttention', () => {
it('should compute multi-head attention', () => {
const attention = MultiHeadAttention.new(128, 8);
const query = new Float32Array(128).fill(1);
const keys = new Float32Array(100 * 128).fill(0.5);
const values = new Float32Array(100 * 128).fill(0.75);
const result = attention.forward(query, keys, values, 100);
expect(result.length).toBe(128);
expect(result.every(x => isFinite(x))).toBe(true);
});
});
describe('Memory Management', () => {
it('should properly free memory', () => {
const attention = ScaledDotProduct.new(128);
// Force garbage collection if available
if (global.gc) {
global.gc();
}
attention.free();
// Should not crash
expect(true).toBe(true);
});
it('should handle concurrent operations', async () => {
const attentions = Array.from({ length: 10 }, () =>
ScaledDotProduct.new(128)
);
const query = new Float32Array(128).fill(1);
const keys = new Float32Array(100 * 128).fill(0.5);
const values = new Float32Array(100 * 128).fill(0.75);
const results = await Promise.all(
attentions.map(att =>
Promise.resolve(att.forward(query, keys, values, 100))
)
);
expect(results).toHaveLength(10);
// Cleanup
attentions.forEach(att => att.free());
});
});
describe('Performance', () => {
it('should meet latency targets', () => {
const attention = ScaledDotProduct.new(128);
const query = new Float32Array(128).fill(1);
const keys = new Float32Array(100 * 128).fill(0.5);
const values = new Float32Array(100 * 128).fill(0.75);
// Warm-up
for (let i = 0; i < 10; i++) {
attention.forward(query, keys, values, 100);
}
// Measure
const times = [];
for (let i = 0; i < 100; i++) {
const start = performance.now();
attention.forward(query, keys, values, 100);
times.push(performance.now() - start);
}
times.sort((a, b) => a - b);
const p50 = times[Math.floor(times.length * 0.5)];
const p99 = times[Math.floor(times.length * 0.99)];
expect(p50).toBeLessThan(1); // < 1ms for WASM
expect(p99).toBeLessThan(2); // < 2ms for WASM
});
});
});
7.2 NAPI-RS Tests
// tests/napi/attention.test.ts
import { describe, it, expect, beforeEach } from '@jest/globals';
import {
ScaledDotProduct,
HyperbolicAttention,
MultiHeadAttention,
FlashAttention,
} from '../../index';
describe('NAPI-RS Attention Module', () => {
describe('ScaledDotProduct', () => {
let attention: ScaledDotProduct;
beforeEach(() => {
attention = new ScaledDotProduct(128);
});
it('should create instance', () => {
expect(attention).toBeDefined();
expect(attention.dim()).toBe(128);
});
it('should compute attention synchronously', () => {
const query = new Float32Array(128).fill(1);
const keys = Array.from({ length: 100 }, () =>
Array.from({ length: 128 }, () => 0.5)
);
const values = Array.from({ length: 100 }, () =>
Array.from({ length: 128 }, () => 0.75)
);
const result = attention.forward(query, keys, values);
expect(result).toHaveLength(128);
expect(result.every(x => isFinite(x))).toBe(true);
});
it('should compute attention asynchronously', async () => {
const query = new Float32Array(128).fill(1);
const keys = Array.from({ length: 100 }, () =>
Array.from({ length: 128 }, () => 0.5)
);
const values = Array.from({ length: 100 }, () =>
Array.from({ length: 128 }, () => 0.75)
);
const result = await attention.forwardAsync(query, keys, values);
expect(result).toHaveLength(128);
});
it('should process batch asynchronously', async () => {
const queries = Array.from({ length: 10 }, () =>
new Float32Array(128).fill(1)
);
const keys = Array.from({ length: 100 }, () =>
Array.from({ length: 128 }, () => 0.5)
);
const values = Array.from({ length: 100 }, () =>
Array.from({ length: 128 }, () => 0.75)
);
const results = await attention.forwardBatchAsync(queries, keys, values);
expect(results).toHaveLength(10);
expect(results[0]).toHaveLength(128);
});
it('should match Rust implementation', () => {
// Test data computed in Rust
const query = new Float32Array([1, 0, 0, 0]);
const keys = [
[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
];
const values = [
[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
];
const attention4 = new ScaledDotProduct(4);
const result = attention4.forward(query, keys, values);
// Expected result from Rust
const expected = [0.6652, 0.2424, 0.0924, 0.0];
for (let i = 0; i < 4; i++) {
expect(Math.abs(result[i] - expected[i])).toBeLessThan(1e-3);
}
});
});
describe('HyperbolicAttention', () => {
it('should compute hyperbolic attention', () => {
const attention = new HyperbolicAttention(-1.0, 128);
const query = new Float32Array(128).fill(0.1);
const keys = Array.from({ length: 100 }, () =>
Array.from({ length: 128 }, () => 0.2)
);
const values = Array.from({ length: 100 }, () =>
Array.from({ length: 128 }, () => 0.5)
);
const result = attention.forward(query, keys, values);
expect(result).toHaveLength(128);
expect(result.every(x => isFinite(x))).toBe(true);
});
it('should compute Poincaré distance', () => {
const attention = new HyperbolicAttention(-1.0, 4);
const x = new Float32Array([0.1, 0.2, 0, 0]);
const y = new Float32Array([0.3, 0.1, 0, 0]);
const distance = attention.poincareDistance(x, y);
expect(distance).toBeGreaterThan(0);
expect(isFinite(distance)).toBe(true);
});
});
describe('MultiHeadAttention', () => {
it('should compute with multiple heads', () => {
const attention = new MultiHeadAttention(128, 8);
const query = new Float32Array(128).fill(1);
const keys = Array.from({ length: 100 }, () =>
Array.from({ length: 128 }, () => 0.5)
);
const values = Array.from({ length: 100 }, () =>
Array.from({ length: 128 }, () => 0.75)
);
const result = attention.forward(query, keys, values);
expect(result).toHaveLength(128);
});
});
describe('FlashAttention', () => {
it('should handle large inputs efficiently', async () => {
const attention = new FlashAttention(128, 256);
const query = new Float32Array(128).fill(1);
const keys = Array.from({ length: 10000 }, () =>
Array.from({ length: 128 }, () => 0.5)
);
const values = Array.from({ length: 10000 }, () =>
Array.from({ length: 128 }, () => 0.75)
);
const start = Date.now();
const result = await attention.forwardAsync(query, keys, values);
const duration = Date.now() - start;
expect(result).toHaveLength(128);
expect(duration).toBeLessThan(100); // < 100ms
});
});
describe('Performance', () => {
it('should meet throughput targets', async () => {
const attention = new ScaledDotProduct(128);
const query = new Float32Array(128).fill(1);
const keys = Array.from({ length: 100 }, () =>
Array.from({ length: 128 }, () => 0.5)
);
const values = Array.from({ length: 100 }, () =>
Array.from({ length: 128 }, () => 0.75)
);
const iterations = 1000;
const start = Date.now();
for (let i = 0; i < iterations; i++) {
attention.forward(query, keys, values);
}
const duration = Date.now() - start;
const qps = (iterations / duration) * 1000;
expect(qps).toBeGreaterThan(10000); // > 10k QPS
});
it('should support concurrent async operations', async () => {
const attention = new ScaledDotProduct(128);
const query = new Float32Array(128).fill(1);
const keys = Array.from({ length: 100 }, () =>
Array.from({ length: 128 }, () => 0.5)
);
const values = Array.from({ length: 100 }, () =>
Array.from({ length: 128 }, () => 0.75)
);
const promises = Array.from({ length: 100 }, () =>
attention.forwardAsync(query, keys, values)
);
const results = await Promise.all(promises);
expect(results).toHaveLength(100);
expect(results.every(r => r.length === 128)).toBe(true);
});
});
});
8. Continuous Benchmarking
8.1 Daily Benchmark Job
# .github/workflows/daily-benchmarks.yml
name: Daily Performance Benchmarks
on:
schedule:
- cron: '0 2 * * *' # 2 AM daily
workflow_dispatch: # Manual trigger
jobs:
benchmark-rust:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
steps:
- uses: actions/checkout@v3
- name: Install Rust
uses: actions-rust-lang/setup-rust-toolchain@v1
with:
toolchain: stable
- name: Run benchmarks
run: |
cargo bench --bench attention_benchmarks -- --save-baseline daily-${{ matrix.os }}
- name: Upload results
uses: actions/upload-artifact@v3
with:
name: benchmark-${{ matrix.os }}
path: target/criterion/
- name: Generate report
run: |
python3 scripts/generate_benchmark_report.py \
--input target/criterion/ \
--output benchmark-report-${{ matrix.os }}.md
- name: Post to dashboard
env:
DASHBOARD_URL: ${{ secrets.BENCHMARK_DASHBOARD_URL }}
run: |
curl -X POST $DASHBOARD_URL/api/benchmarks \
-H "Authorization: Bearer ${{ secrets.BENCHMARK_TOKEN }}" \
-F "os=${{ matrix.os }}" \
-F "data=@target/criterion/results.json"
benchmark-wasm:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install Rust + wasm-pack
run: |
curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
- name: Build WASM
run: |
wasm-pack build --target web
- name: Setup Node.js
uses: actions/setup-node@v3
with:
node-version: '18'
- name: Install dependencies
run: npm ci
- name: Run WASM benchmarks
run: |
npm run bench:wasm
- name: Upload results
uses: actions/upload-artifact@v3
with:
name: benchmark-wasm
path: bench-results/
benchmark-memory:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install Rust + Valgrind
run: |
sudo apt-get update
sudo apt-get install -y valgrind
rustup toolchain install nightly
- name: Build with profiling
run: |
cargo +nightly build --release --features memory-profiling
- name: Run memory profiling
run: |
valgrind --tool=massif \
--massif-out-file=massif.out \
target/release/deps/attention_benchmarks-*
- name: Generate memory report
run: |
ms_print massif.out > memory-report.txt
- name: Upload results
uses: actions/upload-artifact@v3
with:
name: memory-profile
path: |
massif.out
memory-report.txt
generate-dashboard:
needs: [benchmark-rust, benchmark-wasm, benchmark-memory]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Download all artifacts
uses: actions/download-artifact@v3
- name: Generate dashboard
run: |
python3 scripts/generate_dashboard.py \
--output docs/benchmarks/index.html
- name: Deploy to GitHub Pages
uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./docs/benchmarks
8.2 Benchmark Dashboard Generator
# scripts/generate_dashboard.py
import json
import sys
from pathlib import Path
from datetime import datetime
import matplotlib.pyplot as plt
import pandas as pd
def load_criterion_results(criterion_dir):
"""Load all Criterion benchmark results"""
results = []
for bench_dir in Path(criterion_dir).glob("*/"):
bench_name = bench_dir.name
estimates_file = bench_dir / "new" / "estimates.json"
if estimates_file.exists():
with open(estimates_file) as f:
data = json.load(f)
results.append({
'name': bench_name,
'mean': data['mean']['point_estimate'] / 1e6, # Convert to ms
'median': data['median']['point_estimate'] / 1e6,
'std_dev': data['std_dev']['point_estimate'] / 1e6,
})
return pd.DataFrame(results)
def generate_latency_chart(df, output_path):
"""Generate latency comparison chart"""
fig, ax = plt.subplots(figsize=(12, 6))
# Filter and sort
df_sorted = df.sort_values('median')
ax.barh(df_sorted['name'], df_sorted['median'], xerr=df_sorted['std_dev'])
ax.set_xlabel('Latency (ms)')
ax.set_title('Attention Mechanism Latency Comparison')
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(output_path)
plt.close()
def generate_html_dashboard(df, charts_dir, output_file):
"""Generate HTML dashboard"""
html = f"""
<!DOCTYPE html>
<html>
<head>
<title>RuVector Attention Benchmarks</title>
<style>
body {{
font-family: Arial, sans-serif;
margin: 20px;
background-color: #f5f5f5;
}}
.container {{
max-width: 1200px;
margin: 0 auto;
background-color: white;
padding: 20px;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}}
h1 {{
color: #333;
}}
table {{
width: 100%;
border-collapse: collapse;
margin: 20px 0;
}}
th, td {{
padding: 12px;
text-align: left;
border-bottom: 1px solid #ddd;
}}
th {{
background-color: #4CAF50;
color: white;
}}
tr:hover {{
background-color: #f5f5f5;
}}
.chart {{
margin: 20px 0;
}}
.metric {{
display: inline-block;
margin: 10px 20px;
padding: 15px;
background-color: #e3f2fd;
border-radius: 4px;
}}
.metric-value {{
font-size: 24px;
font-weight: bold;
color: #1976d2;
}}
.metric-label {{
font-size: 14px;
color: #666;
}}
</style>
</head>
<body>
<div class="container">
<h1>RuVector Attention Benchmarks</h1>
<p>Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
<h2>Summary Metrics</h2>
<div class="metric">
<div class="metric-value">{df['median'].median():.3f} ms</div>
<div class="metric-label">Median Latency</div>
</div>
<div class="metric">
<div class="metric-value">{df['median'].min():.3f} ms</div>
<div class="metric-label">Best Latency</div>
</div>
<div class="metric">
<div class="metric-value">{len(df)}</div>
<div class="metric-label">Benchmarks</div>
</div>
<h2>Latency Chart</h2>
<div class="chart">
<img src="latency_chart.png" alt="Latency Chart" style="max-width: 100%;">
</div>
<h2>Detailed Results</h2>
<table>
<thead>
<tr>
<th>Benchmark</th>
<th>Median (ms)</th>
<th>Mean (ms)</th>
<th>Std Dev (ms)</th>
</tr>
</thead>
<tbody>
"""
for _, row in df.iterrows():
html += f"""
<tr>
<td>{row['name']}</td>
<td>{row['median']:.4f}</td>
<td>{row['mean']:.4f}</td>
<td>{row['std_dev']:.4f}</td>
</tr>
"""
html += """
</tbody>
</table>
</div>
</body>
</html>
"""
with open(output_file, 'w') as f:
f.write(html)
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--criterion-dir', default='target/criterion')
parser.add_argument('--output', default='docs/benchmarks/index.html')
args = parser.parse_args()
# Load results
df = load_criterion_results(args.criterion_dir)
if df.empty:
print("No benchmark results found")
sys.exit(1)
# Generate charts
output_dir = Path(args.output).parent
output_dir.mkdir(parents=True, exist_ok=True)
generate_latency_chart(df, output_dir / 'latency_chart.png')
# Generate dashboard
generate_html_dashboard(df, output_dir, args.output)
print(f"Dashboard generated: {args.output}")
if __name__ == '__main__':
main()
9. Test Execution Plan
9.1 Development Workflow
# Local development testing
cargo test # Run all tests
cargo test --package ruvector-attention # Package-specific tests
cargo test test_scaled_dot_product # Specific test
# Run benchmarks locally
cargo bench # All benchmarks
cargo bench --bench attention_benchmarks # Specific benchmark
# Run with coverage
cargo tarpaulin --out Html # Generate HTML coverage report
# Platform-specific testing
wasm-pack test --headless --firefox # WASM tests
npm test # NAPI-RS tests
9.2 CI/CD Integration
# .github/workflows/ci.yml (excerpt)
test:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
rust: [stable, nightly]
steps:
- name: Run tests
run: cargo test --all-features
- name: Run benchmarks
if: matrix.rust == 'stable'
run: cargo bench --no-run # Just build, don't run
- name: Coverage
if: matrix.os == 'ubuntu-latest'
run: cargo tarpaulin --out Lcov
- name: Upload coverage
uses: codecov/codecov-action@v3
10. Quality Gates
10.1 Pre-Merge Requirements
All PRs must pass:
- ✅ All unit tests pass
- ✅ All integration tests pass
- ✅ Code coverage ≥ 80% (critical paths ≥ 95%)
- ✅ No performance regressions > 10%
- ✅ Platform-specific tests pass (WASM + NAPI)
- ✅ Benchmarks complete without errors
- ✅ Memory profiling shows no leaks
10.2 Release Requirements
For releases:
- ✅ All pre-merge requirements
- ✅ Full benchmark suite passes
- ✅ Performance targets met (see Section 5)
- ✅ Cross-platform consistency verified
- ✅ Documentation updated
- ✅ Changelog updated
- ✅ Version bumped appropriately
11. Next Steps
-
Implement Core Test Suite
- Start with scaled dot-product tests
- Add hyperbolic attention tests
- Build graph attention tests
-
Set Up Benchmarking
- Configure Criterion
- Define baseline benchmarks
- Create comparison framework
-
Platform Testing
- Set up WASM test environment
- Configure NAPI-RS tests
- Verify cross-platform parity
-
CI/CD Integration
- Add test workflows
- Set up benchmark automation
- Configure performance tracking
-
Documentation
- Document test organization
- Create benchmark interpretation guide
- Write troubleshooting guides
Appendix A: Test Data Generation
// tests/common/test_data.rs
use ndarray::{Array1, Array2};
use rand::{Rng, SeedableRng};
use rand_chacha::ChaCha8Rng;
pub fn generate_random_vector(dim: usize, seed: u64) -> Array1<f32> {
let mut rng = ChaCha8Rng::seed_from_u64(seed);
Array1::from_vec(
(0..dim).map(|_| rng.gen_range(-1.0..1.0)).collect()
)
}
pub fn generate_random_vectors(n: usize, dim: usize, seed: u64) -> Array2<f32> {
let mut rng = ChaCha8Rng::seed_from_u64(seed);
Array2::from_shape_vec(
(n, dim),
(0..n * dim).map(|_| rng.gen_range(-1.0..1.0)).collect(),
).unwrap()
}
pub fn generate_cluster_vector(dim: usize, cluster: usize, idx: usize) -> Array1<f32> {
let mut rng = ChaCha8Rng::seed_from_u64((cluster * 10000 + idx) as u64);
let center = Array1::from_elem(dim, cluster as f32 / 10.0);
let noise = Array1::from_vec(
(0..dim).map(|_| rng.gen_range(-0.1..0.1)).collect()
);
center + noise
}
pub fn generate_orthogonal_vectors(dim: usize, n: usize) -> Array2<f32> {
// Generate approximately orthogonal vectors using Gram-Schmidt
let mut vectors = Array2::zeros((n, dim));
let mut rng = ChaCha8Rng::seed_from_u64(42);
for i in 0..n {
let mut v = Array1::from_vec(
(0..dim).map(|_| rng.gen_range(-1.0..1.0)).collect()
);
// Orthogonalize against previous vectors
for j in 0..i {
let prev = vectors.row(j);
let projection = v.dot(&prev) / prev.dot(&prev);
v = v - projection * &prev.to_owned();
}
// Normalize
let norm = v.dot(&v).sqrt();
if norm > 1e-6 {
v = v / norm;
}
vectors.row_mut(i).assign(&v);
}
vectors
}
Appendix B: Performance Monitoring
// tests/common/perf_monitor.rs
use std::time::{Duration, Instant};
pub struct PerformanceMonitor {
samples: Vec<Duration>,
}
impl PerformanceMonitor {
pub fn new() -> Self {
Self { samples: Vec::new() }
}
pub fn measure<F, R>(&mut self, f: F) -> R
where
F: FnOnce() -> R,
{
let start = Instant::now();
let result = f();
let duration = start.elapsed();
self.samples.push(duration);
result
}
pub fn p50(&self) -> Duration {
self.percentile(0.5)
}
pub fn p99(&self) -> Duration {
self.percentile(0.99)
}
pub fn mean(&self) -> Duration {
let total: Duration = self.samples.iter().sum();
total / self.samples.len() as u32
}
fn percentile(&self, p: f64) -> Duration {
let mut sorted = self.samples.clone();
sorted.sort();
let idx = ((sorted.len() as f64) * p) as usize;
sorted[idx.min(sorted.len() - 1)]
}
}
End of Document