Files
wifi-densepose/docs/research/latent-space/implementation-plans/05-testing-benchmarks.md
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

64 KiB

Testing and Benchmarking Specification for ruvector-attention

Version: 1.0 Date: 2025-11-30 Status: Draft

Executive Summary

This document defines comprehensive testing and benchmarking strategies for the ruvector-attention crate, ensuring reliability, performance, and correctness across all attention mechanisms and platforms.


1. Testing Strategy Overview

Testing Pyramid

Testing Distribution:
                    /\
                   /  \  E2E Tests (5%)
                  /----\  - End-to-end workflows
                 /      \  - Real-world scenarios
                /--------\ Integration Tests (25%)
               /          \ - Module integration
              /            \ - Cross-platform validation
             /--------------\ Unit Tests (70%)
            /                \ - Component isolation
           /                  \ - Edge cases & correctness
          /--------------------\

Testing Philosophy

  1. Test-Driven Development (TDD): Write tests before implementation
  2. Property-Based Testing: Use proptest for mathematical properties
  3. Regression Prevention: Automated performance benchmarks
  4. Platform Parity: Ensure WASM/NAPI-RS match Rust implementations
  5. Continuous Validation: Integrate into CI/CD pipeline

Test Coverage Goals

Component Target Coverage Critical Paths
Core Attention 95% 100%
Hyperbolic 90% 100%
Graph Attention 90% 100%
Optimization (Flash, Linear) 85% 100%
Platform Bindings 80% 95%

2. Unit Test Specifications

2.1 Scaled Dot-Product Attention

// crates/ruvector-attention/src/scaled_dot_product/tests.rs

#[cfg(test)]
mod tests {
    use super::*;
    use approx::assert_relative_eq;
    use ndarray::{Array1, Array2};
    use proptest::prelude::*;

    #[test]
    fn test_basic_attention_computation() {
        let attention = ScaledDotProduct::new(4);

        let query = Array1::from_vec(vec![1.0, 0.0, 0.0, 0.0]);
        let keys = Array2::from_shape_vec(
            (3, 4),
            vec![
                1.0, 0.0, 0.0, 0.0,  // Identical to query
                0.0, 1.0, 0.0, 0.0,  // Orthogonal
                0.5, 0.5, 0.5, 0.5,  // Mixed
            ],
        ).unwrap();
        let values = Array2::from_shape_vec(
            (3, 4),
            vec![
                1.0, 0.0, 0.0, 0.0,
                0.0, 1.0, 0.0, 0.0,
                0.0, 0.0, 1.0, 0.0,
            ],
        ).unwrap();

        let output = attention.forward(&query, &keys, &values).unwrap();

        // Should weight most similar key highest
        assert!(output[0] > 0.5, "First value should dominate");
    }

    #[test]
    fn test_zero_vectors() {
        let attention = ScaledDotProduct::new(4);

        let query = Array1::zeros(4);
        let keys = Array2::zeros((3, 4));
        let values = Array2::ones((3, 4));

        let output = attention.forward(&query, &keys, &values).unwrap();

        // All weights equal, should average values
        assert_relative_eq!(output[0], 1.0, epsilon = 1e-5);
    }

    #[test]
    fn test_numerical_stability_large_values() {
        let attention = ScaledDotProduct::new(128);

        // Large magnitude vectors
        let query = Array1::from_elem(128, 100.0);
        let keys = Array2::from_elem((100, 128), 100.0);
        let values = Array2::from_elem((100, 128), 1.0);

        let output = attention.forward(&query, &keys, &values).unwrap();

        // Should not overflow/underflow
        assert!(output.iter().all(|&x| x.is_finite()));
        assert!(output.iter().all(|&x| x >= 0.0 && x <= 2.0));
    }

    #[test]
    fn test_softmax_sums_to_one() {
        let attention = ScaledDotProduct::new(64);

        let query = Array1::from_vec((0..64).map(|i| i as f32).collect());
        let keys = Array2::from_shape_vec(
            (50, 64),
            (0..50*64).map(|i| (i % 100) as f32).collect(),
        ).unwrap();

        let weights = attention.compute_attention_weights(&query, &keys).unwrap();
        let sum: f32 = weights.sum();

        assert_relative_eq!(sum, 1.0, epsilon = 1e-5);
    }

    #[test]
    fn test_gradient_correctness() {
        // Numerical gradient checking
        let attention = ScaledDotProduct::new(8);

        let query = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
        let keys = Array2::from_elem((5, 8), 0.5);
        let values = Array2::eye(8).slice(s![0..5, ..]).to_owned();

        let epsilon = 1e-4;

        // Compute numerical gradient
        let output = attention.forward(&query, &keys, &values).unwrap();

        for i in 0..8 {
            let mut query_plus = query.clone();
            query_plus[i] += epsilon;
            let output_plus = attention.forward(&query_plus, &keys, &values).unwrap();

            let numerical_grad = (&output_plus - &output) / epsilon;

            // Analytical gradient should match
            // (Would require backward pass implementation)
            assert!(numerical_grad.iter().all(|&x| x.is_finite()));
        }
    }

    #[test]
    fn test_serialization_roundtrip() {
        let attention = ScaledDotProduct::new(64);

        let serialized = serde_json::to_string(&attention).unwrap();
        let deserialized: ScaledDotProduct = serde_json::from_str(&serialized).unwrap();

        assert_eq!(attention.dim(), deserialized.dim());
    }

    #[test]
    fn test_masking() {
        let attention = ScaledDotProduct::new(4);

        let query = Array1::ones(4);
        let keys = Array2::ones((3, 4));
        let values = Array2::from_shape_vec(
            (3, 4),
            vec![
                1.0, 0.0, 0.0, 0.0,
                0.0, 2.0, 0.0, 0.0,
                0.0, 0.0, 3.0, 0.0,
            ],
        ).unwrap();

        // Mask out second key
        let mask = vec![true, false, true];

        let output = attention.forward_with_mask(&query, &keys, &values, &mask).unwrap();

        // Second value should not contribute
        assert_relative_eq!(output[1], 0.0, epsilon = 1e-5);
    }

    // Property-based testing
    proptest! {
        #[test]
        fn prop_attention_preserves_dimension(
            dim in 4usize..256,
            n_keys in 1usize..100,
        ) {
            let attention = ScaledDotProduct::new(dim);

            let query = Array1::from_elem(dim, 1.0);
            let keys = Array2::from_elem((n_keys, dim), 0.5);
            let values = Array2::from_elem((n_keys, dim), 0.5);

            let output = attention.forward(&query, &keys, &values).unwrap();

            prop_assert_eq!(output.len(), dim);
        }

        #[test]
        fn prop_weights_non_negative(
            dim in 4usize..64,
            n_keys in 1usize..50,
        ) {
            let attention = ScaledDotProduct::new(dim);

            let query = Array1::from_elem(dim, 1.0);
            let keys = Array2::from_elem((n_keys, dim), 0.5);

            let weights = attention.compute_attention_weights(&query, &keys).unwrap();

            prop_assert!(weights.iter().all(|&w| w >= 0.0));
        }
    }
}

2.2 Hyperbolic Attention Tests

// crates/ruvector-attention/src/hyperbolic/tests.rs

#[cfg(test)]
mod tests {
    use super::*;
    use approx::assert_relative_eq;

    #[test]
    fn test_poincare_distance_symmetry() {
        let attention = HyperbolicAttention::new(-1.0); // Unit curvature

        let x = Array1::from_vec(vec![0.1, 0.2, 0.0, 0.0]);
        let y = Array1::from_vec(vec![0.3, 0.1, 0.0, 0.0]);

        let d_xy = attention.poincare_distance(&x, &y);
        let d_yx = attention.poincare_distance(&y, &x);

        assert_relative_eq!(d_xy, d_yx, epsilon = 1e-6);
    }

    #[test]
    fn test_poincare_distance_identity() {
        let attention = HyperbolicAttention::new(-1.0);

        let x = Array1::from_vec(vec![0.1, 0.2, 0.3, 0.4]);

        let d = attention.poincare_distance(&x, &x);

        assert_relative_eq!(d, 0.0, epsilon = 1e-6);
    }

    #[test]
    fn test_mobius_addition_identity() {
        let attention = HyperbolicAttention::new(-1.0);

        let x = Array1::from_vec(vec![0.1, 0.2, 0.0, 0.0]);
        let zero = Array1::zeros(4);

        let result = attention.mobius_add(&x, &zero);

        assert_relative_eq!(result.as_slice().unwrap(), x.as_slice().unwrap(), epsilon = 1e-6);
    }

    #[test]
    fn test_mobius_addition_commutativity() {
        let attention = HyperbolicAttention::new(-1.0);

        let x = Array1::from_vec(vec![0.1, 0.2, 0.0, 0.0]);
        let y = Array1::from_vec(vec![0.05, 0.1, 0.0, 0.0]);

        let xy = attention.mobius_add(&x, &y);
        let yx = attention.mobius_add(&y, &x);

        assert_relative_eq!(xy.as_slice().unwrap(), yx.as_slice().unwrap(), epsilon = 1e-5);
    }

    #[test]
    fn test_boundary_stability() {
        let attention = HyperbolicAttention::new(-1.0);

        // Vector near boundary (||x|| → 1)
        let norm = 0.99;
        let x = Array1::from_vec(vec![norm / 2.0_f32.sqrt(), norm / 2.0_f32.sqrt(), 0.0, 0.0]);
        let y = Array1::from_vec(vec![0.1, 0.1, 0.0, 0.0]);

        let distance = attention.poincare_distance(&x, &y);

        // Should remain finite
        assert!(distance.is_finite());
        assert!(distance >= 0.0);
    }

    #[test]
    fn test_exponential_map() {
        let attention = HyperbolicAttention::new(-1.0);

        let origin = Array1::zeros(4);
        let tangent = Array1::from_vec(vec![0.1, 0.2, 0.0, 0.0]);

        let point = attention.exponential_map(&origin, &tangent);

        // Point should be in disk
        let norm: f32 = point.iter().map(|x| x * x).sum();
        assert!(norm < 1.0);
    }

    #[test]
    fn test_logarithmic_map() {
        let attention = HyperbolicAttention::new(-1.0);

        let origin = Array1::zeros(4);
        let point = Array1::from_vec(vec![0.3, 0.4, 0.0, 0.0]);

        let tangent = attention.logarithmic_map(&origin, &point);

        // Roundtrip should recover point
        let recovered = attention.exponential_map(&origin, &tangent);

        assert_relative_eq!(recovered.as_slice().unwrap(), point.as_slice().unwrap(), epsilon = 1e-5);
    }

    #[test]
    fn test_curvature_parameter() {
        let c_values = vec![-0.1, -0.5, -1.0, -2.0];

        for c in c_values {
            let attention = HyperbolicAttention::new(c);

            let x = Array1::from_vec(vec![0.1, 0.1, 0.0, 0.0]);
            let y = Array1::from_vec(vec![0.2, 0.2, 0.0, 0.0]);

            let distance = attention.poincare_distance(&x, &y);

            // Distance should vary with curvature
            assert!(distance > 0.0);
            assert!(distance.is_finite());
        }
    }

    #[test]
    fn test_gyrovector_parallel_transport() {
        let attention = HyperbolicAttention::new(-1.0);

        let x = Array1::from_vec(vec![0.1, 0.0, 0.0, 0.0]);
        let y = Array1::from_vec(vec![0.0, 0.1, 0.0, 0.0]);
        let v = Array1::from_vec(vec![0.05, 0.05, 0.0, 0.0]);

        let transported = attention.parallel_transport(&x, &y, &v);

        // Transported vector should maintain properties
        assert!(transported.iter().all(|&x| x.is_finite()));
    }

    proptest! {
        #[test]
        fn prop_poincare_distance_triangle_inequality(
            dim in 4usize..16,
        ) {
            let attention = HyperbolicAttention::new(-1.0);

            // Generate points in Poincaré disk
            let x = Array1::from_elem(dim, 0.1);
            let y = Array1::from_elem(dim, 0.2);
            let z = Array1::from_elem(dim, 0.15);

            let d_xy = attention.poincare_distance(&x, &y);
            let d_yz = attention.poincare_distance(&y, &z);
            let d_xz = attention.poincare_distance(&x, &z);

            // Triangle inequality: d(x,z) ≤ d(x,y) + d(y,z)
            prop_assert!(d_xz <= d_xy + d_yz + 1e-5);
        }
    }
}

2.3 Graph Attention Tests

// crates/ruvector-attention/src/graph/tests.rs

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_edge_featured_attention() {
        let attention = EdgeFeaturedAttention::new(4, 2);

        let node = Array1::from_vec(vec![1.0, 0.0, 0.0, 0.0]);
        let neighbors = Array2::from_shape_vec(
            (3, 4),
            vec![
                0.5, 0.5, 0.0, 0.0,
                0.0, 1.0, 0.0, 0.0,
                0.0, 0.0, 1.0, 0.0,
            ],
        ).unwrap();

        // Edge features: distance, weight
        let edge_features = Array2::from_shape_vec(
            (3, 2),
            vec![
                0.1, 1.0,  // Close, high weight
                0.5, 0.5,  // Medium distance, medium weight
                1.0, 0.1,  // Far, low weight
            ],
        ).unwrap();

        let output = attention.forward(&node, &neighbors, &edge_features).unwrap();

        assert_eq!(output.len(), 4);
        assert!(output.iter().all(|&x| x.is_finite()));
    }

    #[test]
    fn test_local_global_attention() {
        let attention = LocalGlobalAttention::new(4, 2, 8);

        let query = Array1::ones(4);
        let local_keys = Array2::ones((10, 4));
        let global_keys = Array2::ones((100, 4));
        let local_values = Array2::ones((10, 4));
        let global_values = Array2::ones((100, 4));

        let output = attention.forward(
            &query,
            &local_keys,
            &global_keys,
            &local_values,
            &global_values,
        ).unwrap();

        assert_eq!(output.len(), 4);
    }

    #[test]
    fn test_relational_attention() {
        let attention = RelationalAttention::new(4, vec![
            "friend".to_string(),
            "colleague".to_string(),
            "family".to_string(),
        ]);

        let node = Array1::ones(4);
        let neighbors = Array2::ones((5, 4));
        let relations = vec![0, 1, 0, 2, 1]; // Relation types

        let output = attention.forward(&node, &neighbors, &relations).unwrap();

        assert_eq!(output.len(), 4);
    }
}

2.4 Optimization Tests

// crates/ruvector-attention/src/flash/tests.rs

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_flash_attention_matches_standard() {
        let dim = 64;
        let n_neighbors = 1000;

        let flash = FlashAttention::new(dim, 256); // Block size 256
        let standard = ScaledDotProduct::new(dim);

        let query = Array1::ones(dim);
        let keys = Array2::ones((n_neighbors, dim));
        let values = Array2::ones((n_neighbors, dim));

        let flash_output = flash.forward(&query, &keys, &values).unwrap();
        let standard_output = standard.forward(&query, &keys, &values).unwrap();

        // Should be nearly identical
        assert_relative_eq!(
            flash_output.as_slice().unwrap(),
            standard_output.as_slice().unwrap(),
            epsilon = 1e-3
        );
    }

    #[test]
    fn test_flash_memory_usage() {
        let flash = FlashAttention::new(128, 512);

        // Should use O(sqrt(N)) memory for blocks
        let query = Array1::ones(128);
        let keys = Array2::ones((10000, 128));
        let values = Array2::ones((10000, 128));

        // Measure memory before and after
        let before = get_memory_usage();
        let _ = flash.forward(&query, &keys, &values).unwrap();
        let after = get_memory_usage();

        let memory_used = after - before;

        // Should be much less than O(N) memory
        assert!(memory_used < 50 * 1024 * 1024); // < 50MB
    }

    #[test]
    fn test_linear_attention_approximation() {
        let linear = LinearAttention::new(64);
        let standard = ScaledDotProduct::new(64);

        let query = Array1::ones(64);
        let keys = Array2::ones((100, 64));
        let values = Array2::ones((100, 64));

        let linear_output = linear.forward(&query, &keys, &values).unwrap();
        let standard_output = standard.forward(&query, &keys, &values).unwrap();

        // Should approximate standard attention
        let diff: f32 = (&linear_output - &standard_output)
            .iter()
            .map(|x| x.abs())
            .sum();

        assert!(diff < 5.0); // Reasonable approximation error
    }
}

3. Integration Test Specifications

3.1 Integration with ruvector-gnn

// tests/integration/gnn_integration.rs

use ruvector_attention::{HyperbolicAttention, GraphAttention};
use ruvector_gnn::{GNNLayer, GraphConvolution};

#[test]
fn test_hyperbolic_attention_in_gnn_layer() {
    let attention = HyperbolicAttention::new(-1.0);
    let layer = GNNLayer::with_attention(4, 8, attention);

    // Create graph: 5 nodes, 8 edges
    let nodes = Array2::ones((5, 4));
    let edges = vec![
        (0, 1), (0, 2), (1, 2), (1, 3),
        (2, 3), (2, 4), (3, 4), (4, 0),
    ];

    let output = layer.forward(&nodes, &edges).unwrap();

    assert_eq!(output.shape(), &[5, 8]);
    assert!(output.iter().all(|&x| x.is_finite()));
}

#[test]
fn test_multi_layer_gnn_with_attention() {
    let attention1 = GraphAttention::new(4);
    let attention2 = GraphAttention::new(8);

    let layer1 = GNNLayer::with_attention(4, 8, attention1);
    let layer2 = GNNLayer::with_attention(8, 16, attention2);

    let nodes = Array2::ones((10, 4));
    let edges = (0..20).map(|i| (i % 10, (i + 1) % 10)).collect();

    let hidden = layer1.forward(&nodes, &edges).unwrap();
    let output = layer2.forward(&hidden, &edges).unwrap();

    assert_eq!(output.shape(), &[10, 16]);
}

#[test]
fn test_attention_aggregation_methods() {
    let attention = GraphAttention::new(4);

    let node_features = Array1::ones(4);
    let neighbor_features = Array2::ones((10, 4));

    // Test different aggregation
    let sum_agg = attention.aggregate_neighbors(&node_features, &neighbor_features, "sum").unwrap();
    let mean_agg = attention.aggregate_neighbors(&node_features, &neighbor_features, "mean").unwrap();
    let max_agg = attention.aggregate_neighbors(&node_features, &neighbor_features, "max").unwrap();

    assert_eq!(sum_agg.len(), 4);
    assert_eq!(mean_agg.len(), 4);
    assert_eq!(max_agg.len(), 4);

    // Mean should be sum / count
    assert_relative_eq!(mean_agg[0], sum_agg[0] / 10.0, epsilon = 1e-5);
}

3.2 Integration with ruvector-core HNSW

// tests/integration/hnsw_integration.rs

use ruvector_core::{HnswIndex, SearchParams};
use ruvector_attention::{ScaledDotProduct, AttentionGuidedSearch};

#[test]
fn test_attention_guided_search() {
    // Build HNSW index
    let mut index = HnswIndex::new(128, 16, 200);

    for i in 0..1000 {
        let vector = generate_random_vector(128, i);
        index.add(i, vector);
    }

    // Create attention mechanism
    let attention = ScaledDotProduct::new(128);
    let guided_search = AttentionGuidedSearch::new(attention);

    let query = generate_random_vector(128, 12345);

    // Standard search
    let standard_results = index.search(&query, 10).unwrap();

    // Attention-guided search
    let attention_results = guided_search.search(&index, &query, 10).unwrap();

    // Should find similar results but potentially better ranking
    assert_eq!(attention_results.len(), 10);

    // Check recall
    let standard_ids: HashSet<_> = standard_results.iter().map(|r| r.id).collect();
    let attention_ids: HashSet<_> = attention_results.iter().map(|r| r.id).collect();
    let overlap = standard_ids.intersection(&attention_ids).count();

    assert!(overlap >= 7); // At least 70% overlap
}

#[test]
fn test_attention_edge_weighting() {
    let mut index = HnswIndex::new(64, 16, 200);

    // Add hierarchical clusters
    for cluster in 0..10 {
        for i in 0..100 {
            let vector = generate_cluster_vector(64, cluster, i);
            index.add(cluster * 100 + i, vector);
        }
    }

    let attention = GraphAttention::new(64);

    // Query from cluster 0
    let query = generate_cluster_vector(64, 0, 0);

    let results = index.search_with_attention(&query, 20, &attention).unwrap();

    // Most results should be from cluster 0
    let cluster_0_count = results.iter()
        .filter(|r| r.id < 100)
        .count();

    assert!(cluster_0_count >= 15); // At least 75% from same cluster
}

#[test]
fn test_multi_scale_search() {
    let index = build_test_index(1000, 128);

    let local_attention = GraphAttention::new(128);
    let global_attention = ScaledDotProduct::new(128);

    let multi_scale = MultiScaleAttention::new(local_attention, global_attention);

    let query = generate_random_vector(128, 999);
    let results = index.search_with_multi_scale(&query, 10, &multi_scale).unwrap();

    assert_eq!(results.len(), 10);
    assert!(results.windows(2).all(|w| w[0].distance <= w[1].distance));
}

3.3 Cross-Platform Consistency Tests

// tests/integration/cross_platform.rs

#[cfg(all(feature = "wasm", target_arch = "wasm32"))]
use wasm_bindgen_test::*;

#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
#[cfg_attr(not(target_arch = "wasm32"), test)]
fn test_wasm_rust_attention_consistency() {
    let dim = 128;
    let n_neighbors = 100;

    #[cfg(target_arch = "wasm32")]
    let attention = ScaledDotProduct::new_wasm(dim);

    #[cfg(not(target_arch = "wasm32"))]
    let attention = ScaledDotProduct::new(dim);

    let query = Array1::ones(dim);
    let keys = Array2::ones((n_neighbors, dim));
    let values = Array2::ones((n_neighbors, dim));

    let output = attention.forward(&query, &keys, &values).unwrap();

    // Expected output (computed offline)
    let expected = Array1::ones(dim);

    assert_relative_eq!(
        output.as_slice().unwrap(),
        expected.as_slice().unwrap(),
        epsilon = 1e-4
    );
}

#[cfg(feature = "napi")]
#[test]
fn test_napi_rust_consistency() {
    use napi::threadsafe_function::ThreadsafeFunction;
    use ruvector_attention_napi::ScaledDotProductNAPI;

    let rust_attention = ScaledDotProduct::new(64);
    let napi_attention = ScaledDotProductNAPI::new(64);

    let query = vec![1.0f32; 64];
    let keys = vec![vec![0.5f32; 64]; 50];
    let values = vec![vec![0.75f32; 64]; 50];

    let rust_output = rust_attention.forward(
        &Array1::from_vec(query.clone()),
        &Array2::from_shape_vec((50, 64), keys.clone().into_iter().flatten().collect()).unwrap(),
        &Array2::from_shape_vec((50, 64), values.clone().into_iter().flatten().collect()).unwrap(),
    ).unwrap();

    let napi_output = napi_attention.forward_sync(query, keys, values).unwrap();

    assert_relative_eq!(
        rust_output.as_slice().unwrap(),
        &napi_output[..],
        epsilon = 1e-5
    );
}

#[test]
fn test_serialization_consistency() {
    let attention = HyperbolicAttention::new(-1.0);

    // Serialize to JSON
    let json = serde_json::to_string(&attention).unwrap();

    // Deserialize
    let deserialized: HyperbolicAttention = serde_json::from_str(&json).unwrap();

    // Test equivalence
    let query = Array1::from_vec(vec![0.1, 0.2, 0.0, 0.0]);
    let keys = Array2::from_elem((10, 4), 0.3);
    let values = Array2::from_elem((10, 4), 0.5);

    let output1 = attention.forward(&query, &keys, &values).unwrap();
    let output2 = deserialized.forward(&query, &keys, &values).unwrap();

    assert_relative_eq!(
        output1.as_slice().unwrap(),
        output2.as_slice().unwrap(),
        epsilon = 1e-8
    );
}

4. Benchmark Suite

4.1 Criterion Benchmarks

// benches/attention_benchmarks.rs

use criterion::{
    black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
};
use ruvector_attention::*;
use ndarray::{Array1, Array2};

fn benchmark_scaled_dot_product(c: &mut Criterion) {
    let dims = [64, 128, 256, 512, 1024];
    let neighbors = [10, 50, 100, 500, 1000];

    let mut group = c.benchmark_group("scaled_dot_product");

    for &d in &dims {
        for &n in &neighbors {
            let param = format!("d={}/n={}", d, n);

            group.throughput(Throughput::Elements((n * d) as u64));

            group.bench_with_input(
                BenchmarkId::new("forward", &param),
                &(d, n),
                |b, &(d, n)| {
                    let attention = ScaledDotProduct::new(d);
                    let query = Array1::ones(d);
                    let keys = Array2::ones((n, d));
                    let values = Array2::ones((n, d));

                    b.iter(|| {
                        black_box(attention.forward(
                            black_box(&query),
                            black_box(&keys),
                            black_box(&values),
                        ))
                    });
                },
            );
        }
    }

    group.finish();
}

fn benchmark_hyperbolic_attention(c: &mut Criterion) {
    let mut group = c.benchmark_group("hyperbolic_attention");

    let curvatures = [-0.1, -0.5, -1.0, -2.0];
    let dims = [64, 128, 256];
    let neighbors = [10, 50, 100];

    for &c_val in &curvatures {
        for &d in &dims {
            for &n in &neighbors {
                let param = format!("c={}/d={}/n={}", c_val, d, n);

                group.bench_with_input(
                    BenchmarkId::new("poincare_distance", &param),
                    &(c_val, d, n),
                    |b, &(c_val, d, n)| {
                        let attention = HyperbolicAttention::new(c_val);
                        let query = Array1::from_elem(d, 0.1);
                        let keys = Array2::from_elem((n, d), 0.2);

                        b.iter(|| {
                            for i in 0..n {
                                black_box(attention.poincare_distance(
                                    black_box(&query),
                                    black_box(&keys.row(i).to_owned()),
                                ));
                            }
                        });
                    },
                );
            }
        }
    }

    group.finish();
}

fn benchmark_flash_attention(c: &mut Criterion) {
    let mut group = c.benchmark_group("flash_attention");

    let block_sizes = [64, 128, 256, 512];
    let neighbors = [1000, 5000, 10000];

    for &block_size in &block_sizes {
        for &n in &neighbors {
            let param = format!("block={}/n={}", block_size, n);

            group.throughput(Throughput::Elements((n * 128) as u64));

            group.bench_with_input(
                BenchmarkId::new("flash_vs_standard", &param),
                &(block_size, n),
                |b, &(block_size, n)| {
                    let flash = FlashAttention::new(128, block_size);
                    let query = Array1::ones(128);
                    let keys = Array2::ones((n, 128));
                    let values = Array2::ones((n, 128));

                    b.iter(|| {
                        black_box(flash.forward(
                            black_box(&query),
                            black_box(&keys),
                            black_box(&values),
                        ))
                    });
                },
            );
        }
    }

    group.finish();
}

fn benchmark_memory_usage(c: &mut Criterion) {
    let mut group = c.benchmark_group("memory_usage");

    let methods = [
        ("standard", false),
        ("flash", true),
    ];

    for (name, use_flash) in &methods {
        group.bench_function(*name, |b| {
            b.iter_custom(|iters| {
                let start_memory = get_memory_usage();
                let start_time = std::time::Instant::now();

                for _ in 0..iters {
                    if *use_flash {
                        let attention = FlashAttention::new(128, 256);
                        let query = Array1::ones(128);
                        let keys = Array2::ones((10000, 128));
                        let values = Array2::ones((10000, 128));
                        black_box(attention.forward(&query, &keys, &values));
                    } else {
                        let attention = ScaledDotProduct::new(128);
                        let query = Array1::ones(128);
                        let keys = Array2::ones((10000, 128));
                        let values = Array2::ones((10000, 128));
                        black_box(attention.forward(&query, &keys, &values));
                    }
                }

                let end_memory = get_memory_usage();
                let duration = start_time.elapsed();

                println!(
                    "{}: Memory used = {} MB",
                    name,
                    (end_memory - start_memory) / 1024 / 1024
                );

                duration
            });
        });
    }

    group.finish();
}

fn benchmark_throughput(c: &mut Criterion) {
    let mut group = c.benchmark_group("throughput");

    let batch_sizes = [1, 10, 100, 1000];

    for &batch_size in &batch_sizes {
        group.throughput(Throughput::Elements(batch_size as u64));

        group.bench_with_input(
            BenchmarkId::new("batch_processing", batch_size),
            &batch_size,
            |b, &batch_size| {
                let attention = ScaledDotProduct::new(128);
                let queries: Vec<_> = (0..batch_size)
                    .map(|_| Array1::ones(128))
                    .collect();
                let keys = Array2::ones((100, 128));
                let values = Array2::ones((100, 128));

                b.iter(|| {
                    for query in &queries {
                        black_box(attention.forward(
                            black_box(query),
                            black_box(&keys),
                            black_box(&values),
                        ));
                    }
                });
            },
        );
    }

    group.finish();
}

fn benchmark_multi_head_attention(c: &mut Criterion) {
    let mut group = c.benchmark_group("multi_head_attention");

    let num_heads = [1, 2, 4, 8, 16];

    for &heads in &num_heads {
        group.bench_with_input(
            BenchmarkId::new("heads", heads),
            &heads,
            |b, &heads| {
                let attention = MultiHeadAttention::new(128, heads);
                let query = Array1::ones(128);
                let keys = Array2::ones((100, 128));
                let values = Array2::ones((100, 128));

                b.iter(|| {
                    black_box(attention.forward(
                        black_box(&query),
                        black_box(&keys),
                        black_box(&values),
                    ))
                });
            },
        );
    }

    group.finish();
}

fn benchmark_mixture_of_experts(c: &mut Criterion) {
    let mut group = c.benchmark_group("mixture_of_experts");

    let num_experts = [2, 4, 8, 16];

    for &experts in &num_experts {
        group.bench_with_input(
            BenchmarkId::new("experts", experts),
            &experts,
            |b, &experts| {
                let attention = MixtureOfExpertsAttention::new(128, experts);
                let query = Array1::ones(128);
                let keys = Array2::ones((100, 128));
                let values = Array2::ones((100, 128));

                b.iter(|| {
                    black_box(attention.forward(
                        black_box(&query),
                        black_box(&keys),
                        black_box(&values),
                    ))
                });
            },
        );
    }

    group.finish();
}

criterion_group!(
    benches,
    benchmark_scaled_dot_product,
    benchmark_hyperbolic_attention,
    benchmark_flash_attention,
    benchmark_memory_usage,
    benchmark_throughput,
    benchmark_multi_head_attention,
    benchmark_mixture_of_experts,
);

criterion_main!(benches);

// Helper function
fn get_memory_usage() -> usize {
    #[cfg(target_os = "linux")]
    {
        use std::fs;
        let status = fs::read_to_string("/proc/self/status").unwrap();
        for line in status.lines() {
            if line.starts_with("VmRSS:") {
                let parts: Vec<&str> = line.split_whitespace().collect();
                return parts[1].parse::<usize>().unwrap() * 1024;
            }
        }
    }
    0
}

5. Performance Targets

5.1 Latency Targets

Attention Type Dims Neighbors P50 Latency P99 Latency Memory Peak
Scaled Dot-Product 64 100 < 30μs < 50μs < 500KB
Scaled Dot-Product 128 100 < 40μs < 70μs < 1MB
Scaled Dot-Product 256 100 < 60μs < 100μs < 2MB
Multi-Head (4) 128 100 < 120μs < 200μs < 1.5MB
Multi-Head (8) 128 100 < 200μs < 350μs < 2MB
Hyperbolic 64 100 < 60μs < 100μs < 750KB
Hyperbolic 128 100 < 90μs < 150μs < 1.2MB
Graph (Edge) 128 100 < 80μs < 130μs < 1.5MB
Local+Global 128 100+500 < 400μs < 700μs < 4MB
Local+Global 128 1000+5000 < 2ms < 4ms < 15MB
Linear 128 1000 < 150μs < 250μs < 2MB
Linear 128 10000 < 1ms < 1.8ms < 8MB
Flash 128 1000 < 120μs < 200μs < 1.5MB
Flash 128 10000 < 800μs < 1.5ms < 6MB
Flash 128 100000 < 15ms < 25ms < 40MB
MoE (2 experts) 128 100 < 180μs < 300μs < 4MB
MoE (4 experts) 128 100 < 300μs < 500μs < 8MB
MoE (8 experts) 128 100 < 550μs < 900μs < 15MB

5.2 Throughput Targets

Operation Batch Size Target QPS Notes
Single Query 1 > 20,000 128D, 100 neighbors
Batch Processing 10 > 150,000 Total queries/sec
Batch Processing 100 > 1,000,000 Total queries/sec
Large Scale 1000 > 8,000,000 Parallel processing

5.3 Accuracy Targets

Metric Target Measurement Method
Recall@10 > 95% vs brute-force
Recall@100 > 98% vs brute-force
Precision@10 > 90% vs brute-force
Numerical Stability < 1e-5 error Relative to FP64
Cross-Platform Consistency < 1e-4 error WASM vs Native

6. Regression Testing

6.1 Automated Performance Regression

# .github/workflows/performance-regression.yml

name: Performance Regression Tests

on:
  pull_request:
    branches: [main, develop]
  push:
    branches: [main]

jobs:
  benchmark:
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v3

      - name: Install Rust
        uses: actions-rust-lang/setup-rust-toolchain@v1

      - name: Run benchmarks (PR)
        run: |
          cargo bench --bench attention_benchmarks -- --save-baseline pr

      - name: Checkout main branch
        run: |
          git fetch origin main
          git checkout origin/main

      - name: Run benchmarks (main)
        run: |
          cargo bench --bench attention_benchmarks -- --save-baseline main

      - name: Compare benchmarks
        run: |
          cargo bench --bench attention_benchmarks -- --baseline main --compare pr

      - name: Check for regressions
        run: |
          python3 scripts/check_regression.py \
            --threshold-p50 5 \
            --threshold-p99 10 \
            --threshold-memory 10

      - name: Upload results
        uses: actions/upload-artifact@v3
        with:
          name: benchmark-results
          path: target/criterion/

6.2 Regression Detection Script

# scripts/check_regression.py

import json
import sys
import argparse
from pathlib import Path

def load_benchmark_results(path):
    """Load Criterion benchmark results"""
    results = {}

    for bench_dir in Path(path).glob("*/"):
        bench_name = bench_dir.name

        # Load estimates.json
        estimates_file = bench_dir / "new" / "estimates.json"
        if estimates_file.exists():
            with open(estimates_file) as f:
                data = json.load(f)
                results[bench_name] = {
                    'mean': data['mean']['point_estimate'],
                    'median': data['median']['point_estimate'],
                }

    return results

def compare_results(baseline, current, thresholds):
    """Compare benchmark results and detect regressions"""
    regressions = []
    warnings = []

    for bench_name in current:
        if bench_name not in baseline:
            continue

        base_mean = baseline[bench_name]['mean']
        curr_mean = current[bench_name]['mean']

        percent_change = ((curr_mean - base_mean) / base_mean) * 100

        if percent_change > thresholds['p99']:
            regressions.append({
                'benchmark': bench_name,
                'change': percent_change,
                'baseline': base_mean,
                'current': curr_mean,
            })
        elif percent_change > thresholds['p50']:
            warnings.append({
                'benchmark': bench_name,
                'change': percent_change,
                'baseline': base_mean,
                'current': curr_mean,
            })

    return regressions, warnings

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--threshold-p50', type=float, default=5.0)
    parser.add_argument('--threshold-p99', type=float, default=10.0)
    parser.add_argument('--threshold-memory', type=float, default=10.0)
    parser.add_argument('--baseline-path', default='target/criterion/main')
    parser.add_argument('--current-path', default='target/criterion/pr')

    args = parser.parse_args()

    thresholds = {
        'p50': args.threshold_p50,
        'p99': args.threshold_p99,
        'memory': args.threshold_memory,
    }

    baseline = load_benchmark_results(args.baseline_path)
    current = load_benchmark_results(args.current_path)

    regressions, warnings = compare_results(baseline, current, thresholds)

    if warnings:
        print("⚠️  Performance Warnings:")
        for w in warnings:
            print(f"  {w['benchmark']}: +{w['change']:.2f}% ({w['baseline']:.2f}ns → {w['current']:.2f}ns)")

    if regressions:
        print("\n❌ Performance Regressions Detected:")
        for r in regressions:
            print(f"  {r['benchmark']}: +{r['change']:.2f}% ({r['baseline']:.2f}ns → {r['current']:.2f}ns)")
        sys.exit(1)
    else:
        print("✅ No performance regressions detected")
        sys.exit(0)

if __name__ == '__main__':
    main()

6.3 Regression Test Configuration

# regression-tests.toml

[metrics]

[metrics.p50_latency]
threshold = 5.0  # 5% increase triggers warning
action = "warn"
description = "Median latency regression"

[metrics.p99_latency]
threshold = 10.0  # 10% increase blocks PR
action = "block"
description = "99th percentile latency regression"

[metrics.memory_peak]
threshold = 10.0
action = "warn"
description = "Peak memory usage regression"

[metrics.recall_at_10]
threshold = 1.0  # 1% decrease blocks PR
action = "block"
comparison = "decrease"
description = "Recall@10 accuracy regression"

[metrics.throughput]
threshold = 5.0
action = "warn"
comparison = "decrease"
description = "Throughput regression"

[benchmarks]
critical = [
    "scaled_dot_product/forward/d=128/n=100",
    "hyperbolic_attention/poincare_distance/c=-1.0/d=128/n=100",
    "flash_attention/flash_vs_standard/block=256/n=10000",
]

[notifications]
slack_webhook = "${SLACK_WEBHOOK_URL}"
email = ["team@example.com"]

7. Platform-Specific Tests

7.1 WASM Tests

// tests/wasm/attention.test.js

import { describe, it, expect, beforeAll } from 'vitest';
import init, {
  ScaledDotProduct,
  HyperbolicAttention,
  MultiHeadAttention,
} from '../../pkg/ruvector_attention.js';

describe('WASM Attention Module', () => {
  beforeAll(async () => {
    await init();
  });

  describe('ScaledDotProduct', () => {
    it('should create instance with correct dimensions', () => {
      const attention = ScaledDotProduct.new(128);
      expect(attention.dim()).toBe(128);
    });

    it('should compute attention for small inputs', () => {
      const attention = ScaledDotProduct.new(4);

      const query = new Float32Array([1, 0, 0, 0]);
      const keys = new Float32Array([
        1, 0, 0, 0,
        0, 1, 0, 0,
        0, 0, 1, 0,
      ]);
      const values = new Float32Array([
        1, 0, 0, 0,
        0, 1, 0, 0,
        0, 0, 1, 0,
      ]);

      const result = attention.forward(query, keys, values, 3);

      expect(result).toBeInstanceOf(Float32Array);
      expect(result.length).toBe(4);
      expect(result[0]).toBeGreaterThan(0.5);
    });

    it('should handle large batches', async () => {
      const attention = ScaledDotProduct.new(128);

      const query = new Float32Array(128).fill(1);
      const n_neighbors = 10000;
      const keys = new Float32Array(n_neighbors * 128).fill(0.5);
      const values = new Float32Array(n_neighbors * 128).fill(0.75);

      const start = performance.now();
      const result = attention.forward(query, keys, values, n_neighbors);
      const duration = performance.now() - start;

      expect(result.length).toBe(128);
      expect(duration).toBeLessThan(100); // < 100ms
    });

    it('should produce consistent results', () => {
      const attention = ScaledDotProduct.new(64);

      const query = new Float32Array(64).fill(1);
      const keys = new Float32Array(50 * 64).fill(0.5);
      const values = new Float32Array(50 * 64).fill(0.75);

      const result1 = attention.forward(query, keys, values, 50);
      const result2 = attention.forward(query, keys, values, 50);

      for (let i = 0; i < 64; i++) {
        expect(Math.abs(result1[i] - result2[i])).toBeLessThan(1e-6);
      }
    });
  });

  describe('HyperbolicAttention', () => {
    it('should compute Poincaré distance', () => {
      const attention = HyperbolicAttention.new(-1.0, 4);

      const x = new Float32Array([0.1, 0.2, 0, 0]);
      const y = new Float32Array([0.3, 0.1, 0, 0]);

      const distance = attention.poincare_distance(x, y);

      expect(distance).toBeGreaterThan(0);
      expect(distance).toBeLessThan(10);
      expect(isFinite(distance)).toBe(true);
    });

    it('should handle boundary cases', () => {
      const attention = HyperbolicAttention.new(-1.0, 4);

      // Near boundary
      const x = new Float32Array([0.7, 0.7, 0, 0]);
      const y = new Float32Array([0.1, 0.1, 0, 0]);

      const distance = attention.poincare_distance(x, y);
      expect(isFinite(distance)).toBe(true);
    });
  });

  describe('MultiHeadAttention', () => {
    it('should compute multi-head attention', () => {
      const attention = MultiHeadAttention.new(128, 8);

      const query = new Float32Array(128).fill(1);
      const keys = new Float32Array(100 * 128).fill(0.5);
      const values = new Float32Array(100 * 128).fill(0.75);

      const result = attention.forward(query, keys, values, 100);

      expect(result.length).toBe(128);
      expect(result.every(x => isFinite(x))).toBe(true);
    });
  });

  describe('Memory Management', () => {
    it('should properly free memory', () => {
      const attention = ScaledDotProduct.new(128);

      // Force garbage collection if available
      if (global.gc) {
        global.gc();
      }

      attention.free();

      // Should not crash
      expect(true).toBe(true);
    });

    it('should handle concurrent operations', async () => {
      const attentions = Array.from({ length: 10 }, () =>
        ScaledDotProduct.new(128)
      );

      const query = new Float32Array(128).fill(1);
      const keys = new Float32Array(100 * 128).fill(0.5);
      const values = new Float32Array(100 * 128).fill(0.75);

      const results = await Promise.all(
        attentions.map(att =>
          Promise.resolve(att.forward(query, keys, values, 100))
        )
      );

      expect(results).toHaveLength(10);

      // Cleanup
      attentions.forEach(att => att.free());
    });
  });

  describe('Performance', () => {
    it('should meet latency targets', () => {
      const attention = ScaledDotProduct.new(128);

      const query = new Float32Array(128).fill(1);
      const keys = new Float32Array(100 * 128).fill(0.5);
      const values = new Float32Array(100 * 128).fill(0.75);

      // Warm-up
      for (let i = 0; i < 10; i++) {
        attention.forward(query, keys, values, 100);
      }

      // Measure
      const times = [];
      for (let i = 0; i < 100; i++) {
        const start = performance.now();
        attention.forward(query, keys, values, 100);
        times.push(performance.now() - start);
      }

      times.sort((a, b) => a - b);
      const p50 = times[Math.floor(times.length * 0.5)];
      const p99 = times[Math.floor(times.length * 0.99)];

      expect(p50).toBeLessThan(1); // < 1ms for WASM
      expect(p99).toBeLessThan(2); // < 2ms for WASM
    });
  });
});

7.2 NAPI-RS Tests

// tests/napi/attention.test.ts

import { describe, it, expect, beforeEach } from '@jest/globals';
import {
  ScaledDotProduct,
  HyperbolicAttention,
  MultiHeadAttention,
  FlashAttention,
} from '../../index';

describe('NAPI-RS Attention Module', () => {
  describe('ScaledDotProduct', () => {
    let attention: ScaledDotProduct;

    beforeEach(() => {
      attention = new ScaledDotProduct(128);
    });

    it('should create instance', () => {
      expect(attention).toBeDefined();
      expect(attention.dim()).toBe(128);
    });

    it('should compute attention synchronously', () => {
      const query = new Float32Array(128).fill(1);
      const keys = Array.from({ length: 100 }, () =>
        Array.from({ length: 128 }, () => 0.5)
      );
      const values = Array.from({ length: 100 }, () =>
        Array.from({ length: 128 }, () => 0.75)
      );

      const result = attention.forward(query, keys, values);

      expect(result).toHaveLength(128);
      expect(result.every(x => isFinite(x))).toBe(true);
    });

    it('should compute attention asynchronously', async () => {
      const query = new Float32Array(128).fill(1);
      const keys = Array.from({ length: 100 }, () =>
        Array.from({ length: 128 }, () => 0.5)
      );
      const values = Array.from({ length: 100 }, () =>
        Array.from({ length: 128 }, () => 0.75)
      );

      const result = await attention.forwardAsync(query, keys, values);

      expect(result).toHaveLength(128);
    });

    it('should process batch asynchronously', async () => {
      const queries = Array.from({ length: 10 }, () =>
        new Float32Array(128).fill(1)
      );
      const keys = Array.from({ length: 100 }, () =>
        Array.from({ length: 128 }, () => 0.5)
      );
      const values = Array.from({ length: 100 }, () =>
        Array.from({ length: 128 }, () => 0.75)
      );

      const results = await attention.forwardBatchAsync(queries, keys, values);

      expect(results).toHaveLength(10);
      expect(results[0]).toHaveLength(128);
    });

    it('should match Rust implementation', () => {
      // Test data computed in Rust
      const query = new Float32Array([1, 0, 0, 0]);
      const keys = [
        [1, 0, 0, 0],
        [0, 1, 0, 0],
        [0, 0, 1, 0],
      ];
      const values = [
        [1, 0, 0, 0],
        [0, 1, 0, 0],
        [0, 0, 1, 0],
      ];

      const attention4 = new ScaledDotProduct(4);
      const result = attention4.forward(query, keys, values);

      // Expected result from Rust
      const expected = [0.6652, 0.2424, 0.0924, 0.0];

      for (let i = 0; i < 4; i++) {
        expect(Math.abs(result[i] - expected[i])).toBeLessThan(1e-3);
      }
    });
  });

  describe('HyperbolicAttention', () => {
    it('should compute hyperbolic attention', () => {
      const attention = new HyperbolicAttention(-1.0, 128);

      const query = new Float32Array(128).fill(0.1);
      const keys = Array.from({ length: 100 }, () =>
        Array.from({ length: 128 }, () => 0.2)
      );
      const values = Array.from({ length: 100 }, () =>
        Array.from({ length: 128 }, () => 0.5)
      );

      const result = attention.forward(query, keys, values);

      expect(result).toHaveLength(128);
      expect(result.every(x => isFinite(x))).toBe(true);
    });

    it('should compute Poincaré distance', () => {
      const attention = new HyperbolicAttention(-1.0, 4);

      const x = new Float32Array([0.1, 0.2, 0, 0]);
      const y = new Float32Array([0.3, 0.1, 0, 0]);

      const distance = attention.poincareDistance(x, y);

      expect(distance).toBeGreaterThan(0);
      expect(isFinite(distance)).toBe(true);
    });
  });

  describe('MultiHeadAttention', () => {
    it('should compute with multiple heads', () => {
      const attention = new MultiHeadAttention(128, 8);

      const query = new Float32Array(128).fill(1);
      const keys = Array.from({ length: 100 }, () =>
        Array.from({ length: 128 }, () => 0.5)
      );
      const values = Array.from({ length: 100 }, () =>
        Array.from({ length: 128 }, () => 0.75)
      );

      const result = attention.forward(query, keys, values);

      expect(result).toHaveLength(128);
    });
  });

  describe('FlashAttention', () => {
    it('should handle large inputs efficiently', async () => {
      const attention = new FlashAttention(128, 256);

      const query = new Float32Array(128).fill(1);
      const keys = Array.from({ length: 10000 }, () =>
        Array.from({ length: 128 }, () => 0.5)
      );
      const values = Array.from({ length: 10000 }, () =>
        Array.from({ length: 128 }, () => 0.75)
      );

      const start = Date.now();
      const result = await attention.forwardAsync(query, keys, values);
      const duration = Date.now() - start;

      expect(result).toHaveLength(128);
      expect(duration).toBeLessThan(100); // < 100ms
    });
  });

  describe('Performance', () => {
    it('should meet throughput targets', async () => {
      const attention = new ScaledDotProduct(128);

      const query = new Float32Array(128).fill(1);
      const keys = Array.from({ length: 100 }, () =>
        Array.from({ length: 128 }, () => 0.5)
      );
      const values = Array.from({ length: 100 }, () =>
        Array.from({ length: 128 }, () => 0.75)
      );

      const iterations = 1000;
      const start = Date.now();

      for (let i = 0; i < iterations; i++) {
        attention.forward(query, keys, values);
      }

      const duration = Date.now() - start;
      const qps = (iterations / duration) * 1000;

      expect(qps).toBeGreaterThan(10000); // > 10k QPS
    });

    it('should support concurrent async operations', async () => {
      const attention = new ScaledDotProduct(128);

      const query = new Float32Array(128).fill(1);
      const keys = Array.from({ length: 100 }, () =>
        Array.from({ length: 128 }, () => 0.5)
      );
      const values = Array.from({ length: 100 }, () =>
        Array.from({ length: 128 }, () => 0.75)
      );

      const promises = Array.from({ length: 100 }, () =>
        attention.forwardAsync(query, keys, values)
      );

      const results = await Promise.all(promises);

      expect(results).toHaveLength(100);
      expect(results.every(r => r.length === 128)).toBe(true);
    });
  });
});

8. Continuous Benchmarking

8.1 Daily Benchmark Job

# .github/workflows/daily-benchmarks.yml

name: Daily Performance Benchmarks

on:
  schedule:
    - cron: '0 2 * * *'  # 2 AM daily
  workflow_dispatch:  # Manual trigger

jobs:
  benchmark-rust:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest, windows-latest, macos-latest]

    steps:
      - uses: actions/checkout@v3

      - name: Install Rust
        uses: actions-rust-lang/setup-rust-toolchain@v1
        with:
          toolchain: stable

      - name: Run benchmarks
        run: |
          cargo bench --bench attention_benchmarks -- --save-baseline daily-${{ matrix.os }}

      - name: Upload results
        uses: actions/upload-artifact@v3
        with:
          name: benchmark-${{ matrix.os }}
          path: target/criterion/

      - name: Generate report
        run: |
          python3 scripts/generate_benchmark_report.py \
            --input target/criterion/ \
            --output benchmark-report-${{ matrix.os }}.md

      - name: Post to dashboard
        env:
          DASHBOARD_URL: ${{ secrets.BENCHMARK_DASHBOARD_URL }}
        run: |
          curl -X POST $DASHBOARD_URL/api/benchmarks \
            -H "Authorization: Bearer ${{ secrets.BENCHMARK_TOKEN }}" \
            -F "os=${{ matrix.os }}" \
            -F "data=@target/criterion/results.json"

  benchmark-wasm:
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v3

      - name: Install Rust + wasm-pack
        run: |
          curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh

      - name: Build WASM
        run: |
          wasm-pack build --target web

      - name: Setup Node.js
        uses: actions/setup-node@v3
        with:
          node-version: '18'

      - name: Install dependencies
        run: npm ci

      - name: Run WASM benchmarks
        run: |
          npm run bench:wasm

      - name: Upload results
        uses: actions/upload-artifact@v3
        with:
          name: benchmark-wasm
          path: bench-results/

  benchmark-memory:
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v3

      - name: Install Rust + Valgrind
        run: |
          sudo apt-get update
          sudo apt-get install -y valgrind
          rustup toolchain install nightly

      - name: Build with profiling
        run: |
          cargo +nightly build --release --features memory-profiling

      - name: Run memory profiling
        run: |
          valgrind --tool=massif \
            --massif-out-file=massif.out \
            target/release/deps/attention_benchmarks-*

      - name: Generate memory report
        run: |
          ms_print massif.out > memory-report.txt

      - name: Upload results
        uses: actions/upload-artifact@v3
        with:
          name: memory-profile
          path: |
            massif.out
            memory-report.txt

  generate-dashboard:
    needs: [benchmark-rust, benchmark-wasm, benchmark-memory]
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v3

      - name: Download all artifacts
        uses: actions/download-artifact@v3

      - name: Generate dashboard
        run: |
          python3 scripts/generate_dashboard.py \
            --output docs/benchmarks/index.html

      - name: Deploy to GitHub Pages
        uses: peaceiris/actions-gh-pages@v3
        with:
          github_token: ${{ secrets.GITHUB_TOKEN }}
          publish_dir: ./docs/benchmarks

8.2 Benchmark Dashboard Generator

# scripts/generate_dashboard.py

import json
import sys
from pathlib import Path
from datetime import datetime
import matplotlib.pyplot as plt
import pandas as pd

def load_criterion_results(criterion_dir):
    """Load all Criterion benchmark results"""
    results = []

    for bench_dir in Path(criterion_dir).glob("*/"):
        bench_name = bench_dir.name

        estimates_file = bench_dir / "new" / "estimates.json"
        if estimates_file.exists():
            with open(estimates_file) as f:
                data = json.load(f)
                results.append({
                    'name': bench_name,
                    'mean': data['mean']['point_estimate'] / 1e6,  # Convert to ms
                    'median': data['median']['point_estimate'] / 1e6,
                    'std_dev': data['std_dev']['point_estimate'] / 1e6,
                })

    return pd.DataFrame(results)

def generate_latency_chart(df, output_path):
    """Generate latency comparison chart"""
    fig, ax = plt.subplots(figsize=(12, 6))

    # Filter and sort
    df_sorted = df.sort_values('median')

    ax.barh(df_sorted['name'], df_sorted['median'], xerr=df_sorted['std_dev'])
    ax.set_xlabel('Latency (ms)')
    ax.set_title('Attention Mechanism Latency Comparison')
    ax.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.savefig(output_path)
    plt.close()

def generate_html_dashboard(df, charts_dir, output_file):
    """Generate HTML dashboard"""
    html = f"""
<!DOCTYPE html>
<html>
<head>
    <title>RuVector Attention Benchmarks</title>
    <style>
        body {{
            font-family: Arial, sans-serif;
            margin: 20px;
            background-color: #f5f5f5;
        }}
        .container {{
            max-width: 1200px;
            margin: 0 auto;
            background-color: white;
            padding: 20px;
            border-radius: 8px;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        }}
        h1 {{
            color: #333;
        }}
        table {{
            width: 100%;
            border-collapse: collapse;
            margin: 20px 0;
        }}
        th, td {{
            padding: 12px;
            text-align: left;
            border-bottom: 1px solid #ddd;
        }}
        th {{
            background-color: #4CAF50;
            color: white;
        }}
        tr:hover {{
            background-color: #f5f5f5;
        }}
        .chart {{
            margin: 20px 0;
        }}
        .metric {{
            display: inline-block;
            margin: 10px 20px;
            padding: 15px;
            background-color: #e3f2fd;
            border-radius: 4px;
        }}
        .metric-value {{
            font-size: 24px;
            font-weight: bold;
            color: #1976d2;
        }}
        .metric-label {{
            font-size: 14px;
            color: #666;
        }}
    </style>
</head>
<body>
    <div class="container">
        <h1>RuVector Attention Benchmarks</h1>
        <p>Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>

        <h2>Summary Metrics</h2>
        <div class="metric">
            <div class="metric-value">{df['median'].median():.3f} ms</div>
            <div class="metric-label">Median Latency</div>
        </div>
        <div class="metric">
            <div class="metric-value">{df['median'].min():.3f} ms</div>
            <div class="metric-label">Best Latency</div>
        </div>
        <div class="metric">
            <div class="metric-value">{len(df)}</div>
            <div class="metric-label">Benchmarks</div>
        </div>

        <h2>Latency Chart</h2>
        <div class="chart">
            <img src="latency_chart.png" alt="Latency Chart" style="max-width: 100%;">
        </div>

        <h2>Detailed Results</h2>
        <table>
            <thead>
                <tr>
                    <th>Benchmark</th>
                    <th>Median (ms)</th>
                    <th>Mean (ms)</th>
                    <th>Std Dev (ms)</th>
                </tr>
            </thead>
            <tbody>
"""

    for _, row in df.iterrows():
        html += f"""
                <tr>
                    <td>{row['name']}</td>
                    <td>{row['median']:.4f}</td>
                    <td>{row['mean']:.4f}</td>
                    <td>{row['std_dev']:.4f}</td>
                </tr>
"""

    html += """
            </tbody>
        </table>
    </div>
</body>
</html>
"""

    with open(output_file, 'w') as f:
        f.write(html)

def main():
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('--criterion-dir', default='target/criterion')
    parser.add_argument('--output', default='docs/benchmarks/index.html')

    args = parser.parse_args()

    # Load results
    df = load_criterion_results(args.criterion_dir)

    if df.empty:
        print("No benchmark results found")
        sys.exit(1)

    # Generate charts
    output_dir = Path(args.output).parent
    output_dir.mkdir(parents=True, exist_ok=True)

    generate_latency_chart(df, output_dir / 'latency_chart.png')

    # Generate dashboard
    generate_html_dashboard(df, output_dir, args.output)

    print(f"Dashboard generated: {args.output}")

if __name__ == '__main__':
    main()

9. Test Execution Plan

9.1 Development Workflow

# Local development testing
cargo test                          # Run all tests
cargo test --package ruvector-attention  # Package-specific tests
cargo test test_scaled_dot_product  # Specific test

# Run benchmarks locally
cargo bench                         # All benchmarks
cargo bench --bench attention_benchmarks  # Specific benchmark

# Run with coverage
cargo tarpaulin --out Html          # Generate HTML coverage report

# Platform-specific testing
wasm-pack test --headless --firefox  # WASM tests
npm test                             # NAPI-RS tests

9.2 CI/CD Integration

# .github/workflows/ci.yml (excerpt)

test:
  strategy:
    matrix:
      os: [ubuntu-latest, windows-latest, macos-latest]
      rust: [stable, nightly]

  steps:
    - name: Run tests
      run: cargo test --all-features

    - name: Run benchmarks
      if: matrix.rust == 'stable'
      run: cargo bench --no-run  # Just build, don't run

    - name: Coverage
      if: matrix.os == 'ubuntu-latest'
      run: cargo tarpaulin --out Lcov

    - name: Upload coverage
      uses: codecov/codecov-action@v3

10. Quality Gates

10.1 Pre-Merge Requirements

All PRs must pass:

  1. All unit tests pass
  2. All integration tests pass
  3. Code coverage ≥ 80% (critical paths ≥ 95%)
  4. No performance regressions > 10%
  5. Platform-specific tests pass (WASM + NAPI)
  6. Benchmarks complete without errors
  7. Memory profiling shows no leaks

10.2 Release Requirements

For releases:

  1. All pre-merge requirements
  2. Full benchmark suite passes
  3. Performance targets met (see Section 5)
  4. Cross-platform consistency verified
  5. Documentation updated
  6. Changelog updated
  7. Version bumped appropriately

11. Next Steps

  1. Implement Core Test Suite

    • Start with scaled dot-product tests
    • Add hyperbolic attention tests
    • Build graph attention tests
  2. Set Up Benchmarking

    • Configure Criterion
    • Define baseline benchmarks
    • Create comparison framework
  3. Platform Testing

    • Set up WASM test environment
    • Configure NAPI-RS tests
    • Verify cross-platform parity
  4. CI/CD Integration

    • Add test workflows
    • Set up benchmark automation
    • Configure performance tracking
  5. Documentation

    • Document test organization
    • Create benchmark interpretation guide
    • Write troubleshooting guides

Appendix A: Test Data Generation

// tests/common/test_data.rs

use ndarray::{Array1, Array2};
use rand::{Rng, SeedableRng};
use rand_chacha::ChaCha8Rng;

pub fn generate_random_vector(dim: usize, seed: u64) -> Array1<f32> {
    let mut rng = ChaCha8Rng::seed_from_u64(seed);
    Array1::from_vec(
        (0..dim).map(|_| rng.gen_range(-1.0..1.0)).collect()
    )
}

pub fn generate_random_vectors(n: usize, dim: usize, seed: u64) -> Array2<f32> {
    let mut rng = ChaCha8Rng::seed_from_u64(seed);
    Array2::from_shape_vec(
        (n, dim),
        (0..n * dim).map(|_| rng.gen_range(-1.0..1.0)).collect(),
    ).unwrap()
}

pub fn generate_cluster_vector(dim: usize, cluster: usize, idx: usize) -> Array1<f32> {
    let mut rng = ChaCha8Rng::seed_from_u64((cluster * 10000 + idx) as u64);
    let center = Array1::from_elem(dim, cluster as f32 / 10.0);
    let noise = Array1::from_vec(
        (0..dim).map(|_| rng.gen_range(-0.1..0.1)).collect()
    );
    center + noise
}

pub fn generate_orthogonal_vectors(dim: usize, n: usize) -> Array2<f32> {
    // Generate approximately orthogonal vectors using Gram-Schmidt
    let mut vectors = Array2::zeros((n, dim));
    let mut rng = ChaCha8Rng::seed_from_u64(42);

    for i in 0..n {
        let mut v = Array1::from_vec(
            (0..dim).map(|_| rng.gen_range(-1.0..1.0)).collect()
        );

        // Orthogonalize against previous vectors
        for j in 0..i {
            let prev = vectors.row(j);
            let projection = v.dot(&prev) / prev.dot(&prev);
            v = v - projection * &prev.to_owned();
        }

        // Normalize
        let norm = v.dot(&v).sqrt();
        if norm > 1e-6 {
            v = v / norm;
        }

        vectors.row_mut(i).assign(&v);
    }

    vectors
}

Appendix B: Performance Monitoring

// tests/common/perf_monitor.rs

use std::time::{Duration, Instant};

pub struct PerformanceMonitor {
    samples: Vec<Duration>,
}

impl PerformanceMonitor {
    pub fn new() -> Self {
        Self { samples: Vec::new() }
    }

    pub fn measure<F, R>(&mut self, f: F) -> R
    where
        F: FnOnce() -> R,
    {
        let start = Instant::now();
        let result = f();
        let duration = start.elapsed();
        self.samples.push(duration);
        result
    }

    pub fn p50(&self) -> Duration {
        self.percentile(0.5)
    }

    pub fn p99(&self) -> Duration {
        self.percentile(0.99)
    }

    pub fn mean(&self) -> Duration {
        let total: Duration = self.samples.iter().sum();
        total / self.samples.len() as u32
    }

    fn percentile(&self, p: f64) -> Duration {
        let mut sorted = self.samples.clone();
        sorted.sort();
        let idx = ((sorted.len() as f64) * p) as usize;
        sorted[idx.min(sorted.len() - 1)]
    }
}

End of Document