Files
wifi-densepose/docs/research/latent-space/implementation-plans/agents/12-integration-tests.md
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

50 KiB

Agent 12: Integration Tests

Overview

Comprehensive integration test suite for GNN latent space attention mechanisms, covering integration with ruvector-gnn, ruvector-core, cross-platform consistency, and end-to-end workflows.

1. Integration with ruvector-gnn

1.1 Attention as GNN Layer

// tests/integration/gnn_attention_layer.rs

use ruvector_gnn::{GNNModel, GraphBatch, LayerType};
use ruvector_latent::attention::{AttentionConfig, AttentionLayer, AttentionType};
use ndarray::{Array2, Array3};

#[test]
fn test_attention_as_gnn_message_passing() {
    // Setup: Create graph with 10 nodes, 5 neighbors each
    let num_nodes = 10;
    let num_neighbors = 5;
    let embed_dim = 64;

    let graph = create_test_graph(num_nodes, num_neighbors);

    // Create attention layer as GNN message passing
    let config = AttentionConfig {
        num_heads: 4,
        embed_dim,
        dropout: 0.1,
        attention_type: AttentionType::GraphAttention,
    };

    let mut attention_layer = AttentionLayer::new(config);

    // Node features
    let node_features = Array2::<f32>::random((num_nodes, embed_dim), rand::distributions::Uniform::new(-1.0, 1.0));

    // Edge indices for graph connectivity
    let edge_index = graph.edge_index();

    // Forward pass: Attention over neighbors
    let output = attention_layer.forward_graph(&node_features, &edge_index);

    // Assertions
    assert_eq!(output.shape(), &[num_nodes, embed_dim]);
    assert!(!output.iter().any(|&x| x.is_nan()));

    // Verify message aggregation: output should differ from input
    let difference = (&output - &node_features).mapv(|x| x.abs()).sum();
    assert!(difference > 0.01, "Attention should modify node features");

    // Verify attention weights sum to 1
    let attention_weights = attention_layer.get_attention_weights();
    for head in 0..config.num_heads {
        for node in 0..num_nodes {
            let weights_sum: f32 = attention_weights
                .slice(s![head, node, ..num_neighbors])
                .sum();
            assert!((weights_sum - 1.0).abs() < 1e-5,
                "Attention weights should sum to 1, got {}", weights_sum);
        }
    }
}

#[test]
fn test_attention_gnn_multi_layer_stack() {
    // Test stacking attention layers in GNN architecture
    let num_nodes = 20;
    let num_neighbors = 8;
    let embed_dims = vec![64, 128, 256, 128, 64];

    let graph = create_test_graph(num_nodes, num_neighbors);
    let edge_index = graph.edge_index();

    let mut layers = Vec::new();
    for (i, &dim) in embed_dims.iter().enumerate() {
        let config = AttentionConfig {
            num_heads: if i < 2 { 4 } else { 8 },
            embed_dim: dim,
            dropout: 0.1,
            attention_type: AttentionType::GraphAttention,
        };
        layers.push(AttentionLayer::new(config));
    }

    // Initial features
    let mut features = Array2::<f32>::random(
        (num_nodes, embed_dims[0]),
        rand::distributions::Uniform::new(-1.0, 1.0)
    );

    // Forward pass through all layers
    for (i, layer) in layers.iter_mut().enumerate() {
        features = layer.forward_graph(&features, &edge_index);

        // Verify shape after each layer
        assert_eq!(features.shape(), &[num_nodes, embed_dims[i]]);
        assert!(!features.iter().any(|&x| x.is_nan()));

        println!("Layer {} output range: [{:.4}, {:.4}]",
            i,
            features.iter().cloned().fold(f32::INFINITY, f32::min),
            features.iter().cloned().fold(f32::NEG_INFINITY, f32::max)
        );
    }
}

#[test]
fn test_attention_heterogeneous_graph() {
    // Test attention on heterogeneous graphs (multiple node/edge types)
    let num_items = 50;
    let num_users = 30;
    let num_categories = 10;

    // Create bipartite user-item graph
    let graph = create_heterogeneous_graph(num_users, num_items, num_categories);

    let config = AttentionConfig {
        num_heads: 4,
        embed_dim: 64,
        dropout: 0.1,
        attention_type: AttentionType::GraphAttention,
    };

    let mut attention = AttentionLayer::new(config);

    // Separate embeddings for each node type
    let user_features = Array2::<f32>::random((num_users, 64), rand::distributions::Uniform::new(-1.0, 1.0));
    let item_features = Array2::<f32>::random((num_items, 64), rand::distributions::Uniform::new(-1.0, 1.0));

    // Process user-item interactions
    let user_embeddings = attention.forward_heterogeneous(
        &user_features,
        &item_features,
        &graph.user_item_edges()
    );

    assert_eq!(user_embeddings.shape(), &[num_users, 64]);
    assert!(!user_embeddings.iter().any(|&x| x.is_nan()));
}

### 1.2 Training Loop Integration

```rust
// tests/integration/gnn_training.rs

use ruvector_gnn::{GNNTrainer, TrainingConfig, Loss};
use ruvector_latent::attention::{AttentionConfig, AttentionLayer};

#[test]
fn test_attention_backward_pass_integration() {
    let config = AttentionConfig {
        num_heads: 4,
        embed_dim: 64,
        dropout: 0.1,
        attention_type: AttentionType::MultiHead,
    };

    let mut attention = AttentionLayer::new(config);
    let optimizer = Adam::new(0.001);

    let batch_size = 32;
    let seq_len = 50;
    let embed_dim = 64;

    // Create synthetic training data
    let input = Array3::<f32>::random(
        (batch_size, seq_len, embed_dim),
        rand::distributions::Uniform::new(-1.0, 1.0)
    );
    let target = Array3::<f32>::random(
        (batch_size, seq_len, embed_dim),
        rand::distributions::Uniform::new(-1.0, 1.0)
    );

    // Training loop
    let mut losses = Vec::new();
    for epoch in 0..100 {
        // Forward pass
        let output = attention.forward(&input);

        // Compute loss
        let loss = mse_loss(&output, &target);
        losses.push(loss);

        // Backward pass
        let grad_output = compute_gradient(&output, &target);
        let grad_input = attention.backward(&grad_output);

        // Verify gradient shapes
        assert_eq!(grad_input.shape(), input.shape());
        assert!(!grad_input.iter().any(|&x| x.is_nan()));

        // Update weights
        optimizer.step(&mut attention.parameters(), &attention.gradients());

        if epoch % 20 == 0 {
            println!("Epoch {}: Loss = {:.6}", epoch, loss);
        }
    }

    // Verify training reduced loss
    let initial_loss = losses[0];
    let final_loss = losses[losses.len() - 1];
    assert!(final_loss < initial_loss * 0.5,
        "Training should reduce loss by at least 50%: {:.6} -> {:.6}",
        initial_loss, final_loss);
}

#[test]
fn test_attention_gradient_flow() {
    // Test gradient flow through attention mechanism
    let config = AttentionConfig {
        num_heads: 8,
        embed_dim: 128,
        dropout: 0.0, // No dropout for gradient testing
        attention_type: AttentionType::MultiHead,
    };

    let mut attention = AttentionLayer::new(config);

    let batch_size = 16;
    let seq_len = 32;
    let embed_dim = 128;

    let input = Array3::<f32>::random(
        (batch_size, seq_len, embed_dim),
        rand::distributions::Uniform::new(-0.5, 0.5)
    );

    // Forward pass
    let output = attention.forward(&input);

    // Create synthetic gradient
    let grad_output = Array3::<f32>::ones((batch_size, seq_len, embed_dim));

    // Backward pass
    let grad_input = attention.backward(&grad_output);

    // Verify gradients exist for all parameters
    let param_grads = attention.gradients();
    assert!(param_grads.query_weights.iter().any(|&x| x.abs() > 1e-8));
    assert!(param_grads.key_weights.iter().any(|&x| x.abs() > 1e-8));
    assert!(param_grads.value_weights.iter().any(|&x| x.abs() > 1e-8));
    assert!(param_grads.output_weights.iter().any(|&x| x.abs() > 1e-8));

    // Check for exploding/vanishing gradients
    let grad_norm = grad_input.mapv(|x| x * x).sum().sqrt();
    let input_norm = input.mapv(|x| x * x).sum().sqrt();
    let grad_ratio = grad_norm / input_norm;

    assert!(grad_ratio > 0.01 && grad_ratio < 100.0,
        "Gradient ratio should be reasonable: {:.4}", grad_ratio);
}

#[test]
fn test_attention_with_graph_convolution() {
    // Integration test: Attention + GCN layers
    let num_nodes = 100;
    let num_edges = 500;
    let embed_dim = 64;

    let graph = create_random_graph(num_nodes, num_edges);

    // Build hybrid model: GCN -> Attention -> GCN
    let mut model = GNNModel::new(vec![
        LayerType::GraphConv { in_dim: embed_dim, out_dim: 64 },
        LayerType::Attention(AttentionConfig {
            num_heads: 4,
            embed_dim: 64,
            dropout: 0.1,
            attention_type: AttentionType::GraphAttention,
        }),
        LayerType::GraphConv { in_dim: 64, out_dim: 32 },
    ]);

    let optimizer = Adam::new(0.001);
    let trainer = GNNTrainer::new(model, optimizer);

    // Training data
    let node_features = Array2::<f32>::random(
        (num_nodes, embed_dim),
        rand::distributions::Uniform::new(-1.0, 1.0)
    );
    let labels = Array1::<usize>::from_vec(
        (0..num_nodes).map(|_| rand::random::<usize>() % 5).collect()
    );

    // Train for 50 epochs
    let history = trainer.fit(
        &graph,
        &node_features,
        &labels,
        TrainingConfig {
            epochs: 50,
            batch_size: 32,
            validation_split: 0.2,
        }
    );

    // Verify training progress
    assert!(history.train_loss[0] > history.train_loss[49]);
    assert!(history.train_accuracy[49] > 0.6);

    println!("Final accuracy: {:.2}%", history.train_accuracy[49] * 100.0);
}

### 1.3 Backward Pass Verification

```rust
// tests/integration/gradient_verification.rs

use approx::assert_relative_eq;

#[test]
fn test_attention_numerical_gradients() {
    // Verify analytical gradients against numerical gradients
    let config = AttentionConfig {
        num_heads: 2,
        embed_dim: 32,
        dropout: 0.0,
        attention_type: AttentionType::MultiHead,
    };

    let mut attention = AttentionLayer::new(config);

    let batch_size = 4;
    let seq_len = 8;
    let embed_dim = 32;

    let input = Array3::<f32>::random(
        (batch_size, seq_len, embed_dim),
        rand::distributions::Uniform::new(-0.5, 0.5)
    );

    // Analytical gradients
    let output = attention.forward(&input);
    let grad_output = Array3::<f32>::ones(output.raw_dim());
    let analytical_grad = attention.backward(&grad_output);

    // Numerical gradients (finite differences)
    let epsilon = 1e-4;
    let mut numerical_grad = Array3::<f32>::zeros(input.raw_dim());

    for i in 0..batch_size {
        for j in 0..seq_len {
            for k in 0..embed_dim {
                // f(x + epsilon)
                let mut input_plus = input.clone();
                input_plus[[i, j, k]] += epsilon;
                let output_plus = attention.forward(&input_plus);
                let loss_plus = output_plus.sum();

                // f(x - epsilon)
                let mut input_minus = input.clone();
                input_minus[[i, j, k]] -= epsilon;
                let output_minus = attention.forward(&input_minus);
                let loss_minus = output_minus.sum();

                // Numerical gradient
                numerical_grad[[i, j, k]] = (loss_plus - loss_minus) / (2.0 * epsilon);
            }
        }
    }

    // Compare analytical and numerical gradients
    for i in 0..batch_size {
        for j in 0..seq_len {
            for k in 0..embed_dim {
                assert_relative_eq!(
                    analytical_grad[[i, j, k]],
                    numerical_grad[[i, j, k]],
                    epsilon = 1e-3,
                    max_relative = 0.01
                );
            }
        }
    }
}

#[test]
fn test_attention_gradient_accumulation() {
    // Test gradient accumulation across mini-batches
    let config = AttentionConfig {
        num_heads: 4,
        embed_dim: 64,
        dropout: 0.1,
        attention_type: AttentionType::MultiHead,
    };

    let mut attention = AttentionLayer::new(config);
    let accumulation_steps = 4;

    for step in 0..accumulation_steps {
        let input = Array3::<f32>::random(
            (8, 16, 64),
            rand::distributions::Uniform::new(-1.0, 1.0)
        );

        let output = attention.forward(&input);
        let grad_output = Array3::<f32>::ones(output.raw_dim());
        let _ = attention.backward(&grad_output);

        // Don't zero gradients until accumulation is done
        if step < accumulation_steps - 1 {
            attention.accumulate_gradients();
        }
    }

    // Verify accumulated gradients
    let grads = attention.gradients();
    let total_grad_norm = grads.query_weights.mapv(|x| x * x).sum().sqrt();

    assert!(total_grad_norm > 0.0);
    println!("Accumulated gradient norm: {:.6}", total_grad_norm);
}

## 2. Integration with ruvector-core

### 2.1 HNSW with Attention-Guided Search

```rust
// tests/integration/hnsw_attention.rs

use ruvector_core::{HNSWIndex, IndexConfig, SearchConfig};
use ruvector_latent::attention::{AttentionConfig, AttentionGuidedSearch};

#[test]
fn test_hnsw_attention_search_integration() {
    let dim = 128;
    let num_vectors = 10000;

    // Build HNSW index
    let index_config = IndexConfig {
        m: 16,
        ef_construction: 200,
        max_elements: num_vectors,
        distance_type: DistanceType::Cosine,
    };

    let mut index = HNSWIndex::new(dim, index_config);

    // Add vectors
    let vectors = generate_clustered_vectors(num_vectors, dim, 20);
    for (i, vec) in vectors.iter().enumerate() {
        index.add_vector(i, vec);
    }

    // Create attention-guided search
    let attention_config = AttentionConfig {
        num_heads: 8,
        embed_dim: dim,
        dropout: 0.0,
        attention_type: AttentionType::CrossAttention,
    };

    let attention_search = AttentionGuidedSearch::new(attention_config);

    // Query vector
    let query = Array1::<f32>::random(dim, rand::distributions::Uniform::new(-1.0, 1.0));

    // Standard HNSW search
    let search_config = SearchConfig { ef: 50, k: 10 };
    let standard_results = index.search(&query, search_config);

    // Attention-guided search
    let attention_results = attention_search.search(
        &index,
        &query,
        search_config,
        &vectors
    );

    // Compare results
    println!("Standard HNSW results:");
    for (i, (id, dist)) in standard_results.iter().enumerate() {
        println!("  {}: ID={}, dist={:.6}", i, id, dist);
    }

    println!("\nAttention-guided results:");
    for (i, (id, dist)) in attention_results.iter().enumerate() {
        println!("  {}: ID={}, dist={:.6}", i, id, dist);
    }

    // Verify attention improves relevance
    let attention_avg_dist: f32 = attention_results.iter()
        .map(|(_, d)| d)
        .sum::<f32>() / attention_results.len() as f32;

    let standard_avg_dist: f32 = standard_results.iter()
        .map(|(_, d)| d)
        .sum::<f32>() / standard_results.len() as f32;

    assert!(attention_avg_dist <= standard_avg_dist * 1.1,
        "Attention should not significantly degrade distance metrics");
}

#[test]
fn test_attention_reranking() {
    // Test using attention for result reranking
    let dim = 64;
    let num_vectors = 5000;
    let k = 100; // Retrieve more candidates
    let top_k = 10; // Final top results

    let mut index = HNSWIndex::new(dim, IndexConfig::default());
    let vectors = generate_semantic_vectors(num_vectors, dim);

    for (i, vec) in vectors.iter().enumerate() {
        index.add_vector(i, vec);
    }

    let query = Array1::<f32>::random(dim, rand::distributions::Uniform::new(-1.0, 1.0));

    // Stage 1: HNSW retrieval (fast, approximate)
    let candidates = index.search(&query, SearchConfig { ef: 100, k });

    // Stage 2: Attention reranking (precise, contextual)
    let attention_config = AttentionConfig {
        num_heads: 4,
        embed_dim: dim,
        dropout: 0.0,
        attention_type: AttentionType::CrossAttention,
    };

    let reranker = AttentionReranker::new(attention_config);

    let candidate_vectors: Vec<_> = candidates.iter()
        .map(|(id, _)| vectors[*id].clone())
        .collect();

    let reranked = reranker.rerank(&query, &candidate_vectors, top_k);

    // Verify reranking changes order
    let order_changed = reranked.iter()
        .zip(candidates.iter().take(top_k))
        .any(|(r, c)| r.0 != c.0);

    assert!(order_changed, "Reranking should change result order");

    println!("Top-{} after reranking:", top_k);
    for (i, (id, score)) in reranked.iter().enumerate() {
        println!("  {}: ID={}, score={:.6}", i, id, score);
    }
}

#[test]
fn test_attention_guided_graph_traversal() {
    // Test attention directing graph traversal in HNSW
    let dim = 128;
    let num_vectors = 20000;

    let mut index = HNSWIndex::new(dim, IndexConfig {
        m: 32,
        ef_construction: 200,
        max_elements: num_vectors,
        distance_type: DistanceType::Euclidean,
    });

    let vectors = generate_hierarchical_vectors(num_vectors, dim);
    for (i, vec) in vectors.iter().enumerate() {
        index.add_vector(i, vec);
    }

    let attention_config = AttentionConfig {
        num_heads: 8,
        embed_dim: dim,
        dropout: 0.0,
        attention_type: AttentionType::GraphAttention,
    };

    let mut guided_search = AttentionGuidedHNSW::new(attention_config);

    let query = vectors[0].clone();

    // Track traversal path
    let (results, traversal_stats) = guided_search.search_with_stats(
        &index,
        &query,
        SearchConfig { ef: 50, k: 10 },
        &vectors
    );

    println!("Traversal statistics:");
    println!("  Nodes visited: {}", traversal_stats.nodes_visited);
    println!("  Distance computations: {}", traversal_stats.distance_computations);
    println!("  Attention computations: {}", traversal_stats.attention_computations);

    // Attention should reduce nodes visited
    let standard_stats = index.search_with_stats(&query, SearchConfig { ef: 50, k: 10 });

    assert!(traversal_stats.nodes_visited <= standard_stats.nodes_visited,
        "Attention-guided search should visit fewer nodes");
}

### 2.2 Learned Distance Metrics

```rust
// tests/integration/learned_metrics.rs

use ruvector_core::{DistanceMetric, HNSWIndex};
use ruvector_latent::metrics::{LearnedMetric, MetricTrainer};

#[test]
fn test_learned_metric_integration() {
    let dim = 64;
    let num_vectors = 5000;

    // Generate training data with semantic relationships
    let (vectors, similarity_labels) = generate_labeled_vectors(num_vectors, dim);

    // Train learned metric
    let metric_config = LearnedMetricConfig {
        input_dim: dim,
        hidden_dims: vec![128, 64, 32],
        output_dim: 1,
        learning_rate: 0.001,
    };

    let mut trainer = MetricTrainer::new(metric_config);

    // Training loop
    for epoch in 0..100 {
        let loss = trainer.train_epoch(&vectors, &similarity_labels);

        if epoch % 20 == 0 {
            println!("Epoch {}: Loss = {:.6}", epoch, loss);
        }
    }

    let learned_metric = trainer.get_metric();

    // Create HNSW index with learned metric
    let mut index = HNSWIndex::new_with_metric(
        dim,
        IndexConfig::default(),
        Box::new(learned_metric)
    );

    for (i, vec) in vectors.iter().enumerate() {
        index.add_vector(i, vec);
    }

    // Test search with learned metric
    let query = vectors[0].clone();
    let results = index.search(&query, SearchConfig { ef: 50, k: 10 });

    // Verify results respect learned similarity
    for (rank, (id, dist)) in results.iter().enumerate() {
        let true_similarity = similarity_labels.get(&(0, *id)).unwrap_or(&0.0);
        println!("Rank {}: ID={}, dist={:.6}, true_sim={:.4}",
            rank, id, dist, true_similarity);
    }
}

#[test]
fn test_metric_learning_with_triplet_loss() {
    // Train metric using triplet loss (anchor, positive, negative)
    let dim = 128;
    let num_triplets = 10000;

    let triplets = generate_triplets(num_triplets, dim);

    let config = LearnedMetricConfig {
        input_dim: dim,
        hidden_dims: vec![256, 128, 64],
        output_dim: dim,
        learning_rate: 0.0001,
    };

    let mut trainer = MetricTrainer::new(config);

    for epoch in 0..200 {
        let loss = trainer.train_triplet_epoch(&triplets, margin = 1.0);

        if epoch % 20 == 0 {
            println!("Epoch {}: Triplet Loss = {:.6}", epoch, loss);
        }
    }

    let metric = trainer.get_metric();

    // Verify triplet constraints
    let mut violations = 0;
    for (anchor, positive, negative) in triplets.iter().take(100) {
        let dist_pos = metric.distance(anchor, positive);
        let dist_neg = metric.distance(anchor, negative);

        if dist_pos >= dist_neg {
            violations += 1;
        }
    }

    let violation_rate = violations as f32 / 100.0;
    assert!(violation_rate < 0.1,
        "Triplet violation rate should be < 10%, got {:.2}%",
        violation_rate * 100.0);
}

#[test]
fn test_adaptive_metric_during_search() {
    // Test metric that adapts during search based on query
    let dim = 64;
    let num_vectors = 8000;

    let vectors = generate_multi_modal_vectors(num_vectors, dim);

    let adaptive_config = AdaptiveMetricConfig {
        base_dim: dim,
        num_modes: 5,
        attention_heads: 4,
    };

    let adaptive_metric = AdaptiveLearnedMetric::new(adaptive_config);

    let mut index = HNSWIndex::new_with_metric(
        dim,
        IndexConfig::default(),
        Box::new(adaptive_metric.clone())
    );

    for (i, vec) in vectors.iter().enumerate() {
        index.add_vector(i, vec);
    }

    // Different queries should use different metric adaptations
    let query1 = vectors[0].clone(); // Mode 1
    let query2 = vectors[num_vectors / 2].clone(); // Mode 2

    // Search adapts metric based on query
    adaptive_metric.adapt_to_query(&query1);
    let results1 = index.search(&query1, SearchConfig { ef: 50, k: 10 });

    adaptive_metric.adapt_to_query(&query2);
    let results2 = index.search(&query2, SearchConfig { ef: 50, k: 10 });

    // Verify different adaptations produce different results
    let overlap = results1.iter()
        .filter(|(id1, _)| results2.iter().any(|(id2, _)| id1 == id2))
        .count();

    assert!(overlap < 8, "Different queries should produce different results");
}

## 3. Cross-Platform Consistency

### 3.1 Rust vs WASM Results

```rust
// tests/integration/cross_platform.rs

#[cfg(test)]
mod cross_platform_tests {
    use super::*;

    #[test]
    fn test_rust_wasm_attention_consistency() {
        let config = AttentionConfig {
            num_heads: 4,
            embed_dim: 64,
            dropout: 0.0, // Disable dropout for deterministic testing
            attention_type: AttentionType::MultiHead,
        };

        // Create identical models in Rust and WASM
        let mut rust_attention = AttentionLayer::new(config.clone());

        #[cfg(target_arch = "wasm32")]
        let mut wasm_attention = wasm::AttentionLayer::new(config);

        // Test input
        let input = Array3::<f32>::from_shape_fn((4, 16, 64), |(i, j, k)| {
            ((i * 16 * 64 + j * 64 + k) as f32 * 0.01).sin()
        });

        // Rust forward pass
        let rust_output = rust_attention.forward(&input);

        #[cfg(target_arch = "wasm32")]
        let wasm_output = {
            // Convert to WASM-compatible format
            let wasm_input = wasm::Array3::from_rust(&input);
            let output = wasm_attention.forward(&wasm_input);
            output.to_rust()
        };

        #[cfg(target_arch = "wasm32")]
        {
            // Compare outputs
            let max_diff = rust_output.iter()
                .zip(wasm_output.iter())
                .map(|(r, w)| (r - w).abs())
                .fold(0.0f32, f32::max);

            assert!(max_diff < 1e-5,
                "Rust and WASM outputs should match within tolerance, max diff: {}",
                max_diff);

            println!("Rust vs WASM max difference: {:.8}", max_diff);
        }
    }

    #[test]
    fn test_wasm_numerical_stability() {
        // Test WASM implementation for numerical stability
        #[cfg(target_arch = "wasm32")]
        {
            let config = AttentionConfig {
                num_heads: 8,
                embed_dim: 128,
                dropout: 0.0,
                attention_type: AttentionType::MultiHead,
            };

            let mut attention = wasm::AttentionLayer::new(config);

            // Test with extreme values
            let large_input = wasm::Array3::from_fn((2, 8, 128), |_| 100.0);
            let small_input = wasm::Array3::from_fn((2, 8, 128), |_| 0.001);
            let mixed_input = wasm::Array3::from_fn((2, 8, 128), |(i, j, k)| {
                if k % 2 == 0 { 100.0 } else { 0.001 }
            });

            let outputs = vec![
                attention.forward(&large_input),
                attention.forward(&small_input),
                attention.forward(&mixed_input),
            ];

            for (i, output) in outputs.iter().enumerate() {
                assert!(!output.iter().any(|&x| x.is_nan()),
                    "Output {} contains NaN", i);
                assert!(!output.iter().any(|&x| x.is_infinite()),
                    "Output {} contains infinity", i);
            }
        }
    }
}

### 3.2 Rust vs NAPI Results

```typescript
// tests/integration/napi_consistency.test.ts

import { describe, it, expect } from 'vitest';
import { AttentionLayer, AttentionConfig, AttentionType } from '../../napi';

describe('NAPI Consistency Tests', () => {
  it('should match Rust implementation outputs', async () => {
    const config: AttentionConfig = {
      numHeads: 4,
      embedDim: 64,
      dropout: 0.0,
      attentionType: AttentionType.MultiHead,
    };

    const attention = new AttentionLayer(config);

    // Generate deterministic input
    const batchSize = 4;
    const seqLen = 16;
    const embedDim = 64;

    const input = new Float32Array(batchSize * seqLen * embedDim);
    for (let i = 0; i < input.length; i++) {
      input[i] = Math.sin(i * 0.01);
    }

    // Forward pass through NAPI
    const napiOutput = attention.forward(input, [batchSize, seqLen, embedDim]);

    // Load reference Rust output (generated offline)
    const rustOutput = await loadRustReference('attention_reference.bin');

    // Compare
    let maxDiff = 0;
    let totalDiff = 0;

    for (let i = 0; i < napiOutput.length; i++) {
      const diff = Math.abs(napiOutput[i] - rustOutput[i]);
      maxDiff = Math.max(maxDiff, diff);
      totalDiff += diff;
    }

    const avgDiff = totalDiff / napiOutput.length;

    expect(maxDiff).toBeLessThan(1e-5);
    expect(avgDiff).toBeLessThan(1e-6);

    console.log(`NAPI vs Rust - Max diff: ${maxDiff}, Avg diff: ${avgDiff}`);
  });

  it('should handle concurrent requests consistently', async () => {
    const config: AttentionConfig = {
      numHeads: 8,
      embedDim: 128,
      dropout: 0.0,
      attentionType: AttentionType.MultiHead,
    };

    const attention = new AttentionLayer(config);

    // Create 100 random inputs
    const inputs = Array.from({ length: 100 }, (_, i) => {
      const data = new Float32Array(4 * 16 * 128);
      for (let j = 0; j < data.length; j++) {
        data[j] = Math.sin((i * 1000 + j) * 0.01);
      }
      return data;
    });

    // Process sequentially first
    const sequentialOutputs = inputs.map(input =>
      attention.forward(input, [4, 16, 128])
    );

    // Process concurrently
    const concurrentOutputs = await Promise.all(
      inputs.map(input =>
        Promise.resolve(attention.forward(input, [4, 16, 128]))
      )
    );

    // Verify consistency
    for (let i = 0; i < 100; i++) {
      const sequential = sequentialOutputs[i];
      const concurrent = concurrentOutputs[i];

      let maxDiff = 0;
      for (let j = 0; j < sequential.length; j++) {
        maxDiff = Math.max(maxDiff, Math.abs(sequential[j] - concurrent[j]));
      }

      expect(maxDiff).toBeLessThan(1e-7);
    }
  });

  it('should maintain precision across type conversions', () => {
    const config: AttentionConfig = {
      numHeads: 4,
      embedDim: 64,
      dropout: 0.0,
      attentionType: AttentionType.MultiHead,
    };

    const attention = new AttentionLayer(config);

    // Test with various numeric ranges
    const testCases = [
      { range: [0, 1], name: 'normalized' },
      { range: [-1, 1], name: 'centered' },
      { range: [-100, 100], name: 'large' },
      { range: [-0.001, 0.001], name: 'small' },
    ];

    for (const testCase of testCases) {
      const input = new Float32Array(4 * 16 * 64);
      const [min, max] = testCase.range;

      for (let i = 0; i < input.length; i++) {
        input[i] = min + (max - min) * Math.random();
      }

      const output = attention.forward(input, [4, 16, 64]);

      // Verify no precision loss symptoms
      const hasNaN = output.some(x => isNaN(x));
      const hasInf = output.some(x => !isFinite(x));
      const allZeros = output.every(x => x === 0);

      expect(hasNaN).toBe(false);
      expect(hasInf).toBe(false);
      expect(allZeros).toBe(false);

      console.log(`${testCase.name}: output range [${Math.min(...output)}, ${Math.max(...output)}]`);
    }
  });
});

### 3.3 Numerical Tolerance Checks

```rust
// tests/integration/numerical_tolerance.rs

use approx::assert_relative_eq;

#[test]
fn test_cross_platform_numerical_tolerance() {
    // Define acceptable tolerances for each platform
    struct PlatformTolerance {
        absolute: f32,
        relative: f32,
    }

    let tolerances = HashMap::from([
        ("rust_x86_64", PlatformTolerance { absolute: 1e-7, relative: 1e-5 }),
        ("rust_aarch64", PlatformTolerance { absolute: 1e-6, relative: 1e-5 }),
        ("wasm32", PlatformTolerance { absolute: 1e-5, relative: 1e-4 }),
        ("napi", PlatformTolerance { absolute: 1e-5, relative: 1e-4 }),
    ]);

    let config = AttentionConfig {
        num_heads: 4,
        embed_dim: 64,
        dropout: 0.0,
        attention_type: AttentionType::MultiHead,
    };

    // Generate reference output on current platform
    let mut attention = AttentionLayer::new(config.clone());
    let input = Array3::<f32>::from_shape_fn((4, 16, 64), |(i, j, k)| {
        ((i * 16 * 64 + j * 64 + k) as f32 * 0.01).sin()
    });

    let reference_output = attention.forward(&input);

    // Save reference for cross-platform comparison
    save_reference("attention_output_reference.bin", &reference_output);

    // Load outputs from other platforms (if available)
    let platform_outputs = load_platform_outputs("attention_output_*.bin");

    for (platform, output) in platform_outputs {
        let tolerance = tolerances.get(platform.as_str()).unwrap();

        let mut max_abs_diff = 0.0f32;
        let mut max_rel_diff = 0.0f32;

        for (ref_val, plat_val) in reference_output.iter().zip(output.iter()) {
            let abs_diff = (ref_val - plat_val).abs();
            let rel_diff = if ref_val.abs() > 1e-8 {
                abs_diff / ref_val.abs()
            } else {
                abs_diff
            };

            max_abs_diff = max_abs_diff.max(abs_diff);
            max_rel_diff = max_rel_diff.max(rel_diff);
        }

        assert!(max_abs_diff < tolerance.absolute,
            "{}: Absolute difference {} exceeds tolerance {}",
            platform, max_abs_diff, tolerance.absolute);

        assert!(max_rel_diff < tolerance.relative,
            "{}: Relative difference {} exceeds tolerance {}",
            platform, max_rel_diff, tolerance.relative);

        println!("{}: abs_diff={:.8}, rel_diff={:.8}",
            platform, max_abs_diff, max_rel_diff);
    }
}

#[test]
fn test_deterministic_execution() {
    // Verify same input produces same output across runs
    let config = AttentionConfig {
        num_heads: 4,
        embed_dim: 64,
        dropout: 0.0, // Critical: no dropout for determinism
        attention_type: AttentionType::MultiHead,
    };

    let input = Array3::<f32>::from_shape_fn((4, 16, 64), |(i, j, k)| {
        ((i * 16 * 64 + j * 64 + k) as f32 * 0.01).sin()
    });

    let mut outputs = Vec::new();

    // Run 10 times
    for run in 0..10 {
        let mut attention = AttentionLayer::new(config.clone());
        let output = attention.forward(&input);
        outputs.push(output);
    }

    // All outputs should be identical
    let reference = &outputs[0];

    for (run, output) in outputs.iter().enumerate().skip(1) {
        for (i, (ref_val, out_val)) in reference.iter().zip(output.iter()).enumerate() {
            assert_eq!(ref_val, out_val,
                "Run {} differs at index {}: {} != {}",
                run, i, ref_val, out_val);
        }
    }
}

#[test]
fn test_floating_point_edge_cases() {
    // Test handling of special float values
    let config = AttentionConfig {
        num_heads: 2,
        embed_dim: 32,
        dropout: 0.0,
        attention_type: AttentionType::MultiHead,
    };

    let mut attention = AttentionLayer::new(config);

    // Test cases
    let test_inputs = vec![
        ("zeros", Array3::<f32>::zeros((2, 8, 32))),
        ("ones", Array3::<f32>::ones((2, 8, 32))),
        ("large", Array3::<f32>::from_elem((2, 8, 32), 1e6)),
        ("small", Array3::<f32>::from_elem((2, 8, 32), 1e-6)),
        ("negative", Array3::<f32>::from_elem((2, 8, 32), -1.0)),
    ];

    for (name, input) in test_inputs {
        let output = attention.forward(&input);

        // Verify no NaN or Inf
        assert!(!output.iter().any(|&x| x.is_nan()),
            "{}: Output contains NaN", name);
        assert!(!output.iter().any(|&x| x.is_infinite()),
            "{}: Output contains Inf", name);

        println!("{}: output range [{:.6}, {:.6}]",
            name,
            output.iter().cloned().fold(f32::INFINITY, f32::min),
            output.iter().cloned().fold(f32::NEG_INFINITY, f32::max)
        );
    }
}

## 4. End-to-End Workflows

### 4.1 Index Building with Attention

```rust
// tests/integration/e2e_index_building.rs

#[test]
fn test_e2e_build_attention_enhanced_index() {
    // Complete workflow: data loading -> training -> index building

    println!("Step 1: Load dataset");
    let dataset = load_dataset("datasets/wikipedia_embeddings.bin");
    let num_vectors = dataset.len();
    let dim = dataset[0].len();
    println!("  Loaded {} vectors of dimension {}", num_vectors, dim);

    println!("\nStep 2: Train attention model");
    let attention_config = AttentionConfig {
        num_heads: 8,
        embed_dim: dim,
        dropout: 0.1,
        attention_type: AttentionType::GraphAttention,
    };

    let mut attention_model = AttentionLayer::new(attention_config);

    // Train on random batches
    let num_epochs = 50;
    let batch_size = 128;

    for epoch in 0..num_epochs {
        let mut epoch_loss = 0.0;
        let num_batches = num_vectors / batch_size;

        for batch_idx in 0..num_batches {
            let start = batch_idx * batch_size;
            let end = (start + batch_size).min(num_vectors);

            let batch = stack_vectors(&dataset[start..end]);
            let output = attention_model.forward(&batch);

            // Self-supervised loss
            let loss = contrastive_loss(&output, &batch);
            epoch_loss += loss;

            let grad = compute_gradient(&output, &batch);
            attention_model.backward(&grad);
            attention_model.update_weights(0.001);
        }

        if epoch % 10 == 0 {
            println!("  Epoch {}: Loss = {:.6}", epoch, epoch_loss / num_batches as f32);
        }
    }

    println!("\nStep 3: Build HNSW index with learned attention");

    // Transform vectors using trained attention
    let transformed_vectors: Vec<_> = dataset.iter()
        .map(|vec| {
            let input = Array2::from_shape_vec((1, dim), vec.clone()).unwrap();
            let output = attention_model.forward_single(&input);
            output.to_vec()
        })
        .collect();

    let mut index = HNSWIndex::new(dim, IndexConfig {
        m: 32,
        ef_construction: 200,
        max_elements: num_vectors,
        distance_type: DistanceType::Cosine,
    });

    for (i, vec) in transformed_vectors.iter().enumerate() {
        index.add_vector(i, vec);

        if i % 10000 == 0 {
            println!("  Indexed {} / {} vectors", i, num_vectors);
        }
    }

    println!("\nStep 4: Evaluate search quality");

    // Test queries
    let num_queries = 100;
    let k = 10;

    let mut recall_scores = Vec::new();

    for query_idx in 0..num_queries {
        let query = &transformed_vectors[query_idx];

        // Search in attention-enhanced index
        let results = index.search(query, SearchConfig { ef: 100, k });

        // Compare to ground truth (brute force on original vectors)
        let ground_truth = brute_force_search(&dataset[query_idx], &dataset, k);

        // Calculate recall
        let recall = calculate_recall(&results, &ground_truth);
        recall_scores.push(recall);
    }

    let avg_recall = recall_scores.iter().sum::<f32>() / recall_scores.len() as f32;

    println!("  Average Recall@{}: {:.4}", k, avg_recall);
    assert!(avg_recall > 0.9, "Recall should be > 0.9");

    println!("\nStep 5: Save index and model");
    index.save("output/attention_enhanced_index.hnsw");
    attention_model.save("output/attention_model.bin");

    println!("\n✓ End-to-end index building complete");
}

### 4.2 Search with Various Attention Types

```rust
// tests/integration/e2e_search.rs

#[test]
fn test_e2e_multi_attention_search() {
    println!("=== Multi-Attention Search Comparison ===\n");

    // Load pre-built index
    let index = HNSWIndex::load("output/wikipedia_index.hnsw");
    let vectors = load_vectors("datasets/wikipedia_embeddings.bin");

    let query = vectors[0].clone();
    let k = 20;

    // Define attention variants to test
    let attention_types = vec![
        ("No Attention", None),
        ("Multi-Head", Some(AttentionType::MultiHead)),
        ("Graph", Some(AttentionType::GraphAttention)),
        ("Cross", Some(AttentionType::CrossAttention)),
        ("Sparse", Some(AttentionType::SparseAttention)),
        ("Hyperbolic", Some(AttentionType::Hyperbolic)),
        ("MoE", Some(AttentionType::MixtureOfExperts)),
    ];

    let mut results_comparison = Vec::new();

    for (name, attention_type) in attention_types {
        println!("Testing: {}", name);

        let start_time = Instant::now();

        let results = if let Some(attn_type) = attention_type {
            let config = AttentionConfig {
                num_heads: 8,
                embed_dim: vectors[0].len(),
                dropout: 0.0,
                attention_type: attn_type,
            };

            let attention_search = AttentionGuidedSearch::new(config);
            attention_search.search(&index, &query, SearchConfig { ef: 100, k }, &vectors)
        } else {
            index.search(&query, SearchConfig { ef: 100, k })
        };

        let search_time = start_time.elapsed();

        // Calculate metrics
        let ground_truth = brute_force_search(&query, &vectors, k);
        let recall = calculate_recall(&results, &ground_truth);
        let ndcg = calculate_ndcg(&results, &ground_truth);

        println!("  Recall@{}: {:.4}", k, recall);
        println!("  NDCG@{}: {:.4}", k, ndcg);
        println!("  Search time: {:.2}ms", search_time.as_millis());
        println!();

        results_comparison.push((name, recall, ndcg, search_time));
    }

    // Summary comparison
    println!("=== Summary ===");
    println!("{:<20} {:>10} {:>10} {:>12}", "Type", "Recall", "NDCG", "Time (ms)");
    println!("{:-<54}", "");

    for (name, recall, ndcg, time) in results_comparison {
        println!("{:<20} {:>10.4} {:>10.4} {:>12.2}",
            name, recall, ndcg, time.as_millis());
    }
}

#[test]
fn test_e2e_hybrid_search_pipeline() {
    // Multi-stage search pipeline
    println!("=== Hybrid Search Pipeline ===\n");

    let index = HNSWIndex::load("output/large_index.hnsw");
    let vectors = load_vectors("datasets/embeddings.bin");
    let query = vectors[0].clone();

    println!("Stage 1: Fast HNSW retrieval");
    let start = Instant::now();
    let candidates = index.search(&query, SearchConfig { ef: 500, k: 500 });
    println!("  Retrieved {} candidates in {:.2}ms",
        candidates.len(), start.elapsed().as_millis());

    println!("\nStage 2: Attention-based reranking");
    let rerank_config = AttentionConfig {
        num_heads: 16,
        embed_dim: vectors[0].len(),
        dropout: 0.0,
        attention_type: AttentionType::CrossAttention,
    };

    let start = Instant::now();
    let reranker = AttentionReranker::new(rerank_config);

    let candidate_vectors: Vec<_> = candidates.iter()
        .map(|(id, _)| vectors[*id].clone())
        .collect();

    let reranked = reranker.rerank(&query, &candidate_vectors, 100);
    println!("  Reranked to {} results in {:.2}ms",
        reranked.len(), start.elapsed().as_millis());

    println!("\nStage 3: Diversity filtering");
    let start = Instant::now();
    let diverse_results = diversity_filter(&reranked, &candidate_vectors, 20, 0.7);
    println!("  Filtered to {} diverse results in {:.2}ms",
        diverse_results.len(), start.elapsed().as_millis());

    println!("\nFinal Results:");
    for (rank, (id, score)) in diverse_results.iter().take(10).enumerate() {
        println!("  {}: ID={}, score={:.6}", rank + 1, id, score);
    }
}

### 4.3 Training and Inference

```rust
// tests/integration/e2e_training_inference.rs

#[test]
fn test_e2e_attention_training_deployment() {
    println!("=== Attention Training & Deployment Pipeline ===\n");

    // Phase 1: Training
    println!("Phase 1: Training");

    let train_data = load_dataset("datasets/train.bin");
    let val_data = load_dataset("datasets/val.bin");

    let config = AttentionConfig {
        num_heads: 8,
        embed_dim: 256,
        dropout: 0.1,
        attention_type: AttentionType::MultiHead,
    };

    let mut model = AttentionLayer::new(config);
    let optimizer = Adam::new(0.0001);

    let num_epochs = 100;
    let batch_size = 64;

    let mut best_val_loss = f32::INFINITY;
    let mut patience = 0;
    let max_patience = 10;

    for epoch in 0..num_epochs {
        // Training
        let train_loss = train_epoch(&mut model, &train_data, batch_size, &optimizer);

        // Validation
        let val_loss = validate(&model, &val_data, batch_size);

        println!("Epoch {}: train_loss={:.6}, val_loss={:.6}",
            epoch, train_loss, val_loss);

        // Early stopping
        if val_loss < best_val_loss {
            best_val_loss = val_loss;
            patience = 0;
            model.save("checkpoints/best_model.bin");
        } else {
            patience += 1;
            if patience >= max_patience {
                println!("Early stopping at epoch {}", epoch);
                break;
            }
        }
    }

    // Phase 2: Export for deployment
    println!("\nPhase 2: Export");

    // Load best model
    model = AttentionLayer::load("checkpoints/best_model.bin");

    // Export to different formats
    println!("  Exporting to ONNX...");
    model.export_onnx("deploy/model.onnx");

    println!("  Exporting to TorchScript...");
    model.export_torchscript("deploy/model.pt");

    println!("  Exporting quantized version...");
    let quantized = model.quantize_int8();
    quantized.save("deploy/model_int8.bin");

    // Phase 3: Inference benchmarking
    println!("\nPhase 3: Inference Benchmarking");

    let test_data = load_dataset("datasets/test.bin");
    let test_queries = &test_data[..1000];

    // Benchmark full precision
    let start = Instant::now();
    for query in test_queries {
        let _ = model.forward_single(query);
    }
    let fp32_time = start.elapsed();
    let fp32_latency = fp32_time.as_micros() as f32 / 1000.0;

    println!("  FP32 latency: {:.2}ms per query", fp32_latency);

    // Benchmark quantized
    let start = Instant::now();
    for query in test_queries {
        let _ = quantized.forward_single(query);
    }
    let int8_time = start.elapsed();
    let int8_latency = int8_time.as_micros() as f32 / 1000.0;

    println!("  INT8 latency: {:.2}ms per query", int8_latency);
    println!("  Speedup: {:.2}x", fp32_latency / int8_latency);

    // Accuracy comparison
    let mut accuracy_diffs = Vec::new();

    for query in test_queries.iter().take(100) {
        let fp32_output = model.forward_single(query);
        let int8_output = quantized.forward_single(query);

        let diff = compute_cosine_similarity(&fp32_output, &int8_output);
        accuracy_diffs.push(diff);
    }

    let avg_similarity = accuracy_diffs.iter().sum::<f32>() / accuracy_diffs.len() as f32;
    println!("  FP32 vs INT8 similarity: {:.6}", avg_similarity);

    // Phase 4: Production deployment
    println!("\nPhase 4: Production Deployment");

    println!("  Creating production config...");
    let prod_config = ProductionConfig {
        model_path: "deploy/model_int8.bin",
        batch_size: 128,
        num_threads: 8,
        use_gpu: false,
        max_latency_ms: 10.0,
    };
    prod_config.save("deploy/config.json");

    println!("  Setting up serving endpoint...");
    let server = AttentionServer::new(prod_config);

    // Simulate production load
    let num_requests = 10000;
    let concurrency = 100;

    println!("  Simulating {} requests with concurrency {}",
        num_requests, concurrency);

    let start = Instant::now();
    let latencies = simulate_production_load(&server, num_requests, concurrency);
    let total_time = start.elapsed();

    let throughput = num_requests as f32 / total_time.as_secs_f32();
    let p50 = percentile(&latencies, 0.5);
    let p95 = percentile(&latencies, 0.95);
    let p99 = percentile(&latencies, 0.99);

    println!("  Throughput: {:.2} req/s", throughput);
    println!("  Latency P50: {:.2}ms", p50);
    println!("  Latency P95: {:.2}ms", p95);
    println!("  Latency P99: {:.2}ms", p99);

    assert!(p99 < prod_config.max_latency_ms,
        "P99 latency exceeds SLA: {:.2}ms", p99);

    println!("\n✓ Training and deployment pipeline complete");
}

## Test Utilities

```rust
// tests/integration/utils.rs

// Helper functions for integration tests

pub fn create_test_graph(num_nodes: usize, num_neighbors: usize) -> Graph {
    let mut edges = Vec::new();

    for i in 0..num_nodes {
        for j in 0..num_neighbors {
            let neighbor = (i + j + 1) % num_nodes;
            edges.push((i, neighbor));
        }
    }

    Graph::from_edges(num_nodes, &edges)
}

pub fn generate_clustered_vectors(
    num_vectors: usize,
    dim: usize,
    num_clusters: usize
) -> Vec<Array1<f32>> {
    let mut rng = rand::thread_rng();
    let mut vectors = Vec::new();

    // Create cluster centers
    let centers: Vec<Array1<f32>> = (0..num_clusters)
        .map(|_| Array1::random(dim, Uniform::new(-1.0, 1.0)))
        .collect();

    // Generate vectors around centers
    for _ in 0..num_vectors {
        let cluster = rng.gen_range(0..num_clusters);
        let center = &centers[cluster];

        let noise: Array1<f32> = Array1::random(dim, Uniform::new(-0.1, 0.1));
        let vector = center + &noise;

        vectors.push(vector);
    }

    vectors
}

pub fn calculate_recall(results: &[(usize, f32)], ground_truth: &[(usize, f32)]) -> f32 {
    let result_ids: HashSet<usize> = results.iter().map(|(id, _)| *id).collect();
    let truth_ids: HashSet<usize> = ground_truth.iter().map(|(id, _)| *id).collect();

    let intersection = result_ids.intersection(&truth_ids).count();
    intersection as f32 / ground_truth.len() as f32
}

pub fn calculate_ndcg(results: &[(usize, f32)], ground_truth: &[(usize, f32)]) -> f32 {
    let truth_map: HashMap<usize, f32> = ground_truth.iter()
        .enumerate()
        .map(|(rank, (id, _))| (*id, 1.0 / (rank + 1) as f32))
        .collect();

    let dcg: f32 = results.iter()
        .enumerate()
        .map(|(rank, (id, _))| {
            let relevance = truth_map.get(id).unwrap_or(&0.0);
            relevance / (rank + 2) as f32.log2()
        })
        .sum();

    let idcg: f32 = (0..results.len())
        .map(|rank| 1.0 / (rank + 2) as f32.log2())
        .sum();

    dcg / idcg
}

pub fn mse_loss(output: &Array3<f32>, target: &Array3<f32>) -> f32 {
    let diff = output - target;
    diff.mapv(|x| x * x).mean().unwrap()
}

pub fn contrastive_loss(output: &Array3<f32>, target: &Array3<f32>) -> f32 {
    // Simplified contrastive loss
    let batch_size = output.shape()[0];
    let mut loss = 0.0;

    for i in 0..batch_size {
        let out_i = output.slice(s![i, .., ..]);
        let target_i = target.slice(s![i, .., ..]);

        let pos_sim = cosine_similarity(&out_i, &target_i);

        for j in 0..batch_size {
            if i != j {
                let target_j = target.slice(s![j, .., ..]);
                let neg_sim = cosine_similarity(&out_i, &target_j);
                loss += (1.0 - pos_sim + neg_sim).max(0.0);
            }
        }
    }

    loss / (batch_size * (batch_size - 1)) as f32
}

## Summary

This integration test suite provides comprehensive coverage of:

1. **GNN Integration**: Attention as graph layers, training loops, gradient verification
2. **Core Integration**: HNSW search enhancement, learned metrics
3. **Platform Consistency**: Cross-platform numerical verification with defined tolerances
4. **E2E Workflows**: Complete pipelines from training to production deployment

**Testing Strategy**:
- Unit-level integration: Component interactions
- System-level integration: Full workflow testing
- Platform verification: Cross-platform consistency
- Performance validation: Benchmarking and SLA verification

**Key Metrics**:
- Recall@K > 0.9
- NDCG@K > 0.85
- Cross-platform tolerance < 1e-5
- P99 latency < 10ms (production)
- Training convergence within 100 epochs

**Execution**:
```bash
# Run all integration tests
cargo test --test integration -- --test-threads=1

# Run specific integration suites
cargo test --test integration::gnn_attention
cargo test --test integration::hnsw_attention
cargo test --test integration::cross_platform
cargo test --test integration::e2e_workflows

# Run with detailed output
cargo test --test integration -- --nocapture