Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions
--- a/crates/prime-radiant/tests/gpu_coherence_tests.rs
+++ b/crates/prime-radiant/tests/gpu_coherence_tests.rs
@@ -0,0 +1,520 @@
+//! GPU Coherence Engine Tests
+//!
+//! Comprehensive tests verifying GPU computation results match CPU results
+//! within floating-point tolerance. These tests ensure correctness of:
+//!
+//! - GPU buffer management and data transfer
+//! - Parallel residual computation
+//! - Energy aggregation with tree reduction
+//! - CPU fallback mechanism
+//!
+//! Run with: cargo test --features gpu
+
+#![cfg(feature = "gpu")]
+
+use prime_radiant::gpu::{
+    BufferUsage, GpuBuffer, GpuBufferManager, GpuCoherenceEngine, GpuConfig, GpuEdge, GpuError,
+    GpuParams, GpuRestrictionMap, GpuResult,
+};
+use prime_radiant::substrate::{
+    EdgeId, NodeId, SheafEdge, SheafEdgeBuilder, SheafGraph, SheafNode, SheafNodeBuilder,
+};
+use std::collections::HashMap;
+use uuid::Uuid;
+
+/// Floating point tolerance for GPU vs CPU comparison
+const TOLERANCE: f32 = 1e-5;
+
+/// Create a simple test graph with 3 nodes forming a triangle
+fn create_triangle_graph() -> SheafGraph {
+    let graph = SheafGraph::new();
+
+    // Create three nodes with states
+    let node1 = SheafNodeBuilder::new()
+        .state_from_slice(&[1.0, 0.0, 0.0])
+        .namespace("test")
+        .build();
+    let node2 = SheafNodeBuilder::new()
+        .state_from_slice(&[0.0, 1.0, 0.0])
+        .namespace("test")
+        .build();
+    let node3 = SheafNodeBuilder::new()
+        .state_from_slice(&[0.0, 0.0, 1.0])
+        .namespace("test")
+        .build();
+
+    let id1 = graph.add_node(node1);
+    let id2 = graph.add_node(node2);
+    let id3 = graph.add_node(node3);
+
+    // Create edges with identity restrictions
+    let edge12 = SheafEdgeBuilder::new(id1, id2)
+        .identity_restrictions(3)
+        .weight(1.0)
+        .namespace("test")
+        .build();
+    let edge23 = SheafEdgeBuilder::new(id2, id3)
+        .identity_restrictions(3)
+        .weight(1.0)
+        .namespace("test")
+        .build();
+    let edge31 = SheafEdgeBuilder::new(id3, id1)
+        .identity_restrictions(3)
+        .weight(1.0)
+        .namespace("test")
+        .build();
+
+    graph.add_edge(edge12).unwrap();
+    graph.add_edge(edge23).unwrap();
+    graph.add_edge(edge31).unwrap();
+
+    graph
+}
+
+/// Create a coherent graph where all nodes have identical states
+fn create_coherent_graph() -> SheafGraph {
+    let graph = SheafGraph::new();
+
+    // All nodes have the same state
+    let state = [1.0, 1.0, 1.0];
+
+    let node1 = SheafNodeBuilder::new().state_from_slice(&state).build();
+    let node2 = SheafNodeBuilder::new().state_from_slice(&state).build();
+
+    let id1 = graph.add_node(node1);
+    let id2 = graph.add_node(node2);
+
+    let edge = SheafEdgeBuilder::new(id1, id2)
+        .identity_restrictions(3)
+        .weight(1.0)
+        .build();
+
+    graph.add_edge(edge).unwrap();
+    graph
+}
+
+/// Create a larger graph for performance testing
+fn create_large_graph(num_nodes: usize, edges_per_node: usize) -> SheafGraph {
+    let graph = SheafGraph::new();
+    let state_dim = 64;
+
+    // Create nodes with random states
+    let mut node_ids = Vec::with_capacity(num_nodes);
+    for i in 0..num_nodes {
+        let state: Vec<f32> = (0..state_dim)
+            .map(|j| ((i * state_dim + j) as f32 * 0.01).sin())
+            .collect();
+
+        let node = SheafNodeBuilder::new().state_from_slice(&state).build();
+
+        node_ids.push(graph.add_node(node));
+    }
+
+    // Create edges
+    for i in 0..num_nodes {
+        for j in 1..=edges_per_node {
+            let target_idx = (i + j) % num_nodes;
+            if i != target_idx {
+                let edge = SheafEdgeBuilder::new(node_ids[i], node_ids[target_idx])
+                    .identity_restrictions(state_dim)
+                    .weight(1.0)
+                    .build();
+
+                // Ignore duplicate edges
+                let _ = graph.add_edge(edge);
+            }
+        }
+    }
+
+    graph
+}
+
+// ============================================================================
+// GPU Configuration Tests
+// ============================================================================
+
+#[test]
+fn test_gpu_config_default() {
+    let config = GpuConfig::default();
+
+    assert!(config.enable_fallback);
+    assert_eq!(config.beta, 1.0);
+    assert!(config.threshold_lane0 < config.threshold_lane1);
+    assert!(config.threshold_lane1 < config.threshold_lane2);
+    assert!(config.timeout_ms > 0);
+}
+
+#[test]
+fn test_gpu_config_custom() {
+    let config = GpuConfig {
+        enable_fallback: false,
+        beta: 2.0,
+        threshold_lane0: 0.05,
+        threshold_lane1: 0.5,
+        threshold_lane2: 5.0,
+        ..Default::default()
+    };
+
+    assert!(!config.enable_fallback);
+    assert_eq!(config.beta, 2.0);
+    assert_eq!(config.threshold_lane0, 0.05);
+}
+
+// ============================================================================
+// GPU Buffer Tests
+// ============================================================================
+
+#[test]
+fn test_gpu_params_alignment() {
+    // GPU struct alignment is critical for correct computation
+    assert_eq!(std::mem::size_of::<GpuParams>(), 32);
+    assert_eq!(std::mem::align_of::<GpuParams>(), 4);
+}
+
+#[test]
+fn test_gpu_edge_alignment() {
+    assert_eq!(std::mem::size_of::<GpuEdge>(), 32);
+    assert_eq!(std::mem::align_of::<GpuEdge>(), 4);
+}
+
+#[test]
+fn test_gpu_restriction_map_alignment() {
+    assert_eq!(std::mem::size_of::<GpuRestrictionMap>(), 32);
+    assert_eq!(std::mem::align_of::<GpuRestrictionMap>(), 4);
+}
+
+// ============================================================================
+// CPU vs GPU Comparison Tests
+// ============================================================================
+
+/// Test that GPU energy matches CPU energy for triangle graph
+#[tokio::test]
+async fn test_gpu_cpu_energy_match_triangle() {
+    let graph = create_triangle_graph();
+
+    // Compute CPU energy
+    let cpu_energy = graph.compute_energy();
+
+    // Try GPU computation
+    let config = GpuConfig::default();
+    match GpuCoherenceEngine::try_new(config).await {
+        Some(mut engine) => {
+            engine.upload_graph(&graph).unwrap();
+            let gpu_energy = engine.compute_energy().await.unwrap();
+
+            // Compare total energies
+            let diff = (cpu_energy.total_energy - gpu_energy.total_energy).abs();
+            assert!(
+                diff < TOLERANCE,
+                "Energy mismatch: CPU={}, GPU={}, diff={}",
+                cpu_energy.total_energy,
+                gpu_energy.total_energy,
+                diff
+            );
+
+            // Verify GPU was actually used
+            assert!(gpu_energy.used_gpu);
+        }
+        None => {
+            // GPU not available, skip test
+            eprintln!("GPU not available, skipping GPU comparison test");
+        }
+    }
+}
+
+/// Test that coherent graph has near-zero energy on GPU
+#[tokio::test]
+async fn test_gpu_coherent_graph() {
+    let graph = create_coherent_graph();
+
+    // CPU energy should be near zero
+    let cpu_energy = graph.compute_energy();
+    assert!(
+        cpu_energy.total_energy < 1e-10,
+        "CPU energy for coherent graph should be near zero: {}",
+        cpu_energy.total_energy
+    );
+
+    // Try GPU computation
+    let config = GpuConfig::default();
+    if let Some(mut engine) = GpuCoherenceEngine::try_new(config).await {
+        engine.upload_graph(&graph).unwrap();
+        let gpu_energy = engine.compute_energy().await.unwrap();
+
+        assert!(
+            gpu_energy.total_energy < 1e-5,
+            "GPU energy for coherent graph should be near zero: {}",
+            gpu_energy.total_energy
+        );
+    }
+}
+
+/// Test per-edge energy computation
+#[tokio::test]
+async fn test_gpu_per_edge_energies() {
+    let graph = create_triangle_graph();
+
+    // Compute CPU energy
+    let cpu_energy = graph.compute_energy();
+
+    let config = GpuConfig::default();
+    if let Some(mut engine) = GpuCoherenceEngine::try_new(config).await {
+        engine.upload_graph(&graph).unwrap();
+        let gpu_energy = engine.compute_energy().await.unwrap();
+
+        // Same number of edge energies
+        assert_eq!(
+            cpu_energy.edge_energies.len(),
+            gpu_energy.edge_energies.len(),
+            "Edge count mismatch"
+        );
+
+        // Each edge energy should match (order may differ)
+        let cpu_sum: f32 = cpu_energy.edge_energies.values().sum();
+        let gpu_sum: f32 = gpu_energy.edge_energies.iter().sum();
+
+        let diff = (cpu_sum - gpu_sum).abs();
+        assert!(
+            diff < TOLERANCE,
+            "Sum of edge energies mismatch: CPU={}, GPU={}, diff={}",
+            cpu_sum,
+            gpu_sum,
+            diff
+        );
+    }
+}
+
+/// Test with larger graph
+#[tokio::test]
+async fn test_gpu_large_graph() {
+    let graph = create_large_graph(100, 5);
+
+    let cpu_energy = graph.compute_energy();
+
+    let config = GpuConfig::default();
+    if let Some(mut engine) = GpuCoherenceEngine::try_new(config).await {
+        engine.upload_graph(&graph).unwrap();
+        let gpu_energy = engine.compute_energy().await.unwrap();
+
+        // Allow slightly larger tolerance for large graphs due to floating point accumulation
+        let diff = (cpu_energy.total_energy - gpu_energy.total_energy).abs();
+        let relative_diff = diff / cpu_energy.total_energy.max(1.0);
+
+        assert!(
+            relative_diff < 0.01, // 1% relative error
+            "Large graph energy mismatch: CPU={}, GPU={}, relative_diff={:.2}%",
+            cpu_energy.total_energy,
+            gpu_energy.total_energy,
+            relative_diff * 100.0
+        );
+    }
+}
+
+// ============================================================================
+// Error Handling Tests
+// ============================================================================
+
+#[tokio::test]
+async fn test_gpu_empty_graph_error() {
+    let graph = SheafGraph::new();
+
+    let config = GpuConfig::default();
+    if let Some(mut engine) = GpuCoherenceEngine::try_new(config).await {
+        let result = engine.upload_graph(&graph);
+        assert!(result.is_err());
+        match result {
+            Err(GpuError::EmptyGraph) => {}
+            Err(e) => panic!("Expected EmptyGraph error, got: {:?}", e),
+            Ok(_) => panic!("Expected error for empty graph"),
+        }
+    }
+}
+
+#[test]
+fn test_gpu_error_fallback_detection() {
+    // Test that certain errors trigger fallback
+    assert!(GpuError::NoAdapter.should_fallback());
+    assert!(GpuError::NoDevice("test".into()).should_fallback());
+    assert!(GpuError::DeviceCreation("test".into()).should_fallback());
+    assert!(GpuError::AdapterRequest("test".into()).should_fallback());
+    assert!(GpuError::UnsupportedFeature("test".into()).should_fallback());
+
+    // These should not trigger fallback
+    assert!(!GpuError::Timeout(100).should_fallback());
+    assert!(!GpuError::EmptyGraph.should_fallback());
+    assert!(!GpuError::BufferRead("test".into()).should_fallback());
+}
+
+#[test]
+fn test_gpu_error_recoverable() {
+    assert!(GpuError::Timeout(100).is_recoverable());
+    assert!(GpuError::BufferRead("test".into()).is_recoverable());
+    assert!(GpuError::ExecutionFailed("test".into()).is_recoverable());
+
+    assert!(!GpuError::NoAdapter.is_recoverable());
+    assert!(!GpuError::EmptyGraph.is_recoverable());
+}
+
+// ============================================================================
+// GPU Capabilities Tests
+// ============================================================================
+
+#[tokio::test]
+async fn test_gpu_capabilities() {
+    let config = GpuConfig::default();
+    if let Some(engine) = GpuCoherenceEngine::try_new(config).await {
+        let caps = engine.capabilities();
+
+        // Should have valid device info
+        assert!(!caps.device_name.is_empty());
+        assert!(!caps.backend.is_empty());
+
+        // Should have reasonable limits
+        assert!(caps.max_buffer_size > 0);
+        assert!(caps.max_workgroup_size > 0);
+        assert!(caps.max_workgroups[0] > 0);
+
+        // Should be marked as supported
+        assert!(caps.supported);
+    }
+}
+
+// ============================================================================
+// Synchronous API Tests
+// ============================================================================
+
+#[test]
+fn test_sync_api() {
+    use prime_radiant::gpu::sync;
+
+    let config = GpuConfig::default();
+    if let Some(mut engine) = sync::try_create_engine(config) {
+        let graph = create_triangle_graph();
+
+        engine.upload_graph(&graph).unwrap();
+        let energy = sync::compute_energy(&mut engine).unwrap();
+
+        assert!(energy.total_energy > 0.0);
+        assert!(energy.used_gpu);
+    }
+}
+
+// ============================================================================
+// Resource Management Tests
+// ============================================================================
+
+#[tokio::test]
+async fn test_gpu_resource_release() {
+    let config = GpuConfig::default();
+    if let Some(mut engine) = GpuCoherenceEngine::try_new(config).await {
+        let graph = create_triangle_graph();
+
+        // Upload and compute
+        engine.upload_graph(&graph).unwrap();
+        let _ = engine.compute_energy().await.unwrap();
+
+        // Release resources
+        engine.release();
+
+        // Re-upload should work
+        engine.upload_graph(&graph).unwrap();
+        let energy = engine.compute_energy().await.unwrap();
+        assert!(energy.total_energy > 0.0);
+    }
+}
+
+#[tokio::test]
+async fn test_gpu_multiple_computations() {
+    let config = GpuConfig::default();
+    if let Some(mut engine) = GpuCoherenceEngine::try_new(config).await {
+        let graph = create_triangle_graph();
+        engine.upload_graph(&graph).unwrap();
+
+        // Multiple computations should give consistent results
+        let energy1 = engine.compute_energy().await.unwrap();
+        let energy2 = engine.compute_energy().await.unwrap();
+        let energy3 = engine.compute_energy().await.unwrap();
+
+        assert!(
+            (energy1.total_energy - energy2.total_energy).abs() < TOLERANCE,
+            "Inconsistent results between computations"
+        );
+        assert!(
+            (energy2.total_energy - energy3.total_energy).abs() < TOLERANCE,
+            "Inconsistent results between computations"
+        );
+    }
+}
+
+// ============================================================================
+// Performance Tests (disabled by default)
+// ============================================================================
+
+#[tokio::test]
+#[ignore] // Run with: cargo test --features gpu -- --ignored
+async fn test_gpu_performance_1k_nodes() {
+    let graph = create_large_graph(1000, 10);
+    let edge_count = graph.edge_count();
+
+    let config = GpuConfig::default();
+    if let Some(mut engine) = GpuCoherenceEngine::try_new(config).await {
+        engine.upload_graph(&graph).unwrap();
+
+        // Warm up
+        let _ = engine.compute_energy().await.unwrap();
+
+        // Benchmark
+        let start = std::time::Instant::now();
+        let energy = engine.compute_energy().await.unwrap();
+        let gpu_time = start.elapsed();
+
+        // Compare with CPU
+        let start = std::time::Instant::now();
+        let cpu_energy = graph.compute_energy();
+        let cpu_time = start.elapsed();
+
+        println!("Performance test ({} edges):", edge_count);
+        println!(
+            "  GPU: {}us ({} edges/ms)",
+            energy.compute_time_us,
+            edge_count as u64 * 1000 / energy.compute_time_us.max(1)
+        );
+        println!("  CPU: {}us", cpu_time.as_micros());
+        println!(
+            "  Speedup: {:.2}x",
+            cpu_time.as_micros() as f64 / gpu_time.as_micros() as f64
+        );
+
+        // Verify correctness
+        let diff = (cpu_energy.total_energy - energy.total_energy).abs();
+        let relative_diff = diff / cpu_energy.total_energy.max(1.0);
+        assert!(relative_diff < 0.01, "Performance test: energy mismatch");
+    }
+}
+
+#[tokio::test]
+#[ignore]
+async fn test_gpu_performance_10k_nodes() {
+    let graph = create_large_graph(10000, 10);
+    let edge_count = graph.edge_count();
+
+    let config = GpuConfig::default();
+    if let Some(mut engine) = GpuCoherenceEngine::try_new(config).await {
+        engine.upload_graph(&graph).unwrap();
+
+        // Warm up
+        let _ = engine.compute_energy().await.unwrap();
+
+        // Benchmark
+        let energy = engine.compute_energy().await.unwrap();
+
+        println!(
+            "Large scale test ({} edges): {}us, {} edges/ms",
+            edge_count,
+            energy.compute_time_us,
+            edge_count as u64 * 1000 / energy.compute_time_us.max(1)
+        );
+
+        assert!(energy.total_energy > 0.0);
+    }
+}