Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,520 @@
//! GPU Coherence Engine Tests
//!
//! Comprehensive tests verifying GPU computation results match CPU results
//! within floating-point tolerance. These tests ensure correctness of:
//!
//! - GPU buffer management and data transfer
//! - Parallel residual computation
//! - Energy aggregation with tree reduction
//! - CPU fallback mechanism
//!
//! Run with: cargo test --features gpu
#![cfg(feature = "gpu")]
use prime_radiant::gpu::{
BufferUsage, GpuBuffer, GpuBufferManager, GpuCoherenceEngine, GpuConfig, GpuEdge, GpuError,
GpuParams, GpuRestrictionMap, GpuResult,
};
use prime_radiant::substrate::{
EdgeId, NodeId, SheafEdge, SheafEdgeBuilder, SheafGraph, SheafNode, SheafNodeBuilder,
};
use std::collections::HashMap;
use uuid::Uuid;
/// Floating point tolerance for GPU vs CPU comparison
const TOLERANCE: f32 = 1e-5;
/// Create a simple test graph with 3 nodes forming a triangle
fn create_triangle_graph() -> SheafGraph {
let graph = SheafGraph::new();
// Create three nodes with states
let node1 = SheafNodeBuilder::new()
.state_from_slice(&[1.0, 0.0, 0.0])
.namespace("test")
.build();
let node2 = SheafNodeBuilder::new()
.state_from_slice(&[0.0, 1.0, 0.0])
.namespace("test")
.build();
let node3 = SheafNodeBuilder::new()
.state_from_slice(&[0.0, 0.0, 1.0])
.namespace("test")
.build();
let id1 = graph.add_node(node1);
let id2 = graph.add_node(node2);
let id3 = graph.add_node(node3);
// Create edges with identity restrictions
let edge12 = SheafEdgeBuilder::new(id1, id2)
.identity_restrictions(3)
.weight(1.0)
.namespace("test")
.build();
let edge23 = SheafEdgeBuilder::new(id2, id3)
.identity_restrictions(3)
.weight(1.0)
.namespace("test")
.build();
let edge31 = SheafEdgeBuilder::new(id3, id1)
.identity_restrictions(3)
.weight(1.0)
.namespace("test")
.build();
graph.add_edge(edge12).unwrap();
graph.add_edge(edge23).unwrap();
graph.add_edge(edge31).unwrap();
graph
}
/// Create a coherent graph where all nodes have identical states
fn create_coherent_graph() -> SheafGraph {
let graph = SheafGraph::new();
// All nodes have the same state
let state = [1.0, 1.0, 1.0];
let node1 = SheafNodeBuilder::new().state_from_slice(&state).build();
let node2 = SheafNodeBuilder::new().state_from_slice(&state).build();
let id1 = graph.add_node(node1);
let id2 = graph.add_node(node2);
let edge = SheafEdgeBuilder::new(id1, id2)
.identity_restrictions(3)
.weight(1.0)
.build();
graph.add_edge(edge).unwrap();
graph
}
/// Create a larger graph for performance testing
fn create_large_graph(num_nodes: usize, edges_per_node: usize) -> SheafGraph {
let graph = SheafGraph::new();
let state_dim = 64;
// Create nodes with random states
let mut node_ids = Vec::with_capacity(num_nodes);
for i in 0..num_nodes {
let state: Vec<f32> = (0..state_dim)
.map(|j| ((i * state_dim + j) as f32 * 0.01).sin())
.collect();
let node = SheafNodeBuilder::new().state_from_slice(&state).build();
node_ids.push(graph.add_node(node));
}
// Create edges
for i in 0..num_nodes {
for j in 1..=edges_per_node {
let target_idx = (i + j) % num_nodes;
if i != target_idx {
let edge = SheafEdgeBuilder::new(node_ids[i], node_ids[target_idx])
.identity_restrictions(state_dim)
.weight(1.0)
.build();
// Ignore duplicate edges
let _ = graph.add_edge(edge);
}
}
}
graph
}
// ============================================================================
// GPU Configuration Tests
// ============================================================================
#[test]
fn test_gpu_config_default() {
let config = GpuConfig::default();
assert!(config.enable_fallback);
assert_eq!(config.beta, 1.0);
assert!(config.threshold_lane0 < config.threshold_lane1);
assert!(config.threshold_lane1 < config.threshold_lane2);
assert!(config.timeout_ms > 0);
}
#[test]
fn test_gpu_config_custom() {
let config = GpuConfig {
enable_fallback: false,
beta: 2.0,
threshold_lane0: 0.05,
threshold_lane1: 0.5,
threshold_lane2: 5.0,
..Default::default()
};
assert!(!config.enable_fallback);
assert_eq!(config.beta, 2.0);
assert_eq!(config.threshold_lane0, 0.05);
}
// ============================================================================
// GPU Buffer Tests
// ============================================================================
#[test]
fn test_gpu_params_alignment() {
// GPU struct alignment is critical for correct computation
assert_eq!(std::mem::size_of::<GpuParams>(), 32);
assert_eq!(std::mem::align_of::<GpuParams>(), 4);
}
#[test]
fn test_gpu_edge_alignment() {
assert_eq!(std::mem::size_of::<GpuEdge>(), 32);
assert_eq!(std::mem::align_of::<GpuEdge>(), 4);
}
#[test]
fn test_gpu_restriction_map_alignment() {
assert_eq!(std::mem::size_of::<GpuRestrictionMap>(), 32);
assert_eq!(std::mem::align_of::<GpuRestrictionMap>(), 4);
}
// ============================================================================
// CPU vs GPU Comparison Tests
// ============================================================================
/// Test that GPU energy matches CPU energy for triangle graph
#[tokio::test]
async fn test_gpu_cpu_energy_match_triangle() {
let graph = create_triangle_graph();
// Compute CPU energy
let cpu_energy = graph.compute_energy();
// Try GPU computation
let config = GpuConfig::default();
match GpuCoherenceEngine::try_new(config).await {
Some(mut engine) => {
engine.upload_graph(&graph).unwrap();
let gpu_energy = engine.compute_energy().await.unwrap();
// Compare total energies
let diff = (cpu_energy.total_energy - gpu_energy.total_energy).abs();
assert!(
diff < TOLERANCE,
"Energy mismatch: CPU={}, GPU={}, diff={}",
cpu_energy.total_energy,
gpu_energy.total_energy,
diff
);
// Verify GPU was actually used
assert!(gpu_energy.used_gpu);
}
None => {
// GPU not available, skip test
eprintln!("GPU not available, skipping GPU comparison test");
}
}
}
/// Test that coherent graph has near-zero energy on GPU
#[tokio::test]
async fn test_gpu_coherent_graph() {
let graph = create_coherent_graph();
// CPU energy should be near zero
let cpu_energy = graph.compute_energy();
assert!(
cpu_energy.total_energy < 1e-10,
"CPU energy for coherent graph should be near zero: {}",
cpu_energy.total_energy
);
// Try GPU computation
let config = GpuConfig::default();
if let Some(mut engine) = GpuCoherenceEngine::try_new(config).await {
engine.upload_graph(&graph).unwrap();
let gpu_energy = engine.compute_energy().await.unwrap();
assert!(
gpu_energy.total_energy < 1e-5,
"GPU energy for coherent graph should be near zero: {}",
gpu_energy.total_energy
);
}
}
/// Test per-edge energy computation
#[tokio::test]
async fn test_gpu_per_edge_energies() {
let graph = create_triangle_graph();
// Compute CPU energy
let cpu_energy = graph.compute_energy();
let config = GpuConfig::default();
if let Some(mut engine) = GpuCoherenceEngine::try_new(config).await {
engine.upload_graph(&graph).unwrap();
let gpu_energy = engine.compute_energy().await.unwrap();
// Same number of edge energies
assert_eq!(
cpu_energy.edge_energies.len(),
gpu_energy.edge_energies.len(),
"Edge count mismatch"
);
// Each edge energy should match (order may differ)
let cpu_sum: f32 = cpu_energy.edge_energies.values().sum();
let gpu_sum: f32 = gpu_energy.edge_energies.iter().sum();
let diff = (cpu_sum - gpu_sum).abs();
assert!(
diff < TOLERANCE,
"Sum of edge energies mismatch: CPU={}, GPU={}, diff={}",
cpu_sum,
gpu_sum,
diff
);
}
}
/// Test with larger graph
#[tokio::test]
async fn test_gpu_large_graph() {
let graph = create_large_graph(100, 5);
let cpu_energy = graph.compute_energy();
let config = GpuConfig::default();
if let Some(mut engine) = GpuCoherenceEngine::try_new(config).await {
engine.upload_graph(&graph).unwrap();
let gpu_energy = engine.compute_energy().await.unwrap();
// Allow slightly larger tolerance for large graphs due to floating point accumulation
let diff = (cpu_energy.total_energy - gpu_energy.total_energy).abs();
let relative_diff = diff / cpu_energy.total_energy.max(1.0);
assert!(
relative_diff < 0.01, // 1% relative error
"Large graph energy mismatch: CPU={}, GPU={}, relative_diff={:.2}%",
cpu_energy.total_energy,
gpu_energy.total_energy,
relative_diff * 100.0
);
}
}
// ============================================================================
// Error Handling Tests
// ============================================================================
#[tokio::test]
async fn test_gpu_empty_graph_error() {
let graph = SheafGraph::new();
let config = GpuConfig::default();
if let Some(mut engine) = GpuCoherenceEngine::try_new(config).await {
let result = engine.upload_graph(&graph);
assert!(result.is_err());
match result {
Err(GpuError::EmptyGraph) => {}
Err(e) => panic!("Expected EmptyGraph error, got: {:?}", e),
Ok(_) => panic!("Expected error for empty graph"),
}
}
}
#[test]
fn test_gpu_error_fallback_detection() {
// Test that certain errors trigger fallback
assert!(GpuError::NoAdapter.should_fallback());
assert!(GpuError::NoDevice("test".into()).should_fallback());
assert!(GpuError::DeviceCreation("test".into()).should_fallback());
assert!(GpuError::AdapterRequest("test".into()).should_fallback());
assert!(GpuError::UnsupportedFeature("test".into()).should_fallback());
// These should not trigger fallback
assert!(!GpuError::Timeout(100).should_fallback());
assert!(!GpuError::EmptyGraph.should_fallback());
assert!(!GpuError::BufferRead("test".into()).should_fallback());
}
#[test]
fn test_gpu_error_recoverable() {
assert!(GpuError::Timeout(100).is_recoverable());
assert!(GpuError::BufferRead("test".into()).is_recoverable());
assert!(GpuError::ExecutionFailed("test".into()).is_recoverable());
assert!(!GpuError::NoAdapter.is_recoverable());
assert!(!GpuError::EmptyGraph.is_recoverable());
}
// ============================================================================
// GPU Capabilities Tests
// ============================================================================
#[tokio::test]
async fn test_gpu_capabilities() {
let config = GpuConfig::default();
if let Some(engine) = GpuCoherenceEngine::try_new(config).await {
let caps = engine.capabilities();
// Should have valid device info
assert!(!caps.device_name.is_empty());
assert!(!caps.backend.is_empty());
// Should have reasonable limits
assert!(caps.max_buffer_size > 0);
assert!(caps.max_workgroup_size > 0);
assert!(caps.max_workgroups[0] > 0);
// Should be marked as supported
assert!(caps.supported);
}
}
// ============================================================================
// Synchronous API Tests
// ============================================================================
#[test]
fn test_sync_api() {
use prime_radiant::gpu::sync;
let config = GpuConfig::default();
if let Some(mut engine) = sync::try_create_engine(config) {
let graph = create_triangle_graph();
engine.upload_graph(&graph).unwrap();
let energy = sync::compute_energy(&mut engine).unwrap();
assert!(energy.total_energy > 0.0);
assert!(energy.used_gpu);
}
}
// ============================================================================
// Resource Management Tests
// ============================================================================
#[tokio::test]
async fn test_gpu_resource_release() {
let config = GpuConfig::default();
if let Some(mut engine) = GpuCoherenceEngine::try_new(config).await {
let graph = create_triangle_graph();
// Upload and compute
engine.upload_graph(&graph).unwrap();
let _ = engine.compute_energy().await.unwrap();
// Release resources
engine.release();
// Re-upload should work
engine.upload_graph(&graph).unwrap();
let energy = engine.compute_energy().await.unwrap();
assert!(energy.total_energy > 0.0);
}
}
#[tokio::test]
async fn test_gpu_multiple_computations() {
let config = GpuConfig::default();
if let Some(mut engine) = GpuCoherenceEngine::try_new(config).await {
let graph = create_triangle_graph();
engine.upload_graph(&graph).unwrap();
// Multiple computations should give consistent results
let energy1 = engine.compute_energy().await.unwrap();
let energy2 = engine.compute_energy().await.unwrap();
let energy3 = engine.compute_energy().await.unwrap();
assert!(
(energy1.total_energy - energy2.total_energy).abs() < TOLERANCE,
"Inconsistent results between computations"
);
assert!(
(energy2.total_energy - energy3.total_energy).abs() < TOLERANCE,
"Inconsistent results between computations"
);
}
}
// ============================================================================
// Performance Tests (disabled by default)
// ============================================================================
#[tokio::test]
#[ignore] // Run with: cargo test --features gpu -- --ignored
async fn test_gpu_performance_1k_nodes() {
let graph = create_large_graph(1000, 10);
let edge_count = graph.edge_count();
let config = GpuConfig::default();
if let Some(mut engine) = GpuCoherenceEngine::try_new(config).await {
engine.upload_graph(&graph).unwrap();
// Warm up
let _ = engine.compute_energy().await.unwrap();
// Benchmark
let start = std::time::Instant::now();
let energy = engine.compute_energy().await.unwrap();
let gpu_time = start.elapsed();
// Compare with CPU
let start = std::time::Instant::now();
let cpu_energy = graph.compute_energy();
let cpu_time = start.elapsed();
println!("Performance test ({} edges):", edge_count);
println!(
" GPU: {}us ({} edges/ms)",
energy.compute_time_us,
edge_count as u64 * 1000 / energy.compute_time_us.max(1)
);
println!(" CPU: {}us", cpu_time.as_micros());
println!(
" Speedup: {:.2}x",
cpu_time.as_micros() as f64 / gpu_time.as_micros() as f64
);
// Verify correctness
let diff = (cpu_energy.total_energy - energy.total_energy).abs();
let relative_diff = diff / cpu_energy.total_energy.max(1.0);
assert!(relative_diff < 0.01, "Performance test: energy mismatch");
}
}
#[tokio::test]
#[ignore]
async fn test_gpu_performance_10k_nodes() {
let graph = create_large_graph(10000, 10);
let edge_count = graph.edge_count();
let config = GpuConfig::default();
if let Some(mut engine) = GpuCoherenceEngine::try_new(config).await {
engine.upload_graph(&graph).unwrap();
// Warm up
let _ = engine.compute_energy().await.unwrap();
// Benchmark
let energy = engine.compute_energy().await.unwrap();
println!(
"Large scale test ({} edges): {}us, {} edges/ms",
edge_count,
energy.compute_time_us,
edge_count as u64 * 1000 / energy.compute_time_us.max(1)
);
assert!(energy.total_energy > 0.0);
}
}