Files
wifi-densepose/tests/integration/distributed/cluster_integration_tests.rs
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

391 lines
12 KiB
Rust

//! Cluster Integration Tests
//!
//! End-to-end tests combining Raft, Replication, and Sharding
use ruvector_cluster::{
ClusterManager, ClusterConfig, ClusterNode, NodeStatus,
ConsistentHashRing, ShardRouter,
discovery::StaticDiscovery,
};
use ruvector_raft::{RaftNode, RaftNodeConfig, RaftState};
use ruvector_replication::{
ReplicaSet, ReplicaRole, SyncManager, SyncMode, ReplicationLog,
};
use std::net::{SocketAddr, IpAddr, Ipv4Addr};
use std::sync::Arc;
use std::time::{Duration, Instant};
/// Test full cluster initialization
#[tokio::test]
async fn test_full_cluster_initialization() {
// Create cluster configuration
let config = ClusterConfig {
replication_factor: 3,
shard_count: 16,
heartbeat_interval: Duration::from_secs(5),
node_timeout: Duration::from_secs(30),
enable_consensus: true,
min_quorum_size: 2,
};
let discovery = Box::new(StaticDiscovery::new(vec![]));
let manager = ClusterManager::new(config.clone(), "coordinator".to_string(), discovery).unwrap();
// Add nodes to cluster
for i in 0..5 {
let node = ClusterNode::new(
format!("node{}", i),
SocketAddr::new(IpAddr::V4(Ipv4Addr::new(192, 168, 1, i as u8 + 1)), 9000),
);
manager.add_node(node).await.unwrap();
}
// Verify cluster state
let stats = manager.get_stats();
assert_eq!(stats.total_nodes, 5);
assert_eq!(stats.healthy_nodes, 5);
// Verify sharding is available
let router = manager.router();
let shard = router.get_shard("test-vector-id");
assert!(shard < config.shard_count);
}
/// Test combined Raft + Cluster coordination
#[tokio::test]
async fn test_raft_cluster_coordination() {
let cluster_members = vec![
"raft-node-1".to_string(),
"raft-node-2".to_string(),
"raft-node-3".to_string(),
];
// Create Raft nodes
let mut raft_nodes = Vec::new();
for member in &cluster_members {
let config = RaftNodeConfig::new(member.clone(), cluster_members.clone());
raft_nodes.push(RaftNode::new(config));
}
// Create cluster manager
let cluster_config = ClusterConfig {
shard_count: 8,
replication_factor: 3,
min_quorum_size: 2,
..Default::default()
};
let discovery = Box::new(StaticDiscovery::new(vec![]));
let cluster = ClusterManager::new(cluster_config, "raft-node-1".to_string(), discovery).unwrap();
// Add Raft nodes to cluster
for (i, member) in cluster_members.iter().enumerate() {
let node = ClusterNode::new(
member.clone(),
SocketAddr::new(IpAddr::V4(Ipv4Addr::new(10, 0, 0, i as u8 + 1)), 7000),
);
cluster.add_node(node).await.unwrap();
}
// Verify all Raft nodes are in cluster
assert_eq!(cluster.list_nodes().len(), 3);
// Verify Raft nodes start as followers
for node in &raft_nodes {
assert_eq!(node.current_state(), RaftState::Follower);
}
}
/// Test replication across cluster
#[tokio::test]
async fn test_cluster_replication() {
// Create replica set
let mut replica_set = ReplicaSet::new("distributed-cluster");
replica_set.add_replica("primary", "10.0.0.1:9001", ReplicaRole::Primary).unwrap();
replica_set.add_replica("secondary-1", "10.0.0.2:9001", ReplicaRole::Secondary).unwrap();
replica_set.add_replica("secondary-2", "10.0.0.3:9001", ReplicaRole::Secondary).unwrap();
// Create cluster with same nodes
let config = ClusterConfig {
replication_factor: 3,
shard_count: 16,
..Default::default()
};
let discovery = Box::new(StaticDiscovery::new(vec![]));
let cluster = ClusterManager::new(config, "primary".to_string(), discovery).unwrap();
// Add nodes to cluster
for (id, addr) in [
("primary", "10.0.0.1:9000"),
("secondary-1", "10.0.0.2:9000"),
("secondary-2", "10.0.0.3:9000"),
] {
let node = ClusterNode::new(
id.to_string(),
addr.parse().unwrap(),
);
cluster.add_node(node).await.unwrap();
}
// Create sync manager
let log = Arc::new(ReplicationLog::new("primary"));
let sync_manager = SyncManager::new(Arc::new(replica_set), log);
sync_manager.set_sync_mode(SyncMode::SemiSync { min_replicas: 1 });
// Replicate data
let entry = sync_manager.replicate(b"vector-data".to_vec()).await.unwrap();
// Verify replication
assert_eq!(entry.sequence, 1);
assert_eq!(sync_manager.current_position(), 1);
}
/// Test sharded data distribution
#[tokio::test]
async fn test_sharded_data_distribution() {
let config = ClusterConfig {
shard_count: 32,
replication_factor: 3,
..Default::default()
};
let discovery = Box::new(StaticDiscovery::new(vec![]));
let cluster = ClusterManager::new(config.clone(), "coordinator".to_string(), discovery).unwrap();
// Add nodes
for i in 0..5 {
let node = ClusterNode::new(
format!("data-node-{}", i),
SocketAddr::new(IpAddr::V4(Ipv4Addr::new(172, 16, 0, i as u8 + 1)), 8000),
);
cluster.add_node(node).await.unwrap();
}
// Simulate vector insertions
let router = cluster.router();
let mut shard_distribution = std::collections::HashMap::new();
for i in 0..10000 {
let vector_id = format!("vec-{:08}", i);
let shard = router.get_shard(&vector_id);
*shard_distribution.entry(shard).or_insert(0) += 1;
}
// Verify distribution across shards
let expected_per_shard = 10000 / config.shard_count;
let mut total = 0;
for shard in 0..config.shard_count {
let count = shard_distribution.get(&shard).copied().unwrap_or(0);
total += count;
// Allow 50% deviation from expected
let min_expected = (expected_per_shard as f64 * 0.5) as usize;
let max_expected = (expected_per_shard as f64 * 1.5) as usize;
assert!(
count >= min_expected && count <= max_expected,
"Shard {} has {} vectors, expected {}-{}",
shard, count, min_expected, max_expected
);
}
assert_eq!(total, 10000);
}
/// Test node failure handling
#[tokio::test]
async fn test_node_failure_handling() {
let config = ClusterConfig {
shard_count: 8,
replication_factor: 3,
node_timeout: Duration::from_secs(5),
..Default::default()
};
let discovery = Box::new(StaticDiscovery::new(vec![]));
let cluster = ClusterManager::new(config, "coordinator".to_string(), discovery).unwrap();
// Add nodes
for i in 0..5 {
let mut node = ClusterNode::new(
format!("node-{}", i),
SocketAddr::new(IpAddr::V4(Ipv4Addr::new(192, 168, 0, i as u8 + 1)), 9000),
);
// Mark one node as offline
if i == 2 {
node.status = NodeStatus::Offline;
}
cluster.add_node(node).await.unwrap();
}
// Check healthy nodes
let all_nodes = cluster.list_nodes();
let healthy = cluster.healthy_nodes();
assert_eq!(all_nodes.len(), 5);
// At least some nodes should be healthy (the offline one might or might not show based on timing)
assert!(healthy.len() >= 4);
}
/// Test consistent hashing stability
#[tokio::test]
async fn test_consistent_hashing_stability() {
let mut ring = ConsistentHashRing::new(3);
// Initial cluster
ring.add_node("node-a".to_string());
ring.add_node("node-b".to_string());
ring.add_node("node-c".to_string());
// Record assignments for 1000 keys
let mut assignments = std::collections::HashMap::new();
for i in 0..1000 {
let key = format!("stable-key-{}", i);
if let Some(node) = ring.get_primary_node(&key) {
assignments.insert(key, node);
}
}
// Add a new node
ring.add_node("node-d".to_string());
// Count reassignments
let mut reassigned = 0;
for (key, original_node) in &assignments {
if let Some(new_node) = ring.get_primary_node(key) {
if new_node != *original_node {
reassigned += 1;
}
}
}
let reassignment_rate = reassigned as f64 / assignments.len() as f64;
println!("Reassignment rate after adding node: {:.1}%", reassignment_rate * 100.0);
// With 4 nodes, ~25% of keys should be reassigned (1/4)
assert!(reassignment_rate < 0.35, "Too many reassignments: {:.1}%", reassignment_rate * 100.0);
// Remove a node
ring.remove_node("node-b");
// Count reassignments after removal
let mut reassigned_after_removal = 0;
for (key, _) in &assignments {
if let Some(new_node) = ring.get_primary_node(key) {
// Keys originally on node-b should definitely move
if new_node != *assignments.get(key).unwrap_or(&String::new()) {
reassigned_after_removal += 1;
}
}
}
println!("Reassignments after removing node: {}", reassigned_after_removal);
}
/// Test cross-shard query routing
#[tokio::test]
async fn test_cross_shard_query_routing() {
let router = ShardRouter::new(16);
// Simulate a range query that spans multiple shards
let query_keys = vec![
"query-key-1",
"query-key-2",
"query-key-3",
"query-key-4",
"query-key-5",
];
let mut target_shards = std::collections::HashSet::new();
for key in &query_keys {
target_shards.insert(router.get_shard(key));
}
println!("Query spans {} shards: {:?}", target_shards.len(), target_shards);
// For scatter-gather, we need to query all relevant shards
assert!(target_shards.len() > 0);
assert!(target_shards.len() <= query_keys.len());
}
/// Test cluster startup sequence
#[tokio::test]
async fn test_cluster_startup_sequence() {
let start = Instant::now();
// Step 1: Create cluster manager
let config = ClusterConfig {
shard_count: 32,
replication_factor: 3,
enable_consensus: true,
min_quorum_size: 2,
..Default::default()
};
let discovery = Box::new(StaticDiscovery::new(vec![]));
let cluster = ClusterManager::new(config.clone(), "bootstrap".to_string(), discovery).unwrap();
// Step 2: Add initial nodes
for i in 0..3 {
let node = ClusterNode::new(
format!("init-node-{}", i),
SocketAddr::new(IpAddr::V4(Ipv4Addr::new(10, 0, 0, i as u8 + 1)), 9000),
);
cluster.add_node(node).await.unwrap();
}
// Step 3: Initialize shards
for shard_id in 0..config.shard_count {
let shard = cluster.assign_shard(shard_id).unwrap();
assert!(!shard.primary_node.is_empty());
}
let startup_time = start.elapsed();
println!("Cluster startup completed in {:?}", startup_time);
// Startup should be fast
assert!(startup_time < Duration::from_secs(1), "Startup too slow");
// Verify final state
let stats = cluster.get_stats();
assert_eq!(stats.total_nodes, 3);
assert_eq!(stats.total_shards, 32);
}
/// Load test for cluster operations
#[tokio::test]
async fn test_cluster_load() {
let config = ClusterConfig {
shard_count: 64,
replication_factor: 3,
..Default::default()
};
let discovery = Box::new(StaticDiscovery::new(vec![]));
let cluster = ClusterManager::new(config, "load-test".to_string(), discovery).unwrap();
// Add nodes
for i in 0..10 {
let node = ClusterNode::new(
format!("load-node-{}", i),
SocketAddr::new(IpAddr::V4(Ipv4Addr::new(10, 0, i as u8, 1)), 9000),
);
cluster.add_node(node).await.unwrap();
}
let router = cluster.router();
// Simulate heavy routing load
let start = Instant::now();
let iterations = 100000;
for i in 0..iterations {
let key = format!("load-key-{}", i);
let _ = router.get_shard(&key);
}
let elapsed = start.elapsed();
let ops_per_sec = iterations as f64 / elapsed.as_secs_f64();
println!("Cluster routing: {:.0} ops/sec", ops_per_sec);
// Should handle high throughput
assert!(ops_per_sec > 100000.0, "Throughput too low: {:.0} ops/sec", ops_per_sec);
}