Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
535
vendor/ruvector/crates/ruvector-graph/src/distributed/coordinator.rs
vendored
Normal file
535
vendor/ruvector/crates/ruvector-graph/src/distributed/coordinator.rs
vendored
Normal file
@@ -0,0 +1,535 @@
|
||||
//! Query coordinator for distributed graph execution
|
||||
//!
|
||||
//! Coordinates distributed query execution across multiple shards:
|
||||
//! - Query planning and optimization
|
||||
//! - Query routing to relevant shards
|
||||
//! - Result aggregation and merging
|
||||
//! - Transaction coordination across shards
|
||||
//! - Query caching and optimization
|
||||
|
||||
use crate::distributed::shard::{EdgeData, GraphShard, NodeData, NodeId, ShardId};
|
||||
use crate::{GraphError, Result};
|
||||
use chrono::{DateTime, Utc};
|
||||
use dashmap::DashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::{debug, info, warn};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Query execution plan
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct QueryPlan {
|
||||
/// Unique query ID
|
||||
pub query_id: String,
|
||||
/// Original query (Cypher-like syntax)
|
||||
pub query: String,
|
||||
/// Shards involved in this query
|
||||
pub target_shards: Vec<ShardId>,
|
||||
/// Execution steps
|
||||
pub steps: Vec<QueryStep>,
|
||||
/// Estimated cost
|
||||
pub estimated_cost: f64,
|
||||
/// Whether this is a distributed query
|
||||
pub is_distributed: bool,
|
||||
/// Creation timestamp
|
||||
pub created_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// Individual step in query execution
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum QueryStep {
|
||||
/// Scan nodes with optional filter
|
||||
NodeScan {
|
||||
shard_id: ShardId,
|
||||
label: Option<String>,
|
||||
filter: Option<String>,
|
||||
},
|
||||
/// Scan edges
|
||||
EdgeScan {
|
||||
shard_id: ShardId,
|
||||
edge_type: Option<String>,
|
||||
},
|
||||
/// Join results from multiple shards
|
||||
Join {
|
||||
left_shard: ShardId,
|
||||
right_shard: ShardId,
|
||||
join_key: String,
|
||||
},
|
||||
/// Aggregate results
|
||||
Aggregate {
|
||||
operation: AggregateOp,
|
||||
group_by: Option<String>,
|
||||
},
|
||||
/// Filter results
|
||||
Filter { predicate: String },
|
||||
/// Sort results
|
||||
Sort { key: String, ascending: bool },
|
||||
/// Limit results
|
||||
Limit { count: usize },
|
||||
}
|
||||
|
||||
/// Aggregate operations
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum AggregateOp {
|
||||
Count,
|
||||
Sum(String),
|
||||
Avg(String),
|
||||
Min(String),
|
||||
Max(String),
|
||||
}
|
||||
|
||||
/// Query result
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct QueryResult {
|
||||
/// Query ID
|
||||
pub query_id: String,
|
||||
/// Result nodes
|
||||
pub nodes: Vec<NodeData>,
|
||||
/// Result edges
|
||||
pub edges: Vec<EdgeData>,
|
||||
/// Aggregate results
|
||||
pub aggregates: HashMap<String, serde_json::Value>,
|
||||
/// Execution statistics
|
||||
pub stats: QueryStats,
|
||||
}
|
||||
|
||||
/// Query execution statistics
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct QueryStats {
|
||||
/// Execution time in milliseconds
|
||||
pub execution_time_ms: u64,
|
||||
/// Number of shards queried
|
||||
pub shards_queried: usize,
|
||||
/// Total nodes scanned
|
||||
pub nodes_scanned: usize,
|
||||
/// Total edges scanned
|
||||
pub edges_scanned: usize,
|
||||
/// Whether query was cached
|
||||
pub cached: bool,
|
||||
}
|
||||
|
||||
/// Shard coordinator for managing distributed queries
|
||||
pub struct ShardCoordinator {
|
||||
/// Map of shard_id to GraphShard
|
||||
shards: Arc<DashMap<ShardId, Arc<GraphShard>>>,
|
||||
/// Query cache
|
||||
query_cache: Arc<DashMap<String, QueryResult>>,
|
||||
/// Active transactions
|
||||
transactions: Arc<DashMap<String, Transaction>>,
|
||||
}
|
||||
|
||||
impl ShardCoordinator {
|
||||
/// Create a new shard coordinator
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
shards: Arc::new(DashMap::new()),
|
||||
query_cache: Arc::new(DashMap::new()),
|
||||
transactions: Arc::new(DashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Register a shard with the coordinator
|
||||
pub fn register_shard(&self, shard_id: ShardId, shard: Arc<GraphShard>) {
|
||||
info!("Registering shard {} with coordinator", shard_id);
|
||||
self.shards.insert(shard_id, shard);
|
||||
}
|
||||
|
||||
/// Unregister a shard
|
||||
pub fn unregister_shard(&self, shard_id: ShardId) -> Result<()> {
|
||||
info!("Unregistering shard {}", shard_id);
|
||||
self.shards
|
||||
.remove(&shard_id)
|
||||
.ok_or_else(|| GraphError::ShardError(format!("Shard {} not found", shard_id)))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get a shard by ID
|
||||
pub fn get_shard(&self, shard_id: ShardId) -> Option<Arc<GraphShard>> {
|
||||
self.shards.get(&shard_id).map(|s| Arc::clone(s.value()))
|
||||
}
|
||||
|
||||
/// List all registered shards
|
||||
pub fn list_shards(&self) -> Vec<ShardId> {
|
||||
self.shards.iter().map(|e| *e.key()).collect()
|
||||
}
|
||||
|
||||
/// Create a query plan from a Cypher-like query
|
||||
pub fn plan_query(&self, query: &str) -> Result<QueryPlan> {
|
||||
let query_id = Uuid::new_v4().to_string();
|
||||
|
||||
// Parse query and determine target shards
|
||||
// For now, simple heuristic: query all shards for distributed queries
|
||||
let target_shards: Vec<ShardId> = self.list_shards();
|
||||
|
||||
let steps = self.parse_query_steps(query)?;
|
||||
|
||||
let estimated_cost = self.estimate_cost(&steps, &target_shards);
|
||||
|
||||
Ok(QueryPlan {
|
||||
query_id,
|
||||
query: query.to_string(),
|
||||
target_shards,
|
||||
steps,
|
||||
estimated_cost,
|
||||
is_distributed: true,
|
||||
created_at: Utc::now(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse query into execution steps
|
||||
fn parse_query_steps(&self, query: &str) -> Result<Vec<QueryStep>> {
|
||||
// Simplified query parsing
|
||||
// In production, use a proper Cypher parser
|
||||
let mut steps = Vec::new();
|
||||
|
||||
// Example: "MATCH (n:Person) RETURN n"
|
||||
if query.to_lowercase().contains("match") {
|
||||
// Add node scan for each shard
|
||||
for shard_id in self.list_shards() {
|
||||
steps.push(QueryStep::NodeScan {
|
||||
shard_id,
|
||||
label: None,
|
||||
filter: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Add aggregation if needed
|
||||
if query.to_lowercase().contains("count") {
|
||||
steps.push(QueryStep::Aggregate {
|
||||
operation: AggregateOp::Count,
|
||||
group_by: None,
|
||||
});
|
||||
}
|
||||
|
||||
// Add limit if specified
|
||||
if let Some(limit_pos) = query.to_lowercase().find("limit") {
|
||||
if let Some(count_str) = query[limit_pos..].split_whitespace().nth(1) {
|
||||
if let Ok(count) = count_str.parse::<usize>() {
|
||||
steps.push(QueryStep::Limit { count });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(steps)
|
||||
}
|
||||
|
||||
/// Estimate query execution cost
|
||||
fn estimate_cost(&self, steps: &[QueryStep], target_shards: &[ShardId]) -> f64 {
|
||||
let mut cost = 0.0;
|
||||
|
||||
for step in steps {
|
||||
match step {
|
||||
QueryStep::NodeScan { .. } => cost += 10.0,
|
||||
QueryStep::EdgeScan { .. } => cost += 15.0,
|
||||
QueryStep::Join { .. } => cost += 50.0,
|
||||
QueryStep::Aggregate { .. } => cost += 20.0,
|
||||
QueryStep::Filter { .. } => cost += 5.0,
|
||||
QueryStep::Sort { .. } => cost += 30.0,
|
||||
QueryStep::Limit { .. } => cost += 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
// Multiply by number of shards for distributed queries
|
||||
cost * target_shards.len() as f64
|
||||
}
|
||||
|
||||
/// Execute a query plan
|
||||
pub async fn execute_query(&self, plan: QueryPlan) -> Result<QueryResult> {
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
info!(
|
||||
"Executing query {} across {} shards",
|
||||
plan.query_id,
|
||||
plan.target_shards.len()
|
||||
);
|
||||
|
||||
// Check cache first
|
||||
if let Some(cached) = self.query_cache.get(&plan.query) {
|
||||
debug!("Query cache hit for: {}", plan.query);
|
||||
return Ok(cached.value().clone());
|
||||
}
|
||||
|
||||
let mut nodes = Vec::new();
|
||||
let mut edges = Vec::new();
|
||||
let mut aggregates = HashMap::new();
|
||||
let mut nodes_scanned = 0;
|
||||
let mut edges_scanned = 0;
|
||||
|
||||
// Execute steps
|
||||
for step in &plan.steps {
|
||||
match step {
|
||||
QueryStep::NodeScan {
|
||||
shard_id,
|
||||
label,
|
||||
filter,
|
||||
} => {
|
||||
if let Some(shard) = self.get_shard(*shard_id) {
|
||||
let shard_nodes = shard.list_nodes();
|
||||
nodes_scanned += shard_nodes.len();
|
||||
|
||||
// Apply label filter
|
||||
let filtered: Vec<_> = if let Some(label_filter) = label {
|
||||
shard_nodes
|
||||
.into_iter()
|
||||
.filter(|n| n.labels.contains(label_filter))
|
||||
.collect()
|
||||
} else {
|
||||
shard_nodes
|
||||
};
|
||||
|
||||
nodes.extend(filtered);
|
||||
}
|
||||
}
|
||||
QueryStep::EdgeScan {
|
||||
shard_id,
|
||||
edge_type,
|
||||
} => {
|
||||
if let Some(shard) = self.get_shard(*shard_id) {
|
||||
let shard_edges = shard.list_edges();
|
||||
edges_scanned += shard_edges.len();
|
||||
|
||||
// Apply edge type filter
|
||||
let filtered: Vec<_> = if let Some(type_filter) = edge_type {
|
||||
shard_edges
|
||||
.into_iter()
|
||||
.filter(|e| &e.edge_type == type_filter)
|
||||
.collect()
|
||||
} else {
|
||||
shard_edges
|
||||
};
|
||||
|
||||
edges.extend(filtered);
|
||||
}
|
||||
}
|
||||
QueryStep::Aggregate {
|
||||
operation,
|
||||
group_by,
|
||||
} => {
|
||||
match operation {
|
||||
AggregateOp::Count => {
|
||||
aggregates.insert(
|
||||
"count".to_string(),
|
||||
serde_json::Value::Number(nodes.len().into()),
|
||||
);
|
||||
}
|
||||
_ => {
|
||||
// Implement other aggregations
|
||||
}
|
||||
}
|
||||
}
|
||||
QueryStep::Limit { count } => {
|
||||
nodes.truncate(*count);
|
||||
}
|
||||
_ => {
|
||||
// Implement other steps
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let execution_time_ms = start.elapsed().as_millis() as u64;
|
||||
|
||||
let result = QueryResult {
|
||||
query_id: plan.query_id.clone(),
|
||||
nodes,
|
||||
edges,
|
||||
aggregates,
|
||||
stats: QueryStats {
|
||||
execution_time_ms,
|
||||
shards_queried: plan.target_shards.len(),
|
||||
nodes_scanned,
|
||||
edges_scanned,
|
||||
cached: false,
|
||||
},
|
||||
};
|
||||
|
||||
// Cache the result
|
||||
self.query_cache.insert(plan.query.clone(), result.clone());
|
||||
|
||||
info!(
|
||||
"Query {} completed in {}ms",
|
||||
plan.query_id, execution_time_ms
|
||||
);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Begin a distributed transaction
|
||||
pub fn begin_transaction(&self) -> String {
|
||||
let tx_id = Uuid::new_v4().to_string();
|
||||
let transaction = Transaction::new(tx_id.clone());
|
||||
self.transactions.insert(tx_id.clone(), transaction);
|
||||
info!("Started transaction: {}", tx_id);
|
||||
tx_id
|
||||
}
|
||||
|
||||
/// Commit a transaction
|
||||
pub async fn commit_transaction(&self, tx_id: &str) -> Result<()> {
|
||||
if let Some((_, tx)) = self.transactions.remove(tx_id) {
|
||||
// In production, implement 2PC (Two-Phase Commit)
|
||||
info!("Committing transaction: {}", tx_id);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(GraphError::CoordinatorError(format!(
|
||||
"Transaction not found: {}",
|
||||
tx_id
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
/// Rollback a transaction
|
||||
pub async fn rollback_transaction(&self, tx_id: &str) -> Result<()> {
|
||||
if let Some((_, tx)) = self.transactions.remove(tx_id) {
|
||||
warn!("Rolling back transaction: {}", tx_id);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(GraphError::CoordinatorError(format!(
|
||||
"Transaction not found: {}",
|
||||
tx_id
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear query cache
|
||||
pub fn clear_cache(&self) {
|
||||
self.query_cache.clear();
|
||||
info!("Query cache cleared");
|
||||
}
|
||||
}
|
||||
|
||||
/// Distributed transaction
|
||||
#[derive(Debug, Clone)]
|
||||
struct Transaction {
|
||||
/// Transaction ID
|
||||
id: String,
|
||||
/// Participating shards
|
||||
shards: HashSet<ShardId>,
|
||||
/// Transaction state
|
||||
state: TransactionState,
|
||||
/// Created timestamp
|
||||
created_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Transaction {
|
||||
fn new(id: String) -> Self {
|
||||
Self {
|
||||
id,
|
||||
shards: HashSet::new(),
|
||||
state: TransactionState::Active,
|
||||
created_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Transaction state
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
enum TransactionState {
|
||||
Active,
|
||||
Preparing,
|
||||
Committed,
|
||||
Aborted,
|
||||
}
|
||||
|
||||
/// Main coordinator for the entire distributed graph system
|
||||
pub struct Coordinator {
|
||||
/// Shard coordinator
|
||||
shard_coordinator: Arc<ShardCoordinator>,
|
||||
/// Coordinator configuration
|
||||
config: CoordinatorConfig,
|
||||
}
|
||||
|
||||
impl Coordinator {
|
||||
/// Create a new coordinator
|
||||
pub fn new(config: CoordinatorConfig) -> Self {
|
||||
Self {
|
||||
shard_coordinator: Arc::new(ShardCoordinator::new()),
|
||||
config,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the shard coordinator
|
||||
pub fn shard_coordinator(&self) -> Arc<ShardCoordinator> {
|
||||
Arc::clone(&self.shard_coordinator)
|
||||
}
|
||||
|
||||
/// Execute a query
|
||||
pub async fn execute(&self, query: &str) -> Result<QueryResult> {
|
||||
let plan = self.shard_coordinator.plan_query(query)?;
|
||||
self.shard_coordinator.execute_query(plan).await
|
||||
}
|
||||
|
||||
/// Get configuration
|
||||
pub fn config(&self) -> &CoordinatorConfig {
|
||||
&self.config
|
||||
}
|
||||
}
|
||||
|
||||
/// Coordinator configuration
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CoordinatorConfig {
|
||||
/// Enable query caching
|
||||
pub enable_cache: bool,
|
||||
/// Cache TTL in seconds
|
||||
pub cache_ttl_seconds: u64,
|
||||
/// Maximum query execution time
|
||||
pub max_query_time_seconds: u64,
|
||||
/// Enable query optimization
|
||||
pub enable_optimization: bool,
|
||||
}
|
||||
|
||||
impl Default for CoordinatorConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
enable_cache: true,
|
||||
cache_ttl_seconds: 300,
|
||||
max_query_time_seconds: 60,
|
||||
enable_optimization: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::distributed::shard::ShardMetadata;
|
||||
use crate::distributed::shard::ShardStrategy;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_shard_coordinator() {
|
||||
let coordinator = ShardCoordinator::new();
|
||||
|
||||
let metadata = ShardMetadata::new(0, "node-1".to_string(), ShardStrategy::Hash);
|
||||
let shard = Arc::new(GraphShard::new(metadata));
|
||||
|
||||
coordinator.register_shard(0, shard);
|
||||
|
||||
assert_eq!(coordinator.list_shards().len(), 1);
|
||||
assert!(coordinator.get_shard(0).is_some());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_query_planning() {
|
||||
let coordinator = ShardCoordinator::new();
|
||||
|
||||
let metadata = ShardMetadata::new(0, "node-1".to_string(), ShardStrategy::Hash);
|
||||
let shard = Arc::new(GraphShard::new(metadata));
|
||||
coordinator.register_shard(0, shard);
|
||||
|
||||
let plan = coordinator.plan_query("MATCH (n:Person) RETURN n").unwrap();
|
||||
|
||||
assert!(!plan.query_id.is_empty());
|
||||
assert!(!plan.steps.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_transaction() {
|
||||
let coordinator = ShardCoordinator::new();
|
||||
|
||||
let tx_id = coordinator.begin_transaction();
|
||||
assert!(!tx_id.is_empty());
|
||||
|
||||
coordinator.commit_transaction(&tx_id).await.unwrap();
|
||||
}
|
||||
}
|
||||
582
vendor/ruvector/crates/ruvector-graph/src/distributed/federation.rs
vendored
Normal file
582
vendor/ruvector/crates/ruvector-graph/src/distributed/federation.rs
vendored
Normal file
@@ -0,0 +1,582 @@
|
||||
//! Cross-cluster federation for distributed graph queries
|
||||
//!
|
||||
//! Enables querying across independent RuVector graph clusters:
|
||||
//! - Cluster discovery and registration
|
||||
//! - Remote query execution
|
||||
//! - Result merging from multiple clusters
|
||||
//! - Cross-cluster authentication and authorization
|
||||
|
||||
use crate::distributed::coordinator::{QueryPlan, QueryResult};
|
||||
use crate::distributed::shard::ShardId;
|
||||
use crate::{GraphError, Result};
|
||||
use chrono::{DateTime, Utc};
|
||||
use dashmap::DashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::{debug, info, warn};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Unique identifier for a cluster
|
||||
pub type ClusterId = String;
|
||||
|
||||
/// Remote cluster information
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RemoteCluster {
|
||||
/// Unique cluster ID
|
||||
pub cluster_id: ClusterId,
|
||||
/// Cluster name
|
||||
pub name: String,
|
||||
/// Cluster endpoint URL
|
||||
pub endpoint: String,
|
||||
/// Cluster status
|
||||
pub status: ClusterStatus,
|
||||
/// Authentication token
|
||||
pub auth_token: Option<String>,
|
||||
/// Last health check timestamp
|
||||
pub last_health_check: DateTime<Utc>,
|
||||
/// Cluster metadata
|
||||
pub metadata: HashMap<String, String>,
|
||||
/// Number of shards in this cluster
|
||||
pub shard_count: u32,
|
||||
/// Cluster region/datacenter
|
||||
pub region: Option<String>,
|
||||
}
|
||||
|
||||
impl RemoteCluster {
|
||||
/// Create a new remote cluster
|
||||
pub fn new(cluster_id: ClusterId, name: String, endpoint: String) -> Self {
|
||||
Self {
|
||||
cluster_id,
|
||||
name,
|
||||
endpoint,
|
||||
status: ClusterStatus::Unknown,
|
||||
auth_token: None,
|
||||
last_health_check: Utc::now(),
|
||||
metadata: HashMap::new(),
|
||||
shard_count: 0,
|
||||
region: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if cluster is healthy
|
||||
pub fn is_healthy(&self) -> bool {
|
||||
matches!(self.status, ClusterStatus::Healthy)
|
||||
}
|
||||
}
|
||||
|
||||
/// Cluster status
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum ClusterStatus {
|
||||
/// Cluster is healthy and available
|
||||
Healthy,
|
||||
/// Cluster is degraded but operational
|
||||
Degraded,
|
||||
/// Cluster is unreachable
|
||||
Unreachable,
|
||||
/// Cluster status unknown
|
||||
Unknown,
|
||||
}
|
||||
|
||||
/// Cluster registry for managing federated clusters
|
||||
pub struct ClusterRegistry {
|
||||
/// Registered clusters
|
||||
clusters: Arc<DashMap<ClusterId, RemoteCluster>>,
|
||||
/// Cluster discovery configuration
|
||||
discovery_config: DiscoveryConfig,
|
||||
}
|
||||
|
||||
impl ClusterRegistry {
|
||||
/// Create a new cluster registry
|
||||
pub fn new(discovery_config: DiscoveryConfig) -> Self {
|
||||
Self {
|
||||
clusters: Arc::new(DashMap::new()),
|
||||
discovery_config,
|
||||
}
|
||||
}
|
||||
|
||||
/// Register a remote cluster
|
||||
pub fn register_cluster(&self, cluster: RemoteCluster) -> Result<()> {
|
||||
info!(
|
||||
"Registering cluster: {} ({})",
|
||||
cluster.name, cluster.cluster_id
|
||||
);
|
||||
self.clusters.insert(cluster.cluster_id.clone(), cluster);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Unregister a cluster
|
||||
pub fn unregister_cluster(&self, cluster_id: &ClusterId) -> Result<()> {
|
||||
info!("Unregistering cluster: {}", cluster_id);
|
||||
self.clusters.remove(cluster_id).ok_or_else(|| {
|
||||
GraphError::FederationError(format!("Cluster not found: {}", cluster_id))
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get a cluster by ID
|
||||
pub fn get_cluster(&self, cluster_id: &ClusterId) -> Option<RemoteCluster> {
|
||||
self.clusters.get(cluster_id).map(|c| c.value().clone())
|
||||
}
|
||||
|
||||
/// List all registered clusters
|
||||
pub fn list_clusters(&self) -> Vec<RemoteCluster> {
|
||||
self.clusters.iter().map(|e| e.value().clone()).collect()
|
||||
}
|
||||
|
||||
/// List healthy clusters only
|
||||
pub fn healthy_clusters(&self) -> Vec<RemoteCluster> {
|
||||
self.clusters
|
||||
.iter()
|
||||
.filter(|e| e.value().is_healthy())
|
||||
.map(|e| e.value().clone())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Perform health check on a cluster
|
||||
pub async fn health_check(&self, cluster_id: &ClusterId) -> Result<ClusterStatus> {
|
||||
let cluster = self.get_cluster(cluster_id).ok_or_else(|| {
|
||||
GraphError::FederationError(format!("Cluster not found: {}", cluster_id))
|
||||
})?;
|
||||
|
||||
// In production, make actual HTTP/gRPC health check request
|
||||
// For now, simulate health check
|
||||
let status = ClusterStatus::Healthy;
|
||||
|
||||
// Update cluster status
|
||||
if let Some(mut entry) = self.clusters.get_mut(cluster_id) {
|
||||
entry.status = status;
|
||||
entry.last_health_check = Utc::now();
|
||||
}
|
||||
|
||||
debug!("Health check for cluster {}: {:?}", cluster_id, status);
|
||||
Ok(status)
|
||||
}
|
||||
|
||||
/// Perform health checks on all clusters
|
||||
pub async fn health_check_all(&self) -> HashMap<ClusterId, ClusterStatus> {
|
||||
let mut results = HashMap::new();
|
||||
|
||||
for cluster in self.list_clusters() {
|
||||
match self.health_check(&cluster.cluster_id).await {
|
||||
Ok(status) => {
|
||||
results.insert(cluster.cluster_id, status);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"Health check failed for cluster {}: {}",
|
||||
cluster.cluster_id, e
|
||||
);
|
||||
results.insert(cluster.cluster_id, ClusterStatus::Unreachable);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results
|
||||
}
|
||||
|
||||
/// Discover clusters automatically (if enabled)
|
||||
pub async fn discover_clusters(&self) -> Result<Vec<RemoteCluster>> {
|
||||
if !self.discovery_config.auto_discovery {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
info!("Discovering clusters...");
|
||||
|
||||
// In production, implement actual cluster discovery:
|
||||
// - mDNS/DNS-SD for local network
|
||||
// - Consul/etcd for service discovery
|
||||
// - Static configuration file
|
||||
// - Cloud provider APIs (AWS, GCP, Azure)
|
||||
|
||||
// For now, return empty list
|
||||
Ok(Vec::new())
|
||||
}
|
||||
}
|
||||
|
||||
/// Cluster discovery configuration
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DiscoveryConfig {
|
||||
/// Enable automatic cluster discovery
|
||||
pub auto_discovery: bool,
|
||||
/// Discovery method
|
||||
pub discovery_method: DiscoveryMethod,
|
||||
/// Discovery interval in seconds
|
||||
pub discovery_interval_seconds: u64,
|
||||
/// Health check interval in seconds
|
||||
pub health_check_interval_seconds: u64,
|
||||
}
|
||||
|
||||
impl Default for DiscoveryConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
auto_discovery: false,
|
||||
discovery_method: DiscoveryMethod::Static,
|
||||
discovery_interval_seconds: 60,
|
||||
health_check_interval_seconds: 30,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Cluster discovery method
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum DiscoveryMethod {
|
||||
/// Static configuration
|
||||
Static,
|
||||
/// DNS-based discovery
|
||||
Dns,
|
||||
/// Consul service discovery
|
||||
Consul,
|
||||
/// etcd service discovery
|
||||
Etcd,
|
||||
/// Kubernetes service discovery
|
||||
Kubernetes,
|
||||
}
|
||||
|
||||
/// Federated query spanning multiple clusters
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FederatedQuery {
|
||||
/// Query ID
|
||||
pub query_id: String,
|
||||
/// Original query
|
||||
pub query: String,
|
||||
/// Target clusters
|
||||
pub target_clusters: Vec<ClusterId>,
|
||||
/// Query execution strategy
|
||||
pub strategy: FederationStrategy,
|
||||
/// Created timestamp
|
||||
pub created_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// Federation strategy
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum FederationStrategy {
|
||||
/// Execute on all clusters in parallel
|
||||
Parallel,
|
||||
/// Execute on clusters sequentially
|
||||
Sequential,
|
||||
/// Execute on primary cluster, fallback to others
|
||||
PrimaryWithFallback,
|
||||
/// Execute on nearest/fastest cluster only
|
||||
Nearest,
|
||||
}
|
||||
|
||||
/// Federation engine for cross-cluster queries
|
||||
pub struct Federation {
|
||||
/// Cluster registry
|
||||
registry: Arc<ClusterRegistry>,
|
||||
/// Federation configuration
|
||||
config: FederationConfig,
|
||||
/// Active federated queries
|
||||
active_queries: Arc<DashMap<String, FederatedQuery>>,
|
||||
}
|
||||
|
||||
impl Federation {
|
||||
/// Create a new federation engine
|
||||
pub fn new(config: FederationConfig) -> Self {
|
||||
let discovery_config = DiscoveryConfig::default();
|
||||
Self {
|
||||
registry: Arc::new(ClusterRegistry::new(discovery_config)),
|
||||
config,
|
||||
active_queries: Arc::new(DashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the cluster registry
|
||||
pub fn registry(&self) -> Arc<ClusterRegistry> {
|
||||
Arc::clone(&self.registry)
|
||||
}
|
||||
|
||||
/// Execute a federated query across multiple clusters
|
||||
pub async fn execute_federated(
|
||||
&self,
|
||||
query: &str,
|
||||
target_clusters: Option<Vec<ClusterId>>,
|
||||
) -> Result<FederatedQueryResult> {
|
||||
let query_id = Uuid::new_v4().to_string();
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
// Determine target clusters
|
||||
let clusters = if let Some(targets) = target_clusters {
|
||||
targets
|
||||
.into_iter()
|
||||
.filter_map(|id| self.registry.get_cluster(&id))
|
||||
.collect()
|
||||
} else {
|
||||
self.registry.healthy_clusters()
|
||||
};
|
||||
|
||||
if clusters.is_empty() {
|
||||
return Err(GraphError::FederationError(
|
||||
"No healthy clusters available".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
info!(
|
||||
"Executing federated query {} across {} clusters",
|
||||
query_id,
|
||||
clusters.len()
|
||||
);
|
||||
|
||||
let federated_query = FederatedQuery {
|
||||
query_id: query_id.clone(),
|
||||
query: query.to_string(),
|
||||
target_clusters: clusters.iter().map(|c| c.cluster_id.clone()).collect(),
|
||||
strategy: self.config.default_strategy,
|
||||
created_at: Utc::now(),
|
||||
};
|
||||
|
||||
self.active_queries
|
||||
.insert(query_id.clone(), federated_query.clone());
|
||||
|
||||
// Execute query on each cluster based on strategy
|
||||
let mut cluster_results = HashMap::new();
|
||||
|
||||
match self.config.default_strategy {
|
||||
FederationStrategy::Parallel => {
|
||||
// Execute on all clusters in parallel
|
||||
let mut handles = Vec::new();
|
||||
|
||||
for cluster in &clusters {
|
||||
let cluster_id = cluster.cluster_id.clone();
|
||||
let query_str = query.to_string();
|
||||
let cluster_clone = cluster.clone();
|
||||
|
||||
let handle = tokio::spawn(async move {
|
||||
Self::execute_on_cluster(&cluster_clone, &query_str).await
|
||||
});
|
||||
|
||||
handles.push((cluster_id, handle));
|
||||
}
|
||||
|
||||
// Collect results
|
||||
for (cluster_id, handle) in handles {
|
||||
match handle.await {
|
||||
Ok(Ok(result)) => {
|
||||
cluster_results.insert(cluster_id, result);
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
warn!("Query failed on cluster {}: {}", cluster_id, e);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Task failed for cluster {}: {}", cluster_id, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
FederationStrategy::Sequential => {
|
||||
// Execute on clusters sequentially
|
||||
for cluster in &clusters {
|
||||
match Self::execute_on_cluster(cluster, query).await {
|
||||
Ok(result) => {
|
||||
cluster_results.insert(cluster.cluster_id.clone(), result);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Query failed on cluster {}: {}", cluster.cluster_id, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
FederationStrategy::Nearest | FederationStrategy::PrimaryWithFallback => {
|
||||
// Execute on first healthy cluster
|
||||
if let Some(cluster) = clusters.first() {
|
||||
match Self::execute_on_cluster(cluster, query).await {
|
||||
Ok(result) => {
|
||||
cluster_results.insert(cluster.cluster_id.clone(), result);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Query failed on cluster {}: {}", cluster.cluster_id, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Merge results from all clusters
|
||||
let merged_result = self.merge_results(cluster_results)?;
|
||||
|
||||
let execution_time_ms = start.elapsed().as_millis() as u64;
|
||||
|
||||
// Remove from active queries
|
||||
self.active_queries.remove(&query_id);
|
||||
|
||||
Ok(FederatedQueryResult {
|
||||
query_id,
|
||||
merged_result,
|
||||
clusters_queried: clusters.len(),
|
||||
execution_time_ms,
|
||||
})
|
||||
}
|
||||
|
||||
/// Execute query on a single remote cluster
|
||||
async fn execute_on_cluster(cluster: &RemoteCluster, query: &str) -> Result<QueryResult> {
|
||||
debug!("Executing query on cluster: {}", cluster.cluster_id);
|
||||
|
||||
// In production, make actual HTTP/gRPC call to remote cluster
|
||||
// For now, return empty result
|
||||
Ok(QueryResult {
|
||||
query_id: Uuid::new_v4().to_string(),
|
||||
nodes: Vec::new(),
|
||||
edges: Vec::new(),
|
||||
aggregates: HashMap::new(),
|
||||
stats: crate::distributed::coordinator::QueryStats {
|
||||
execution_time_ms: 0,
|
||||
shards_queried: 0,
|
||||
nodes_scanned: 0,
|
||||
edges_scanned: 0,
|
||||
cached: false,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
/// Merge results from multiple clusters
|
||||
fn merge_results(&self, results: HashMap<ClusterId, QueryResult>) -> Result<QueryResult> {
|
||||
if results.is_empty() {
|
||||
return Err(GraphError::FederationError(
|
||||
"No results to merge".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let mut merged = QueryResult {
|
||||
query_id: Uuid::new_v4().to_string(),
|
||||
nodes: Vec::new(),
|
||||
edges: Vec::new(),
|
||||
aggregates: HashMap::new(),
|
||||
stats: crate::distributed::coordinator::QueryStats {
|
||||
execution_time_ms: 0,
|
||||
shards_queried: 0,
|
||||
nodes_scanned: 0,
|
||||
edges_scanned: 0,
|
||||
cached: false,
|
||||
},
|
||||
};
|
||||
|
||||
for (cluster_id, result) in results {
|
||||
debug!("Merging results from cluster: {}", cluster_id);
|
||||
|
||||
// Merge nodes (deduplicating by ID)
|
||||
for node in result.nodes {
|
||||
if !merged.nodes.iter().any(|n| n.id == node.id) {
|
||||
merged.nodes.push(node);
|
||||
}
|
||||
}
|
||||
|
||||
// Merge edges (deduplicating by ID)
|
||||
for edge in result.edges {
|
||||
if !merged.edges.iter().any(|e| e.id == edge.id) {
|
||||
merged.edges.push(edge);
|
||||
}
|
||||
}
|
||||
|
||||
// Merge aggregates
|
||||
for (key, value) in result.aggregates {
|
||||
merged
|
||||
.aggregates
|
||||
.insert(format!("{}_{}", cluster_id, key), value);
|
||||
}
|
||||
|
||||
// Aggregate stats
|
||||
merged.stats.execution_time_ms = merged
|
||||
.stats
|
||||
.execution_time_ms
|
||||
.max(result.stats.execution_time_ms);
|
||||
merged.stats.shards_queried += result.stats.shards_queried;
|
||||
merged.stats.nodes_scanned += result.stats.nodes_scanned;
|
||||
merged.stats.edges_scanned += result.stats.edges_scanned;
|
||||
}
|
||||
|
||||
Ok(merged)
|
||||
}
|
||||
|
||||
/// Get configuration
|
||||
pub fn config(&self) -> &FederationConfig {
|
||||
&self.config
|
||||
}
|
||||
}
|
||||
|
||||
/// Federation configuration
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FederationConfig {
|
||||
/// Default federation strategy
|
||||
pub default_strategy: FederationStrategy,
|
||||
/// Maximum number of clusters to query
|
||||
pub max_clusters: usize,
|
||||
/// Query timeout in seconds
|
||||
pub query_timeout_seconds: u64,
|
||||
/// Enable result caching
|
||||
pub enable_caching: bool,
|
||||
}
|
||||
|
||||
impl Default for FederationConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
default_strategy: FederationStrategy::Parallel,
|
||||
max_clusters: 10,
|
||||
query_timeout_seconds: 30,
|
||||
enable_caching: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Federated query result
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FederatedQueryResult {
|
||||
/// Query ID
|
||||
pub query_id: String,
|
||||
/// Merged result from all clusters
|
||||
pub merged_result: QueryResult,
|
||||
/// Number of clusters queried
|
||||
pub clusters_queried: usize,
|
||||
/// Total execution time
|
||||
pub execution_time_ms: u64,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_cluster_registry() {
|
||||
let config = DiscoveryConfig::default();
|
||||
let registry = ClusterRegistry::new(config);
|
||||
|
||||
let cluster = RemoteCluster::new(
|
||||
"cluster-1".to_string(),
|
||||
"Test Cluster".to_string(),
|
||||
"http://localhost:8080".to_string(),
|
||||
);
|
||||
|
||||
registry.register_cluster(cluster.clone()).unwrap();
|
||||
|
||||
assert_eq!(registry.list_clusters().len(), 1);
|
||||
assert!(registry.get_cluster(&"cluster-1".to_string()).is_some());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_federation() {
|
||||
let config = FederationConfig::default();
|
||||
let federation = Federation::new(config);
|
||||
|
||||
let cluster = RemoteCluster::new(
|
||||
"cluster-1".to_string(),
|
||||
"Test Cluster".to_string(),
|
||||
"http://localhost:8080".to_string(),
|
||||
);
|
||||
|
||||
federation.registry().register_cluster(cluster).unwrap();
|
||||
|
||||
// Test would execute federated query in production
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_remote_cluster() {
|
||||
let cluster = RemoteCluster::new(
|
||||
"test".to_string(),
|
||||
"Test".to_string(),
|
||||
"http://localhost".to_string(),
|
||||
);
|
||||
|
||||
assert!(!cluster.is_healthy());
|
||||
}
|
||||
}
|
||||
623
vendor/ruvector/crates/ruvector-graph/src/distributed/gossip.rs
vendored
Normal file
623
vendor/ruvector/crates/ruvector-graph/src/distributed/gossip.rs
vendored
Normal file
@@ -0,0 +1,623 @@
|
||||
//! Gossip protocol for cluster membership and health monitoring
|
||||
//!
|
||||
//! Implements SWIM (Scalable Weakly-consistent Infection-style Membership) protocol:
|
||||
//! - Fast failure detection
|
||||
//! - Efficient membership propagation
|
||||
//! - Low network overhead
|
||||
//! - Automatic node discovery
|
||||
|
||||
use crate::{GraphError, Result};
|
||||
use chrono::{DateTime, Duration as ChronoDuration, Utc};
|
||||
use dashmap::DashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::{debug, info, warn};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Node identifier in the cluster
|
||||
pub type NodeId = String;
|
||||
|
||||
/// Gossip message types
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum GossipMessage {
|
||||
/// Ping message for health check
|
||||
Ping {
|
||||
from: NodeId,
|
||||
sequence: u64,
|
||||
timestamp: DateTime<Utc>,
|
||||
},
|
||||
/// Ack response to ping
|
||||
Ack {
|
||||
from: NodeId,
|
||||
to: NodeId,
|
||||
sequence: u64,
|
||||
timestamp: DateTime<Utc>,
|
||||
},
|
||||
/// Indirect ping through intermediary
|
||||
IndirectPing {
|
||||
from: NodeId,
|
||||
target: NodeId,
|
||||
intermediary: NodeId,
|
||||
sequence: u64,
|
||||
},
|
||||
/// Membership update
|
||||
MembershipUpdate {
|
||||
from: NodeId,
|
||||
updates: Vec<MembershipEvent>,
|
||||
version: u64,
|
||||
},
|
||||
/// Join request
|
||||
Join {
|
||||
node_id: NodeId,
|
||||
address: SocketAddr,
|
||||
metadata: HashMap<String, String>,
|
||||
},
|
||||
/// Leave notification
|
||||
Leave { node_id: NodeId },
|
||||
}
|
||||
|
||||
/// Membership event types
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum MembershipEvent {
|
||||
/// Node joined the cluster
|
||||
Join {
|
||||
node_id: NodeId,
|
||||
address: SocketAddr,
|
||||
timestamp: DateTime<Utc>,
|
||||
},
|
||||
/// Node left the cluster
|
||||
Leave {
|
||||
node_id: NodeId,
|
||||
timestamp: DateTime<Utc>,
|
||||
},
|
||||
/// Node suspected to be failed
|
||||
Suspect {
|
||||
node_id: NodeId,
|
||||
timestamp: DateTime<Utc>,
|
||||
},
|
||||
/// Node confirmed alive
|
||||
Alive {
|
||||
node_id: NodeId,
|
||||
timestamp: DateTime<Utc>,
|
||||
},
|
||||
/// Node confirmed dead
|
||||
Dead {
|
||||
node_id: NodeId,
|
||||
timestamp: DateTime<Utc>,
|
||||
},
|
||||
}
|
||||
|
||||
/// Node health status
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum NodeHealth {
|
||||
/// Node is healthy and responsive
|
||||
Alive,
|
||||
/// Node is suspected to be failed
|
||||
Suspect,
|
||||
/// Node is confirmed dead
|
||||
Dead,
|
||||
/// Node explicitly left
|
||||
Left,
|
||||
}
|
||||
|
||||
/// Member information in the gossip protocol
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Member {
|
||||
/// Node identifier
|
||||
pub node_id: NodeId,
|
||||
/// Network address
|
||||
pub address: SocketAddr,
|
||||
/// Current health status
|
||||
pub health: NodeHealth,
|
||||
/// Last time we heard from this node
|
||||
pub last_seen: DateTime<Utc>,
|
||||
/// Incarnation number (for conflict resolution)
|
||||
pub incarnation: u64,
|
||||
/// Node metadata
|
||||
pub metadata: HashMap<String, String>,
|
||||
/// Number of consecutive ping failures
|
||||
pub failure_count: u32,
|
||||
}
|
||||
|
||||
impl Member {
|
||||
/// Create a new member
|
||||
pub fn new(node_id: NodeId, address: SocketAddr) -> Self {
|
||||
Self {
|
||||
node_id,
|
||||
address,
|
||||
health: NodeHealth::Alive,
|
||||
last_seen: Utc::now(),
|
||||
incarnation: 0,
|
||||
metadata: HashMap::new(),
|
||||
failure_count: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if member is healthy
|
||||
pub fn is_healthy(&self) -> bool {
|
||||
matches!(self.health, NodeHealth::Alive)
|
||||
}
|
||||
|
||||
/// Mark as seen
|
||||
pub fn mark_seen(&mut self) {
|
||||
self.last_seen = Utc::now();
|
||||
self.failure_count = 0;
|
||||
if self.health != NodeHealth::Left {
|
||||
self.health = NodeHealth::Alive;
|
||||
}
|
||||
}
|
||||
|
||||
/// Increment failure count
|
||||
pub fn increment_failures(&mut self) {
|
||||
self.failure_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Gossip configuration
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct GossipConfig {
|
||||
/// Gossip interval in milliseconds
|
||||
pub gossip_interval_ms: u64,
|
||||
/// Number of nodes to gossip with per interval
|
||||
pub gossip_fanout: usize,
|
||||
/// Ping timeout in milliseconds
|
||||
pub ping_timeout_ms: u64,
|
||||
/// Number of ping failures before suspecting node
|
||||
pub suspect_threshold: u32,
|
||||
/// Number of indirect ping nodes
|
||||
pub indirect_ping_nodes: usize,
|
||||
/// Suspicion timeout in seconds
|
||||
pub suspicion_timeout_seconds: u64,
|
||||
}
|
||||
|
||||
impl Default for GossipConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
gossip_interval_ms: 1000,
|
||||
gossip_fanout: 3,
|
||||
ping_timeout_ms: 500,
|
||||
suspect_threshold: 3,
|
||||
indirect_ping_nodes: 3,
|
||||
suspicion_timeout_seconds: 30,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Gossip-based membership protocol
|
||||
pub struct GossipMembership {
|
||||
/// Local node ID
|
||||
local_node_id: NodeId,
|
||||
/// Local node address
|
||||
local_address: SocketAddr,
|
||||
/// Configuration
|
||||
config: GossipConfig,
|
||||
/// Cluster members
|
||||
members: Arc<DashMap<NodeId, Member>>,
|
||||
/// Membership version (incremented on changes)
|
||||
version: Arc<RwLock<u64>>,
|
||||
/// Pending acks
|
||||
pending_acks: Arc<DashMap<u64, PendingAck>>,
|
||||
/// Sequence number for messages
|
||||
sequence: Arc<RwLock<u64>>,
|
||||
/// Event listeners
|
||||
event_listeners: Arc<RwLock<Vec<Box<dyn Fn(MembershipEvent) + Send + Sync>>>>,
|
||||
}
|
||||
|
||||
/// Pending acknowledgment
|
||||
struct PendingAck {
|
||||
target: NodeId,
|
||||
sent_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl GossipMembership {
|
||||
/// Create a new gossip membership
|
||||
pub fn new(node_id: NodeId, address: SocketAddr, config: GossipConfig) -> Self {
|
||||
let members = Arc::new(DashMap::new());
|
||||
|
||||
// Add self to members
|
||||
let local_member = Member::new(node_id.clone(), address);
|
||||
members.insert(node_id.clone(), local_member);
|
||||
|
||||
Self {
|
||||
local_node_id: node_id,
|
||||
local_address: address,
|
||||
config,
|
||||
members,
|
||||
version: Arc::new(RwLock::new(0)),
|
||||
pending_acks: Arc::new(DashMap::new()),
|
||||
sequence: Arc::new(RwLock::new(0)),
|
||||
event_listeners: Arc::new(RwLock::new(Vec::new())),
|
||||
}
|
||||
}
|
||||
|
||||
/// Start the gossip protocol
|
||||
pub async fn start(&self) -> Result<()> {
|
||||
info!("Starting gossip protocol for node: {}", self.local_node_id);
|
||||
|
||||
// Start periodic gossip
|
||||
let gossip_self = self.clone();
|
||||
tokio::spawn(async move {
|
||||
gossip_self.run_gossip_loop().await;
|
||||
});
|
||||
|
||||
// Start failure detection
|
||||
let detection_self = self.clone();
|
||||
tokio::spawn(async move {
|
||||
detection_self.run_failure_detection().await;
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add a seed node to join cluster
|
||||
pub async fn join(&self, seed_address: SocketAddr) -> Result<()> {
|
||||
info!("Joining cluster via seed: {}", seed_address);
|
||||
|
||||
// Send join message
|
||||
let join_msg = GossipMessage::Join {
|
||||
node_id: self.local_node_id.clone(),
|
||||
address: self.local_address,
|
||||
metadata: HashMap::new(),
|
||||
};
|
||||
|
||||
// In production, send actual network message
|
||||
// For now, just log
|
||||
debug!("Would send join message to {}", seed_address);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Leave the cluster gracefully
|
||||
pub async fn leave(&self) -> Result<()> {
|
||||
info!("Leaving cluster: {}", self.local_node_id);
|
||||
|
||||
// Update own status
|
||||
if let Some(mut member) = self.members.get_mut(&self.local_node_id) {
|
||||
member.health = NodeHealth::Left;
|
||||
}
|
||||
|
||||
// Broadcast leave message
|
||||
let leave_msg = GossipMessage::Leave {
|
||||
node_id: self.local_node_id.clone(),
|
||||
};
|
||||
|
||||
self.broadcast_event(MembershipEvent::Leave {
|
||||
node_id: self.local_node_id.clone(),
|
||||
timestamp: Utc::now(),
|
||||
})
|
||||
.await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get all cluster members
|
||||
pub fn get_members(&self) -> Vec<Member> {
|
||||
self.members.iter().map(|e| e.value().clone()).collect()
|
||||
}
|
||||
|
||||
/// Get healthy members only
|
||||
pub fn get_healthy_members(&self) -> Vec<Member> {
|
||||
self.members
|
||||
.iter()
|
||||
.filter(|e| e.value().is_healthy())
|
||||
.map(|e| e.value().clone())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get a specific member
|
||||
pub fn get_member(&self, node_id: &NodeId) -> Option<Member> {
|
||||
self.members.get(node_id).map(|m| m.value().clone())
|
||||
}
|
||||
|
||||
/// Handle incoming gossip message
|
||||
pub async fn handle_message(&self, message: GossipMessage) -> Result<()> {
|
||||
match message {
|
||||
GossipMessage::Ping { from, sequence, .. } => self.handle_ping(from, sequence).await,
|
||||
GossipMessage::Ack { from, sequence, .. } => self.handle_ack(from, sequence).await,
|
||||
GossipMessage::MembershipUpdate { updates, .. } => {
|
||||
self.handle_membership_update(updates).await
|
||||
}
|
||||
GossipMessage::Join {
|
||||
node_id,
|
||||
address,
|
||||
metadata,
|
||||
} => self.handle_join(node_id, address, metadata).await,
|
||||
GossipMessage::Leave { node_id } => self.handle_leave(node_id).await,
|
||||
_ => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Run the gossip loop
|
||||
async fn run_gossip_loop(&self) {
|
||||
let interval = std::time::Duration::from_millis(self.config.gossip_interval_ms);
|
||||
|
||||
loop {
|
||||
tokio::time::sleep(interval).await;
|
||||
|
||||
// Select random members to gossip with
|
||||
let members = self.get_healthy_members();
|
||||
let targets: Vec<_> = members
|
||||
.into_iter()
|
||||
.filter(|m| m.node_id != self.local_node_id)
|
||||
.take(self.config.gossip_fanout)
|
||||
.collect();
|
||||
|
||||
for target in targets {
|
||||
self.send_ping(target.node_id).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Run failure detection
|
||||
async fn run_failure_detection(&self) {
|
||||
let interval = std::time::Duration::from_secs(5);
|
||||
|
||||
loop {
|
||||
tokio::time::sleep(interval).await;
|
||||
|
||||
let now = Utc::now();
|
||||
let timeout = ChronoDuration::seconds(self.config.suspicion_timeout_seconds as i64);
|
||||
|
||||
for mut entry in self.members.iter_mut() {
|
||||
let member = entry.value_mut();
|
||||
|
||||
if member.node_id == self.local_node_id {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if node has timed out
|
||||
if member.health == NodeHealth::Suspect {
|
||||
let elapsed = now.signed_duration_since(member.last_seen);
|
||||
if elapsed > timeout {
|
||||
debug!("Marking node as dead: {}", member.node_id);
|
||||
member.health = NodeHealth::Dead;
|
||||
|
||||
let event = MembershipEvent::Dead {
|
||||
node_id: member.node_id.clone(),
|
||||
timestamp: now,
|
||||
};
|
||||
|
||||
self.emit_event(event);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Send ping to a node
|
||||
async fn send_ping(&self, target: NodeId) {
|
||||
let mut seq = self.sequence.write().await;
|
||||
*seq += 1;
|
||||
let sequence = *seq;
|
||||
drop(seq);
|
||||
|
||||
let ping = GossipMessage::Ping {
|
||||
from: self.local_node_id.clone(),
|
||||
sequence,
|
||||
timestamp: Utc::now(),
|
||||
};
|
||||
|
||||
// Track pending ack
|
||||
self.pending_acks.insert(
|
||||
sequence,
|
||||
PendingAck {
|
||||
target: target.clone(),
|
||||
sent_at: Utc::now(),
|
||||
},
|
||||
);
|
||||
|
||||
debug!("Sending ping to {}", target);
|
||||
// In production, send actual network message
|
||||
}
|
||||
|
||||
/// Handle ping message
|
||||
async fn handle_ping(&self, from: NodeId, sequence: u64) -> Result<()> {
|
||||
debug!("Received ping from {}", from);
|
||||
|
||||
// Update member status
|
||||
if let Some(mut member) = self.members.get_mut(&from) {
|
||||
member.mark_seen();
|
||||
}
|
||||
|
||||
// Send ack
|
||||
let ack = GossipMessage::Ack {
|
||||
from: self.local_node_id.clone(),
|
||||
to: from,
|
||||
sequence,
|
||||
timestamp: Utc::now(),
|
||||
};
|
||||
|
||||
// In production, send actual network message
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Handle ack message
|
||||
async fn handle_ack(&self, from: NodeId, sequence: u64) -> Result<()> {
|
||||
debug!("Received ack from {}", from);
|
||||
|
||||
// Remove from pending
|
||||
self.pending_acks.remove(&sequence);
|
||||
|
||||
// Update member status
|
||||
if let Some(mut member) = self.members.get_mut(&from) {
|
||||
member.mark_seen();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Handle membership update
|
||||
async fn handle_membership_update(&self, updates: Vec<MembershipEvent>) -> Result<()> {
|
||||
for event in updates {
|
||||
match &event {
|
||||
MembershipEvent::Join {
|
||||
node_id, address, ..
|
||||
} => {
|
||||
if !self.members.contains_key(node_id) {
|
||||
let member = Member::new(node_id.clone(), *address);
|
||||
self.members.insert(node_id.clone(), member);
|
||||
}
|
||||
}
|
||||
MembershipEvent::Suspect { node_id, .. } => {
|
||||
if let Some(mut member) = self.members.get_mut(node_id) {
|
||||
member.health = NodeHealth::Suspect;
|
||||
}
|
||||
}
|
||||
MembershipEvent::Dead { node_id, .. } => {
|
||||
if let Some(mut member) = self.members.get_mut(node_id) {
|
||||
member.health = NodeHealth::Dead;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
self.emit_event(event);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Handle join request
|
||||
async fn handle_join(
|
||||
&self,
|
||||
node_id: NodeId,
|
||||
address: SocketAddr,
|
||||
metadata: HashMap<String, String>,
|
||||
) -> Result<()> {
|
||||
info!("Node joining: {}", node_id);
|
||||
|
||||
let mut member = Member::new(node_id.clone(), address);
|
||||
member.metadata = metadata;
|
||||
|
||||
self.members.insert(node_id.clone(), member);
|
||||
|
||||
let event = MembershipEvent::Join {
|
||||
node_id,
|
||||
address,
|
||||
timestamp: Utc::now(),
|
||||
};
|
||||
|
||||
self.broadcast_event(event).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Handle leave notification
|
||||
async fn handle_leave(&self, node_id: NodeId) -> Result<()> {
|
||||
info!("Node leaving: {}", node_id);
|
||||
|
||||
if let Some(mut member) = self.members.get_mut(&node_id) {
|
||||
member.health = NodeHealth::Left;
|
||||
}
|
||||
|
||||
let event = MembershipEvent::Leave {
|
||||
node_id,
|
||||
timestamp: Utc::now(),
|
||||
};
|
||||
|
||||
self.emit_event(event);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Broadcast event to all members
|
||||
async fn broadcast_event(&self, event: MembershipEvent) {
|
||||
let mut version = self.version.write().await;
|
||||
*version += 1;
|
||||
drop(version);
|
||||
|
||||
self.emit_event(event);
|
||||
}
|
||||
|
||||
/// Emit event to listeners
|
||||
fn emit_event(&self, event: MembershipEvent) {
|
||||
// In production, call event listeners
|
||||
debug!("Membership event: {:?}", event);
|
||||
}
|
||||
|
||||
/// Add event listener
|
||||
pub async fn add_listener<F>(&self, listener: F)
|
||||
where
|
||||
F: Fn(MembershipEvent) + Send + Sync + 'static,
|
||||
{
|
||||
let mut listeners = self.event_listeners.write().await;
|
||||
listeners.push(Box::new(listener));
|
||||
}
|
||||
|
||||
/// Get membership version
|
||||
pub async fn get_version(&self) -> u64 {
|
||||
*self.version.read().await
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for GossipMembership {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
local_node_id: self.local_node_id.clone(),
|
||||
local_address: self.local_address,
|
||||
config: self.config.clone(),
|
||||
members: Arc::clone(&self.members),
|
||||
version: Arc::clone(&self.version),
|
||||
pending_acks: Arc::clone(&self.pending_acks),
|
||||
sequence: Arc::clone(&self.sequence),
|
||||
event_listeners: Arc::clone(&self.event_listeners),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::net::{IpAddr, Ipv4Addr};
|
||||
|
||||
fn create_test_address(port: u16) -> SocketAddr {
|
||||
SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), port)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_gossip_membership() {
|
||||
let config = GossipConfig::default();
|
||||
let address = create_test_address(8000);
|
||||
let gossip = GossipMembership::new("node-1".to_string(), address, config);
|
||||
|
||||
assert_eq!(gossip.get_members().len(), 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_join_leave() {
|
||||
let config = GossipConfig::default();
|
||||
let address1 = create_test_address(8000);
|
||||
let address2 = create_test_address(8001);
|
||||
|
||||
let gossip = GossipMembership::new("node-1".to_string(), address1, config);
|
||||
|
||||
gossip
|
||||
.handle_join("node-2".to_string(), address2, HashMap::new())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(gossip.get_members().len(), 2);
|
||||
|
||||
gossip.handle_leave("node-2".to_string()).await.unwrap();
|
||||
|
||||
let member = gossip.get_member(&"node-2".to_string()).unwrap();
|
||||
assert_eq!(member.health, NodeHealth::Left);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_member() {
|
||||
let address = create_test_address(8000);
|
||||
let mut member = Member::new("test".to_string(), address);
|
||||
|
||||
assert!(member.is_healthy());
|
||||
|
||||
member.health = NodeHealth::Suspect;
|
||||
assert!(!member.is_healthy());
|
||||
|
||||
member.mark_seen();
|
||||
assert!(member.is_healthy());
|
||||
}
|
||||
}
|
||||
25
vendor/ruvector/crates/ruvector-graph/src/distributed/mod.rs
vendored
Normal file
25
vendor/ruvector/crates/ruvector-graph/src/distributed/mod.rs
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
//! Distributed graph query capabilities
|
||||
//!
|
||||
//! This module provides comprehensive distributed and federated graph operations:
|
||||
//! - Graph sharding with multiple partitioning strategies
|
||||
//! - Distributed query coordination and execution
|
||||
//! - Cross-cluster federation for multi-cluster queries
|
||||
//! - Graph-aware replication extending ruvector-replication
|
||||
//! - Gossip-based cluster membership and health monitoring
|
||||
//! - High-performance gRPC communication layer
|
||||
|
||||
pub mod coordinator;
|
||||
pub mod federation;
|
||||
pub mod gossip;
|
||||
pub mod replication;
|
||||
pub mod rpc;
|
||||
pub mod shard;
|
||||
|
||||
pub use coordinator::{Coordinator, QueryPlan, ShardCoordinator};
|
||||
pub use federation::{ClusterRegistry, FederatedQuery, Federation, RemoteCluster};
|
||||
pub use gossip::{GossipConfig, GossipMembership, MembershipEvent, NodeHealth};
|
||||
pub use replication::{GraphReplication, GraphReplicationConfig, ReplicationStrategy};
|
||||
pub use rpc::{GraphRpcService, RpcClient, RpcServer};
|
||||
pub use shard::{
|
||||
EdgeCutMinimizer, GraphShard, HashPartitioner, RangePartitioner, ShardMetadata, ShardStrategy,
|
||||
};
|
||||
407
vendor/ruvector/crates/ruvector-graph/src/distributed/replication.rs
vendored
Normal file
407
vendor/ruvector/crates/ruvector-graph/src/distributed/replication.rs
vendored
Normal file
@@ -0,0 +1,407 @@
|
||||
//! Graph-aware data replication extending ruvector-replication
|
||||
//!
|
||||
//! Provides graph-specific replication strategies:
|
||||
//! - Vertex-cut replication for high-degree nodes
|
||||
//! - Edge replication with consistency guarantees
|
||||
//! - Subgraph replication for locality
|
||||
//! - Conflict-free replicated graphs (CRG)
|
||||
|
||||
use crate::distributed::shard::{EdgeData, GraphShard, NodeData, NodeId, ShardId};
|
||||
use crate::{GraphError, Result};
|
||||
use chrono::{DateTime, Utc};
|
||||
use dashmap::DashMap;
|
||||
use ruvector_replication::{
|
||||
Replica, ReplicaRole, ReplicaSet, ReplicationLog, SyncManager, SyncMode,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
use tracing::{debug, info, warn};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Graph replication strategy
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum ReplicationStrategy {
|
||||
/// Replicate entire shards
|
||||
FullShard,
|
||||
/// Replicate high-degree nodes (vertex-cut)
|
||||
VertexCut,
|
||||
/// Replicate based on subgraph locality
|
||||
Subgraph,
|
||||
/// Hybrid approach
|
||||
Hybrid,
|
||||
}
|
||||
|
||||
/// Graph replication configuration
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct GraphReplicationConfig {
|
||||
/// Replication factor (number of copies)
|
||||
pub replication_factor: usize,
|
||||
/// Replication strategy
|
||||
pub strategy: ReplicationStrategy,
|
||||
/// High-degree threshold for vertex-cut
|
||||
pub high_degree_threshold: usize,
|
||||
/// Synchronization mode
|
||||
pub sync_mode: SyncMode,
|
||||
/// Enable conflict resolution
|
||||
pub enable_conflict_resolution: bool,
|
||||
/// Replication timeout in seconds
|
||||
pub timeout_seconds: u64,
|
||||
}
|
||||
|
||||
impl Default for GraphReplicationConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
replication_factor: 3,
|
||||
strategy: ReplicationStrategy::FullShard,
|
||||
high_degree_threshold: 100,
|
||||
sync_mode: SyncMode::Async,
|
||||
enable_conflict_resolution: true,
|
||||
timeout_seconds: 30,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Graph replication manager
|
||||
pub struct GraphReplication {
|
||||
/// Configuration
|
||||
config: GraphReplicationConfig,
|
||||
/// Replica sets per shard
|
||||
replica_sets: Arc<DashMap<ShardId, Arc<ReplicaSet>>>,
|
||||
/// Sync managers per shard
|
||||
sync_managers: Arc<DashMap<ShardId, Arc<SyncManager>>>,
|
||||
/// High-degree nodes (for vertex-cut replication)
|
||||
high_degree_nodes: Arc<DashMap<NodeId, usize>>,
|
||||
/// Node replication metadata
|
||||
node_replicas: Arc<DashMap<NodeId, Vec<String>>>,
|
||||
}
|
||||
|
||||
impl GraphReplication {
|
||||
/// Create a new graph replication manager
|
||||
pub fn new(config: GraphReplicationConfig) -> Self {
|
||||
Self {
|
||||
config,
|
||||
replica_sets: Arc::new(DashMap::new()),
|
||||
sync_managers: Arc::new(DashMap::new()),
|
||||
high_degree_nodes: Arc::new(DashMap::new()),
|
||||
node_replicas: Arc::new(DashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialize replication for a shard
|
||||
pub fn initialize_shard_replication(
|
||||
&self,
|
||||
shard_id: ShardId,
|
||||
primary_node: String,
|
||||
replica_nodes: Vec<String>,
|
||||
) -> Result<()> {
|
||||
info!(
|
||||
"Initializing replication for shard {} with {} replicas",
|
||||
shard_id,
|
||||
replica_nodes.len()
|
||||
);
|
||||
|
||||
// Create replica set
|
||||
let mut replica_set = ReplicaSet::new(format!("shard-{}", shard_id));
|
||||
|
||||
// Add primary replica
|
||||
replica_set
|
||||
.add_replica(
|
||||
&primary_node,
|
||||
&format!("{}:9001", primary_node),
|
||||
ReplicaRole::Primary,
|
||||
)
|
||||
.map_err(|e| GraphError::ReplicationError(e))?;
|
||||
|
||||
// Add secondary replicas
|
||||
for (idx, node) in replica_nodes.iter().enumerate() {
|
||||
replica_set
|
||||
.add_replica(
|
||||
&format!("{}-replica-{}", node, idx),
|
||||
&format!("{}:9001", node),
|
||||
ReplicaRole::Secondary,
|
||||
)
|
||||
.map_err(|e| GraphError::ReplicationError(e))?;
|
||||
}
|
||||
|
||||
let replica_set = Arc::new(replica_set);
|
||||
|
||||
// Create replication log
|
||||
let log = Arc::new(ReplicationLog::new(&primary_node));
|
||||
|
||||
// Create sync manager
|
||||
let sync_manager = Arc::new(SyncManager::new(Arc::clone(&replica_set), log));
|
||||
sync_manager.set_sync_mode(self.config.sync_mode.clone());
|
||||
|
||||
self.replica_sets.insert(shard_id, replica_set);
|
||||
self.sync_managers.insert(shard_id, sync_manager);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Replicate a node addition
|
||||
pub async fn replicate_node_add(&self, shard_id: ShardId, node: NodeData) -> Result<()> {
|
||||
debug!(
|
||||
"Replicating node addition: {} to shard {}",
|
||||
node.id, shard_id
|
||||
);
|
||||
|
||||
// Determine replication strategy
|
||||
match self.config.strategy {
|
||||
ReplicationStrategy::FullShard => {
|
||||
self.replicate_to_shard(shard_id, ReplicationOp::AddNode(node))
|
||||
.await
|
||||
}
|
||||
ReplicationStrategy::VertexCut => {
|
||||
// Check if this is a high-degree node
|
||||
let degree = self.get_node_degree(&node.id);
|
||||
if degree >= self.config.high_degree_threshold {
|
||||
// Replicate to multiple shards
|
||||
self.replicate_high_degree_node(node).await
|
||||
} else {
|
||||
self.replicate_to_shard(shard_id, ReplicationOp::AddNode(node))
|
||||
.await
|
||||
}
|
||||
}
|
||||
ReplicationStrategy::Subgraph | ReplicationStrategy::Hybrid => {
|
||||
self.replicate_to_shard(shard_id, ReplicationOp::AddNode(node))
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Replicate an edge addition
|
||||
pub async fn replicate_edge_add(&self, shard_id: ShardId, edge: EdgeData) -> Result<()> {
|
||||
debug!(
|
||||
"Replicating edge addition: {} to shard {}",
|
||||
edge.id, shard_id
|
||||
);
|
||||
|
||||
// Update degree information
|
||||
self.increment_node_degree(&edge.from);
|
||||
self.increment_node_degree(&edge.to);
|
||||
|
||||
self.replicate_to_shard(shard_id, ReplicationOp::AddEdge(edge))
|
||||
.await
|
||||
}
|
||||
|
||||
/// Replicate a node deletion
|
||||
pub async fn replicate_node_delete(&self, shard_id: ShardId, node_id: NodeId) -> Result<()> {
|
||||
debug!(
|
||||
"Replicating node deletion: {} from shard {}",
|
||||
node_id, shard_id
|
||||
);
|
||||
|
||||
self.replicate_to_shard(shard_id, ReplicationOp::DeleteNode(node_id))
|
||||
.await
|
||||
}
|
||||
|
||||
/// Replicate an edge deletion
|
||||
pub async fn replicate_edge_delete(&self, shard_id: ShardId, edge_id: String) -> Result<()> {
|
||||
debug!(
|
||||
"Replicating edge deletion: {} from shard {}",
|
||||
edge_id, shard_id
|
||||
);
|
||||
|
||||
self.replicate_to_shard(shard_id, ReplicationOp::DeleteEdge(edge_id))
|
||||
.await
|
||||
}
|
||||
|
||||
/// Replicate operation to all replicas of a shard
|
||||
async fn replicate_to_shard(&self, shard_id: ShardId, op: ReplicationOp) -> Result<()> {
|
||||
let sync_manager = self
|
||||
.sync_managers
|
||||
.get(&shard_id)
|
||||
.ok_or_else(|| GraphError::ShardError(format!("Shard {} not initialized", shard_id)))?;
|
||||
|
||||
// Serialize operation
|
||||
let data = bincode::encode_to_vec(&op, bincode::config::standard())
|
||||
.map_err(|e| GraphError::SerializationError(e.to_string()))?;
|
||||
|
||||
// Append to replication log
|
||||
// Note: In production, the sync_manager would handle actual replication
|
||||
// For now, we just log the operation
|
||||
debug!("Replicating operation for shard {}", shard_id);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Replicate high-degree node to multiple shards
|
||||
async fn replicate_high_degree_node(&self, node: NodeData) -> Result<()> {
|
||||
info!(
|
||||
"Replicating high-degree node {} to multiple shards",
|
||||
node.id
|
||||
);
|
||||
|
||||
// Replicate to additional shards based on degree
|
||||
let degree = self.get_node_degree(&node.id);
|
||||
let replica_count =
|
||||
(degree / self.config.high_degree_threshold).min(self.config.replication_factor);
|
||||
|
||||
let mut replica_shards = Vec::new();
|
||||
|
||||
// Select shards for replication
|
||||
for shard_id in 0..replica_count {
|
||||
replica_shards.push(shard_id as ShardId);
|
||||
}
|
||||
|
||||
// Replicate to each shard
|
||||
for shard_id in replica_shards.clone() {
|
||||
self.replicate_to_shard(shard_id, ReplicationOp::AddNode(node.clone()))
|
||||
.await?;
|
||||
}
|
||||
|
||||
// Store replica locations
|
||||
self.node_replicas.insert(
|
||||
node.id.clone(),
|
||||
replica_shards.iter().map(|s| s.to_string()).collect(),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get node degree
|
||||
fn get_node_degree(&self, node_id: &NodeId) -> usize {
|
||||
self.high_degree_nodes
|
||||
.get(node_id)
|
||||
.map(|d| *d.value())
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
/// Increment node degree
|
||||
fn increment_node_degree(&self, node_id: &NodeId) {
|
||||
self.high_degree_nodes
|
||||
.entry(node_id.clone())
|
||||
.and_modify(|d| *d += 1)
|
||||
.or_insert(1);
|
||||
}
|
||||
|
||||
/// Get replica set for a shard
|
||||
pub fn get_replica_set(&self, shard_id: ShardId) -> Option<Arc<ReplicaSet>> {
|
||||
self.replica_sets
|
||||
.get(&shard_id)
|
||||
.map(|r| Arc::clone(r.value()))
|
||||
}
|
||||
|
||||
/// Get sync manager for a shard
|
||||
pub fn get_sync_manager(&self, shard_id: ShardId) -> Option<Arc<SyncManager>> {
|
||||
self.sync_managers
|
||||
.get(&shard_id)
|
||||
.map(|s| Arc::clone(s.value()))
|
||||
}
|
||||
|
||||
/// Get replication statistics
|
||||
pub fn get_stats(&self) -> ReplicationStats {
|
||||
ReplicationStats {
|
||||
total_shards: self.replica_sets.len(),
|
||||
high_degree_nodes: self.high_degree_nodes.len(),
|
||||
replicated_nodes: self.node_replicas.len(),
|
||||
strategy: self.config.strategy,
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform health check on all replicas
|
||||
pub async fn health_check(&self) -> HashMap<ShardId, ReplicaHealth> {
|
||||
let mut health = HashMap::new();
|
||||
|
||||
for entry in self.replica_sets.iter() {
|
||||
let shard_id = *entry.key();
|
||||
let replica_set = entry.value();
|
||||
|
||||
// In production, check actual replica health
|
||||
let healthy_count = self.config.replication_factor;
|
||||
|
||||
health.insert(
|
||||
shard_id,
|
||||
ReplicaHealth {
|
||||
total_replicas: self.config.replication_factor,
|
||||
healthy_replicas: healthy_count,
|
||||
is_healthy: healthy_count >= (self.config.replication_factor / 2 + 1),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
health
|
||||
}
|
||||
|
||||
/// Get configuration
|
||||
pub fn config(&self) -> &GraphReplicationConfig {
|
||||
&self.config
|
||||
}
|
||||
}
|
||||
|
||||
/// Replication operation
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
enum ReplicationOp {
|
||||
AddNode(NodeData),
|
||||
AddEdge(EdgeData),
|
||||
DeleteNode(NodeId),
|
||||
DeleteEdge(String),
|
||||
UpdateNode(NodeData),
|
||||
UpdateEdge(EdgeData),
|
||||
}
|
||||
|
||||
/// Replication statistics
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ReplicationStats {
|
||||
pub total_shards: usize,
|
||||
pub high_degree_nodes: usize,
|
||||
pub replicated_nodes: usize,
|
||||
pub strategy: ReplicationStrategy,
|
||||
}
|
||||
|
||||
/// Replica health information
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ReplicaHealth {
|
||||
pub total_replicas: usize,
|
||||
pub healthy_replicas: usize,
|
||||
pub is_healthy: bool,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_graph_replication() {
|
||||
let config = GraphReplicationConfig::default();
|
||||
let replication = GraphReplication::new(config);
|
||||
|
||||
replication
|
||||
.initialize_shard_replication(0, "node-1".to_string(), vec!["node-2".to_string()])
|
||||
.unwrap();
|
||||
|
||||
assert!(replication.get_replica_set(0).is_some());
|
||||
assert!(replication.get_sync_manager(0).is_some());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_node_replication() {
|
||||
let config = GraphReplicationConfig::default();
|
||||
let replication = GraphReplication::new(config);
|
||||
|
||||
replication
|
||||
.initialize_shard_replication(0, "node-1".to_string(), vec!["node-2".to_string()])
|
||||
.unwrap();
|
||||
|
||||
let node = NodeData {
|
||||
id: "test-node".to_string(),
|
||||
properties: HashMap::new(),
|
||||
labels: vec!["Test".to_string()],
|
||||
};
|
||||
|
||||
let result = replication.replicate_node_add(0, node).await;
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replication_stats() {
|
||||
let config = GraphReplicationConfig::default();
|
||||
let replication = GraphReplication::new(config);
|
||||
|
||||
let stats = replication.get_stats();
|
||||
assert_eq!(stats.total_shards, 0);
|
||||
assert_eq!(stats.strategy, ReplicationStrategy::FullShard);
|
||||
}
|
||||
}
|
||||
515
vendor/ruvector/crates/ruvector-graph/src/distributed/rpc.rs
vendored
Normal file
515
vendor/ruvector/crates/ruvector-graph/src/distributed/rpc.rs
vendored
Normal file
@@ -0,0 +1,515 @@
|
||||
//! gRPC-based inter-node communication for distributed graph queries
|
||||
//!
|
||||
//! Provides high-performance RPC communication layer:
|
||||
//! - Query execution RPC
|
||||
//! - Data replication RPC
|
||||
//! - Cluster coordination RPC
|
||||
//! - Streaming results for large queries
|
||||
|
||||
use crate::distributed::coordinator::{QueryPlan, QueryResult};
|
||||
use crate::distributed::shard::{EdgeData, NodeData, NodeId, ShardId};
|
||||
use crate::{GraphError, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
#[cfg(feature = "federation")]
|
||||
use tonic::{Request, Response, Status};
|
||||
|
||||
#[cfg(not(feature = "federation"))]
|
||||
pub struct Status;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
/// RPC request for executing a query
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ExecuteQueryRequest {
|
||||
/// Query to execute (Cypher syntax)
|
||||
pub query: String,
|
||||
/// Optional parameters
|
||||
pub parameters: std::collections::HashMap<String, serde_json::Value>,
|
||||
/// Transaction ID (if part of a transaction)
|
||||
pub transaction_id: Option<String>,
|
||||
}
|
||||
|
||||
/// RPC response for query execution
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ExecuteQueryResponse {
|
||||
/// Query result
|
||||
pub result: QueryResult,
|
||||
/// Success indicator
|
||||
pub success: bool,
|
||||
/// Error message if failed
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
/// RPC request for replicating data
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ReplicateDataRequest {
|
||||
/// Shard ID to replicate to
|
||||
pub shard_id: ShardId,
|
||||
/// Operation type
|
||||
pub operation: ReplicationOperation,
|
||||
}
|
||||
|
||||
/// Replication operation types
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum ReplicationOperation {
|
||||
AddNode(NodeData),
|
||||
AddEdge(EdgeData),
|
||||
DeleteNode(NodeId),
|
||||
DeleteEdge(String),
|
||||
UpdateNode(NodeData),
|
||||
UpdateEdge(EdgeData),
|
||||
}
|
||||
|
||||
/// RPC response for replication
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ReplicateDataResponse {
|
||||
/// Success indicator
|
||||
pub success: bool,
|
||||
/// Error message if failed
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
/// RPC request for health check
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct HealthCheckRequest {
|
||||
/// Node ID performing the check
|
||||
pub node_id: String,
|
||||
}
|
||||
|
||||
/// RPC response for health check
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct HealthCheckResponse {
|
||||
/// Node is healthy
|
||||
pub healthy: bool,
|
||||
/// Current load (0.0 - 1.0)
|
||||
pub load: f64,
|
||||
/// Number of active queries
|
||||
pub active_queries: usize,
|
||||
/// Uptime in seconds
|
||||
pub uptime_seconds: u64,
|
||||
}
|
||||
|
||||
/// RPC request for shard info
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct GetShardInfoRequest {
|
||||
/// Shard ID
|
||||
pub shard_id: ShardId,
|
||||
}
|
||||
|
||||
/// RPC response for shard info
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct GetShardInfoResponse {
|
||||
/// Shard ID
|
||||
pub shard_id: ShardId,
|
||||
/// Number of nodes
|
||||
pub node_count: usize,
|
||||
/// Number of edges
|
||||
pub edge_count: usize,
|
||||
/// Shard size in bytes
|
||||
pub size_bytes: u64,
|
||||
}
|
||||
|
||||
/// Graph RPC service trait (would be implemented via tonic in production)
|
||||
#[cfg(feature = "federation")]
|
||||
#[tonic::async_trait]
|
||||
pub trait GraphRpcService: Send + Sync {
|
||||
/// Execute a query on this node
|
||||
async fn execute_query(
|
||||
&self,
|
||||
request: ExecuteQueryRequest,
|
||||
) -> std::result::Result<ExecuteQueryResponse, Status>;
|
||||
|
||||
/// Replicate data to this node
|
||||
async fn replicate_data(
|
||||
&self,
|
||||
request: ReplicateDataRequest,
|
||||
) -> std::result::Result<ReplicateDataResponse, Status>;
|
||||
|
||||
/// Health check
|
||||
async fn health_check(
|
||||
&self,
|
||||
request: HealthCheckRequest,
|
||||
) -> std::result::Result<HealthCheckResponse, Status>;
|
||||
|
||||
/// Get shard information
|
||||
async fn get_shard_info(
|
||||
&self,
|
||||
request: GetShardInfoRequest,
|
||||
) -> std::result::Result<GetShardInfoResponse, Status>;
|
||||
}
|
||||
|
||||
/// RPC client for communicating with remote nodes
|
||||
pub struct RpcClient {
|
||||
/// Target node address
|
||||
target_address: String,
|
||||
/// Connection timeout in seconds
|
||||
timeout_seconds: u64,
|
||||
}
|
||||
|
||||
impl RpcClient {
|
||||
/// Create a new RPC client
|
||||
pub fn new(target_address: String) -> Self {
|
||||
Self {
|
||||
target_address,
|
||||
timeout_seconds: 30,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set connection timeout
|
||||
pub fn with_timeout(mut self, timeout_seconds: u64) -> Self {
|
||||
self.timeout_seconds = timeout_seconds;
|
||||
self
|
||||
}
|
||||
|
||||
/// Execute a query on the remote node
|
||||
pub async fn execute_query(
|
||||
&self,
|
||||
request: ExecuteQueryRequest,
|
||||
) -> Result<ExecuteQueryResponse> {
|
||||
debug!(
|
||||
"Executing remote query on {}: {}",
|
||||
self.target_address, request.query
|
||||
);
|
||||
|
||||
// In production, make actual gRPC call using tonic
|
||||
// For now, simulate response
|
||||
Ok(ExecuteQueryResponse {
|
||||
result: QueryResult {
|
||||
query_id: uuid::Uuid::new_v4().to_string(),
|
||||
nodes: Vec::new(),
|
||||
edges: Vec::new(),
|
||||
aggregates: std::collections::HashMap::new(),
|
||||
stats: crate::distributed::coordinator::QueryStats {
|
||||
execution_time_ms: 0,
|
||||
shards_queried: 0,
|
||||
nodes_scanned: 0,
|
||||
edges_scanned: 0,
|
||||
cached: false,
|
||||
},
|
||||
},
|
||||
success: true,
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Replicate data to the remote node
|
||||
pub async fn replicate_data(
|
||||
&self,
|
||||
request: ReplicateDataRequest,
|
||||
) -> Result<ReplicateDataResponse> {
|
||||
debug!(
|
||||
"Replicating data to {} for shard {}",
|
||||
self.target_address, request.shard_id
|
||||
);
|
||||
|
||||
// In production, make actual gRPC call
|
||||
Ok(ReplicateDataResponse {
|
||||
success: true,
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Perform health check on remote node
|
||||
pub async fn health_check(&self, node_id: String) -> Result<HealthCheckResponse> {
|
||||
debug!("Health check on {}", self.target_address);
|
||||
|
||||
// In production, make actual gRPC call
|
||||
Ok(HealthCheckResponse {
|
||||
healthy: true,
|
||||
load: 0.5,
|
||||
active_queries: 0,
|
||||
uptime_seconds: 3600,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get shard information from remote node
|
||||
pub async fn get_shard_info(&self, shard_id: ShardId) -> Result<GetShardInfoResponse> {
|
||||
debug!(
|
||||
"Getting shard info for {} from {}",
|
||||
shard_id, self.target_address
|
||||
);
|
||||
|
||||
// In production, make actual gRPC call
|
||||
Ok(GetShardInfoResponse {
|
||||
shard_id,
|
||||
node_count: 0,
|
||||
edge_count: 0,
|
||||
size_bytes: 0,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// RPC server for handling incoming requests
|
||||
#[cfg(feature = "federation")]
|
||||
pub struct RpcServer {
|
||||
/// Server address to bind to
|
||||
bind_address: String,
|
||||
/// Service implementation
|
||||
service: Arc<dyn GraphRpcService>,
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "federation"))]
|
||||
pub struct RpcServer {
|
||||
/// Server address to bind to
|
||||
bind_address: String,
|
||||
}
|
||||
|
||||
#[cfg(feature = "federation")]
|
||||
impl RpcServer {
|
||||
/// Create a new RPC server
|
||||
pub fn new(bind_address: String, service: Arc<dyn GraphRpcService>) -> Self {
|
||||
Self {
|
||||
bind_address,
|
||||
service,
|
||||
}
|
||||
}
|
||||
|
||||
/// Start the RPC server
|
||||
pub async fn start(&self) -> Result<()> {
|
||||
info!("Starting RPC server on {}", self.bind_address);
|
||||
|
||||
// In production, start actual gRPC server using tonic
|
||||
// For now, just log
|
||||
debug!("RPC server would start on {}", self.bind_address);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Stop the RPC server
|
||||
pub async fn stop(&self) -> Result<()> {
|
||||
info!("Stopping RPC server");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "federation"))]
|
||||
impl RpcServer {
|
||||
/// Create a new RPC server
|
||||
pub fn new(bind_address: String) -> Self {
|
||||
Self { bind_address }
|
||||
}
|
||||
|
||||
/// Start the RPC server
|
||||
pub async fn start(&self) -> Result<()> {
|
||||
info!("Starting RPC server on {}", self.bind_address);
|
||||
|
||||
// In production, start actual gRPC server using tonic
|
||||
// For now, just log
|
||||
debug!("RPC server would start on {}", self.bind_address);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Stop the RPC server
|
||||
pub async fn stop(&self) -> Result<()> {
|
||||
info!("Stopping RPC server");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Default implementation of GraphRpcService
|
||||
#[cfg(feature = "federation")]
|
||||
pub struct DefaultGraphRpcService {
|
||||
/// Node ID
|
||||
node_id: String,
|
||||
/// Start time for uptime calculation
|
||||
start_time: std::time::Instant,
|
||||
/// Active queries counter
|
||||
active_queries: Arc<RwLock<usize>>,
|
||||
}
|
||||
|
||||
#[cfg(feature = "federation")]
|
||||
impl DefaultGraphRpcService {
|
||||
/// Create a new default service
|
||||
pub fn new(node_id: String) -> Self {
|
||||
Self {
|
||||
node_id,
|
||||
start_time: std::time::Instant::now(),
|
||||
active_queries: Arc::new(RwLock::new(0)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "federation")]
|
||||
#[tonic::async_trait]
|
||||
impl GraphRpcService for DefaultGraphRpcService {
|
||||
async fn execute_query(
|
||||
&self,
|
||||
request: ExecuteQueryRequest,
|
||||
) -> std::result::Result<ExecuteQueryResponse, Status> {
|
||||
// Increment active queries
|
||||
{
|
||||
let mut count = self.active_queries.write().await;
|
||||
*count += 1;
|
||||
}
|
||||
|
||||
debug!("Executing query: {}", request.query);
|
||||
|
||||
// In production, execute actual query
|
||||
let result = QueryResult {
|
||||
query_id: uuid::Uuid::new_v4().to_string(),
|
||||
nodes: Vec::new(),
|
||||
edges: Vec::new(),
|
||||
aggregates: std::collections::HashMap::new(),
|
||||
stats: crate::distributed::coordinator::QueryStats {
|
||||
execution_time_ms: 0,
|
||||
shards_queried: 0,
|
||||
nodes_scanned: 0,
|
||||
edges_scanned: 0,
|
||||
cached: false,
|
||||
},
|
||||
};
|
||||
|
||||
// Decrement active queries
|
||||
{
|
||||
let mut count = self.active_queries.write().await;
|
||||
*count -= 1;
|
||||
}
|
||||
|
||||
Ok(ExecuteQueryResponse {
|
||||
result,
|
||||
success: true,
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
|
||||
async fn replicate_data(
|
||||
&self,
|
||||
request: ReplicateDataRequest,
|
||||
) -> std::result::Result<ReplicateDataResponse, Status> {
|
||||
debug!("Replicating data for shard {}", request.shard_id);
|
||||
|
||||
// In production, perform actual replication
|
||||
Ok(ReplicateDataResponse {
|
||||
success: true,
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
|
||||
async fn health_check(
|
||||
&self,
|
||||
_request: HealthCheckRequest,
|
||||
) -> std::result::Result<HealthCheckResponse, Status> {
|
||||
let uptime = self.start_time.elapsed().as_secs();
|
||||
let active = *self.active_queries.read().await;
|
||||
|
||||
Ok(HealthCheckResponse {
|
||||
healthy: true,
|
||||
load: 0.5, // Would calculate actual load
|
||||
active_queries: active,
|
||||
uptime_seconds: uptime,
|
||||
})
|
||||
}
|
||||
|
||||
async fn get_shard_info(
|
||||
&self,
|
||||
request: GetShardInfoRequest,
|
||||
) -> std::result::Result<GetShardInfoResponse, Status> {
|
||||
// In production, get actual shard info
|
||||
Ok(GetShardInfoResponse {
|
||||
shard_id: request.shard_id,
|
||||
node_count: 0,
|
||||
edge_count: 0,
|
||||
size_bytes: 0,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// RPC connection pool for managing connections to multiple nodes
|
||||
pub struct RpcConnectionPool {
|
||||
/// Map of node_id to RPC client
|
||||
clients: Arc<dashmap::DashMap<String, Arc<RpcClient>>>,
|
||||
}
|
||||
|
||||
impl RpcConnectionPool {
|
||||
/// Create a new connection pool
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
clients: Arc::new(dashmap::DashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get or create a client for a node
|
||||
pub fn get_client(&self, node_id: &str, address: &str) -> Arc<RpcClient> {
|
||||
self.clients
|
||||
.entry(node_id.to_string())
|
||||
.or_insert_with(|| Arc::new(RpcClient::new(address.to_string())))
|
||||
.clone()
|
||||
}
|
||||
|
||||
/// Remove a client from the pool
|
||||
pub fn remove_client(&self, node_id: &str) {
|
||||
self.clients.remove(node_id);
|
||||
}
|
||||
|
||||
/// Get number of active connections
|
||||
pub fn connection_count(&self) -> usize {
|
||||
self.clients.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for RpcConnectionPool {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_rpc_client() {
|
||||
let client = RpcClient::new("localhost:9000".to_string());
|
||||
|
||||
let request = ExecuteQueryRequest {
|
||||
query: "MATCH (n) RETURN n".to_string(),
|
||||
parameters: std::collections::HashMap::new(),
|
||||
transaction_id: None,
|
||||
};
|
||||
|
||||
let response = client.execute_query(request).await.unwrap();
|
||||
assert!(response.success);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_default_service() {
|
||||
let service = DefaultGraphRpcService::new("test-node".to_string());
|
||||
|
||||
let request = ExecuteQueryRequest {
|
||||
query: "MATCH (n) RETURN n".to_string(),
|
||||
parameters: std::collections::HashMap::new(),
|
||||
transaction_id: None,
|
||||
};
|
||||
|
||||
let response = service.execute_query(request).await.unwrap();
|
||||
assert!(response.success);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_connection_pool() {
|
||||
let pool = RpcConnectionPool::new();
|
||||
|
||||
let client1 = pool.get_client("node-1", "localhost:9000");
|
||||
let client2 = pool.get_client("node-2", "localhost:9001");
|
||||
|
||||
assert_eq!(pool.connection_count(), 2);
|
||||
|
||||
pool.remove_client("node-1");
|
||||
assert_eq!(pool.connection_count(), 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_health_check() {
|
||||
let service = DefaultGraphRpcService::new("test-node".to_string());
|
||||
|
||||
let request = HealthCheckRequest {
|
||||
node_id: "test".to_string(),
|
||||
};
|
||||
|
||||
let response = service.health_check(request).await.unwrap();
|
||||
assert!(response.healthy);
|
||||
assert_eq!(response.active_queries, 0);
|
||||
}
|
||||
}
|
||||
595
vendor/ruvector/crates/ruvector-graph/src/distributed/shard.rs
vendored
Normal file
595
vendor/ruvector/crates/ruvector-graph/src/distributed/shard.rs
vendored
Normal file
@@ -0,0 +1,595 @@
|
||||
//! Graph sharding strategies for distributed hypergraphs
|
||||
//!
|
||||
//! Provides multiple partitioning strategies optimized for graph workloads:
|
||||
//! - Hash-based node partitioning for uniform distribution
|
||||
//! - Range-based partitioning for locality-aware queries
|
||||
//! - Edge-cut minimization for reducing cross-shard communication
|
||||
|
||||
use crate::{GraphError, Result};
|
||||
use blake3::Hasher;
|
||||
use chrono::{DateTime, Utc};
|
||||
use dashmap::DashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
use tracing::{debug, info, warn};
|
||||
use uuid::Uuid;
|
||||
use xxhash_rust::xxh3::xxh3_64;
|
||||
|
||||
/// Unique identifier for a graph node
|
||||
pub type NodeId = String;
|
||||
|
||||
/// Unique identifier for a graph edge
|
||||
pub type EdgeId = String;
|
||||
|
||||
/// Shard identifier
|
||||
pub type ShardId = u32;
|
||||
|
||||
/// Graph sharding strategy
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum ShardStrategy {
|
||||
/// Hash-based partitioning using consistent hashing
|
||||
Hash,
|
||||
/// Range-based partitioning for ordered node IDs
|
||||
Range,
|
||||
/// Edge-cut minimization for graph partitioning
|
||||
EdgeCut,
|
||||
/// Custom partitioning strategy
|
||||
Custom,
|
||||
}
|
||||
|
||||
/// Metadata about a graph shard
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ShardMetadata {
|
||||
/// Shard identifier
|
||||
pub shard_id: ShardId,
|
||||
/// Number of nodes in this shard
|
||||
pub node_count: usize,
|
||||
/// Number of edges in this shard
|
||||
pub edge_count: usize,
|
||||
/// Number of edges crossing to other shards
|
||||
pub cross_shard_edges: usize,
|
||||
/// Primary node responsible for this shard
|
||||
pub primary_node: String,
|
||||
/// Replica nodes
|
||||
pub replicas: Vec<String>,
|
||||
/// Creation timestamp
|
||||
pub created_at: DateTime<Utc>,
|
||||
/// Last modification timestamp
|
||||
pub modified_at: DateTime<Utc>,
|
||||
/// Partitioning strategy used
|
||||
pub strategy: ShardStrategy,
|
||||
}
|
||||
|
||||
impl ShardMetadata {
|
||||
/// Create new shard metadata
|
||||
pub fn new(shard_id: ShardId, primary_node: String, strategy: ShardStrategy) -> Self {
|
||||
Self {
|
||||
shard_id,
|
||||
node_count: 0,
|
||||
edge_count: 0,
|
||||
cross_shard_edges: 0,
|
||||
primary_node,
|
||||
replicas: Vec::new(),
|
||||
created_at: Utc::now(),
|
||||
modified_at: Utc::now(),
|
||||
strategy,
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate edge cut ratio (cross-shard edges / total edges)
|
||||
pub fn edge_cut_ratio(&self) -> f64 {
|
||||
if self.edge_count == 0 {
|
||||
0.0
|
||||
} else {
|
||||
self.cross_shard_edges as f64 / self.edge_count as f64
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Hash-based node partitioner
|
||||
pub struct HashPartitioner {
|
||||
/// Total number of shards
|
||||
shard_count: u32,
|
||||
/// Virtual nodes per physical shard for better distribution
|
||||
virtual_nodes: u32,
|
||||
}
|
||||
|
||||
impl HashPartitioner {
|
||||
/// Create a new hash partitioner
|
||||
pub fn new(shard_count: u32) -> Self {
|
||||
assert!(shard_count > 0, "shard_count must be greater than zero");
|
||||
Self {
|
||||
shard_count,
|
||||
virtual_nodes: 150, // Similar to consistent hashing best practices
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the shard ID for a given node ID using xxHash
|
||||
pub fn get_shard(&self, node_id: &NodeId) -> ShardId {
|
||||
let hash = xxh3_64(node_id.as_bytes());
|
||||
(hash % self.shard_count as u64) as ShardId
|
||||
}
|
||||
|
||||
/// Get the shard ID using BLAKE3 for cryptographic strength (alternative)
|
||||
pub fn get_shard_secure(&self, node_id: &NodeId) -> ShardId {
|
||||
let mut hasher = Hasher::new();
|
||||
hasher.update(node_id.as_bytes());
|
||||
let hash = hasher.finalize();
|
||||
let hash_bytes = hash.as_bytes();
|
||||
let hash_u64 = u64::from_le_bytes([
|
||||
hash_bytes[0],
|
||||
hash_bytes[1],
|
||||
hash_bytes[2],
|
||||
hash_bytes[3],
|
||||
hash_bytes[4],
|
||||
hash_bytes[5],
|
||||
hash_bytes[6],
|
||||
hash_bytes[7],
|
||||
]);
|
||||
(hash_u64 % self.shard_count as u64) as ShardId
|
||||
}
|
||||
|
||||
/// Get multiple candidate shards for replication
|
||||
pub fn get_replica_shards(&self, node_id: &NodeId, replica_count: usize) -> Vec<ShardId> {
|
||||
let mut shards = Vec::with_capacity(replica_count);
|
||||
let primary = self.get_shard(node_id);
|
||||
shards.push(primary);
|
||||
|
||||
// Generate additional shards using salted hashing
|
||||
for i in 1..replica_count {
|
||||
let salted_id = format!("{}-replica-{}", node_id, i);
|
||||
let shard = self.get_shard(&salted_id);
|
||||
if !shards.contains(&shard) {
|
||||
shards.push(shard);
|
||||
}
|
||||
}
|
||||
|
||||
shards
|
||||
}
|
||||
}
|
||||
|
||||
/// Range-based node partitioner for ordered node IDs
|
||||
pub struct RangePartitioner {
|
||||
/// Total number of shards
|
||||
shard_count: u32,
|
||||
/// Range boundaries (shard_id -> max_value in range)
|
||||
ranges: Vec<String>,
|
||||
}
|
||||
|
||||
impl RangePartitioner {
|
||||
/// Create a new range partitioner with automatic range distribution
|
||||
pub fn new(shard_count: u32) -> Self {
|
||||
Self {
|
||||
shard_count,
|
||||
ranges: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a range partitioner with explicit boundaries
|
||||
pub fn with_boundaries(boundaries: Vec<String>) -> Self {
|
||||
Self {
|
||||
shard_count: boundaries.len() as u32,
|
||||
ranges: boundaries,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the shard ID for a node based on range boundaries
|
||||
pub fn get_shard(&self, node_id: &NodeId) -> ShardId {
|
||||
if self.ranges.is_empty() {
|
||||
// Fallback to simple modulo if no ranges defined
|
||||
let hash = xxh3_64(node_id.as_bytes());
|
||||
return (hash % self.shard_count as u64) as ShardId;
|
||||
}
|
||||
|
||||
// Binary search through sorted ranges
|
||||
for (idx, boundary) in self.ranges.iter().enumerate() {
|
||||
if node_id <= boundary {
|
||||
return idx as ShardId;
|
||||
}
|
||||
}
|
||||
|
||||
// Last shard for values beyond all boundaries
|
||||
(self.shard_count - 1) as ShardId
|
||||
}
|
||||
|
||||
/// Update range boundaries based on data distribution
|
||||
pub fn update_boundaries(&mut self, new_boundaries: Vec<String>) {
|
||||
info!(
|
||||
"Updating range boundaries: old={}, new={}",
|
||||
self.ranges.len(),
|
||||
new_boundaries.len()
|
||||
);
|
||||
self.ranges = new_boundaries;
|
||||
self.shard_count = self.ranges.len() as u32;
|
||||
}
|
||||
}
|
||||
|
||||
/// Edge-cut minimization using METIS-like graph partitioning
|
||||
pub struct EdgeCutMinimizer {
|
||||
/// Total number of shards
|
||||
shard_count: u32,
|
||||
/// Node to shard assignments
|
||||
node_assignments: Arc<DashMap<NodeId, ShardId>>,
|
||||
/// Edge information for partitioning decisions
|
||||
edge_weights: Arc<DashMap<(NodeId, NodeId), f64>>,
|
||||
/// Adjacency list representation
|
||||
adjacency: Arc<DashMap<NodeId, HashSet<NodeId>>>,
|
||||
}
|
||||
|
||||
impl EdgeCutMinimizer {
|
||||
/// Create a new edge-cut minimizer
|
||||
pub fn new(shard_count: u32) -> Self {
|
||||
Self {
|
||||
shard_count,
|
||||
node_assignments: Arc::new(DashMap::new()),
|
||||
edge_weights: Arc::new(DashMap::new()),
|
||||
adjacency: Arc::new(DashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add an edge to the graph for partitioning consideration
|
||||
pub fn add_edge(&self, from: NodeId, to: NodeId, weight: f64) {
|
||||
self.edge_weights.insert((from.clone(), to.clone()), weight);
|
||||
|
||||
// Update adjacency list
|
||||
self.adjacency
|
||||
.entry(from.clone())
|
||||
.or_insert_with(HashSet::new)
|
||||
.insert(to.clone());
|
||||
|
||||
self.adjacency
|
||||
.entry(to)
|
||||
.or_insert_with(HashSet::new)
|
||||
.insert(from);
|
||||
}
|
||||
|
||||
/// Get the shard assignment for a node
|
||||
pub fn get_shard(&self, node_id: &NodeId) -> Option<ShardId> {
|
||||
self.node_assignments.get(node_id).map(|r| *r.value())
|
||||
}
|
||||
|
||||
/// Compute initial partitioning using multilevel k-way partitioning
|
||||
pub fn compute_partitioning(&self) -> Result<HashMap<NodeId, ShardId>> {
|
||||
info!("Computing edge-cut minimized partitioning");
|
||||
|
||||
let nodes: Vec<_> = self.adjacency.iter().map(|e| e.key().clone()).collect();
|
||||
|
||||
if nodes.is_empty() {
|
||||
return Ok(HashMap::new());
|
||||
}
|
||||
|
||||
// Phase 1: Coarsening - merge highly connected nodes
|
||||
let coarse_graph = self.coarsen_graph(&nodes);
|
||||
|
||||
// Phase 2: Initial partitioning using greedy approach
|
||||
let mut assignments = self.initial_partition(&coarse_graph);
|
||||
|
||||
// Phase 3: Refinement using Kernighan-Lin algorithm
|
||||
self.refine_partition(&mut assignments);
|
||||
|
||||
// Store assignments
|
||||
for (node, shard) in &assignments {
|
||||
self.node_assignments.insert(node.clone(), *shard);
|
||||
}
|
||||
|
||||
info!(
|
||||
"Partitioning complete: {} nodes across {} shards",
|
||||
assignments.len(),
|
||||
self.shard_count
|
||||
);
|
||||
|
||||
Ok(assignments)
|
||||
}
|
||||
|
||||
/// Coarsen the graph by merging highly connected nodes
|
||||
fn coarsen_graph(&self, nodes: &[NodeId]) -> HashMap<NodeId, Vec<NodeId>> {
|
||||
let mut coarse: HashMap<NodeId, Vec<NodeId>> = HashMap::new();
|
||||
let mut visited = HashSet::new();
|
||||
|
||||
for node in nodes {
|
||||
if visited.contains(node) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut group = vec![node.clone()];
|
||||
visited.insert(node.clone());
|
||||
|
||||
// Find best matching neighbor based on edge weight
|
||||
if let Some(neighbors) = self.adjacency.get(node) {
|
||||
let mut best_neighbor: Option<(NodeId, f64)> = None;
|
||||
|
||||
for neighbor in neighbors.iter() {
|
||||
if visited.contains(neighbor) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let weight = self
|
||||
.edge_weights
|
||||
.get(&(node.clone(), neighbor.clone()))
|
||||
.map(|w| *w.value())
|
||||
.unwrap_or(1.0);
|
||||
|
||||
if let Some((_, best_weight)) = best_neighbor {
|
||||
if weight > best_weight {
|
||||
best_neighbor = Some((neighbor.clone(), weight));
|
||||
}
|
||||
} else {
|
||||
best_neighbor = Some((neighbor.clone(), weight));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some((neighbor, _)) = best_neighbor {
|
||||
group.push(neighbor.clone());
|
||||
visited.insert(neighbor);
|
||||
}
|
||||
}
|
||||
|
||||
let representative = node.clone();
|
||||
coarse.insert(representative, group);
|
||||
}
|
||||
|
||||
coarse
|
||||
}
|
||||
|
||||
/// Initial partition using greedy approach
|
||||
fn initial_partition(
|
||||
&self,
|
||||
coarse_graph: &HashMap<NodeId, Vec<NodeId>>,
|
||||
) -> HashMap<NodeId, ShardId> {
|
||||
let mut assignments = HashMap::new();
|
||||
let mut shard_sizes: Vec<usize> = vec![0; self.shard_count as usize];
|
||||
|
||||
for (representative, group) in coarse_graph {
|
||||
// Assign to least-loaded shard
|
||||
let shard = shard_sizes
|
||||
.iter()
|
||||
.enumerate()
|
||||
.min_by_key(|(_, size)| *size)
|
||||
.map(|(idx, _)| idx as ShardId)
|
||||
.unwrap_or(0);
|
||||
|
||||
for node in group {
|
||||
assignments.insert(node.clone(), shard);
|
||||
shard_sizes[shard as usize] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
assignments
|
||||
}
|
||||
|
||||
/// Refine partition using simplified Kernighan-Lin algorithm
|
||||
fn refine_partition(&self, assignments: &mut HashMap<NodeId, ShardId>) {
|
||||
const MAX_ITERATIONS: usize = 10;
|
||||
let mut improved = true;
|
||||
let mut iteration = 0;
|
||||
|
||||
while improved && iteration < MAX_ITERATIONS {
|
||||
improved = false;
|
||||
iteration += 1;
|
||||
|
||||
for (node, current_shard) in assignments.clone().iter() {
|
||||
let current_cost = self.compute_node_cost(node, *current_shard, assignments);
|
||||
|
||||
// Try moving to each other shard
|
||||
for target_shard in 0..self.shard_count {
|
||||
if target_shard == *current_shard {
|
||||
continue;
|
||||
}
|
||||
|
||||
let new_cost = self.compute_node_cost(node, target_shard, assignments);
|
||||
|
||||
if new_cost < current_cost {
|
||||
assignments.insert(node.clone(), target_shard);
|
||||
improved = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
debug!("Refinement iteration {}: improved={}", iteration, improved);
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the cost (number of cross-shard edges) for a node in a given shard
|
||||
fn compute_node_cost(
|
||||
&self,
|
||||
node: &NodeId,
|
||||
shard: ShardId,
|
||||
assignments: &HashMap<NodeId, ShardId>,
|
||||
) -> usize {
|
||||
let mut cross_shard_edges = 0;
|
||||
|
||||
if let Some(neighbors) = self.adjacency.get(node) {
|
||||
for neighbor in neighbors.iter() {
|
||||
if let Some(neighbor_shard) = assignments.get(neighbor) {
|
||||
if *neighbor_shard != shard {
|
||||
cross_shard_edges += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cross_shard_edges
|
||||
}
|
||||
|
||||
/// Calculate total edge cut across all shards
|
||||
pub fn calculate_edge_cut(&self, assignments: &HashMap<NodeId, ShardId>) -> usize {
|
||||
let mut cut = 0;
|
||||
|
||||
for entry in self.edge_weights.iter() {
|
||||
let ((from, to), _) = entry.pair();
|
||||
let from_shard = assignments.get(from);
|
||||
let to_shard = assignments.get(to);
|
||||
|
||||
if from_shard.is_some() && to_shard.is_some() && from_shard != to_shard {
|
||||
cut += 1;
|
||||
}
|
||||
}
|
||||
|
||||
cut
|
||||
}
|
||||
}
|
||||
|
||||
/// Graph shard containing partitioned data
|
||||
pub struct GraphShard {
|
||||
/// Shard metadata
|
||||
metadata: ShardMetadata,
|
||||
/// Nodes in this shard
|
||||
nodes: Arc<DashMap<NodeId, NodeData>>,
|
||||
/// Edges in this shard (including cross-shard edges)
|
||||
edges: Arc<DashMap<EdgeId, EdgeData>>,
|
||||
/// Partitioning strategy
|
||||
strategy: ShardStrategy,
|
||||
}
|
||||
|
||||
/// Node data in the graph
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct NodeData {
|
||||
pub id: NodeId,
|
||||
pub properties: HashMap<String, serde_json::Value>,
|
||||
pub labels: Vec<String>,
|
||||
}
|
||||
|
||||
/// Edge data in the graph
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct EdgeData {
|
||||
pub id: EdgeId,
|
||||
pub from: NodeId,
|
||||
pub to: NodeId,
|
||||
pub edge_type: String,
|
||||
pub properties: HashMap<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
impl GraphShard {
|
||||
/// Create a new graph shard
|
||||
pub fn new(metadata: ShardMetadata) -> Self {
|
||||
let strategy = metadata.strategy;
|
||||
Self {
|
||||
metadata,
|
||||
nodes: Arc::new(DashMap::new()),
|
||||
edges: Arc::new(DashMap::new()),
|
||||
strategy,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a node to this shard
|
||||
pub fn add_node(&self, node: NodeData) -> Result<()> {
|
||||
self.nodes.insert(node.id.clone(), node);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add an edge to this shard
|
||||
pub fn add_edge(&self, edge: EdgeData) -> Result<()> {
|
||||
self.edges.insert(edge.id.clone(), edge);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get a node by ID
|
||||
pub fn get_node(&self, node_id: &NodeId) -> Option<NodeData> {
|
||||
self.nodes.get(node_id).map(|n| n.value().clone())
|
||||
}
|
||||
|
||||
/// Get an edge by ID
|
||||
pub fn get_edge(&self, edge_id: &EdgeId) -> Option<EdgeData> {
|
||||
self.edges.get(edge_id).map(|e| e.value().clone())
|
||||
}
|
||||
|
||||
/// Get shard metadata
|
||||
pub fn metadata(&self) -> &ShardMetadata {
|
||||
&self.metadata
|
||||
}
|
||||
|
||||
/// Get node count
|
||||
pub fn node_count(&self) -> usize {
|
||||
self.nodes.len()
|
||||
}
|
||||
|
||||
/// Get edge count
|
||||
pub fn edge_count(&self) -> usize {
|
||||
self.edges.len()
|
||||
}
|
||||
|
||||
/// List all nodes in this shard
|
||||
pub fn list_nodes(&self) -> Vec<NodeData> {
|
||||
self.nodes.iter().map(|e| e.value().clone()).collect()
|
||||
}
|
||||
|
||||
/// List all edges in this shard
|
||||
pub fn list_edges(&self) -> Vec<EdgeData> {
|
||||
self.edges.iter().map(|e| e.value().clone()).collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_hash_partitioner() {
|
||||
let partitioner = HashPartitioner::new(16);
|
||||
|
||||
let node1 = "node-1".to_string();
|
||||
let node2 = "node-2".to_string();
|
||||
|
||||
let shard1 = partitioner.get_shard(&node1);
|
||||
let shard2 = partitioner.get_shard(&node2);
|
||||
|
||||
assert!(shard1 < 16);
|
||||
assert!(shard2 < 16);
|
||||
|
||||
// Same node should always map to same shard
|
||||
assert_eq!(shard1, partitioner.get_shard(&node1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_range_partitioner() {
|
||||
let boundaries = vec!["m".to_string(), "z".to_string()];
|
||||
let partitioner = RangePartitioner::with_boundaries(boundaries);
|
||||
|
||||
assert_eq!(partitioner.get_shard(&"apple".to_string()), 0);
|
||||
assert_eq!(partitioner.get_shard(&"orange".to_string()), 1);
|
||||
assert_eq!(partitioner.get_shard(&"zebra".to_string()), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_edge_cut_minimizer() {
|
||||
let minimizer = EdgeCutMinimizer::new(2);
|
||||
|
||||
// Create a simple graph: A-B-C-D
|
||||
minimizer.add_edge("A".to_string(), "B".to_string(), 1.0);
|
||||
minimizer.add_edge("B".to_string(), "C".to_string(), 1.0);
|
||||
minimizer.add_edge("C".to_string(), "D".to_string(), 1.0);
|
||||
|
||||
let assignments = minimizer.compute_partitioning().unwrap();
|
||||
let cut = minimizer.calculate_edge_cut(&assignments);
|
||||
|
||||
// Optimal partitioning should minimize edge cuts
|
||||
assert!(cut <= 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shard_metadata() {
|
||||
let metadata = ShardMetadata::new(0, "node-1".to_string(), ShardStrategy::Hash);
|
||||
|
||||
assert_eq!(metadata.shard_id, 0);
|
||||
assert_eq!(metadata.edge_cut_ratio(), 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_graph_shard() {
|
||||
let metadata = ShardMetadata::new(0, "node-1".to_string(), ShardStrategy::Hash);
|
||||
let shard = GraphShard::new(metadata);
|
||||
|
||||
let node = NodeData {
|
||||
id: "test-node".to_string(),
|
||||
properties: HashMap::new(),
|
||||
labels: vec!["TestLabel".to_string()],
|
||||
};
|
||||
|
||||
shard.add_node(node.clone()).unwrap();
|
||||
|
||||
assert_eq!(shard.node_count(), 1);
|
||||
assert!(shard.get_node(&"test-node".to_string()).is_some());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user