Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
395
vendor/ruvector/crates/ruvector-replication/src/conflict.rs
vendored
Normal file
395
vendor/ruvector/crates/ruvector-replication/src/conflict.rs
vendored
Normal file
@@ -0,0 +1,395 @@
|
||||
//! Conflict resolution strategies for distributed replication
|
||||
//!
|
||||
//! Provides vector clocks for causality tracking and various
|
||||
//! conflict resolution strategies including Last-Write-Wins
|
||||
//! and custom merge functions.
|
||||
|
||||
use crate::{ReplicationError, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
|
||||
/// Vector clock for tracking causality in distributed systems
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct VectorClock {
|
||||
/// Map of replica ID to logical timestamp
|
||||
clock: HashMap<String, u64>,
|
||||
}
|
||||
|
||||
impl VectorClock {
|
||||
/// Create a new vector clock
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
clock: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Increment the clock for a replica
|
||||
pub fn increment(&mut self, replica_id: &str) {
|
||||
let counter = self.clock.entry(replica_id.to_string()).or_insert(0);
|
||||
*counter += 1;
|
||||
}
|
||||
|
||||
/// Get the timestamp for a replica
|
||||
pub fn get(&self, replica_id: &str) -> u64 {
|
||||
self.clock.get(replica_id).copied().unwrap_or(0)
|
||||
}
|
||||
|
||||
/// Update with another vector clock (taking max of each component)
|
||||
pub fn merge(&mut self, other: &VectorClock) {
|
||||
for (replica_id, ×tamp) in &other.clock {
|
||||
let current = self.clock.entry(replica_id.clone()).or_insert(0);
|
||||
*current = (*current).max(timestamp);
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if this clock happens-before another clock
|
||||
pub fn happens_before(&self, other: &VectorClock) -> bool {
|
||||
let mut less = false;
|
||||
let mut equal = true;
|
||||
|
||||
// Check all replicas in self
|
||||
for (replica_id, &self_ts) in &self.clock {
|
||||
let other_ts = other.get(replica_id);
|
||||
if self_ts > other_ts {
|
||||
return false;
|
||||
}
|
||||
if self_ts < other_ts {
|
||||
less = true;
|
||||
equal = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check replicas only in other
|
||||
for (replica_id, &other_ts) in &other.clock {
|
||||
if !self.clock.contains_key(replica_id) && other_ts > 0 {
|
||||
less = true;
|
||||
equal = false;
|
||||
}
|
||||
}
|
||||
|
||||
less || equal
|
||||
}
|
||||
|
||||
/// Compare vector clocks for causality
|
||||
pub fn compare(&self, other: &VectorClock) -> ClockOrdering {
|
||||
if self == other {
|
||||
return ClockOrdering::Equal;
|
||||
}
|
||||
|
||||
if self.happens_before(other) {
|
||||
return ClockOrdering::Before;
|
||||
}
|
||||
|
||||
if other.happens_before(self) {
|
||||
return ClockOrdering::After;
|
||||
}
|
||||
|
||||
ClockOrdering::Concurrent
|
||||
}
|
||||
|
||||
/// Check if two clocks are concurrent (conflicting)
|
||||
pub fn is_concurrent(&self, other: &VectorClock) -> bool {
|
||||
matches!(self.compare(other), ClockOrdering::Concurrent)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for VectorClock {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for VectorClock {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{{")?;
|
||||
for (i, (replica, ts)) in self.clock.iter().enumerate() {
|
||||
if i > 0 {
|
||||
write!(f, ", ")?;
|
||||
}
|
||||
write!(f, "{}: {}", replica, ts)?;
|
||||
}
|
||||
write!(f, "}}")
|
||||
}
|
||||
}
|
||||
|
||||
/// Ordering relationship between vector clocks
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum ClockOrdering {
|
||||
/// Clocks are equal
|
||||
Equal,
|
||||
/// First clock happens before second
|
||||
Before,
|
||||
/// First clock happens after second
|
||||
After,
|
||||
/// Clocks are concurrent (conflicting)
|
||||
Concurrent,
|
||||
}
|
||||
|
||||
/// A versioned value with vector clock
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Versioned<T> {
|
||||
/// The value
|
||||
pub value: T,
|
||||
/// Vector clock for this version
|
||||
pub clock: VectorClock,
|
||||
/// Replica that created this version
|
||||
pub replica_id: String,
|
||||
}
|
||||
|
||||
impl<T> Versioned<T> {
|
||||
/// Create a new versioned value
|
||||
pub fn new(value: T, replica_id: String) -> Self {
|
||||
let mut clock = VectorClock::new();
|
||||
clock.increment(&replica_id);
|
||||
Self {
|
||||
value,
|
||||
clock,
|
||||
replica_id,
|
||||
}
|
||||
}
|
||||
|
||||
/// Update the version with a new value
|
||||
pub fn update(&mut self, value: T) {
|
||||
self.value = value;
|
||||
self.clock.increment(&self.replica_id);
|
||||
}
|
||||
|
||||
/// Compare versions for causality
|
||||
pub fn compare(&self, other: &Versioned<T>) -> ClockOrdering {
|
||||
self.clock.compare(&other.clock)
|
||||
}
|
||||
}
|
||||
|
||||
/// Trait for conflict resolution strategies
|
||||
pub trait ConflictResolver<T: Clone>: Send + Sync {
|
||||
/// Resolve a conflict between two versions
|
||||
fn resolve(&self, v1: &Versioned<T>, v2: &Versioned<T>) -> Result<Versioned<T>>;
|
||||
|
||||
/// Resolve multiple conflicting versions
|
||||
fn resolve_many(&self, versions: Vec<Versioned<T>>) -> Result<Versioned<T>> {
|
||||
if versions.is_empty() {
|
||||
return Err(ReplicationError::ConflictResolution(
|
||||
"No versions to resolve".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if versions.len() == 1 {
|
||||
return Ok(versions.into_iter().next().unwrap());
|
||||
}
|
||||
|
||||
let mut result = versions[0].clone();
|
||||
for version in versions.iter().skip(1) {
|
||||
result = self.resolve(&result, version)?;
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
/// Last-Write-Wins conflict resolution strategy
|
||||
pub struct LastWriteWins;
|
||||
|
||||
impl<T: Clone> ConflictResolver<T> for LastWriteWins {
|
||||
fn resolve(&self, v1: &Versioned<T>, v2: &Versioned<T>) -> Result<Versioned<T>> {
|
||||
match v1.compare(v2) {
|
||||
ClockOrdering::Before | ClockOrdering::Concurrent => Ok(v2.clone()),
|
||||
ClockOrdering::After | ClockOrdering::Equal => Ok(v1.clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Custom merge function for conflict resolution
|
||||
pub struct MergeFunction<T, F>
|
||||
where
|
||||
F: Fn(&T, &T) -> T + Send + Sync,
|
||||
{
|
||||
merge_fn: F,
|
||||
_phantom: std::marker::PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T, F> MergeFunction<T, F>
|
||||
where
|
||||
F: Fn(&T, &T) -> T + Send + Sync,
|
||||
{
|
||||
/// Create a new merge function resolver
|
||||
pub fn new(merge_fn: F) -> Self {
|
||||
Self {
|
||||
merge_fn,
|
||||
_phantom: std::marker::PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Clone + Send + Sync, F> ConflictResolver<T> for MergeFunction<T, F>
|
||||
where
|
||||
F: Fn(&T, &T) -> T + Send + Sync,
|
||||
{
|
||||
fn resolve(&self, v1: &Versioned<T>, v2: &Versioned<T>) -> Result<Versioned<T>> {
|
||||
match v1.compare(v2) {
|
||||
ClockOrdering::Equal | ClockOrdering::Before => Ok(v2.clone()),
|
||||
ClockOrdering::After => Ok(v1.clone()),
|
||||
ClockOrdering::Concurrent => {
|
||||
let merged_value = (self.merge_fn)(&v1.value, &v2.value);
|
||||
let mut merged_clock = v1.clock.clone();
|
||||
merged_clock.merge(&v2.clock);
|
||||
|
||||
Ok(Versioned {
|
||||
value: merged_value,
|
||||
clock: merged_clock,
|
||||
replica_id: v1.replica_id.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// CRDT-inspired merge for numeric values (takes max)
|
||||
pub struct MaxMerge;
|
||||
|
||||
impl ConflictResolver<i64> for MaxMerge {
|
||||
fn resolve(&self, v1: &Versioned<i64>, v2: &Versioned<i64>) -> Result<Versioned<i64>> {
|
||||
match v1.compare(v2) {
|
||||
ClockOrdering::Equal | ClockOrdering::Before => Ok(v2.clone()),
|
||||
ClockOrdering::After => Ok(v1.clone()),
|
||||
ClockOrdering::Concurrent => {
|
||||
let merged_value = v1.value.max(v2.value);
|
||||
let mut merged_clock = v1.clock.clone();
|
||||
merged_clock.merge(&v2.clock);
|
||||
|
||||
Ok(Versioned {
|
||||
value: merged_value,
|
||||
clock: merged_clock,
|
||||
replica_id: v1.replica_id.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// CRDT-inspired merge for sets (takes union)
|
||||
pub struct SetUnion;
|
||||
|
||||
impl<T: Clone + Eq + std::hash::Hash> ConflictResolver<Vec<T>> for SetUnion {
|
||||
fn resolve(&self, v1: &Versioned<Vec<T>>, v2: &Versioned<Vec<T>>) -> Result<Versioned<Vec<T>>> {
|
||||
match v1.compare(v2) {
|
||||
ClockOrdering::Equal | ClockOrdering::Before => Ok(v2.clone()),
|
||||
ClockOrdering::After => Ok(v1.clone()),
|
||||
ClockOrdering::Concurrent => {
|
||||
let mut merged_value = v1.value.clone();
|
||||
for item in &v2.value {
|
||||
if !merged_value.contains(item) {
|
||||
merged_value.push(item.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let mut merged_clock = v1.clock.clone();
|
||||
merged_clock.merge(&v2.clock);
|
||||
|
||||
Ok(Versioned {
|
||||
value: merged_value,
|
||||
clock: merged_clock,
|
||||
replica_id: v1.replica_id.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_vector_clock() {
|
||||
let mut clock1 = VectorClock::new();
|
||||
clock1.increment("r1");
|
||||
clock1.increment("r1");
|
||||
|
||||
let mut clock2 = VectorClock::new();
|
||||
clock2.increment("r1");
|
||||
|
||||
assert_eq!(clock1.compare(&clock2), ClockOrdering::After);
|
||||
assert_eq!(clock2.compare(&clock1), ClockOrdering::Before);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_concurrent_clocks() {
|
||||
let mut clock1 = VectorClock::new();
|
||||
clock1.increment("r1");
|
||||
|
||||
let mut clock2 = VectorClock::new();
|
||||
clock2.increment("r2");
|
||||
|
||||
assert_eq!(clock1.compare(&clock2), ClockOrdering::Concurrent);
|
||||
assert!(clock1.is_concurrent(&clock2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clock_merge() {
|
||||
let mut clock1 = VectorClock::new();
|
||||
clock1.increment("r1");
|
||||
clock1.increment("r1");
|
||||
|
||||
let mut clock2 = VectorClock::new();
|
||||
clock2.increment("r2");
|
||||
clock2.increment("r2");
|
||||
clock2.increment("r2");
|
||||
|
||||
clock1.merge(&clock2);
|
||||
assert_eq!(clock1.get("r1"), 2);
|
||||
assert_eq!(clock1.get("r2"), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_versioned() {
|
||||
let mut v1 = Versioned::new(100, "r1".to_string());
|
||||
v1.update(200);
|
||||
|
||||
assert_eq!(v1.value, 200);
|
||||
assert_eq!(v1.clock.get("r1"), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_last_write_wins() {
|
||||
let v1 = Versioned::new(100, "r1".to_string());
|
||||
let mut v2 = Versioned::new(200, "r1".to_string());
|
||||
v2.clock.increment("r1");
|
||||
|
||||
let resolver = LastWriteWins;
|
||||
let result = resolver.resolve(&v1, &v2).unwrap();
|
||||
assert_eq!(result.value, 200);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_function() {
|
||||
let v1 = Versioned::new(100, "r1".to_string());
|
||||
let v2 = Versioned::new(200, "r2".to_string());
|
||||
|
||||
let resolver = MergeFunction::new(|a, b| a + b);
|
||||
let result = resolver.resolve(&v1, &v2).unwrap();
|
||||
assert_eq!(result.value, 300);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_max_merge() {
|
||||
let v1 = Versioned::new(100, "r1".to_string());
|
||||
let v2 = Versioned::new(200, "r2".to_string());
|
||||
|
||||
let resolver = MaxMerge;
|
||||
let result = resolver.resolve(&v1, &v2).unwrap();
|
||||
assert_eq!(result.value, 200);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_union() {
|
||||
let v1 = Versioned::new(vec![1, 2, 3], "r1".to_string());
|
||||
let v2 = Versioned::new(vec![3, 4, 5], "r2".to_string());
|
||||
|
||||
let resolver = SetUnion;
|
||||
let result = resolver.resolve(&v1, &v2).unwrap();
|
||||
assert_eq!(result.value.len(), 5);
|
||||
assert!(result.value.contains(&1));
|
||||
assert!(result.value.contains(&4));
|
||||
}
|
||||
}
|
||||
443
vendor/ruvector/crates/ruvector-replication/src/failover.rs
vendored
Normal file
443
vendor/ruvector/crates/ruvector-replication/src/failover.rs
vendored
Normal file
@@ -0,0 +1,443 @@
|
||||
//! Automatic failover and high availability
|
||||
//!
|
||||
//! Provides failover management with health monitoring,
|
||||
//! quorum-based decision making, and split-brain prevention.
|
||||
|
||||
use crate::{Replica, ReplicaRole, ReplicaSet, ReplicationError, Result};
|
||||
use chrono::{DateTime, Utc};
|
||||
use parking_lot::RwLock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tokio::time::interval;
|
||||
|
||||
/// Health status of a replica
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum HealthStatus {
|
||||
/// Replica is healthy
|
||||
Healthy,
|
||||
/// Replica is degraded but operational
|
||||
Degraded,
|
||||
/// Replica is unhealthy
|
||||
Unhealthy,
|
||||
/// Replica is not responding
|
||||
Unresponsive,
|
||||
}
|
||||
|
||||
/// Health check result
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct HealthCheck {
|
||||
/// Replica ID
|
||||
pub replica_id: String,
|
||||
/// Health status
|
||||
pub status: HealthStatus,
|
||||
/// Response time in milliseconds
|
||||
pub response_time_ms: u64,
|
||||
/// Error message if unhealthy
|
||||
pub error: Option<String>,
|
||||
/// Timestamp of the check
|
||||
pub timestamp: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl HealthCheck {
|
||||
/// Create a healthy check result
|
||||
pub fn healthy(replica_id: String, response_time_ms: u64) -> Self {
|
||||
Self {
|
||||
replica_id,
|
||||
status: HealthStatus::Healthy,
|
||||
response_time_ms,
|
||||
error: None,
|
||||
timestamp: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an unhealthy check result
|
||||
pub fn unhealthy(replica_id: String, error: String) -> Self {
|
||||
Self {
|
||||
replica_id,
|
||||
status: HealthStatus::Unhealthy,
|
||||
response_time_ms: 0,
|
||||
error: Some(error),
|
||||
timestamp: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an unresponsive check result
|
||||
pub fn unresponsive(replica_id: String) -> Self {
|
||||
Self {
|
||||
replica_id,
|
||||
status: HealthStatus::Unresponsive,
|
||||
response_time_ms: 0,
|
||||
error: Some("No response".to_string()),
|
||||
timestamp: Utc::now(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Failover policy configuration
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FailoverPolicy {
|
||||
/// Enable automatic failover
|
||||
pub auto_failover: bool,
|
||||
/// Health check interval
|
||||
pub health_check_interval: Duration,
|
||||
/// Timeout for health checks
|
||||
pub health_check_timeout: Duration,
|
||||
/// Number of consecutive failures before failover
|
||||
pub failure_threshold: usize,
|
||||
/// Minimum quorum size for failover
|
||||
pub min_quorum: usize,
|
||||
/// Enable split-brain prevention
|
||||
pub prevent_split_brain: bool,
|
||||
}
|
||||
|
||||
impl Default for FailoverPolicy {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
auto_failover: true,
|
||||
health_check_interval: Duration::from_secs(5),
|
||||
health_check_timeout: Duration::from_secs(2),
|
||||
failure_threshold: 3,
|
||||
min_quorum: 2,
|
||||
prevent_split_brain: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Manages automatic failover and health monitoring
|
||||
pub struct FailoverManager {
|
||||
/// The replica set
|
||||
replica_set: Arc<RwLock<ReplicaSet>>,
|
||||
/// Failover policy
|
||||
policy: Arc<RwLock<FailoverPolicy>>,
|
||||
/// Health check history
|
||||
health_history: Arc<RwLock<Vec<HealthCheck>>>,
|
||||
/// Failure counts by replica
|
||||
failure_counts: Arc<RwLock<std::collections::HashMap<String, usize>>>,
|
||||
/// Whether failover is in progress
|
||||
failover_in_progress: Arc<RwLock<bool>>,
|
||||
}
|
||||
|
||||
impl FailoverManager {
|
||||
/// Create a new failover manager
|
||||
pub fn new(replica_set: Arc<RwLock<ReplicaSet>>) -> Self {
|
||||
Self {
|
||||
replica_set,
|
||||
policy: Arc::new(RwLock::new(FailoverPolicy::default())),
|
||||
health_history: Arc::new(RwLock::new(Vec::new())),
|
||||
failure_counts: Arc::new(RwLock::new(std::collections::HashMap::new())),
|
||||
failover_in_progress: Arc::new(RwLock::new(false)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create with custom policy
|
||||
pub fn with_policy(replica_set: Arc<RwLock<ReplicaSet>>, policy: FailoverPolicy) -> Self {
|
||||
Self {
|
||||
replica_set,
|
||||
policy: Arc::new(RwLock::new(policy)),
|
||||
health_history: Arc::new(RwLock::new(Vec::new())),
|
||||
failure_counts: Arc::new(RwLock::new(std::collections::HashMap::new())),
|
||||
failover_in_progress: Arc::new(RwLock::new(false)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the failover policy
|
||||
pub fn set_policy(&self, policy: FailoverPolicy) {
|
||||
*self.policy.write() = policy;
|
||||
}
|
||||
|
||||
/// Get the current policy
|
||||
pub fn policy(&self) -> FailoverPolicy {
|
||||
self.policy.read().clone()
|
||||
}
|
||||
|
||||
/// Start health monitoring
|
||||
pub async fn start_monitoring(&self) {
|
||||
let policy = self.policy.read().clone();
|
||||
let replica_set = self.replica_set.clone();
|
||||
let health_history = self.health_history.clone();
|
||||
let failure_counts = self.failure_counts.clone();
|
||||
let failover_in_progress = self.failover_in_progress.clone();
|
||||
let manager_policy = self.policy.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let mut interval_timer = interval(policy.health_check_interval);
|
||||
|
||||
loop {
|
||||
interval_timer.tick().await;
|
||||
|
||||
let replica_ids = {
|
||||
let set = replica_set.read();
|
||||
set.replica_ids()
|
||||
};
|
||||
|
||||
for replica_id in replica_ids {
|
||||
let health = Self::check_replica_health(
|
||||
&replica_set,
|
||||
&replica_id,
|
||||
policy.health_check_timeout,
|
||||
)
|
||||
.await;
|
||||
|
||||
// Record health check
|
||||
health_history.write().push(health.clone());
|
||||
|
||||
// Update failure count and check if failover is needed
|
||||
// Use a scope to ensure lock is dropped before any await
|
||||
let should_failover = {
|
||||
let mut counts = failure_counts.write();
|
||||
let count = counts.entry(replica_id.clone()).or_insert(0);
|
||||
|
||||
match health.status {
|
||||
HealthStatus::Healthy => {
|
||||
*count = 0;
|
||||
false
|
||||
}
|
||||
HealthStatus::Degraded => {
|
||||
// Don't increment for degraded
|
||||
false
|
||||
}
|
||||
HealthStatus::Unhealthy | HealthStatus::Unresponsive => {
|
||||
*count += 1;
|
||||
|
||||
// Check if failover is needed
|
||||
let current_policy = manager_policy.read();
|
||||
*count >= current_policy.failure_threshold
|
||||
&& current_policy.auto_failover
|
||||
}
|
||||
}
|
||||
}; // Lock is dropped here
|
||||
|
||||
// Trigger failover if needed (after lock is dropped)
|
||||
if should_failover {
|
||||
if let Err(e) =
|
||||
Self::trigger_failover(&replica_set, &failover_in_progress).await
|
||||
{
|
||||
tracing::error!("Failover failed: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Trim health history to last 1000 entries
|
||||
let mut history = health_history.write();
|
||||
let len = history.len();
|
||||
if len > 1000 {
|
||||
history.drain(0..len - 1000);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// Check health of a specific replica
|
||||
async fn check_replica_health(
|
||||
replica_set: &Arc<RwLock<ReplicaSet>>,
|
||||
replica_id: &str,
|
||||
timeout: Duration,
|
||||
) -> HealthCheck {
|
||||
// In a real implementation, this would make a network call
|
||||
// For now, we simulate health checks based on replica status
|
||||
|
||||
let replica = {
|
||||
let set = replica_set.read();
|
||||
set.get_replica(replica_id)
|
||||
};
|
||||
|
||||
match replica {
|
||||
Some(replica) => {
|
||||
if replica.is_timed_out(timeout) {
|
||||
HealthCheck::unresponsive(replica_id.to_string())
|
||||
} else if replica.is_healthy() {
|
||||
HealthCheck::healthy(replica_id.to_string(), 10)
|
||||
} else {
|
||||
HealthCheck::unhealthy(replica_id.to_string(), "Replica is lagging".to_string())
|
||||
}
|
||||
}
|
||||
None => HealthCheck::unhealthy(replica_id.to_string(), "Replica not found".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Trigger failover to a healthy secondary
|
||||
async fn trigger_failover(
|
||||
replica_set: &Arc<RwLock<ReplicaSet>>,
|
||||
failover_in_progress: &Arc<RwLock<bool>>,
|
||||
) -> Result<()> {
|
||||
// Check if failover is already in progress
|
||||
{
|
||||
let mut in_progress = failover_in_progress.write();
|
||||
if *in_progress {
|
||||
return Ok(());
|
||||
}
|
||||
*in_progress = true;
|
||||
}
|
||||
|
||||
tracing::warn!("Initiating failover");
|
||||
|
||||
// Find candidate within a scope to drop the lock before await
|
||||
let candidate_id = {
|
||||
let set = replica_set.read();
|
||||
|
||||
// Check quorum
|
||||
if !set.has_quorum() {
|
||||
*failover_in_progress.write() = false;
|
||||
return Err(ReplicationError::QuorumNotMet {
|
||||
needed: set.get_quorum_size(),
|
||||
available: set.get_healthy_replicas().len(),
|
||||
});
|
||||
}
|
||||
|
||||
// Find best candidate for promotion
|
||||
let candidate = Self::select_failover_candidate(&set)?;
|
||||
candidate.id.clone()
|
||||
}; // Lock is dropped here
|
||||
|
||||
// Promote the candidate (lock re-acquired inside promote_to_primary)
|
||||
let result = {
|
||||
let mut set = replica_set.write();
|
||||
set.promote_to_primary(&candidate_id)
|
||||
};
|
||||
|
||||
match &result {
|
||||
Ok(()) => tracing::info!("Failover completed: promoted {} to primary", candidate_id),
|
||||
Err(e) => tracing::error!("Failover failed: {}", e),
|
||||
}
|
||||
|
||||
// Clear failover flag
|
||||
*failover_in_progress.write() = false;
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Select the best candidate for failover
|
||||
fn select_failover_candidate(replica_set: &ReplicaSet) -> Result<Replica> {
|
||||
let mut candidates: Vec<Replica> = replica_set
|
||||
.get_healthy_replicas()
|
||||
.into_iter()
|
||||
.filter(|r| r.role == ReplicaRole::Secondary)
|
||||
.collect();
|
||||
|
||||
if candidates.is_empty() {
|
||||
return Err(ReplicationError::FailoverFailed(
|
||||
"No healthy secondary replicas available".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
// Sort by priority (highest first), then by lowest lag
|
||||
candidates.sort_by(|a, b| b.priority.cmp(&a.priority).then(a.lag_ms.cmp(&b.lag_ms)));
|
||||
|
||||
Ok(candidates[0].clone())
|
||||
}
|
||||
|
||||
/// Manually trigger failover
|
||||
pub async fn manual_failover(&self, target_replica_id: Option<String>) -> Result<()> {
|
||||
let mut set = self.replica_set.write();
|
||||
|
||||
// Check quorum
|
||||
if !set.has_quorum() {
|
||||
return Err(ReplicationError::QuorumNotMet {
|
||||
needed: set.get_quorum_size(),
|
||||
available: set.get_healthy_replicas().len(),
|
||||
});
|
||||
}
|
||||
|
||||
let target = if let Some(id) = target_replica_id {
|
||||
set.get_replica(&id)
|
||||
.ok_or_else(|| ReplicationError::ReplicaNotFound(id))?
|
||||
} else {
|
||||
Self::select_failover_candidate(&set)?
|
||||
};
|
||||
|
||||
set.promote_to_primary(&target.id)?;
|
||||
|
||||
tracing::info!(
|
||||
"Manual failover completed: promoted {} to primary",
|
||||
target.id
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get health check history
|
||||
pub fn health_history(&self) -> Vec<HealthCheck> {
|
||||
self.health_history.read().clone()
|
||||
}
|
||||
|
||||
/// Get recent health status for a replica
|
||||
pub fn recent_health(&self, replica_id: &str, limit: usize) -> Vec<HealthCheck> {
|
||||
let history = self.health_history.read();
|
||||
history
|
||||
.iter()
|
||||
.rev()
|
||||
.filter(|h| h.replica_id == replica_id)
|
||||
.take(limit)
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Check if failover is currently in progress
|
||||
pub fn is_failover_in_progress(&self) -> bool {
|
||||
*self.failover_in_progress.read()
|
||||
}
|
||||
|
||||
/// Get failure count for a replica
|
||||
pub fn failure_count(&self, replica_id: &str) -> usize {
|
||||
self.failure_counts
|
||||
.read()
|
||||
.get(replica_id)
|
||||
.copied()
|
||||
.unwrap_or(0)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_health_check() {
|
||||
let check = HealthCheck::healthy("r1".to_string(), 15);
|
||||
assert_eq!(check.status, HealthStatus::Healthy);
|
||||
assert_eq!(check.response_time_ms, 15);
|
||||
|
||||
let check = HealthCheck::unhealthy("r2".to_string(), "Error".to_string());
|
||||
assert_eq!(check.status, HealthStatus::Unhealthy);
|
||||
assert!(check.error.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_failover_policy() {
|
||||
let policy = FailoverPolicy::default();
|
||||
assert!(policy.auto_failover);
|
||||
assert_eq!(policy.failure_threshold, 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_failover_manager() {
|
||||
let mut replica_set = ReplicaSet::new("cluster-1");
|
||||
replica_set
|
||||
.add_replica("r1", "127.0.0.1:9001", ReplicaRole::Primary)
|
||||
.unwrap();
|
||||
replica_set
|
||||
.add_replica("r2", "127.0.0.1:9002", ReplicaRole::Secondary)
|
||||
.unwrap();
|
||||
|
||||
let manager = FailoverManager::new(Arc::new(RwLock::new(replica_set)));
|
||||
assert!(!manager.is_failover_in_progress());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_candidate_selection() {
|
||||
let mut replica_set = ReplicaSet::new("cluster-1");
|
||||
replica_set
|
||||
.add_replica("r1", "127.0.0.1:9001", ReplicaRole::Primary)
|
||||
.unwrap();
|
||||
replica_set
|
||||
.add_replica("r2", "127.0.0.1:9002", ReplicaRole::Secondary)
|
||||
.unwrap();
|
||||
replica_set
|
||||
.add_replica("r3", "127.0.0.1:9003", ReplicaRole::Secondary)
|
||||
.unwrap();
|
||||
|
||||
let candidate = FailoverManager::select_failover_candidate(&replica_set).unwrap();
|
||||
assert!(candidate.role == ReplicaRole::Secondary);
|
||||
assert!(candidate.is_healthy());
|
||||
}
|
||||
}
|
||||
104
vendor/ruvector/crates/ruvector-replication/src/lib.rs
vendored
Normal file
104
vendor/ruvector/crates/ruvector-replication/src/lib.rs
vendored
Normal file
@@ -0,0 +1,104 @@
|
||||
//! Data replication and synchronization for ruvector
|
||||
//!
|
||||
//! This crate provides comprehensive replication capabilities including:
|
||||
//! - Multi-node replica management
|
||||
//! - Synchronous, asynchronous, and semi-synchronous replication modes
|
||||
//! - Conflict resolution with vector clocks and CRDTs
|
||||
//! - Change data capture and streaming
|
||||
//! - Automatic failover and split-brain prevention
|
||||
//!
|
||||
//! # Examples
|
||||
//!
|
||||
//! ```no_run
|
||||
//! use ruvector_replication::{ReplicaSet, ReplicaRole, SyncMode, SyncManager, ReplicationLog};
|
||||
//! use std::sync::Arc;
|
||||
//!
|
||||
//! fn example() -> Result<(), Box<dyn std::error::Error>> {
|
||||
//! // Create a replica set
|
||||
//! let mut replica_set = ReplicaSet::new("cluster-1");
|
||||
//!
|
||||
//! // Add replicas
|
||||
//! replica_set.add_replica("replica-1", "192.168.1.10:9001", ReplicaRole::Primary)?;
|
||||
//! replica_set.add_replica("replica-2", "192.168.1.11:9001", ReplicaRole::Secondary)?;
|
||||
//!
|
||||
//! // Create sync manager and configure synchronization
|
||||
//! let log = Arc::new(ReplicationLog::new("replica-1"));
|
||||
//! let manager = SyncManager::new(Arc::new(replica_set), log);
|
||||
//! manager.set_sync_mode(SyncMode::SemiSync { min_replicas: 1 });
|
||||
//! Ok(())
|
||||
//! }
|
||||
//! ```
|
||||
|
||||
pub mod conflict;
|
||||
pub mod failover;
|
||||
pub mod replica;
|
||||
pub mod stream;
|
||||
pub mod sync;
|
||||
|
||||
pub use conflict::{ConflictResolver, LastWriteWins, MergeFunction, VectorClock};
|
||||
pub use failover::{FailoverManager, FailoverPolicy, HealthStatus};
|
||||
pub use replica::{Replica, ReplicaRole, ReplicaSet, ReplicaStatus};
|
||||
pub use stream::{ChangeEvent, ChangeOperation, ReplicationStream};
|
||||
pub use sync::{LogEntry, ReplicationLog, SyncManager, SyncMode};
|
||||
|
||||
use thiserror::Error;
|
||||
|
||||
/// Result type for replication operations
|
||||
pub type Result<T> = std::result::Result<T, ReplicationError>;
|
||||
|
||||
/// Errors that can occur during replication operations
|
||||
#[derive(Error, Debug)]
|
||||
pub enum ReplicationError {
|
||||
#[error("Replica not found: {0}")]
|
||||
ReplicaNotFound(String),
|
||||
|
||||
#[error("No primary replica available")]
|
||||
NoPrimary,
|
||||
|
||||
#[error("Replication timeout: {0}")]
|
||||
Timeout(String),
|
||||
|
||||
#[error("Synchronization failed: {0}")]
|
||||
SyncFailed(String),
|
||||
|
||||
#[error("Conflict resolution failed: {0}")]
|
||||
ConflictResolution(String),
|
||||
|
||||
#[error("Failover failed: {0}")]
|
||||
FailoverFailed(String),
|
||||
|
||||
#[error("Network error: {0}")]
|
||||
Network(String),
|
||||
|
||||
#[error("Quorum not met: needed {needed}, got {available}")]
|
||||
QuorumNotMet { needed: usize, available: usize },
|
||||
|
||||
#[error("Split-brain detected")]
|
||||
SplitBrain,
|
||||
|
||||
#[error("Invalid replica state: {0}")]
|
||||
InvalidState(String),
|
||||
|
||||
#[error("Serialization encode error: {0}")]
|
||||
SerializationEncode(#[from] bincode::error::EncodeError),
|
||||
|
||||
#[error("Serialization decode error: {0}")]
|
||||
SerializationDecode(#[from] bincode::error::DecodeError),
|
||||
|
||||
#[error("IO error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_error_display() {
|
||||
let err = ReplicationError::QuorumNotMet {
|
||||
needed: 2,
|
||||
available: 1,
|
||||
};
|
||||
assert_eq!(err.to_string(), "Quorum not met: needed 2, got 1");
|
||||
}
|
||||
}
|
||||
378
vendor/ruvector/crates/ruvector-replication/src/replica.rs
vendored
Normal file
378
vendor/ruvector/crates/ruvector-replication/src/replica.rs
vendored
Normal file
@@ -0,0 +1,378 @@
|
||||
//! Replica management and coordination
|
||||
//!
|
||||
//! Provides structures and logic for managing distributed replicas,
|
||||
//! including role management, health tracking, and promotion/demotion.
|
||||
|
||||
use crate::{ReplicationError, Result};
|
||||
use chrono::{DateTime, Utc};
|
||||
use dashmap::DashMap;
|
||||
use parking_lot::RwLock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Role of a replica in the replication topology
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum ReplicaRole {
|
||||
/// Primary replica that handles writes
|
||||
Primary,
|
||||
/// Secondary replica that replicates from primary
|
||||
Secondary,
|
||||
/// Witness replica for quorum without data replication
|
||||
Witness,
|
||||
}
|
||||
|
||||
/// Current status of a replica
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum ReplicaStatus {
|
||||
/// Replica is online and healthy
|
||||
Healthy,
|
||||
/// Replica is lagging behind
|
||||
Lagging,
|
||||
/// Replica is offline or unreachable
|
||||
Offline,
|
||||
/// Replica is recovering
|
||||
Recovering,
|
||||
}
|
||||
|
||||
/// Represents a single replica in the replication topology
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Replica {
|
||||
/// Unique identifier for the replica
|
||||
pub id: String,
|
||||
/// Network address of the replica
|
||||
pub address: String,
|
||||
/// Current role of the replica
|
||||
pub role: ReplicaRole,
|
||||
/// Current status of the replica
|
||||
pub status: ReplicaStatus,
|
||||
/// Replication lag in milliseconds
|
||||
pub lag_ms: u64,
|
||||
/// Last known position in the replication log
|
||||
pub log_position: u64,
|
||||
/// Last heartbeat timestamp
|
||||
pub last_heartbeat: DateTime<Utc>,
|
||||
/// Priority for failover (higher is better)
|
||||
pub priority: u32,
|
||||
}
|
||||
|
||||
impl Replica {
|
||||
/// Create a new replica
|
||||
pub fn new(id: impl Into<String>, address: impl Into<String>, role: ReplicaRole) -> Self {
|
||||
Self {
|
||||
id: id.into(),
|
||||
address: address.into(),
|
||||
role,
|
||||
status: ReplicaStatus::Healthy,
|
||||
lag_ms: 0,
|
||||
log_position: 0,
|
||||
last_heartbeat: Utc::now(),
|
||||
priority: 100,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if the replica is healthy
|
||||
pub fn is_healthy(&self) -> bool {
|
||||
self.status == ReplicaStatus::Healthy && self.lag_ms < 5000
|
||||
}
|
||||
|
||||
/// Check if the replica is available for reads
|
||||
pub fn is_readable(&self) -> bool {
|
||||
matches!(self.status, ReplicaStatus::Healthy | ReplicaStatus::Lagging)
|
||||
}
|
||||
|
||||
/// Check if the replica is available for writes
|
||||
pub fn is_writable(&self) -> bool {
|
||||
self.role == ReplicaRole::Primary && self.status == ReplicaStatus::Healthy
|
||||
}
|
||||
|
||||
/// Update the replica's lag
|
||||
pub fn update_lag(&mut self, lag_ms: u64) {
|
||||
self.lag_ms = lag_ms;
|
||||
if lag_ms > 5000 {
|
||||
self.status = ReplicaStatus::Lagging;
|
||||
} else if self.status == ReplicaStatus::Lagging {
|
||||
self.status = ReplicaStatus::Healthy;
|
||||
}
|
||||
}
|
||||
|
||||
/// Update the replica's log position
|
||||
pub fn update_position(&mut self, position: u64) {
|
||||
self.log_position = position;
|
||||
}
|
||||
|
||||
/// Record a heartbeat
|
||||
pub fn heartbeat(&mut self) {
|
||||
self.last_heartbeat = Utc::now();
|
||||
if self.status == ReplicaStatus::Offline {
|
||||
self.status = ReplicaStatus::Recovering;
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if the replica has timed out
|
||||
pub fn is_timed_out(&self, timeout: Duration) -> bool {
|
||||
let elapsed = Utc::now()
|
||||
.signed_duration_since(self.last_heartbeat)
|
||||
.to_std()
|
||||
.unwrap_or(Duration::MAX);
|
||||
elapsed > timeout
|
||||
}
|
||||
}
|
||||
|
||||
/// Manages a set of replicas
|
||||
pub struct ReplicaSet {
|
||||
/// Cluster identifier
|
||||
cluster_id: String,
|
||||
/// Map of replica ID to replica
|
||||
replicas: Arc<DashMap<String, Replica>>,
|
||||
/// Current primary replica ID
|
||||
primary_id: Arc<RwLock<Option<String>>>,
|
||||
/// Minimum number of replicas for quorum
|
||||
quorum_size: Arc<RwLock<usize>>,
|
||||
}
|
||||
|
||||
impl ReplicaSet {
|
||||
/// Create a new replica set
|
||||
pub fn new(cluster_id: impl Into<String>) -> Self {
|
||||
Self {
|
||||
cluster_id: cluster_id.into(),
|
||||
replicas: Arc::new(DashMap::new()),
|
||||
primary_id: Arc::new(RwLock::new(None)),
|
||||
quorum_size: Arc::new(RwLock::new(1)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a replica to the set
|
||||
pub fn add_replica(
|
||||
&mut self,
|
||||
id: impl Into<String>,
|
||||
address: impl Into<String>,
|
||||
role: ReplicaRole,
|
||||
) -> Result<()> {
|
||||
let id = id.into();
|
||||
let replica = Replica::new(id.clone(), address, role);
|
||||
|
||||
if role == ReplicaRole::Primary {
|
||||
let mut primary = self.primary_id.write();
|
||||
if primary.is_some() {
|
||||
return Err(ReplicationError::InvalidState(
|
||||
"Primary replica already exists".to_string(),
|
||||
));
|
||||
}
|
||||
*primary = Some(id.clone());
|
||||
}
|
||||
|
||||
self.replicas.insert(id, replica);
|
||||
self.update_quorum_size();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Remove a replica from the set
|
||||
pub fn remove_replica(&mut self, id: &str) -> Result<()> {
|
||||
let replica = self
|
||||
.replicas
|
||||
.remove(id)
|
||||
.ok_or_else(|| ReplicationError::ReplicaNotFound(id.to_string()))?;
|
||||
|
||||
if replica.1.role == ReplicaRole::Primary {
|
||||
let mut primary = self.primary_id.write();
|
||||
*primary = None;
|
||||
}
|
||||
|
||||
self.update_quorum_size();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get a replica by ID
|
||||
pub fn get_replica(&self, id: &str) -> Option<Replica> {
|
||||
self.replicas.get(id).map(|r| r.clone())
|
||||
}
|
||||
|
||||
/// Get the current primary replica
|
||||
pub fn get_primary(&self) -> Option<Replica> {
|
||||
let primary_id = self.primary_id.read();
|
||||
primary_id
|
||||
.as_ref()
|
||||
.and_then(|id| self.replicas.get(id).map(|r| r.clone()))
|
||||
}
|
||||
|
||||
/// Get all secondary replicas
|
||||
pub fn get_secondaries(&self) -> Vec<Replica> {
|
||||
self.replicas
|
||||
.iter()
|
||||
.filter(|r| r.role == ReplicaRole::Secondary)
|
||||
.map(|r| r.clone())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get all healthy replicas
|
||||
pub fn get_healthy_replicas(&self) -> Vec<Replica> {
|
||||
self.replicas
|
||||
.iter()
|
||||
.filter(|r| r.is_healthy())
|
||||
.map(|r| r.clone())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Promote a secondary to primary
|
||||
pub fn promote_to_primary(&mut self, id: &str) -> Result<()> {
|
||||
// Get the replica and verify it exists
|
||||
let mut replica = self
|
||||
.replicas
|
||||
.get_mut(id)
|
||||
.ok_or_else(|| ReplicationError::ReplicaNotFound(id.to_string()))?;
|
||||
|
||||
if replica.role == ReplicaRole::Primary {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if replica.role == ReplicaRole::Witness {
|
||||
return Err(ReplicationError::InvalidState(
|
||||
"Cannot promote witness to primary".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
// Demote current primary if exists
|
||||
let old_primary_id = {
|
||||
let mut primary = self.primary_id.write();
|
||||
primary.take()
|
||||
};
|
||||
|
||||
if let Some(old_id) = old_primary_id {
|
||||
if let Some(mut old_primary) = self.replicas.get_mut(&old_id) {
|
||||
old_primary.role = ReplicaRole::Secondary;
|
||||
}
|
||||
}
|
||||
|
||||
// Promote new primary
|
||||
replica.role = ReplicaRole::Primary;
|
||||
let mut primary = self.primary_id.write();
|
||||
*primary = Some(id.to_string());
|
||||
|
||||
tracing::info!("Promoted replica {} to primary", id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Demote a primary to secondary
|
||||
pub fn demote_to_secondary(&mut self, id: &str) -> Result<()> {
|
||||
let mut replica = self
|
||||
.replicas
|
||||
.get_mut(id)
|
||||
.ok_or_else(|| ReplicationError::ReplicaNotFound(id.to_string()))?;
|
||||
|
||||
if replica.role != ReplicaRole::Primary {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
replica.role = ReplicaRole::Secondary;
|
||||
let mut primary = self.primary_id.write();
|
||||
*primary = None;
|
||||
|
||||
tracing::info!("Demoted replica {} to secondary", id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check if quorum is available
|
||||
pub fn has_quorum(&self) -> bool {
|
||||
let healthy_count = self
|
||||
.replicas
|
||||
.iter()
|
||||
.filter(|r| r.is_healthy() && r.role != ReplicaRole::Witness)
|
||||
.count();
|
||||
let quorum = *self.quorum_size.read();
|
||||
healthy_count >= quorum
|
||||
}
|
||||
|
||||
/// Get the required quorum size
|
||||
pub fn get_quorum_size(&self) -> usize {
|
||||
*self.quorum_size.read()
|
||||
}
|
||||
|
||||
/// Set the quorum size
|
||||
pub fn set_quorum_size(&self, size: usize) {
|
||||
*self.quorum_size.write() = size;
|
||||
}
|
||||
|
||||
/// Update quorum size based on replica count
|
||||
fn update_quorum_size(&self) {
|
||||
let replica_count = self
|
||||
.replicas
|
||||
.iter()
|
||||
.filter(|r| r.role != ReplicaRole::Witness)
|
||||
.count();
|
||||
let quorum = (replica_count / 2) + 1;
|
||||
*self.quorum_size.write() = quorum;
|
||||
}
|
||||
|
||||
/// Get all replica IDs
|
||||
pub fn replica_ids(&self) -> Vec<String> {
|
||||
self.replicas.iter().map(|r| r.id.clone()).collect()
|
||||
}
|
||||
|
||||
/// Get replica count
|
||||
pub fn replica_count(&self) -> usize {
|
||||
self.replicas.len()
|
||||
}
|
||||
|
||||
/// Get the cluster ID
|
||||
pub fn cluster_id(&self) -> &str {
|
||||
&self.cluster_id
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_replica_creation() {
|
||||
let replica = Replica::new("r1", "127.0.0.1:9001", ReplicaRole::Primary);
|
||||
assert_eq!(replica.id, "r1");
|
||||
assert_eq!(replica.role, ReplicaRole::Primary);
|
||||
assert!(replica.is_healthy());
|
||||
assert!(replica.is_writable());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replica_set() {
|
||||
let mut set = ReplicaSet::new("cluster-1");
|
||||
set.add_replica("r1", "127.0.0.1:9001", ReplicaRole::Primary)
|
||||
.unwrap();
|
||||
set.add_replica("r2", "127.0.0.1:9002", ReplicaRole::Secondary)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(set.replica_count(), 2);
|
||||
assert!(set.get_primary().is_some());
|
||||
assert_eq!(set.get_secondaries().len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_promotion() {
|
||||
let mut set = ReplicaSet::new("cluster-1");
|
||||
set.add_replica("r1", "127.0.0.1:9001", ReplicaRole::Primary)
|
||||
.unwrap();
|
||||
set.add_replica("r2", "127.0.0.1:9002", ReplicaRole::Secondary)
|
||||
.unwrap();
|
||||
|
||||
set.promote_to_primary("r2").unwrap();
|
||||
|
||||
let primary = set.get_primary().unwrap();
|
||||
assert_eq!(primary.id, "r2");
|
||||
assert_eq!(primary.role, ReplicaRole::Primary);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_quorum() {
|
||||
let mut set = ReplicaSet::new("cluster-1");
|
||||
set.add_replica("r1", "127.0.0.1:9001", ReplicaRole::Primary)
|
||||
.unwrap();
|
||||
set.add_replica("r2", "127.0.0.1:9002", ReplicaRole::Secondary)
|
||||
.unwrap();
|
||||
set.add_replica("r3", "127.0.0.1:9003", ReplicaRole::Secondary)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(set.get_quorum_size(), 2);
|
||||
assert!(set.has_quorum());
|
||||
}
|
||||
}
|
||||
403
vendor/ruvector/crates/ruvector-replication/src/stream.rs
vendored
Normal file
403
vendor/ruvector/crates/ruvector-replication/src/stream.rs
vendored
Normal file
@@ -0,0 +1,403 @@
|
||||
//! Change data capture and streaming for replication
|
||||
//!
|
||||
//! Provides mechanisms for streaming changes from the replication log
|
||||
//! with support for checkpointing, resumption, and backpressure handling.
|
||||
|
||||
use crate::{LogEntry, ReplicationError, ReplicationLog, Result};
|
||||
use chrono::{DateTime, Utc};
|
||||
use parking_lot::RwLock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::mpsc;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Type of change operation
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum ChangeOperation {
|
||||
/// Insert operation
|
||||
Insert,
|
||||
/// Update operation
|
||||
Update,
|
||||
/// Delete operation
|
||||
Delete,
|
||||
/// Bulk operation
|
||||
Bulk,
|
||||
}
|
||||
|
||||
/// A change event in the replication stream
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ChangeEvent {
|
||||
/// Unique identifier for this event
|
||||
pub id: Uuid,
|
||||
/// Sequence number in the stream
|
||||
pub sequence: u64,
|
||||
/// Timestamp of the change
|
||||
pub timestamp: DateTime<Utc>,
|
||||
/// Type of operation
|
||||
pub operation: ChangeOperation,
|
||||
/// Collection/table name
|
||||
pub collection: String,
|
||||
/// Document/vector ID affected
|
||||
pub document_id: String,
|
||||
/// Serialized data for the change
|
||||
pub data: Vec<u8>,
|
||||
/// Metadata for the change
|
||||
pub metadata: serde_json::Value,
|
||||
}
|
||||
|
||||
impl ChangeEvent {
|
||||
/// Create a new change event
|
||||
pub fn new(
|
||||
sequence: u64,
|
||||
operation: ChangeOperation,
|
||||
collection: String,
|
||||
document_id: String,
|
||||
data: Vec<u8>,
|
||||
) -> Self {
|
||||
Self {
|
||||
id: Uuid::new_v4(),
|
||||
sequence,
|
||||
timestamp: Utc::now(),
|
||||
operation,
|
||||
collection,
|
||||
document_id,
|
||||
data,
|
||||
metadata: serde_json::Value::Null,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add metadata to the change event
|
||||
pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self {
|
||||
self.metadata = metadata;
|
||||
self
|
||||
}
|
||||
|
||||
/// Convert from a log entry
|
||||
pub fn from_log_entry(
|
||||
entry: &LogEntry,
|
||||
operation: ChangeOperation,
|
||||
collection: String,
|
||||
document_id: String,
|
||||
) -> Self {
|
||||
Self {
|
||||
id: entry.id,
|
||||
sequence: entry.sequence,
|
||||
timestamp: entry.timestamp,
|
||||
operation,
|
||||
collection,
|
||||
document_id,
|
||||
data: entry.data.clone(),
|
||||
metadata: serde_json::json!({
|
||||
"source_replica": entry.source_replica,
|
||||
"checksum": entry.checksum,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Checkpoint for resuming a replication stream
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Checkpoint {
|
||||
/// Last processed sequence number
|
||||
pub sequence: u64,
|
||||
/// Timestamp of the checkpoint
|
||||
pub timestamp: DateTime<Utc>,
|
||||
/// Optional consumer group ID
|
||||
pub consumer_group: Option<String>,
|
||||
/// Consumer ID within the group
|
||||
pub consumer_id: String,
|
||||
}
|
||||
|
||||
impl Checkpoint {
|
||||
/// Create a new checkpoint
|
||||
pub fn new(sequence: u64, consumer_id: impl Into<String>) -> Self {
|
||||
Self {
|
||||
sequence,
|
||||
timestamp: Utc::now(),
|
||||
consumer_group: None,
|
||||
consumer_id: consumer_id.into(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the consumer group
|
||||
pub fn with_group(mut self, group: impl Into<String>) -> Self {
|
||||
self.consumer_group = Some(group.into());
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Configuration for a replication stream
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct StreamConfig {
|
||||
/// Buffer size for the channel
|
||||
pub buffer_size: usize,
|
||||
/// Batch size for events
|
||||
pub batch_size: usize,
|
||||
/// Enable automatic checkpointing
|
||||
pub auto_checkpoint: bool,
|
||||
/// Checkpoint interval (number of events)
|
||||
pub checkpoint_interval: usize,
|
||||
}
|
||||
|
||||
impl Default for StreamConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
buffer_size: 1000,
|
||||
batch_size: 100,
|
||||
auto_checkpoint: true,
|
||||
checkpoint_interval: 100,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Manages a replication stream
|
||||
pub struct ReplicationStream {
|
||||
/// The replication log
|
||||
log: Arc<ReplicationLog>,
|
||||
/// Stream configuration
|
||||
config: StreamConfig,
|
||||
/// Current checkpoint
|
||||
checkpoint: Arc<RwLock<Option<Checkpoint>>>,
|
||||
/// Consumer ID
|
||||
consumer_id: String,
|
||||
}
|
||||
|
||||
impl ReplicationStream {
|
||||
/// Create a new replication stream
|
||||
pub fn new(log: Arc<ReplicationLog>, consumer_id: impl Into<String>) -> Self {
|
||||
Self {
|
||||
log,
|
||||
config: StreamConfig::default(),
|
||||
checkpoint: Arc::new(RwLock::new(None)),
|
||||
consumer_id: consumer_id.into(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create with custom configuration
|
||||
pub fn with_config(
|
||||
log: Arc<ReplicationLog>,
|
||||
consumer_id: impl Into<String>,
|
||||
config: StreamConfig,
|
||||
) -> Self {
|
||||
Self {
|
||||
log,
|
||||
config,
|
||||
checkpoint: Arc::new(RwLock::new(None)),
|
||||
consumer_id: consumer_id.into(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Start streaming from a given position
|
||||
pub async fn stream_from(
|
||||
&self,
|
||||
start_sequence: u64,
|
||||
) -> Result<mpsc::Receiver<Vec<ChangeEvent>>> {
|
||||
let (tx, rx) = mpsc::channel(self.config.buffer_size);
|
||||
|
||||
let log = self.log.clone();
|
||||
let batch_size = self.config.batch_size;
|
||||
let checkpoint = self.checkpoint.clone();
|
||||
let auto_checkpoint = self.config.auto_checkpoint;
|
||||
let checkpoint_interval = self.config.checkpoint_interval;
|
||||
let consumer_id = self.consumer_id.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let mut current_sequence = start_sequence;
|
||||
let mut events_since_checkpoint = 0;
|
||||
|
||||
loop {
|
||||
// Get batch of entries
|
||||
let entries =
|
||||
log.get_range(current_sequence + 1, current_sequence + batch_size as u64);
|
||||
|
||||
if entries.is_empty() {
|
||||
// No new entries, wait a bit
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Convert to change events
|
||||
let mut events = Vec::new();
|
||||
for entry in &entries {
|
||||
// In a real implementation, we would decode the operation type
|
||||
// from the entry data. For now, we use a placeholder.
|
||||
let event = ChangeEvent::from_log_entry(
|
||||
entry,
|
||||
ChangeOperation::Update,
|
||||
"default".to_string(),
|
||||
Uuid::new_v4().to_string(),
|
||||
);
|
||||
events.push(event);
|
||||
}
|
||||
|
||||
// Update current sequence
|
||||
if let Some(last_entry) = entries.last() {
|
||||
current_sequence = last_entry.sequence;
|
||||
}
|
||||
|
||||
// Send batch
|
||||
if tx.send(events).await.is_err() {
|
||||
// Receiver dropped, stop streaming
|
||||
break;
|
||||
}
|
||||
|
||||
events_since_checkpoint += entries.len();
|
||||
|
||||
// Auto-checkpoint if enabled
|
||||
if auto_checkpoint && events_since_checkpoint >= checkpoint_interval {
|
||||
let cp = Checkpoint::new(current_sequence, consumer_id.clone());
|
||||
*checkpoint.write() = Some(cp);
|
||||
events_since_checkpoint = 0;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(rx)
|
||||
}
|
||||
|
||||
/// Resume streaming from the last checkpoint
|
||||
pub async fn resume(&self) -> Result<mpsc::Receiver<Vec<ChangeEvent>>> {
|
||||
let checkpoint = self.checkpoint.read();
|
||||
let start_sequence = checkpoint.as_ref().map(|cp| cp.sequence).unwrap_or(0);
|
||||
drop(checkpoint);
|
||||
|
||||
self.stream_from(start_sequence).await
|
||||
}
|
||||
|
||||
/// Get the current checkpoint
|
||||
pub fn get_checkpoint(&self) -> Option<Checkpoint> {
|
||||
self.checkpoint.read().clone()
|
||||
}
|
||||
|
||||
/// Set a checkpoint manually
|
||||
pub fn set_checkpoint(&self, checkpoint: Checkpoint) {
|
||||
*self.checkpoint.write() = Some(checkpoint);
|
||||
}
|
||||
|
||||
/// Clear the checkpoint
|
||||
pub fn clear_checkpoint(&self) {
|
||||
*self.checkpoint.write() = None;
|
||||
}
|
||||
}
|
||||
|
||||
/// Manager for multiple replication streams (consumer groups)
|
||||
pub struct StreamManager {
|
||||
/// The replication log
|
||||
log: Arc<ReplicationLog>,
|
||||
/// Active streams by consumer ID
|
||||
streams: Arc<RwLock<Vec<Arc<ReplicationStream>>>>,
|
||||
}
|
||||
|
||||
impl StreamManager {
|
||||
/// Create a new stream manager
|
||||
pub fn new(log: Arc<ReplicationLog>) -> Self {
|
||||
Self {
|
||||
log,
|
||||
streams: Arc::new(RwLock::new(Vec::new())),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new stream for a consumer
|
||||
pub fn create_stream(&self, consumer_id: impl Into<String>) -> Arc<ReplicationStream> {
|
||||
let stream = Arc::new(ReplicationStream::new(self.log.clone(), consumer_id));
|
||||
self.streams.write().push(stream.clone());
|
||||
stream
|
||||
}
|
||||
|
||||
/// Create a stream with custom configuration
|
||||
pub fn create_stream_with_config(
|
||||
&self,
|
||||
consumer_id: impl Into<String>,
|
||||
config: StreamConfig,
|
||||
) -> Arc<ReplicationStream> {
|
||||
let stream = Arc::new(ReplicationStream::with_config(
|
||||
self.log.clone(),
|
||||
consumer_id,
|
||||
config,
|
||||
));
|
||||
self.streams.write().push(stream.clone());
|
||||
stream
|
||||
}
|
||||
|
||||
/// Get all active streams
|
||||
pub fn active_streams(&self) -> Vec<Arc<ReplicationStream>> {
|
||||
self.streams.read().clone()
|
||||
}
|
||||
|
||||
/// Get the number of active streams
|
||||
pub fn stream_count(&self) -> usize {
|
||||
self.streams.read().len()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_change_event_creation() {
|
||||
let event = ChangeEvent::new(
|
||||
1,
|
||||
ChangeOperation::Insert,
|
||||
"vectors".to_string(),
|
||||
"doc-1".to_string(),
|
||||
b"data".to_vec(),
|
||||
);
|
||||
|
||||
assert_eq!(event.sequence, 1);
|
||||
assert_eq!(event.operation, ChangeOperation::Insert);
|
||||
assert_eq!(event.collection, "vectors");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_checkpoint() {
|
||||
let cp = Checkpoint::new(100, "consumer-1").with_group("group-1");
|
||||
|
||||
assert_eq!(cp.sequence, 100);
|
||||
assert_eq!(cp.consumer_id, "consumer-1");
|
||||
assert_eq!(cp.consumer_group, Some("group-1".to_string()));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_replication_stream() {
|
||||
let log = Arc::new(ReplicationLog::new("replica-1"));
|
||||
|
||||
// Add some entries
|
||||
log.append(b"data1".to_vec());
|
||||
log.append(b"data2".to_vec());
|
||||
log.append(b"data3".to_vec());
|
||||
|
||||
let stream = ReplicationStream::new(log.clone(), "consumer-1");
|
||||
let mut rx = stream.stream_from(0).await.unwrap();
|
||||
|
||||
// Receive events
|
||||
if let Some(events) = rx.recv().await {
|
||||
assert!(!events.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stream_manager() {
|
||||
let log = Arc::new(ReplicationLog::new("replica-1"));
|
||||
let manager = StreamManager::new(log);
|
||||
|
||||
let stream1 = manager.create_stream("consumer-1");
|
||||
let stream2 = manager.create_stream("consumer-2");
|
||||
|
||||
assert_eq!(manager.stream_count(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stream_config() {
|
||||
let config = StreamConfig {
|
||||
buffer_size: 2000,
|
||||
batch_size: 50,
|
||||
auto_checkpoint: false,
|
||||
checkpoint_interval: 200,
|
||||
};
|
||||
|
||||
assert_eq!(config.buffer_size, 2000);
|
||||
assert_eq!(config.batch_size, 50);
|
||||
assert!(!config.auto_checkpoint);
|
||||
}
|
||||
}
|
||||
374
vendor/ruvector/crates/ruvector-replication/src/sync.rs
vendored
Normal file
374
vendor/ruvector/crates/ruvector-replication/src/sync.rs
vendored
Normal file
@@ -0,0 +1,374 @@
|
||||
//! Synchronization modes and replication log management
|
||||
//!
|
||||
//! Provides different replication modes (sync, async, semi-sync)
|
||||
//! and manages the replication log for tracking changes.
|
||||
|
||||
use crate::{ReplicaSet, ReplicationError, Result};
|
||||
use chrono::{DateTime, Utc};
|
||||
use dashmap::DashMap;
|
||||
use parking_lot::RwLock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tokio::time::timeout;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Synchronization mode for replication
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum SyncMode {
|
||||
/// Wait for all replicas to acknowledge
|
||||
Sync,
|
||||
/// Don't wait for replicas
|
||||
Async,
|
||||
/// Wait for a minimum number of replicas
|
||||
SemiSync { min_replicas: usize },
|
||||
}
|
||||
|
||||
/// Entry in the replication log
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct LogEntry {
|
||||
/// Unique identifier for this entry
|
||||
pub id: Uuid,
|
||||
/// Sequence number in the log
|
||||
pub sequence: u64,
|
||||
/// Timestamp when the entry was created
|
||||
pub timestamp: DateTime<Utc>,
|
||||
/// The operation data (serialized)
|
||||
pub data: Vec<u8>,
|
||||
/// Checksum for data integrity
|
||||
pub checksum: u64,
|
||||
/// ID of the replica that originated this entry
|
||||
pub source_replica: String,
|
||||
}
|
||||
|
||||
impl LogEntry {
|
||||
/// Create a new log entry
|
||||
pub fn new(sequence: u64, data: Vec<u8>, source_replica: String) -> Self {
|
||||
let checksum = Self::calculate_checksum(&data);
|
||||
Self {
|
||||
id: Uuid::new_v4(),
|
||||
sequence,
|
||||
timestamp: Utc::now(),
|
||||
data,
|
||||
checksum,
|
||||
source_replica,
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate checksum for data
|
||||
fn calculate_checksum(data: &[u8]) -> u64 {
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
let mut hasher = DefaultHasher::new();
|
||||
data.hash(&mut hasher);
|
||||
hasher.finish()
|
||||
}
|
||||
|
||||
/// Verify data integrity
|
||||
pub fn verify(&self) -> bool {
|
||||
Self::calculate_checksum(&self.data) == self.checksum
|
||||
}
|
||||
}
|
||||
|
||||
/// Manages the replication log
|
||||
pub struct ReplicationLog {
|
||||
/// Log entries indexed by sequence number
|
||||
entries: Arc<DashMap<u64, LogEntry>>,
|
||||
/// Current sequence number
|
||||
sequence: Arc<RwLock<u64>>,
|
||||
/// Replica ID
|
||||
replica_id: String,
|
||||
}
|
||||
|
||||
impl ReplicationLog {
|
||||
/// Create a new replication log
|
||||
pub fn new(replica_id: impl Into<String>) -> Self {
|
||||
Self {
|
||||
entries: Arc::new(DashMap::new()),
|
||||
sequence: Arc::new(RwLock::new(0)),
|
||||
replica_id: replica_id.into(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Append an entry to the log
|
||||
pub fn append(&self, data: Vec<u8>) -> LogEntry {
|
||||
let mut seq = self.sequence.write();
|
||||
*seq += 1;
|
||||
let entry = LogEntry::new(*seq, data, self.replica_id.clone());
|
||||
self.entries.insert(*seq, entry.clone());
|
||||
entry
|
||||
}
|
||||
|
||||
/// Get an entry by sequence number
|
||||
pub fn get(&self, sequence: u64) -> Option<LogEntry> {
|
||||
self.entries.get(&sequence).map(|e| e.clone())
|
||||
}
|
||||
|
||||
/// Get entries in a range
|
||||
pub fn get_range(&self, start: u64, end: u64) -> Vec<LogEntry> {
|
||||
let mut entries = Vec::new();
|
||||
for seq in start..=end {
|
||||
if let Some(entry) = self.entries.get(&seq) {
|
||||
entries.push(entry.clone());
|
||||
}
|
||||
}
|
||||
entries
|
||||
}
|
||||
|
||||
/// Get the current sequence number
|
||||
pub fn current_sequence(&self) -> u64 {
|
||||
*self.sequence.read()
|
||||
}
|
||||
|
||||
/// Get entries since a given sequence
|
||||
pub fn get_since(&self, since: u64) -> Vec<LogEntry> {
|
||||
let current = self.current_sequence();
|
||||
self.get_range(since + 1, current)
|
||||
}
|
||||
|
||||
/// Truncate log before a given sequence
|
||||
pub fn truncate_before(&self, before: u64) {
|
||||
self.entries.retain(|seq, _| *seq >= before);
|
||||
}
|
||||
|
||||
/// Get log size
|
||||
pub fn size(&self) -> usize {
|
||||
self.entries.len()
|
||||
}
|
||||
}
|
||||
|
||||
/// Manages synchronization across replicas
|
||||
pub struct SyncManager {
|
||||
/// The replica set
|
||||
replica_set: Arc<ReplicaSet>,
|
||||
/// Replication log
|
||||
log: Arc<ReplicationLog>,
|
||||
/// Synchronization mode
|
||||
sync_mode: Arc<RwLock<SyncMode>>,
|
||||
/// Timeout for synchronous operations
|
||||
sync_timeout: Duration,
|
||||
}
|
||||
|
||||
impl SyncManager {
|
||||
/// Create a new sync manager
|
||||
pub fn new(replica_set: Arc<ReplicaSet>, log: Arc<ReplicationLog>) -> Self {
|
||||
Self {
|
||||
replica_set,
|
||||
log,
|
||||
sync_mode: Arc::new(RwLock::new(SyncMode::Async)),
|
||||
sync_timeout: Duration::from_secs(5),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the synchronization mode
|
||||
pub fn set_sync_mode(&self, mode: SyncMode) {
|
||||
*self.sync_mode.write() = mode;
|
||||
}
|
||||
|
||||
/// Get the current synchronization mode
|
||||
pub fn sync_mode(&self) -> SyncMode {
|
||||
*self.sync_mode.read()
|
||||
}
|
||||
|
||||
/// Set the sync timeout
|
||||
pub fn set_sync_timeout(&mut self, timeout: Duration) {
|
||||
self.sync_timeout = timeout;
|
||||
}
|
||||
|
||||
/// Replicate data to all replicas according to sync mode
|
||||
pub async fn replicate(&self, data: Vec<u8>) -> Result<LogEntry> {
|
||||
// Append to local log
|
||||
let entry = self.log.append(data);
|
||||
|
||||
// Get sync mode
|
||||
let mode = self.sync_mode();
|
||||
|
||||
match mode {
|
||||
SyncMode::Sync => {
|
||||
self.replicate_sync(&entry).await?;
|
||||
}
|
||||
SyncMode::Async => {
|
||||
// Fire and forget
|
||||
let entry_clone = entry.clone();
|
||||
let replica_set = self.replica_set.clone();
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = Self::send_to_replicas(&replica_set, &entry_clone).await {
|
||||
tracing::error!("Async replication failed: {}", e);
|
||||
}
|
||||
});
|
||||
}
|
||||
SyncMode::SemiSync { min_replicas } => {
|
||||
self.replicate_semi_sync(&entry, min_replicas).await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(entry)
|
||||
}
|
||||
|
||||
/// Synchronous replication - wait for all replicas
|
||||
async fn replicate_sync(&self, entry: &LogEntry) -> Result<()> {
|
||||
timeout(
|
||||
self.sync_timeout,
|
||||
Self::send_to_replicas(&self.replica_set, entry),
|
||||
)
|
||||
.await
|
||||
.map_err(|_| ReplicationError::Timeout("Sync replication timed out".to_string()))?
|
||||
}
|
||||
|
||||
/// Semi-synchronous replication - wait for minimum replicas
|
||||
async fn replicate_semi_sync(&self, entry: &LogEntry, min_replicas: usize) -> Result<()> {
|
||||
let secondaries = self.replica_set.get_secondaries();
|
||||
if secondaries.len() < min_replicas {
|
||||
return Err(ReplicationError::QuorumNotMet {
|
||||
needed: min_replicas,
|
||||
available: secondaries.len(),
|
||||
});
|
||||
}
|
||||
|
||||
// Send to all and wait for min_replicas to respond
|
||||
let entry_clone = entry.clone();
|
||||
let replica_set = self.replica_set.clone();
|
||||
let min = min_replicas;
|
||||
|
||||
timeout(self.sync_timeout, async move {
|
||||
// Simulate sending to replicas and waiting for acknowledgments
|
||||
// In a real implementation, this would use network calls
|
||||
let acks = secondaries.len().min(min);
|
||||
if acks >= min {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(ReplicationError::QuorumNotMet {
|
||||
needed: min,
|
||||
available: acks,
|
||||
})
|
||||
}
|
||||
})
|
||||
.await
|
||||
.map_err(|_| ReplicationError::Timeout("Semi-sync replication timed out".to_string()))?
|
||||
}
|
||||
|
||||
/// Send log entry to all replicas
|
||||
async fn send_to_replicas(replica_set: &ReplicaSet, entry: &LogEntry) -> Result<()> {
|
||||
let secondaries = replica_set.get_secondaries();
|
||||
|
||||
// In a real implementation, this would send over the network
|
||||
// For now, we simulate successful replication
|
||||
for replica in secondaries {
|
||||
if replica.is_healthy() {
|
||||
tracing::debug!("Replicating entry {} to {}", entry.sequence, replica.id);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Catch up a lagging replica
|
||||
pub async fn catchup(&self, replica_id: &str, from_sequence: u64) -> Result<Vec<LogEntry>> {
|
||||
let replica = self
|
||||
.replica_set
|
||||
.get_replica(replica_id)
|
||||
.ok_or_else(|| ReplicationError::ReplicaNotFound(replica_id.to_string()))?;
|
||||
|
||||
let current_sequence = self.log.current_sequence();
|
||||
if from_sequence >= current_sequence {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
// Get missing entries
|
||||
let entries = self.log.get_since(from_sequence);
|
||||
|
||||
tracing::info!(
|
||||
"Catching up replica {} with {} entries (from {} to {})",
|
||||
replica_id,
|
||||
entries.len(),
|
||||
from_sequence + 1,
|
||||
current_sequence
|
||||
);
|
||||
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
/// Get the current log position
|
||||
pub fn current_position(&self) -> u64 {
|
||||
self.log.current_sequence()
|
||||
}
|
||||
|
||||
/// Verify log entry integrity
|
||||
pub fn verify_entry(&self, sequence: u64) -> Result<bool> {
|
||||
let entry = self
|
||||
.log
|
||||
.get(sequence)
|
||||
.ok_or_else(|| ReplicationError::InvalidState("Log entry not found".to_string()))?;
|
||||
Ok(entry.verify())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::ReplicaRole;
|
||||
|
||||
#[test]
|
||||
fn test_log_entry_creation() {
|
||||
let data = b"test data".to_vec();
|
||||
let entry = LogEntry::new(1, data, "replica-1".to_string());
|
||||
assert_eq!(entry.sequence, 1);
|
||||
assert!(entry.verify());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replication_log() {
|
||||
let log = ReplicationLog::new("replica-1");
|
||||
|
||||
let entry1 = log.append(b"data1".to_vec());
|
||||
let entry2 = log.append(b"data2".to_vec());
|
||||
|
||||
assert_eq!(entry1.sequence, 1);
|
||||
assert_eq!(entry2.sequence, 2);
|
||||
assert_eq!(log.current_sequence(), 2);
|
||||
|
||||
let entries = log.get_range(1, 2);
|
||||
assert_eq!(entries.len(), 2);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_sync_manager() {
|
||||
let mut replica_set = ReplicaSet::new("cluster-1");
|
||||
replica_set
|
||||
.add_replica("r1", "127.0.0.1:9001", ReplicaRole::Primary)
|
||||
.unwrap();
|
||||
replica_set
|
||||
.add_replica("r2", "127.0.0.1:9002", ReplicaRole::Secondary)
|
||||
.unwrap();
|
||||
|
||||
let log = Arc::new(ReplicationLog::new("r1"));
|
||||
let manager = SyncManager::new(Arc::new(replica_set), log);
|
||||
|
||||
manager.set_sync_mode(SyncMode::Async);
|
||||
let entry = manager.replicate(b"test".to_vec()).await.unwrap();
|
||||
assert_eq!(entry.sequence, 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_catchup() {
|
||||
let mut replica_set = ReplicaSet::new("cluster-1");
|
||||
replica_set
|
||||
.add_replica("r1", "127.0.0.1:9001", ReplicaRole::Primary)
|
||||
.unwrap();
|
||||
replica_set
|
||||
.add_replica("r2", "127.0.0.1:9002", ReplicaRole::Secondary)
|
||||
.unwrap();
|
||||
|
||||
let log = Arc::new(ReplicationLog::new("r1"));
|
||||
let manager = SyncManager::new(Arc::new(replica_set), log.clone());
|
||||
|
||||
// Add some entries
|
||||
log.append(b"data1".to_vec());
|
||||
log.append(b"data2".to_vec());
|
||||
log.append(b"data3".to_vec());
|
||||
|
||||
// Catchup from position 1
|
||||
let entries = manager.catchup("r2", 1).await.unwrap();
|
||||
assert_eq!(entries.len(), 2); // Entries 2 and 3
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user