Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions
--- a/crates/ruvector-replication/Cargo.toml
+++ b/crates/ruvector-replication/Cargo.toml
@@ -0,0 +1,28 @@
+[package]
+name = "ruvector-replication"
+version.workspace = true
+edition.workspace = true
+rust-version.workspace = true
+license.workspace = true
+authors.workspace = true
+repository.workspace = true
+readme = "README.md"
+description = "Data replication and synchronization for ruvector"
+
+[dependencies]
+ruvector-core = { version = "2.0.1", path = "../ruvector-core" }
+tokio = { workspace = true, features = ["time"] }
+serde = { workspace = true }
+serde_json = { workspace = true }
+thiserror = { workspace = true }
+tracing = { workspace = true }
+dashmap = { workspace = true }
+parking_lot = { workspace = true }
+uuid = { workspace = true }
+chrono = { workspace = true, features = ["serde"] }
+futures = { workspace = true }
+rand = { workspace = true }
+bincode = { workspace = true }
+
+[dev-dependencies]
+tokio = { workspace = true, features = ["rt-multi-thread", "macros", "test-util"] }
--- a/crates/ruvector-replication/README.md
+++ b/crates/ruvector-replication/README.md
@@ -0,0 +1,226 @@
+# ruvector-replication
+
+[![Crates.io](https://img.shields.io/crates/v/ruvector-replication.svg)](https://crates.io/crates/ruvector-replication)
+[![docs.rs](https://docs.rs/ruvector-replication/badge.svg)](https://docs.rs/ruvector-replication)
+[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
+[![Rust](https://img.shields.io/badge/rust-1.77%2B-orange.svg)](https://www.rust-lang.org)
+
+**Multi-master vector replication with quorum writes, vector clocks, and automatic conflict resolution.**
+
+```toml
+ruvector-replication = "0.1.1"
+```
+
+When your vector database runs on more than one node, you need a way to keep data in sync without losing writes or slowing down queries. ruvector-replication handles that: it replicates vectors across nodes, resolves conflicts automatically, and lets you trade off consistency versus speed per-write. It plugs into the [RuVector](https://github.com/ruvnet/ruvector) ecosystem alongside Raft consensus and auto-sharding.
+
+| | Single-node vector DB | ruvector-replication |
+|---|---|---|
+| **Availability** | One node goes down, everything stops | Replicas serve reads and accept writes |
+| **Write scaling** | One writer | Multi-master -- write to any node |
+| **Conflict handling** | N/A | Vector clocks, last-write-wins, or CRDTs |
+| **Consistency control** | N/A | Per-write: One, Quorum, or All |
+| **Sync efficiency** | N/A | Incremental deltas with compression |
+| **Recovery** | Manual restore from backup | Automatic replica recovery |
+
+## Quick Start
+
+```rust
+use ruvector_replication::{Replicator, ReplicationConfig, ConsistencyLevel};
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let config = ReplicationConfig {
+        replication_factor: 3,
+        consistency_level: ConsistencyLevel::Quorum,
+        sync_interval: Duration::from_millis(100),
+        batch_size: 1000,
+        compression: true,
+        ..Default::default()
+    };
+
+    let replicator = Replicator::new(config).await?;
+    replicator.start().await?;
+
+    Ok(())
+}
+```
+
+## Key Features
+
+| Feature | What It Does | Why It Matters |
+|---------|-------------|----------------|
+| **Multi-master replication** | Write to any node in the cluster | No single point of failure for writes |
+| **Configurable consistency** | Choose One, Quorum, or All per write | Trade latency for safety on a per-operation basis |
+| **Vector clock conflict resolution** | Track causal ordering across nodes | Detect and resolve concurrent writes correctly |
+| **CRDT support** | Conflict-free replicated data types | Guaranteed convergence without coordination |
+| **Change streams** | Real-time replication event stream | Monitor sync status and react to changes |
+| **Incremental sync with compression** | Only send deltas, compressed on the wire | Minimize bandwidth between nodes |
+| **Automatic recovery** | Replicas catch up after failures | No manual intervention on node restart |
+| **Bandwidth throttling** | Cap replication throughput | Protect production traffic from replication storms |
+
+### Write with Replication
+
+```rust
+use ruvector_replication::{Replicator, WriteOptions};
+
+// Write with quorum consistency
+let options = WriteOptions {
+    consistency: ConsistencyLevel::Quorum,
+    timeout: Duration::from_secs(5),
+};
+
+replicator.write(vector_entry, options).await?;
+
+// Write with eventual consistency (faster)
+let options = WriteOptions {
+    consistency: ConsistencyLevel::One,
+    ..Default::default()
+};
+
+replicator.write(vector_entry, options).await?;
+```
+
+### Monitor Replication
+
+```rust
+// Get replication lag
+let lag = replicator.lag().await?;
+println!("Replication lag: {:?}", lag);
+
+// Get replica status
+for replica in replicator.replicas().await? {
+    println!("{}: {} (lag: {}ms)",
+        replica.id,
+        replica.status,
+        replica.lag_ms
+    );
+}
+
+// Subscribe to replication events
+let mut stream = replicator.events().await?;
+while let Some(event) = stream.next().await {
+    match event {
+        ReplicationEvent::Synced { node_id, entries } => {
+            println!("Synced {} entries to {}", entries, node_id);
+        }
+        ReplicationEvent::Conflict { key, resolution } => {
+            println!("Conflict on {}: {:?}", key, resolution);
+        }
+        _ => {}
+    }
+}
+```
+
+## API Overview
+
+### Core Types
+
+```rust
+// Replication configuration
+pub struct ReplicationConfig {
+    pub replication_factor: usize,
+    pub consistency_level: ConsistencyLevel,
+    pub sync_interval: Duration,
+    pub batch_size: usize,
+    pub compression: bool,
+    pub conflict_resolution: ConflictResolution,
+}
+
+// Consistency levels
+pub enum ConsistencyLevel {
+    One,      // Write to one replica
+    Quorum,   // Write to majority
+    All,      // Write to all replicas
+}
+
+// Conflict resolution strategies
+pub enum ConflictResolution {
+    LastWriteWins,
+    VectorClock,
+    Custom(Box<dyn ConflictResolver>),
+}
+
+// Replica information
+pub struct ReplicaInfo {
+    pub id: NodeId,
+    pub status: ReplicaStatus,
+    pub lag_ms: u64,
+    pub last_sync: DateTime<Utc>,
+}
+```
+
+### Replicator Operations
+
+```rust
+impl Replicator {
+    pub async fn new(config: ReplicationConfig) -> Result<Self>;
+    pub async fn start(&self) -> Result<()>;
+    pub async fn stop(&self) -> Result<()>;
+
+    // Write operations
+    pub async fn write(&self, entry: VectorEntry, options: WriteOptions) -> Result<()>;
+    pub async fn write_batch(&self, entries: Vec<VectorEntry>, options: WriteOptions) -> Result<()>;
+
+    // Monitoring
+    pub async fn lag(&self) -> Result<Duration>;
+    pub async fn replicas(&self) -> Result<Vec<ReplicaInfo>>;
+    pub async fn events(&self) -> Result<impl Stream<Item = ReplicationEvent>>;
+
+    // Management
+    pub async fn add_replica(&self, node_id: NodeId) -> Result<()>;
+    pub async fn remove_replica(&self, node_id: NodeId) -> Result<()>;
+    pub async fn force_sync(&self, node_id: NodeId) -> Result<()>;
+}
+```
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────┐
+│                   Replication Flow                       │
+│                                                         │
+│  Client                                                 │
+│    │                                                    │
+│    ▼                                                    │
+│  ┌──────────┐     Quorum Write    ┌──────────┐         │
+│  │ Primary  │────────────────────▶│ Replica 1│         │
+│  │          │                     │          │         │
+│  │ Vectors  │────────────────────▶│ Vectors  │         │
+│  └──────────┘                     └──────────┘         │
+│       │                                                 │
+│       │        Async Replication                        │
+│       └──────────────────────────▶┌──────────┐         │
+│                                   │ Replica 2│         │
+│                                   │          │         │
+│                                   │ Vectors  │         │
+│                                   └──────────┘         │
+└─────────────────────────────────────────────────────────┘
+```
+
+## Related Crates
+
+- **[ruvector-core](../ruvector-core/)** - Core vector database engine
+- **[ruvector-cluster](../ruvector-cluster/)** - Clustering and sharding
+- **[ruvector-raft](../ruvector-raft/)** - Raft consensus
+
+## Documentation
+
+- **[Main README](../../README.md)** - Complete project overview
+- **[API Documentation](https://docs.rs/ruvector-replication)** - Full API reference
+- **[GitHub Repository](https://github.com/ruvnet/ruvector)** - Source code
+
+## License
+
+**MIT License** - see [LICENSE](../../LICENSE) for details.
+
+---
+
+<div align="center">
+
+**Part of [Ruvector](https://github.com/ruvnet/ruvector) - Built by [rUv](https://ruv.io)**
+
+[![Star on GitHub](https://img.shields.io/github/stars/ruvnet/ruvector?style=social)](https://github.com/ruvnet/ruvector)
+
+[Documentation](https://docs.rs/ruvector-replication) | [Crates.io](https://crates.io/crates/ruvector-replication) | [GitHub](https://github.com/ruvnet/ruvector)
+
+</div>
--- a/crates/ruvector-replication/src/conflict.rs
+++ b/crates/ruvector-replication/src/conflict.rs
@@ -0,0 +1,395 @@
+//! Conflict resolution strategies for distributed replication
+//!
+//! Provides vector clocks for causality tracking and various
+//! conflict resolution strategies including Last-Write-Wins
+//! and custom merge functions.
+
+use crate::{ReplicationError, Result};
+use serde::{Deserialize, Serialize};
+use std::cmp::Ordering;
+use std::collections::HashMap;
+use std::fmt;
+
+/// Vector clock for tracking causality in distributed systems
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct VectorClock {
+    /// Map of replica ID to logical timestamp
+    clock: HashMap<String, u64>,
+}
+
+impl VectorClock {
+    /// Create a new vector clock
+    pub fn new() -> Self {
+        Self {
+            clock: HashMap::new(),
+        }
+    }
+
+    /// Increment the clock for a replica
+    pub fn increment(&mut self, replica_id: &str) {
+        let counter = self.clock.entry(replica_id.to_string()).or_insert(0);
+        *counter += 1;
+    }
+
+    /// Get the timestamp for a replica
+    pub fn get(&self, replica_id: &str) -> u64 {
+        self.clock.get(replica_id).copied().unwrap_or(0)
+    }
+
+    /// Update with another vector clock (taking max of each component)
+    pub fn merge(&mut self, other: &VectorClock) {
+        for (replica_id, &timestamp) in &other.clock {
+            let current = self.clock.entry(replica_id.clone()).or_insert(0);
+            *current = (*current).max(timestamp);
+        }
+    }
+
+    /// Check if this clock happens-before another clock
+    pub fn happens_before(&self, other: &VectorClock) -> bool {
+        let mut less = false;
+        let mut equal = true;
+
+        // Check all replicas in self
+        for (replica_id, &self_ts) in &self.clock {
+            let other_ts = other.get(replica_id);
+            if self_ts > other_ts {
+                return false;
+            }
+            if self_ts < other_ts {
+                less = true;
+                equal = false;
+            }
+        }
+
+        // Check replicas only in other
+        for (replica_id, &other_ts) in &other.clock {
+            if !self.clock.contains_key(replica_id) && other_ts > 0 {
+                less = true;
+                equal = false;
+            }
+        }
+
+        less || equal
+    }
+
+    /// Compare vector clocks for causality
+    pub fn compare(&self, other: &VectorClock) -> ClockOrdering {
+        if self == other {
+            return ClockOrdering::Equal;
+        }
+
+        if self.happens_before(other) {
+            return ClockOrdering::Before;
+        }
+
+        if other.happens_before(self) {
+            return ClockOrdering::After;
+        }
+
+        ClockOrdering::Concurrent
+    }
+
+    /// Check if two clocks are concurrent (conflicting)
+    pub fn is_concurrent(&self, other: &VectorClock) -> bool {
+        matches!(self.compare(other), ClockOrdering::Concurrent)
+    }
+}
+
+impl Default for VectorClock {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl fmt::Display for VectorClock {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{{")?;
+        for (i, (replica, ts)) in self.clock.iter().enumerate() {
+            if i > 0 {
+                write!(f, ", ")?;
+            }
+            write!(f, "{}: {}", replica, ts)?;
+        }
+        write!(f, "}}")
+    }
+}
+
+/// Ordering relationship between vector clocks
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum ClockOrdering {
+    /// Clocks are equal
+    Equal,
+    /// First clock happens before second
+    Before,
+    /// First clock happens after second
+    After,
+    /// Clocks are concurrent (conflicting)
+    Concurrent,
+}
+
+/// A versioned value with vector clock
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Versioned<T> {
+    /// The value
+    pub value: T,
+    /// Vector clock for this version
+    pub clock: VectorClock,
+    /// Replica that created this version
+    pub replica_id: String,
+}
+
+impl<T> Versioned<T> {
+    /// Create a new versioned value
+    pub fn new(value: T, replica_id: String) -> Self {
+        let mut clock = VectorClock::new();
+        clock.increment(&replica_id);
+        Self {
+            value,
+            clock,
+            replica_id,
+        }
+    }
+
+    /// Update the version with a new value
+    pub fn update(&mut self, value: T) {
+        self.value = value;
+        self.clock.increment(&self.replica_id);
+    }
+
+    /// Compare versions for causality
+    pub fn compare(&self, other: &Versioned<T>) -> ClockOrdering {
+        self.clock.compare(&other.clock)
+    }
+}
+
+/// Trait for conflict resolution strategies
+pub trait ConflictResolver<T: Clone>: Send + Sync {
+    /// Resolve a conflict between two versions
+    fn resolve(&self, v1: &Versioned<T>, v2: &Versioned<T>) -> Result<Versioned<T>>;
+
+    /// Resolve multiple conflicting versions
+    fn resolve_many(&self, versions: Vec<Versioned<T>>) -> Result<Versioned<T>> {
+        if versions.is_empty() {
+            return Err(ReplicationError::ConflictResolution(
+                "No versions to resolve".to_string(),
+            ));
+        }
+
+        if versions.len() == 1 {
+            return Ok(versions.into_iter().next().unwrap());
+        }
+
+        let mut result = versions[0].clone();
+        for version in versions.iter().skip(1) {
+            result = self.resolve(&result, version)?;
+        }
+        Ok(result)
+    }
+}
+
+/// Last-Write-Wins conflict resolution strategy
+pub struct LastWriteWins;
+
+impl<T: Clone> ConflictResolver<T> for LastWriteWins {
+    fn resolve(&self, v1: &Versioned<T>, v2: &Versioned<T>) -> Result<Versioned<T>> {
+        match v1.compare(v2) {
+            ClockOrdering::Before | ClockOrdering::Concurrent => Ok(v2.clone()),
+            ClockOrdering::After | ClockOrdering::Equal => Ok(v1.clone()),
+        }
+    }
+}
+
+/// Custom merge function for conflict resolution
+pub struct MergeFunction<T, F>
+where
+    F: Fn(&T, &T) -> T + Send + Sync,
+{
+    merge_fn: F,
+    _phantom: std::marker::PhantomData<T>,
+}
+
+impl<T, F> MergeFunction<T, F>
+where
+    F: Fn(&T, &T) -> T + Send + Sync,
+{
+    /// Create a new merge function resolver
+    pub fn new(merge_fn: F) -> Self {
+        Self {
+            merge_fn,
+            _phantom: std::marker::PhantomData,
+        }
+    }
+}
+
+impl<T: Clone + Send + Sync, F> ConflictResolver<T> for MergeFunction<T, F>
+where
+    F: Fn(&T, &T) -> T + Send + Sync,
+{
+    fn resolve(&self, v1: &Versioned<T>, v2: &Versioned<T>) -> Result<Versioned<T>> {
+        match v1.compare(v2) {
+            ClockOrdering::Equal | ClockOrdering::Before => Ok(v2.clone()),
+            ClockOrdering::After => Ok(v1.clone()),
+            ClockOrdering::Concurrent => {
+                let merged_value = (self.merge_fn)(&v1.value, &v2.value);
+                let mut merged_clock = v1.clock.clone();
+                merged_clock.merge(&v2.clock);
+
+                Ok(Versioned {
+                    value: merged_value,
+                    clock: merged_clock,
+                    replica_id: v1.replica_id.clone(),
+                })
+            }
+        }
+    }
+}
+
+/// CRDT-inspired merge for numeric values (takes max)
+pub struct MaxMerge;
+
+impl ConflictResolver<i64> for MaxMerge {
+    fn resolve(&self, v1: &Versioned<i64>, v2: &Versioned<i64>) -> Result<Versioned<i64>> {
+        match v1.compare(v2) {
+            ClockOrdering::Equal | ClockOrdering::Before => Ok(v2.clone()),
+            ClockOrdering::After => Ok(v1.clone()),
+            ClockOrdering::Concurrent => {
+                let merged_value = v1.value.max(v2.value);
+                let mut merged_clock = v1.clock.clone();
+                merged_clock.merge(&v2.clock);
+
+                Ok(Versioned {
+                    value: merged_value,
+                    clock: merged_clock,
+                    replica_id: v1.replica_id.clone(),
+                })
+            }
+        }
+    }
+}
+
+/// CRDT-inspired merge for sets (takes union)
+pub struct SetUnion;
+
+impl<T: Clone + Eq + std::hash::Hash> ConflictResolver<Vec<T>> for SetUnion {
+    fn resolve(&self, v1: &Versioned<Vec<T>>, v2: &Versioned<Vec<T>>) -> Result<Versioned<Vec<T>>> {
+        match v1.compare(v2) {
+            ClockOrdering::Equal | ClockOrdering::Before => Ok(v2.clone()),
+            ClockOrdering::After => Ok(v1.clone()),
+            ClockOrdering::Concurrent => {
+                let mut merged_value = v1.value.clone();
+                for item in &v2.value {
+                    if !merged_value.contains(item) {
+                        merged_value.push(item.clone());
+                    }
+                }
+
+                let mut merged_clock = v1.clock.clone();
+                merged_clock.merge(&v2.clock);
+
+                Ok(Versioned {
+                    value: merged_value,
+                    clock: merged_clock,
+                    replica_id: v1.replica_id.clone(),
+                })
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_vector_clock() {
+        let mut clock1 = VectorClock::new();
+        clock1.increment("r1");
+        clock1.increment("r1");
+
+        let mut clock2 = VectorClock::new();
+        clock2.increment("r1");
+
+        assert_eq!(clock1.compare(&clock2), ClockOrdering::After);
+        assert_eq!(clock2.compare(&clock1), ClockOrdering::Before);
+    }
+
+    #[test]
+    fn test_concurrent_clocks() {
+        let mut clock1 = VectorClock::new();
+        clock1.increment("r1");
+
+        let mut clock2 = VectorClock::new();
+        clock2.increment("r2");
+
+        assert_eq!(clock1.compare(&clock2), ClockOrdering::Concurrent);
+        assert!(clock1.is_concurrent(&clock2));
+    }
+
+    #[test]
+    fn test_clock_merge() {
+        let mut clock1 = VectorClock::new();
+        clock1.increment("r1");
+        clock1.increment("r1");
+
+        let mut clock2 = VectorClock::new();
+        clock2.increment("r2");
+        clock2.increment("r2");
+        clock2.increment("r2");
+
+        clock1.merge(&clock2);
+        assert_eq!(clock1.get("r1"), 2);
+        assert_eq!(clock1.get("r2"), 3);
+    }
+
+    #[test]
+    fn test_versioned() {
+        let mut v1 = Versioned::new(100, "r1".to_string());
+        v1.update(200);
+
+        assert_eq!(v1.value, 200);
+        assert_eq!(v1.clock.get("r1"), 2);
+    }
+
+    #[test]
+    fn test_last_write_wins() {
+        let v1 = Versioned::new(100, "r1".to_string());
+        let mut v2 = Versioned::new(200, "r1".to_string());
+        v2.clock.increment("r1");
+
+        let resolver = LastWriteWins;
+        let result = resolver.resolve(&v1, &v2).unwrap();
+        assert_eq!(result.value, 200);
+    }
+
+    #[test]
+    fn test_merge_function() {
+        let v1 = Versioned::new(100, "r1".to_string());
+        let v2 = Versioned::new(200, "r2".to_string());
+
+        let resolver = MergeFunction::new(|a, b| a + b);
+        let result = resolver.resolve(&v1, &v2).unwrap();
+        assert_eq!(result.value, 300);
+    }
+
+    #[test]
+    fn test_max_merge() {
+        let v1 = Versioned::new(100, "r1".to_string());
+        let v2 = Versioned::new(200, "r2".to_string());
+
+        let resolver = MaxMerge;
+        let result = resolver.resolve(&v1, &v2).unwrap();
+        assert_eq!(result.value, 200);
+    }
+
+    #[test]
+    fn test_set_union() {
+        let v1 = Versioned::new(vec![1, 2, 3], "r1".to_string());
+        let v2 = Versioned::new(vec![3, 4, 5], "r2".to_string());
+
+        let resolver = SetUnion;
+        let result = resolver.resolve(&v1, &v2).unwrap();
+        assert_eq!(result.value.len(), 5);
+        assert!(result.value.contains(&1));
+        assert!(result.value.contains(&4));
+    }
+}
--- a/crates/ruvector-replication/src/failover.rs
+++ b/crates/ruvector-replication/src/failover.rs
@@ -0,0 +1,443 @@
+//! Automatic failover and high availability
+//!
+//! Provides failover management with health monitoring,
+//! quorum-based decision making, and split-brain prevention.
+
+use crate::{Replica, ReplicaRole, ReplicaSet, ReplicationError, Result};
+use chrono::{DateTime, Utc};
+use parking_lot::RwLock;
+use serde::{Deserialize, Serialize};
+use std::sync::Arc;
+use std::time::Duration;
+use tokio::time::interval;
+
+/// Health status of a replica
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub enum HealthStatus {
+    /// Replica is healthy
+    Healthy,
+    /// Replica is degraded but operational
+    Degraded,
+    /// Replica is unhealthy
+    Unhealthy,
+    /// Replica is not responding
+    Unresponsive,
+}
+
+/// Health check result
+#[derive(Debug, Clone)]
+pub struct HealthCheck {
+    /// Replica ID
+    pub replica_id: String,
+    /// Health status
+    pub status: HealthStatus,
+    /// Response time in milliseconds
+    pub response_time_ms: u64,
+    /// Error message if unhealthy
+    pub error: Option<String>,
+    /// Timestamp of the check
+    pub timestamp: DateTime<Utc>,
+}
+
+impl HealthCheck {
+    /// Create a healthy check result
+    pub fn healthy(replica_id: String, response_time_ms: u64) -> Self {
+        Self {
+            replica_id,
+            status: HealthStatus::Healthy,
+            response_time_ms,
+            error: None,
+            timestamp: Utc::now(),
+        }
+    }
+
+    /// Create an unhealthy check result
+    pub fn unhealthy(replica_id: String, error: String) -> Self {
+        Self {
+            replica_id,
+            status: HealthStatus::Unhealthy,
+            response_time_ms: 0,
+            error: Some(error),
+            timestamp: Utc::now(),
+        }
+    }
+
+    /// Create an unresponsive check result
+    pub fn unresponsive(replica_id: String) -> Self {
+        Self {
+            replica_id,
+            status: HealthStatus::Unresponsive,
+            response_time_ms: 0,
+            error: Some("No response".to_string()),
+            timestamp: Utc::now(),
+        }
+    }
+}
+
+/// Failover policy configuration
+#[derive(Debug, Clone)]
+pub struct FailoverPolicy {
+    /// Enable automatic failover
+    pub auto_failover: bool,
+    /// Health check interval
+    pub health_check_interval: Duration,
+    /// Timeout for health checks
+    pub health_check_timeout: Duration,
+    /// Number of consecutive failures before failover
+    pub failure_threshold: usize,
+    /// Minimum quorum size for failover
+    pub min_quorum: usize,
+    /// Enable split-brain prevention
+    pub prevent_split_brain: bool,
+}
+
+impl Default for FailoverPolicy {
+    fn default() -> Self {
+        Self {
+            auto_failover: true,
+            health_check_interval: Duration::from_secs(5),
+            health_check_timeout: Duration::from_secs(2),
+            failure_threshold: 3,
+            min_quorum: 2,
+            prevent_split_brain: true,
+        }
+    }
+}
+
+/// Manages automatic failover and health monitoring
+pub struct FailoverManager {
+    /// The replica set
+    replica_set: Arc<RwLock<ReplicaSet>>,
+    /// Failover policy
+    policy: Arc<RwLock<FailoverPolicy>>,
+    /// Health check history
+    health_history: Arc<RwLock<Vec<HealthCheck>>>,
+    /// Failure counts by replica
+    failure_counts: Arc<RwLock<std::collections::HashMap<String, usize>>>,
+    /// Whether failover is in progress
+    failover_in_progress: Arc<RwLock<bool>>,
+}
+
+impl FailoverManager {
+    /// Create a new failover manager
+    pub fn new(replica_set: Arc<RwLock<ReplicaSet>>) -> Self {
+        Self {
+            replica_set,
+            policy: Arc::new(RwLock::new(FailoverPolicy::default())),
+            health_history: Arc::new(RwLock::new(Vec::new())),
+            failure_counts: Arc::new(RwLock::new(std::collections::HashMap::new())),
+            failover_in_progress: Arc::new(RwLock::new(false)),
+        }
+    }
+
+    /// Create with custom policy
+    pub fn with_policy(replica_set: Arc<RwLock<ReplicaSet>>, policy: FailoverPolicy) -> Self {
+        Self {
+            replica_set,
+            policy: Arc::new(RwLock::new(policy)),
+            health_history: Arc::new(RwLock::new(Vec::new())),
+            failure_counts: Arc::new(RwLock::new(std::collections::HashMap::new())),
+            failover_in_progress: Arc::new(RwLock::new(false)),
+        }
+    }
+
+    /// Set the failover policy
+    pub fn set_policy(&self, policy: FailoverPolicy) {
+        *self.policy.write() = policy;
+    }
+
+    /// Get the current policy
+    pub fn policy(&self) -> FailoverPolicy {
+        self.policy.read().clone()
+    }
+
+    /// Start health monitoring
+    pub async fn start_monitoring(&self) {
+        let policy = self.policy.read().clone();
+        let replica_set = self.replica_set.clone();
+        let health_history = self.health_history.clone();
+        let failure_counts = self.failure_counts.clone();
+        let failover_in_progress = self.failover_in_progress.clone();
+        let manager_policy = self.policy.clone();
+
+        tokio::spawn(async move {
+            let mut interval_timer = interval(policy.health_check_interval);
+
+            loop {
+                interval_timer.tick().await;
+
+                let replica_ids = {
+                    let set = replica_set.read();
+                    set.replica_ids()
+                };
+
+                for replica_id in replica_ids {
+                    let health = Self::check_replica_health(
+                        &replica_set,
+                        &replica_id,
+                        policy.health_check_timeout,
+                    )
+                    .await;
+
+                    // Record health check
+                    health_history.write().push(health.clone());
+
+                    // Update failure count and check if failover is needed
+                    // Use a scope to ensure lock is dropped before any await
+                    let should_failover = {
+                        let mut counts = failure_counts.write();
+                        let count = counts.entry(replica_id.clone()).or_insert(0);
+
+                        match health.status {
+                            HealthStatus::Healthy => {
+                                *count = 0;
+                                false
+                            }
+                            HealthStatus::Degraded => {
+                                // Don't increment for degraded
+                                false
+                            }
+                            HealthStatus::Unhealthy | HealthStatus::Unresponsive => {
+                                *count += 1;
+
+                                // Check if failover is needed
+                                let current_policy = manager_policy.read();
+                                *count >= current_policy.failure_threshold
+                                    && current_policy.auto_failover
+                            }
+                        }
+                    }; // Lock is dropped here
+
+                    // Trigger failover if needed (after lock is dropped)
+                    if should_failover {
+                        if let Err(e) =
+                            Self::trigger_failover(&replica_set, &failover_in_progress).await
+                        {
+                            tracing::error!("Failover failed: {}", e);
+                        }
+                    }
+                }
+
+                // Trim health history to last 1000 entries
+                let mut history = health_history.write();
+                let len = history.len();
+                if len > 1000 {
+                    history.drain(0..len - 1000);
+                }
+            }
+        });
+    }
+
+    /// Check health of a specific replica
+    async fn check_replica_health(
+        replica_set: &Arc<RwLock<ReplicaSet>>,
+        replica_id: &str,
+        timeout: Duration,
+    ) -> HealthCheck {
+        // In a real implementation, this would make a network call
+        // For now, we simulate health checks based on replica status
+
+        let replica = {
+            let set = replica_set.read();
+            set.get_replica(replica_id)
+        };
+
+        match replica {
+            Some(replica) => {
+                if replica.is_timed_out(timeout) {
+                    HealthCheck::unresponsive(replica_id.to_string())
+                } else if replica.is_healthy() {
+                    HealthCheck::healthy(replica_id.to_string(), 10)
+                } else {
+                    HealthCheck::unhealthy(replica_id.to_string(), "Replica is lagging".to_string())
+                }
+            }
+            None => HealthCheck::unhealthy(replica_id.to_string(), "Replica not found".to_string()),
+        }
+    }
+
+    /// Trigger failover to a healthy secondary
+    async fn trigger_failover(
+        replica_set: &Arc<RwLock<ReplicaSet>>,
+        failover_in_progress: &Arc<RwLock<bool>>,
+    ) -> Result<()> {
+        // Check if failover is already in progress
+        {
+            let mut in_progress = failover_in_progress.write();
+            if *in_progress {
+                return Ok(());
+            }
+            *in_progress = true;
+        }
+
+        tracing::warn!("Initiating failover");
+
+        // Find candidate within a scope to drop the lock before await
+        let candidate_id = {
+            let set = replica_set.read();
+
+            // Check quorum
+            if !set.has_quorum() {
+                *failover_in_progress.write() = false;
+                return Err(ReplicationError::QuorumNotMet {
+                    needed: set.get_quorum_size(),
+                    available: set.get_healthy_replicas().len(),
+                });
+            }
+
+            // Find best candidate for promotion
+            let candidate = Self::select_failover_candidate(&set)?;
+            candidate.id.clone()
+        }; // Lock is dropped here
+
+        // Promote the candidate (lock re-acquired inside promote_to_primary)
+        let result = {
+            let mut set = replica_set.write();
+            set.promote_to_primary(&candidate_id)
+        };
+
+        match &result {
+            Ok(()) => tracing::info!("Failover completed: promoted {} to primary", candidate_id),
+            Err(e) => tracing::error!("Failover failed: {}", e),
+        }
+
+        // Clear failover flag
+        *failover_in_progress.write() = false;
+
+        result
+    }
+
+    /// Select the best candidate for failover
+    fn select_failover_candidate(replica_set: &ReplicaSet) -> Result<Replica> {
+        let mut candidates: Vec<Replica> = replica_set
+            .get_healthy_replicas()
+            .into_iter()
+            .filter(|r| r.role == ReplicaRole::Secondary)
+            .collect();
+
+        if candidates.is_empty() {
+            return Err(ReplicationError::FailoverFailed(
+                "No healthy secondary replicas available".to_string(),
+            ));
+        }
+
+        // Sort by priority (highest first), then by lowest lag
+        candidates.sort_by(|a, b| b.priority.cmp(&a.priority).then(a.lag_ms.cmp(&b.lag_ms)));
+
+        Ok(candidates[0].clone())
+    }
+
+    /// Manually trigger failover
+    pub async fn manual_failover(&self, target_replica_id: Option<String>) -> Result<()> {
+        let mut set = self.replica_set.write();
+
+        // Check quorum
+        if !set.has_quorum() {
+            return Err(ReplicationError::QuorumNotMet {
+                needed: set.get_quorum_size(),
+                available: set.get_healthy_replicas().len(),
+            });
+        }
+
+        let target = if let Some(id) = target_replica_id {
+            set.get_replica(&id)
+                .ok_or_else(|| ReplicationError::ReplicaNotFound(id))?
+        } else {
+            Self::select_failover_candidate(&set)?
+        };
+
+        set.promote_to_primary(&target.id)?;
+
+        tracing::info!(
+            "Manual failover completed: promoted {} to primary",
+            target.id
+        );
+        Ok(())
+    }
+
+    /// Get health check history
+    pub fn health_history(&self) -> Vec<HealthCheck> {
+        self.health_history.read().clone()
+    }
+
+    /// Get recent health status for a replica
+    pub fn recent_health(&self, replica_id: &str, limit: usize) -> Vec<HealthCheck> {
+        let history = self.health_history.read();
+        history
+            .iter()
+            .rev()
+            .filter(|h| h.replica_id == replica_id)
+            .take(limit)
+            .cloned()
+            .collect()
+    }
+
+    /// Check if failover is currently in progress
+    pub fn is_failover_in_progress(&self) -> bool {
+        *self.failover_in_progress.read()
+    }
+
+    /// Get failure count for a replica
+    pub fn failure_count(&self, replica_id: &str) -> usize {
+        self.failure_counts
+            .read()
+            .get(replica_id)
+            .copied()
+            .unwrap_or(0)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_health_check() {
+        let check = HealthCheck::healthy("r1".to_string(), 15);
+        assert_eq!(check.status, HealthStatus::Healthy);
+        assert_eq!(check.response_time_ms, 15);
+
+        let check = HealthCheck::unhealthy("r2".to_string(), "Error".to_string());
+        assert_eq!(check.status, HealthStatus::Unhealthy);
+        assert!(check.error.is_some());
+    }
+
+    #[test]
+    fn test_failover_policy() {
+        let policy = FailoverPolicy::default();
+        assert!(policy.auto_failover);
+        assert_eq!(policy.failure_threshold, 3);
+    }
+
+    #[test]
+    fn test_failover_manager() {
+        let mut replica_set = ReplicaSet::new("cluster-1");
+        replica_set
+            .add_replica("r1", "127.0.0.1:9001", ReplicaRole::Primary)
+            .unwrap();
+        replica_set
+            .add_replica("r2", "127.0.0.1:9002", ReplicaRole::Secondary)
+            .unwrap();
+
+        let manager = FailoverManager::new(Arc::new(RwLock::new(replica_set)));
+        assert!(!manager.is_failover_in_progress());
+    }
+
+    #[test]
+    fn test_candidate_selection() {
+        let mut replica_set = ReplicaSet::new("cluster-1");
+        replica_set
+            .add_replica("r1", "127.0.0.1:9001", ReplicaRole::Primary)
+            .unwrap();
+        replica_set
+            .add_replica("r2", "127.0.0.1:9002", ReplicaRole::Secondary)
+            .unwrap();
+        replica_set
+            .add_replica("r3", "127.0.0.1:9003", ReplicaRole::Secondary)
+            .unwrap();
+
+        let candidate = FailoverManager::select_failover_candidate(&replica_set).unwrap();
+        assert!(candidate.role == ReplicaRole::Secondary);
+        assert!(candidate.is_healthy());
+    }
+}
--- a/crates/ruvector-replication/src/lib.rs
+++ b/crates/ruvector-replication/src/lib.rs
@@ -0,0 +1,104 @@
+//! Data replication and synchronization for ruvector
+//!
+//! This crate provides comprehensive replication capabilities including:
+//! - Multi-node replica management
+//! - Synchronous, asynchronous, and semi-synchronous replication modes
+//! - Conflict resolution with vector clocks and CRDTs
+//! - Change data capture and streaming
+//! - Automatic failover and split-brain prevention
+//!
+//! # Examples
+//!
+//! ```no_run
+//! use ruvector_replication::{ReplicaSet, ReplicaRole, SyncMode, SyncManager, ReplicationLog};
+//! use std::sync::Arc;
+//!
+//! fn example() -> Result<(), Box<dyn std::error::Error>> {
+//!     // Create a replica set
+//!     let mut replica_set = ReplicaSet::new("cluster-1");
+//!
+//!     // Add replicas
+//!     replica_set.add_replica("replica-1", "192.168.1.10:9001", ReplicaRole::Primary)?;
+//!     replica_set.add_replica("replica-2", "192.168.1.11:9001", ReplicaRole::Secondary)?;
+//!
+//!     // Create sync manager and configure synchronization
+//!     let log = Arc::new(ReplicationLog::new("replica-1"));
+//!     let manager = SyncManager::new(Arc::new(replica_set), log);
+//!     manager.set_sync_mode(SyncMode::SemiSync { min_replicas: 1 });
+//!     Ok(())
+//! }
+//! ```
+
+pub mod conflict;
+pub mod failover;
+pub mod replica;
+pub mod stream;
+pub mod sync;
+
+pub use conflict::{ConflictResolver, LastWriteWins, MergeFunction, VectorClock};
+pub use failover::{FailoverManager, FailoverPolicy, HealthStatus};
+pub use replica::{Replica, ReplicaRole, ReplicaSet, ReplicaStatus};
+pub use stream::{ChangeEvent, ChangeOperation, ReplicationStream};
+pub use sync::{LogEntry, ReplicationLog, SyncManager, SyncMode};
+
+use thiserror::Error;
+
+/// Result type for replication operations
+pub type Result<T> = std::result::Result<T, ReplicationError>;
+
+/// Errors that can occur during replication operations
+#[derive(Error, Debug)]
+pub enum ReplicationError {
+    #[error("Replica not found: {0}")]
+    ReplicaNotFound(String),
+
+    #[error("No primary replica available")]
+    NoPrimary,
+
+    #[error("Replication timeout: {0}")]
+    Timeout(String),
+
+    #[error("Synchronization failed: {0}")]
+    SyncFailed(String),
+
+    #[error("Conflict resolution failed: {0}")]
+    ConflictResolution(String),
+
+    #[error("Failover failed: {0}")]
+    FailoverFailed(String),
+
+    #[error("Network error: {0}")]
+    Network(String),
+
+    #[error("Quorum not met: needed {needed}, got {available}")]
+    QuorumNotMet { needed: usize, available: usize },
+
+    #[error("Split-brain detected")]
+    SplitBrain,
+
+    #[error("Invalid replica state: {0}")]
+    InvalidState(String),
+
+    #[error("Serialization encode error: {0}")]
+    SerializationEncode(#[from] bincode::error::EncodeError),
+
+    #[error("Serialization decode error: {0}")]
+    SerializationDecode(#[from] bincode::error::DecodeError),
+
+    #[error("IO error: {0}")]
+    Io(#[from] std::io::Error),
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_error_display() {
+        let err = ReplicationError::QuorumNotMet {
+            needed: 2,
+            available: 1,
+        };
+        assert_eq!(err.to_string(), "Quorum not met: needed 2, got 1");
+    }
+}
--- a/crates/ruvector-replication/src/replica.rs
+++ b/crates/ruvector-replication/src/replica.rs
@@ -0,0 +1,378 @@
+//! Replica management and coordination
+//!
+//! Provides structures and logic for managing distributed replicas,
+//! including role management, health tracking, and promotion/demotion.
+
+use crate::{ReplicationError, Result};
+use chrono::{DateTime, Utc};
+use dashmap::DashMap;
+use parking_lot::RwLock;
+use serde::{Deserialize, Serialize};
+use std::sync::Arc;
+use std::time::Duration;
+use uuid::Uuid;
+
+/// Role of a replica in the replication topology
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub enum ReplicaRole {
+    /// Primary replica that handles writes
+    Primary,
+    /// Secondary replica that replicates from primary
+    Secondary,
+    /// Witness replica for quorum without data replication
+    Witness,
+}
+
+/// Current status of a replica
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub enum ReplicaStatus {
+    /// Replica is online and healthy
+    Healthy,
+    /// Replica is lagging behind
+    Lagging,
+    /// Replica is offline or unreachable
+    Offline,
+    /// Replica is recovering
+    Recovering,
+}
+
+/// Represents a single replica in the replication topology
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Replica {
+    /// Unique identifier for the replica
+    pub id: String,
+    /// Network address of the replica
+    pub address: String,
+    /// Current role of the replica
+    pub role: ReplicaRole,
+    /// Current status of the replica
+    pub status: ReplicaStatus,
+    /// Replication lag in milliseconds
+    pub lag_ms: u64,
+    /// Last known position in the replication log
+    pub log_position: u64,
+    /// Last heartbeat timestamp
+    pub last_heartbeat: DateTime<Utc>,
+    /// Priority for failover (higher is better)
+    pub priority: u32,
+}
+
+impl Replica {
+    /// Create a new replica
+    pub fn new(id: impl Into<String>, address: impl Into<String>, role: ReplicaRole) -> Self {
+        Self {
+            id: id.into(),
+            address: address.into(),
+            role,
+            status: ReplicaStatus::Healthy,
+            lag_ms: 0,
+            log_position: 0,
+            last_heartbeat: Utc::now(),
+            priority: 100,
+        }
+    }
+
+    /// Check if the replica is healthy
+    pub fn is_healthy(&self) -> bool {
+        self.status == ReplicaStatus::Healthy && self.lag_ms < 5000
+    }
+
+    /// Check if the replica is available for reads
+    pub fn is_readable(&self) -> bool {
+        matches!(self.status, ReplicaStatus::Healthy | ReplicaStatus::Lagging)
+    }
+
+    /// Check if the replica is available for writes
+    pub fn is_writable(&self) -> bool {
+        self.role == ReplicaRole::Primary && self.status == ReplicaStatus::Healthy
+    }
+
+    /// Update the replica's lag
+    pub fn update_lag(&mut self, lag_ms: u64) {
+        self.lag_ms = lag_ms;
+        if lag_ms > 5000 {
+            self.status = ReplicaStatus::Lagging;
+        } else if self.status == ReplicaStatus::Lagging {
+            self.status = ReplicaStatus::Healthy;
+        }
+    }
+
+    /// Update the replica's log position
+    pub fn update_position(&mut self, position: u64) {
+        self.log_position = position;
+    }
+
+    /// Record a heartbeat
+    pub fn heartbeat(&mut self) {
+        self.last_heartbeat = Utc::now();
+        if self.status == ReplicaStatus::Offline {
+            self.status = ReplicaStatus::Recovering;
+        }
+    }
+
+    /// Check if the replica has timed out
+    pub fn is_timed_out(&self, timeout: Duration) -> bool {
+        let elapsed = Utc::now()
+            .signed_duration_since(self.last_heartbeat)
+            .to_std()
+            .unwrap_or(Duration::MAX);
+        elapsed > timeout
+    }
+}
+
+/// Manages a set of replicas
+pub struct ReplicaSet {
+    /// Cluster identifier
+    cluster_id: String,
+    /// Map of replica ID to replica
+    replicas: Arc<DashMap<String, Replica>>,
+    /// Current primary replica ID
+    primary_id: Arc<RwLock<Option<String>>>,
+    /// Minimum number of replicas for quorum
+    quorum_size: Arc<RwLock<usize>>,
+}
+
+impl ReplicaSet {
+    /// Create a new replica set
+    pub fn new(cluster_id: impl Into<String>) -> Self {
+        Self {
+            cluster_id: cluster_id.into(),
+            replicas: Arc::new(DashMap::new()),
+            primary_id: Arc::new(RwLock::new(None)),
+            quorum_size: Arc::new(RwLock::new(1)),
+        }
+    }
+
+    /// Add a replica to the set
+    pub fn add_replica(
+        &mut self,
+        id: impl Into<String>,
+        address: impl Into<String>,
+        role: ReplicaRole,
+    ) -> Result<()> {
+        let id = id.into();
+        let replica = Replica::new(id.clone(), address, role);
+
+        if role == ReplicaRole::Primary {
+            let mut primary = self.primary_id.write();
+            if primary.is_some() {
+                return Err(ReplicationError::InvalidState(
+                    "Primary replica already exists".to_string(),
+                ));
+            }
+            *primary = Some(id.clone());
+        }
+
+        self.replicas.insert(id, replica);
+        self.update_quorum_size();
+        Ok(())
+    }
+
+    /// Remove a replica from the set
+    pub fn remove_replica(&mut self, id: &str) -> Result<()> {
+        let replica = self
+            .replicas
+            .remove(id)
+            .ok_or_else(|| ReplicationError::ReplicaNotFound(id.to_string()))?;
+
+        if replica.1.role == ReplicaRole::Primary {
+            let mut primary = self.primary_id.write();
+            *primary = None;
+        }
+
+        self.update_quorum_size();
+        Ok(())
+    }
+
+    /// Get a replica by ID
+    pub fn get_replica(&self, id: &str) -> Option<Replica> {
+        self.replicas.get(id).map(|r| r.clone())
+    }
+
+    /// Get the current primary replica
+    pub fn get_primary(&self) -> Option<Replica> {
+        let primary_id = self.primary_id.read();
+        primary_id
+            .as_ref()
+            .and_then(|id| self.replicas.get(id).map(|r| r.clone()))
+    }
+
+    /// Get all secondary replicas
+    pub fn get_secondaries(&self) -> Vec<Replica> {
+        self.replicas
+            .iter()
+            .filter(|r| r.role == ReplicaRole::Secondary)
+            .map(|r| r.clone())
+            .collect()
+    }
+
+    /// Get all healthy replicas
+    pub fn get_healthy_replicas(&self) -> Vec<Replica> {
+        self.replicas
+            .iter()
+            .filter(|r| r.is_healthy())
+            .map(|r| r.clone())
+            .collect()
+    }
+
+    /// Promote a secondary to primary
+    pub fn promote_to_primary(&mut self, id: &str) -> Result<()> {
+        // Get the replica and verify it exists
+        let mut replica = self
+            .replicas
+            .get_mut(id)
+            .ok_or_else(|| ReplicationError::ReplicaNotFound(id.to_string()))?;
+
+        if replica.role == ReplicaRole::Primary {
+            return Ok(());
+        }
+
+        if replica.role == ReplicaRole::Witness {
+            return Err(ReplicationError::InvalidState(
+                "Cannot promote witness to primary".to_string(),
+            ));
+        }
+
+        // Demote current primary if exists
+        let old_primary_id = {
+            let mut primary = self.primary_id.write();
+            primary.take()
+        };
+
+        if let Some(old_id) = old_primary_id {
+            if let Some(mut old_primary) = self.replicas.get_mut(&old_id) {
+                old_primary.role = ReplicaRole::Secondary;
+            }
+        }
+
+        // Promote new primary
+        replica.role = ReplicaRole::Primary;
+        let mut primary = self.primary_id.write();
+        *primary = Some(id.to_string());
+
+        tracing::info!("Promoted replica {} to primary", id);
+        Ok(())
+    }
+
+    /// Demote a primary to secondary
+    pub fn demote_to_secondary(&mut self, id: &str) -> Result<()> {
+        let mut replica = self
+            .replicas
+            .get_mut(id)
+            .ok_or_else(|| ReplicationError::ReplicaNotFound(id.to_string()))?;
+
+        if replica.role != ReplicaRole::Primary {
+            return Ok(());
+        }
+
+        replica.role = ReplicaRole::Secondary;
+        let mut primary = self.primary_id.write();
+        *primary = None;
+
+        tracing::info!("Demoted replica {} to secondary", id);
+        Ok(())
+    }
+
+    /// Check if quorum is available
+    pub fn has_quorum(&self) -> bool {
+        let healthy_count = self
+            .replicas
+            .iter()
+            .filter(|r| r.is_healthy() && r.role != ReplicaRole::Witness)
+            .count();
+        let quorum = *self.quorum_size.read();
+        healthy_count >= quorum
+    }
+
+    /// Get the required quorum size
+    pub fn get_quorum_size(&self) -> usize {
+        *self.quorum_size.read()
+    }
+
+    /// Set the quorum size
+    pub fn set_quorum_size(&self, size: usize) {
+        *self.quorum_size.write() = size;
+    }
+
+    /// Update quorum size based on replica count
+    fn update_quorum_size(&self) {
+        let replica_count = self
+            .replicas
+            .iter()
+            .filter(|r| r.role != ReplicaRole::Witness)
+            .count();
+        let quorum = (replica_count / 2) + 1;
+        *self.quorum_size.write() = quorum;
+    }
+
+    /// Get all replica IDs
+    pub fn replica_ids(&self) -> Vec<String> {
+        self.replicas.iter().map(|r| r.id.clone()).collect()
+    }
+
+    /// Get replica count
+    pub fn replica_count(&self) -> usize {
+        self.replicas.len()
+    }
+
+    /// Get the cluster ID
+    pub fn cluster_id(&self) -> &str {
+        &self.cluster_id
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_replica_creation() {
+        let replica = Replica::new("r1", "127.0.0.1:9001", ReplicaRole::Primary);
+        assert_eq!(replica.id, "r1");
+        assert_eq!(replica.role, ReplicaRole::Primary);
+        assert!(replica.is_healthy());
+        assert!(replica.is_writable());
+    }
+
+    #[test]
+    fn test_replica_set() {
+        let mut set = ReplicaSet::new("cluster-1");
+        set.add_replica("r1", "127.0.0.1:9001", ReplicaRole::Primary)
+            .unwrap();
+        set.add_replica("r2", "127.0.0.1:9002", ReplicaRole::Secondary)
+            .unwrap();
+
+        assert_eq!(set.replica_count(), 2);
+        assert!(set.get_primary().is_some());
+        assert_eq!(set.get_secondaries().len(), 1);
+    }
+
+    #[test]
+    fn test_promotion() {
+        let mut set = ReplicaSet::new("cluster-1");
+        set.add_replica("r1", "127.0.0.1:9001", ReplicaRole::Primary)
+            .unwrap();
+        set.add_replica("r2", "127.0.0.1:9002", ReplicaRole::Secondary)
+            .unwrap();
+
+        set.promote_to_primary("r2").unwrap();
+
+        let primary = set.get_primary().unwrap();
+        assert_eq!(primary.id, "r2");
+        assert_eq!(primary.role, ReplicaRole::Primary);
+    }
+
+    #[test]
+    fn test_quorum() {
+        let mut set = ReplicaSet::new("cluster-1");
+        set.add_replica("r1", "127.0.0.1:9001", ReplicaRole::Primary)
+            .unwrap();
+        set.add_replica("r2", "127.0.0.1:9002", ReplicaRole::Secondary)
+            .unwrap();
+        set.add_replica("r3", "127.0.0.1:9003", ReplicaRole::Secondary)
+            .unwrap();
+
+        assert_eq!(set.get_quorum_size(), 2);
+        assert!(set.has_quorum());
+    }
+}
--- a/crates/ruvector-replication/src/stream.rs
+++ b/crates/ruvector-replication/src/stream.rs
@@ -0,0 +1,403 @@
+//! Change data capture and streaming for replication
+//!
+//! Provides mechanisms for streaming changes from the replication log
+//! with support for checkpointing, resumption, and backpressure handling.
+
+use crate::{LogEntry, ReplicationError, ReplicationLog, Result};
+use chrono::{DateTime, Utc};
+use parking_lot::RwLock;
+use serde::{Deserialize, Serialize};
+use std::sync::Arc;
+use tokio::sync::mpsc;
+use uuid::Uuid;
+
+/// Type of change operation
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub enum ChangeOperation {
+    /// Insert operation
+    Insert,
+    /// Update operation
+    Update,
+    /// Delete operation
+    Delete,
+    /// Bulk operation
+    Bulk,
+}
+
+/// A change event in the replication stream
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ChangeEvent {
+    /// Unique identifier for this event
+    pub id: Uuid,
+    /// Sequence number in the stream
+    pub sequence: u64,
+    /// Timestamp of the change
+    pub timestamp: DateTime<Utc>,
+    /// Type of operation
+    pub operation: ChangeOperation,
+    /// Collection/table name
+    pub collection: String,
+    /// Document/vector ID affected
+    pub document_id: String,
+    /// Serialized data for the change
+    pub data: Vec<u8>,
+    /// Metadata for the change
+    pub metadata: serde_json::Value,
+}
+
+impl ChangeEvent {
+    /// Create a new change event
+    pub fn new(
+        sequence: u64,
+        operation: ChangeOperation,
+        collection: String,
+        document_id: String,
+        data: Vec<u8>,
+    ) -> Self {
+        Self {
+            id: Uuid::new_v4(),
+            sequence,
+            timestamp: Utc::now(),
+            operation,
+            collection,
+            document_id,
+            data,
+            metadata: serde_json::Value::Null,
+        }
+    }
+
+    /// Add metadata to the change event
+    pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self {
+        self.metadata = metadata;
+        self
+    }
+
+    /// Convert from a log entry
+    pub fn from_log_entry(
+        entry: &LogEntry,
+        operation: ChangeOperation,
+        collection: String,
+        document_id: String,
+    ) -> Self {
+        Self {
+            id: entry.id,
+            sequence: entry.sequence,
+            timestamp: entry.timestamp,
+            operation,
+            collection,
+            document_id,
+            data: entry.data.clone(),
+            metadata: serde_json::json!({
+                "source_replica": entry.source_replica,
+                "checksum": entry.checksum,
+            }),
+        }
+    }
+}
+
+/// Checkpoint for resuming a replication stream
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Checkpoint {
+    /// Last processed sequence number
+    pub sequence: u64,
+    /// Timestamp of the checkpoint
+    pub timestamp: DateTime<Utc>,
+    /// Optional consumer group ID
+    pub consumer_group: Option<String>,
+    /// Consumer ID within the group
+    pub consumer_id: String,
+}
+
+impl Checkpoint {
+    /// Create a new checkpoint
+    pub fn new(sequence: u64, consumer_id: impl Into<String>) -> Self {
+        Self {
+            sequence,
+            timestamp: Utc::now(),
+            consumer_group: None,
+            consumer_id: consumer_id.into(),
+        }
+    }
+
+    /// Set the consumer group
+    pub fn with_group(mut self, group: impl Into<String>) -> Self {
+        self.consumer_group = Some(group.into());
+        self
+    }
+}
+
+/// Configuration for a replication stream
+#[derive(Debug, Clone)]
+pub struct StreamConfig {
+    /// Buffer size for the channel
+    pub buffer_size: usize,
+    /// Batch size for events
+    pub batch_size: usize,
+    /// Enable automatic checkpointing
+    pub auto_checkpoint: bool,
+    /// Checkpoint interval (number of events)
+    pub checkpoint_interval: usize,
+}
+
+impl Default for StreamConfig {
+    fn default() -> Self {
+        Self {
+            buffer_size: 1000,
+            batch_size: 100,
+            auto_checkpoint: true,
+            checkpoint_interval: 100,
+        }
+    }
+}
+
+/// Manages a replication stream
+pub struct ReplicationStream {
+    /// The replication log
+    log: Arc<ReplicationLog>,
+    /// Stream configuration
+    config: StreamConfig,
+    /// Current checkpoint
+    checkpoint: Arc<RwLock<Option<Checkpoint>>>,
+    /// Consumer ID
+    consumer_id: String,
+}
+
+impl ReplicationStream {
+    /// Create a new replication stream
+    pub fn new(log: Arc<ReplicationLog>, consumer_id: impl Into<String>) -> Self {
+        Self {
+            log,
+            config: StreamConfig::default(),
+            checkpoint: Arc::new(RwLock::new(None)),
+            consumer_id: consumer_id.into(),
+        }
+    }
+
+    /// Create with custom configuration
+    pub fn with_config(
+        log: Arc<ReplicationLog>,
+        consumer_id: impl Into<String>,
+        config: StreamConfig,
+    ) -> Self {
+        Self {
+            log,
+            config,
+            checkpoint: Arc::new(RwLock::new(None)),
+            consumer_id: consumer_id.into(),
+        }
+    }
+
+    /// Start streaming from a given position
+    pub async fn stream_from(
+        &self,
+        start_sequence: u64,
+    ) -> Result<mpsc::Receiver<Vec<ChangeEvent>>> {
+        let (tx, rx) = mpsc::channel(self.config.buffer_size);
+
+        let log = self.log.clone();
+        let batch_size = self.config.batch_size;
+        let checkpoint = self.checkpoint.clone();
+        let auto_checkpoint = self.config.auto_checkpoint;
+        let checkpoint_interval = self.config.checkpoint_interval;
+        let consumer_id = self.consumer_id.clone();
+
+        tokio::spawn(async move {
+            let mut current_sequence = start_sequence;
+            let mut events_since_checkpoint = 0;
+
+            loop {
+                // Get batch of entries
+                let entries =
+                    log.get_range(current_sequence + 1, current_sequence + batch_size as u64);
+
+                if entries.is_empty() {
+                    // No new entries, wait a bit
+                    tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
+                    continue;
+                }
+
+                // Convert to change events
+                let mut events = Vec::new();
+                for entry in &entries {
+                    // In a real implementation, we would decode the operation type
+                    // from the entry data. For now, we use a placeholder.
+                    let event = ChangeEvent::from_log_entry(
+                        entry,
+                        ChangeOperation::Update,
+                        "default".to_string(),
+                        Uuid::new_v4().to_string(),
+                    );
+                    events.push(event);
+                }
+
+                // Update current sequence
+                if let Some(last_entry) = entries.last() {
+                    current_sequence = last_entry.sequence;
+                }
+
+                // Send batch
+                if tx.send(events).await.is_err() {
+                    // Receiver dropped, stop streaming
+                    break;
+                }
+
+                events_since_checkpoint += entries.len();
+
+                // Auto-checkpoint if enabled
+                if auto_checkpoint && events_since_checkpoint >= checkpoint_interval {
+                    let cp = Checkpoint::new(current_sequence, consumer_id.clone());
+                    *checkpoint.write() = Some(cp);
+                    events_since_checkpoint = 0;
+                }
+            }
+        });
+
+        Ok(rx)
+    }
+
+    /// Resume streaming from the last checkpoint
+    pub async fn resume(&self) -> Result<mpsc::Receiver<Vec<ChangeEvent>>> {
+        let checkpoint = self.checkpoint.read();
+        let start_sequence = checkpoint.as_ref().map(|cp| cp.sequence).unwrap_or(0);
+        drop(checkpoint);
+
+        self.stream_from(start_sequence).await
+    }
+
+    /// Get the current checkpoint
+    pub fn get_checkpoint(&self) -> Option<Checkpoint> {
+        self.checkpoint.read().clone()
+    }
+
+    /// Set a checkpoint manually
+    pub fn set_checkpoint(&self, checkpoint: Checkpoint) {
+        *self.checkpoint.write() = Some(checkpoint);
+    }
+
+    /// Clear the checkpoint
+    pub fn clear_checkpoint(&self) {
+        *self.checkpoint.write() = None;
+    }
+}
+
+/// Manager for multiple replication streams (consumer groups)
+pub struct StreamManager {
+    /// The replication log
+    log: Arc<ReplicationLog>,
+    /// Active streams by consumer ID
+    streams: Arc<RwLock<Vec<Arc<ReplicationStream>>>>,
+}
+
+impl StreamManager {
+    /// Create a new stream manager
+    pub fn new(log: Arc<ReplicationLog>) -> Self {
+        Self {
+            log,
+            streams: Arc::new(RwLock::new(Vec::new())),
+        }
+    }
+
+    /// Create a new stream for a consumer
+    pub fn create_stream(&self, consumer_id: impl Into<String>) -> Arc<ReplicationStream> {
+        let stream = Arc::new(ReplicationStream::new(self.log.clone(), consumer_id));
+        self.streams.write().push(stream.clone());
+        stream
+    }
+
+    /// Create a stream with custom configuration
+    pub fn create_stream_with_config(
+        &self,
+        consumer_id: impl Into<String>,
+        config: StreamConfig,
+    ) -> Arc<ReplicationStream> {
+        let stream = Arc::new(ReplicationStream::with_config(
+            self.log.clone(),
+            consumer_id,
+            config,
+        ));
+        self.streams.write().push(stream.clone());
+        stream
+    }
+
+    /// Get all active streams
+    pub fn active_streams(&self) -> Vec<Arc<ReplicationStream>> {
+        self.streams.read().clone()
+    }
+
+    /// Get the number of active streams
+    pub fn stream_count(&self) -> usize {
+        self.streams.read().len()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_change_event_creation() {
+        let event = ChangeEvent::new(
+            1,
+            ChangeOperation::Insert,
+            "vectors".to_string(),
+            "doc-1".to_string(),
+            b"data".to_vec(),
+        );
+
+        assert_eq!(event.sequence, 1);
+        assert_eq!(event.operation, ChangeOperation::Insert);
+        assert_eq!(event.collection, "vectors");
+    }
+
+    #[test]
+    fn test_checkpoint() {
+        let cp = Checkpoint::new(100, "consumer-1").with_group("group-1");
+
+        assert_eq!(cp.sequence, 100);
+        assert_eq!(cp.consumer_id, "consumer-1");
+        assert_eq!(cp.consumer_group, Some("group-1".to_string()));
+    }
+
+    #[tokio::test]
+    async fn test_replication_stream() {
+        let log = Arc::new(ReplicationLog::new("replica-1"));
+
+        // Add some entries
+        log.append(b"data1".to_vec());
+        log.append(b"data2".to_vec());
+        log.append(b"data3".to_vec());
+
+        let stream = ReplicationStream::new(log.clone(), "consumer-1");
+        let mut rx = stream.stream_from(0).await.unwrap();
+
+        // Receive events
+        if let Some(events) = rx.recv().await {
+            assert!(!events.is_empty());
+        }
+    }
+
+    #[test]
+    fn test_stream_manager() {
+        let log = Arc::new(ReplicationLog::new("replica-1"));
+        let manager = StreamManager::new(log);
+
+        let stream1 = manager.create_stream("consumer-1");
+        let stream2 = manager.create_stream("consumer-2");
+
+        assert_eq!(manager.stream_count(), 2);
+    }
+
+    #[test]
+    fn test_stream_config() {
+        let config = StreamConfig {
+            buffer_size: 2000,
+            batch_size: 50,
+            auto_checkpoint: false,
+            checkpoint_interval: 200,
+        };
+
+        assert_eq!(config.buffer_size, 2000);
+        assert_eq!(config.batch_size, 50);
+        assert!(!config.auto_checkpoint);
+    }
+}
--- a/crates/ruvector-replication/src/sync.rs
+++ b/crates/ruvector-replication/src/sync.rs
@@ -0,0 +1,374 @@
+//! Synchronization modes and replication log management
+//!
+//! Provides different replication modes (sync, async, semi-sync)
+//! and manages the replication log for tracking changes.
+
+use crate::{ReplicaSet, ReplicationError, Result};
+use chrono::{DateTime, Utc};
+use dashmap::DashMap;
+use parking_lot::RwLock;
+use serde::{Deserialize, Serialize};
+use std::sync::Arc;
+use std::time::Duration;
+use tokio::time::timeout;
+use uuid::Uuid;
+
+/// Synchronization mode for replication
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub enum SyncMode {
+    /// Wait for all replicas to acknowledge
+    Sync,
+    /// Don't wait for replicas
+    Async,
+    /// Wait for a minimum number of replicas
+    SemiSync { min_replicas: usize },
+}
+
+/// Entry in the replication log
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct LogEntry {
+    /// Unique identifier for this entry
+    pub id: Uuid,
+    /// Sequence number in the log
+    pub sequence: u64,
+    /// Timestamp when the entry was created
+    pub timestamp: DateTime<Utc>,
+    /// The operation data (serialized)
+    pub data: Vec<u8>,
+    /// Checksum for data integrity
+    pub checksum: u64,
+    /// ID of the replica that originated this entry
+    pub source_replica: String,
+}
+
+impl LogEntry {
+    /// Create a new log entry
+    pub fn new(sequence: u64, data: Vec<u8>, source_replica: String) -> Self {
+        let checksum = Self::calculate_checksum(&data);
+        Self {
+            id: Uuid::new_v4(),
+            sequence,
+            timestamp: Utc::now(),
+            data,
+            checksum,
+            source_replica,
+        }
+    }
+
+    /// Calculate checksum for data
+    fn calculate_checksum(data: &[u8]) -> u64 {
+        use std::collections::hash_map::DefaultHasher;
+        use std::hash::{Hash, Hasher};
+        let mut hasher = DefaultHasher::new();
+        data.hash(&mut hasher);
+        hasher.finish()
+    }
+
+    /// Verify data integrity
+    pub fn verify(&self) -> bool {
+        Self::calculate_checksum(&self.data) == self.checksum
+    }
+}
+
+/// Manages the replication log
+pub struct ReplicationLog {
+    /// Log entries indexed by sequence number
+    entries: Arc<DashMap<u64, LogEntry>>,
+    /// Current sequence number
+    sequence: Arc<RwLock<u64>>,
+    /// Replica ID
+    replica_id: String,
+}
+
+impl ReplicationLog {
+    /// Create a new replication log
+    pub fn new(replica_id: impl Into<String>) -> Self {
+        Self {
+            entries: Arc::new(DashMap::new()),
+            sequence: Arc::new(RwLock::new(0)),
+            replica_id: replica_id.into(),
+        }
+    }
+
+    /// Append an entry to the log
+    pub fn append(&self, data: Vec<u8>) -> LogEntry {
+        let mut seq = self.sequence.write();
+        *seq += 1;
+        let entry = LogEntry::new(*seq, data, self.replica_id.clone());
+        self.entries.insert(*seq, entry.clone());
+        entry
+    }
+
+    /// Get an entry by sequence number
+    pub fn get(&self, sequence: u64) -> Option<LogEntry> {
+        self.entries.get(&sequence).map(|e| e.clone())
+    }
+
+    /// Get entries in a range
+    pub fn get_range(&self, start: u64, end: u64) -> Vec<LogEntry> {
+        let mut entries = Vec::new();
+        for seq in start..=end {
+            if let Some(entry) = self.entries.get(&seq) {
+                entries.push(entry.clone());
+            }
+        }
+        entries
+    }
+
+    /// Get the current sequence number
+    pub fn current_sequence(&self) -> u64 {
+        *self.sequence.read()
+    }
+
+    /// Get entries since a given sequence
+    pub fn get_since(&self, since: u64) -> Vec<LogEntry> {
+        let current = self.current_sequence();
+        self.get_range(since + 1, current)
+    }
+
+    /// Truncate log before a given sequence
+    pub fn truncate_before(&self, before: u64) {
+        self.entries.retain(|seq, _| *seq >= before);
+    }
+
+    /// Get log size
+    pub fn size(&self) -> usize {
+        self.entries.len()
+    }
+}
+
+/// Manages synchronization across replicas
+pub struct SyncManager {
+    /// The replica set
+    replica_set: Arc<ReplicaSet>,
+    /// Replication log
+    log: Arc<ReplicationLog>,
+    /// Synchronization mode
+    sync_mode: Arc<RwLock<SyncMode>>,
+    /// Timeout for synchronous operations
+    sync_timeout: Duration,
+}
+
+impl SyncManager {
+    /// Create a new sync manager
+    pub fn new(replica_set: Arc<ReplicaSet>, log: Arc<ReplicationLog>) -> Self {
+        Self {
+            replica_set,
+            log,
+            sync_mode: Arc::new(RwLock::new(SyncMode::Async)),
+            sync_timeout: Duration::from_secs(5),
+        }
+    }
+
+    /// Set the synchronization mode
+    pub fn set_sync_mode(&self, mode: SyncMode) {
+        *self.sync_mode.write() = mode;
+    }
+
+    /// Get the current synchronization mode
+    pub fn sync_mode(&self) -> SyncMode {
+        *self.sync_mode.read()
+    }
+
+    /// Set the sync timeout
+    pub fn set_sync_timeout(&mut self, timeout: Duration) {
+        self.sync_timeout = timeout;
+    }
+
+    /// Replicate data to all replicas according to sync mode
+    pub async fn replicate(&self, data: Vec<u8>) -> Result<LogEntry> {
+        // Append to local log
+        let entry = self.log.append(data);
+
+        // Get sync mode
+        let mode = self.sync_mode();
+
+        match mode {
+            SyncMode::Sync => {
+                self.replicate_sync(&entry).await?;
+            }
+            SyncMode::Async => {
+                // Fire and forget
+                let entry_clone = entry.clone();
+                let replica_set = self.replica_set.clone();
+                tokio::spawn(async move {
+                    if let Err(e) = Self::send_to_replicas(&replica_set, &entry_clone).await {
+                        tracing::error!("Async replication failed: {}", e);
+                    }
+                });
+            }
+            SyncMode::SemiSync { min_replicas } => {
+                self.replicate_semi_sync(&entry, min_replicas).await?;
+            }
+        }
+
+        Ok(entry)
+    }
+
+    /// Synchronous replication - wait for all replicas
+    async fn replicate_sync(&self, entry: &LogEntry) -> Result<()> {
+        timeout(
+            self.sync_timeout,
+            Self::send_to_replicas(&self.replica_set, entry),
+        )
+        .await
+        .map_err(|_| ReplicationError::Timeout("Sync replication timed out".to_string()))?
+    }
+
+    /// Semi-synchronous replication - wait for minimum replicas
+    async fn replicate_semi_sync(&self, entry: &LogEntry, min_replicas: usize) -> Result<()> {
+        let secondaries = self.replica_set.get_secondaries();
+        if secondaries.len() < min_replicas {
+            return Err(ReplicationError::QuorumNotMet {
+                needed: min_replicas,
+                available: secondaries.len(),
+            });
+        }
+
+        // Send to all and wait for min_replicas to respond
+        let entry_clone = entry.clone();
+        let replica_set = self.replica_set.clone();
+        let min = min_replicas;
+
+        timeout(self.sync_timeout, async move {
+            // Simulate sending to replicas and waiting for acknowledgments
+            // In a real implementation, this would use network calls
+            let acks = secondaries.len().min(min);
+            if acks >= min {
+                Ok(())
+            } else {
+                Err(ReplicationError::QuorumNotMet {
+                    needed: min,
+                    available: acks,
+                })
+            }
+        })
+        .await
+        .map_err(|_| ReplicationError::Timeout("Semi-sync replication timed out".to_string()))?
+    }
+
+    /// Send log entry to all replicas
+    async fn send_to_replicas(replica_set: &ReplicaSet, entry: &LogEntry) -> Result<()> {
+        let secondaries = replica_set.get_secondaries();
+
+        // In a real implementation, this would send over the network
+        // For now, we simulate successful replication
+        for replica in secondaries {
+            if replica.is_healthy() {
+                tracing::debug!("Replicating entry {} to {}", entry.sequence, replica.id);
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Catch up a lagging replica
+    pub async fn catchup(&self, replica_id: &str, from_sequence: u64) -> Result<Vec<LogEntry>> {
+        let replica = self
+            .replica_set
+            .get_replica(replica_id)
+            .ok_or_else(|| ReplicationError::ReplicaNotFound(replica_id.to_string()))?;
+
+        let current_sequence = self.log.current_sequence();
+        if from_sequence >= current_sequence {
+            return Ok(Vec::new());
+        }
+
+        // Get missing entries
+        let entries = self.log.get_since(from_sequence);
+
+        tracing::info!(
+            "Catching up replica {} with {} entries (from {} to {})",
+            replica_id,
+            entries.len(),
+            from_sequence + 1,
+            current_sequence
+        );
+
+        Ok(entries)
+    }
+
+    /// Get the current log position
+    pub fn current_position(&self) -> u64 {
+        self.log.current_sequence()
+    }
+
+    /// Verify log entry integrity
+    pub fn verify_entry(&self, sequence: u64) -> Result<bool> {
+        let entry = self
+            .log
+            .get(sequence)
+            .ok_or_else(|| ReplicationError::InvalidState("Log entry not found".to_string()))?;
+        Ok(entry.verify())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::ReplicaRole;
+
+    #[test]
+    fn test_log_entry_creation() {
+        let data = b"test data".to_vec();
+        let entry = LogEntry::new(1, data, "replica-1".to_string());
+        assert_eq!(entry.sequence, 1);
+        assert!(entry.verify());
+    }
+
+    #[test]
+    fn test_replication_log() {
+        let log = ReplicationLog::new("replica-1");
+
+        let entry1 = log.append(b"data1".to_vec());
+        let entry2 = log.append(b"data2".to_vec());
+
+        assert_eq!(entry1.sequence, 1);
+        assert_eq!(entry2.sequence, 2);
+        assert_eq!(log.current_sequence(), 2);
+
+        let entries = log.get_range(1, 2);
+        assert_eq!(entries.len(), 2);
+    }
+
+    #[tokio::test]
+    async fn test_sync_manager() {
+        let mut replica_set = ReplicaSet::new("cluster-1");
+        replica_set
+            .add_replica("r1", "127.0.0.1:9001", ReplicaRole::Primary)
+            .unwrap();
+        replica_set
+            .add_replica("r2", "127.0.0.1:9002", ReplicaRole::Secondary)
+            .unwrap();
+
+        let log = Arc::new(ReplicationLog::new("r1"));
+        let manager = SyncManager::new(Arc::new(replica_set), log);
+
+        manager.set_sync_mode(SyncMode::Async);
+        let entry = manager.replicate(b"test".to_vec()).await.unwrap();
+        assert_eq!(entry.sequence, 1);
+    }
+
+    #[tokio::test]
+    async fn test_catchup() {
+        let mut replica_set = ReplicaSet::new("cluster-1");
+        replica_set
+            .add_replica("r1", "127.0.0.1:9001", ReplicaRole::Primary)
+            .unwrap();
+        replica_set
+            .add_replica("r2", "127.0.0.1:9002", ReplicaRole::Secondary)
+            .unwrap();
+
+        let log = Arc::new(ReplicationLog::new("r1"));
+        let manager = SyncManager::new(Arc::new(replica_set), log.clone());
+
+        // Add some entries
+        log.append(b"data1".to_vec());
+        log.append(b"data2".to_vec());
+        log.append(b"data3".to_vec());
+
+        // Catchup from position 1
+        let entries = manager.catchup("r2", 1).await.unwrap();
+        assert_eq!(entries.len(), 2); // Entries 2 and 3
+    }
+}