Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
52
vendor/ruvector/crates/ruvector-snapshot/src/error.rs
vendored
Normal file
52
vendor/ruvector/crates/ruvector-snapshot/src/error.rs
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
use thiserror::Error;
|
||||
|
||||
/// Result type for snapshot operations
|
||||
pub type Result<T> = std::result::Result<T, SnapshotError>;
|
||||
|
||||
/// Errors that can occur during snapshot operations
|
||||
#[derive(Error, Debug)]
|
||||
pub enum SnapshotError {
|
||||
#[error("Snapshot not found: {0}")]
|
||||
SnapshotNotFound(String),
|
||||
|
||||
#[error("Corrupted snapshot: {0}")]
|
||||
CorruptedSnapshot(String),
|
||||
|
||||
#[error("Storage error: {0}")]
|
||||
StorageError(String),
|
||||
|
||||
#[error("Compression error: {0}")]
|
||||
CompressionError(String),
|
||||
|
||||
#[error("IO error: {0}")]
|
||||
IoError(#[from] std::io::Error),
|
||||
|
||||
#[error("Serialization error: {0}")]
|
||||
SerializationError(String),
|
||||
|
||||
#[error("JSON error: {0}")]
|
||||
JsonError(#[from] serde_json::Error),
|
||||
|
||||
#[error("Invalid checksum: expected {expected}, got {actual}")]
|
||||
InvalidChecksum { expected: String, actual: String },
|
||||
|
||||
#[error("Collection error: {0}")]
|
||||
CollectionError(String),
|
||||
}
|
||||
|
||||
impl SnapshotError {
|
||||
/// Create a storage error with a custom message
|
||||
pub fn storage<S: Into<String>>(msg: S) -> Self {
|
||||
SnapshotError::StorageError(msg.into())
|
||||
}
|
||||
|
||||
/// Create a corrupted snapshot error with a custom message
|
||||
pub fn corrupted<S: Into<String>>(msg: S) -> Self {
|
||||
SnapshotError::CorruptedSnapshot(msg.into())
|
||||
}
|
||||
|
||||
/// Create a compression error with a custom message
|
||||
pub fn compression<S: Into<String>>(msg: S) -> Self {
|
||||
SnapshotError::CompressionError(msg.into())
|
||||
}
|
||||
}
|
||||
27
vendor/ruvector/crates/ruvector-snapshot/src/lib.rs
vendored
Normal file
27
vendor/ruvector/crates/ruvector-snapshot/src/lib.rs
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
//! Snapshot and restore functionality for rUvector collections
|
||||
//!
|
||||
//! This crate provides backup and restore capabilities for vector collections,
|
||||
//! including compression, checksums, and multiple storage backends.
|
||||
|
||||
mod error;
|
||||
mod manager;
|
||||
mod snapshot;
|
||||
mod storage;
|
||||
|
||||
pub use error::{Result, SnapshotError};
|
||||
pub use manager::SnapshotManager;
|
||||
pub use snapshot::{Snapshot, SnapshotData, SnapshotMetadata, VectorRecord};
|
||||
pub use storage::{LocalStorage, SnapshotStorage};
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_module_exports() {
|
||||
// Verify all public exports are accessible
|
||||
let _: Option<SnapshotError> = None;
|
||||
let _: Option<SnapshotManager> = None;
|
||||
let _: Option<Snapshot> = None;
|
||||
}
|
||||
}
|
||||
294
vendor/ruvector/crates/ruvector-snapshot/src/manager.rs
vendored
Normal file
294
vendor/ruvector/crates/ruvector-snapshot/src/manager.rs
vendored
Normal file
@@ -0,0 +1,294 @@
|
||||
use crate::error::{Result, SnapshotError};
|
||||
use crate::snapshot::{Snapshot, SnapshotData};
|
||||
use crate::storage::SnapshotStorage;
|
||||
|
||||
/// Manages snapshot operations for collections
|
||||
pub struct SnapshotManager {
|
||||
storage: Box<dyn SnapshotStorage>,
|
||||
}
|
||||
|
||||
impl SnapshotManager {
|
||||
/// Create a new snapshot manager with the given storage backend
|
||||
pub fn new(storage: Box<dyn SnapshotStorage>) -> Self {
|
||||
Self { storage }
|
||||
}
|
||||
|
||||
/// Create a snapshot of a collection
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `snapshot_data` - The complete snapshot data including vectors and configuration
|
||||
///
|
||||
/// # Returns
|
||||
/// * `Snapshot` - Metadata about the created snapshot
|
||||
pub async fn create_snapshot(&self, snapshot_data: SnapshotData) -> Result<Snapshot> {
|
||||
// Validate snapshot data
|
||||
if snapshot_data.vectors.is_empty() {
|
||||
return Err(SnapshotError::storage(
|
||||
"Cannot create snapshot of empty collection",
|
||||
));
|
||||
}
|
||||
|
||||
// Verify all vectors have the same dimension
|
||||
let expected_dim = snapshot_data.config.dimension;
|
||||
for (idx, vector) in snapshot_data.vectors.iter().enumerate() {
|
||||
if vector.vector.len() != expected_dim {
|
||||
return Err(SnapshotError::storage(format!(
|
||||
"Vector {} has dimension {} but expected {}",
|
||||
idx,
|
||||
vector.vector.len(),
|
||||
expected_dim
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
// Save the snapshot
|
||||
self.storage.save(&snapshot_data).await
|
||||
}
|
||||
|
||||
/// Restore a snapshot by ID
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `id` - The unique snapshot identifier
|
||||
///
|
||||
/// # Returns
|
||||
/// * `SnapshotData` - The complete snapshot data including vectors and configuration
|
||||
pub async fn restore_snapshot(&self, id: &str) -> Result<SnapshotData> {
|
||||
if id.is_empty() {
|
||||
return Err(SnapshotError::storage("Snapshot ID cannot be empty"));
|
||||
}
|
||||
|
||||
self.storage.load(id).await
|
||||
}
|
||||
|
||||
/// List all available snapshots
|
||||
///
|
||||
/// # Returns
|
||||
/// * `Vec<Snapshot>` - List of all snapshot metadata, sorted by creation date (newest first)
|
||||
pub async fn list_snapshots(&self) -> Result<Vec<Snapshot>> {
|
||||
self.storage.list().await
|
||||
}
|
||||
|
||||
/// List snapshots for a specific collection
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `collection_name` - Name of the collection to filter by
|
||||
///
|
||||
/// # Returns
|
||||
/// * `Vec<Snapshot>` - List of snapshots for the specified collection
|
||||
pub async fn list_snapshots_for_collection(
|
||||
&self,
|
||||
collection_name: &str,
|
||||
) -> Result<Vec<Snapshot>> {
|
||||
let all_snapshots = self.storage.list().await?;
|
||||
Ok(all_snapshots
|
||||
.into_iter()
|
||||
.filter(|s| s.collection_name == collection_name)
|
||||
.collect())
|
||||
}
|
||||
|
||||
/// Delete a snapshot by ID
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `id` - The unique snapshot identifier
|
||||
pub async fn delete_snapshot(&self, id: &str) -> Result<()> {
|
||||
if id.is_empty() {
|
||||
return Err(SnapshotError::storage("Snapshot ID cannot be empty"));
|
||||
}
|
||||
|
||||
self.storage.delete(id).await
|
||||
}
|
||||
|
||||
/// Get snapshot metadata by ID
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `id` - The unique snapshot identifier
|
||||
///
|
||||
/// # Returns
|
||||
/// * `Snapshot` - Metadata about the snapshot
|
||||
pub async fn get_snapshot_info(&self, id: &str) -> Result<Snapshot> {
|
||||
let snapshots = self.storage.list().await?;
|
||||
snapshots
|
||||
.into_iter()
|
||||
.find(|s| s.id == id)
|
||||
.ok_or_else(|| SnapshotError::SnapshotNotFound(id.to_string()))
|
||||
}
|
||||
|
||||
/// Delete old snapshots, keeping only the N most recent
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `collection_name` - Name of the collection
|
||||
/// * `keep_count` - Number of recent snapshots to keep
|
||||
///
|
||||
/// # Returns
|
||||
/// * `usize` - Number of snapshots deleted
|
||||
pub async fn cleanup_old_snapshots(
|
||||
&self,
|
||||
collection_name: &str,
|
||||
keep_count: usize,
|
||||
) -> Result<usize> {
|
||||
let snapshots = self.list_snapshots_for_collection(collection_name).await?;
|
||||
|
||||
if snapshots.len() <= keep_count {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let to_delete = &snapshots[keep_count..];
|
||||
let mut deleted = 0;
|
||||
|
||||
for snapshot in to_delete {
|
||||
if self.storage.delete(&snapshot.id).await.is_ok() {
|
||||
deleted += 1;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(deleted)
|
||||
}
|
||||
|
||||
/// Get the total size of all snapshots in bytes
|
||||
pub async fn total_size(&self) -> Result<u64> {
|
||||
let snapshots = self.storage.list().await?;
|
||||
Ok(snapshots.iter().map(|s| s.size_bytes).sum())
|
||||
}
|
||||
|
||||
/// Get the total size of snapshots for a specific collection
|
||||
pub async fn collection_size(&self, collection_name: &str) -> Result<u64> {
|
||||
let snapshots = self.list_snapshots_for_collection(collection_name).await?;
|
||||
Ok(snapshots.iter().map(|s| s.size_bytes).sum())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::snapshot::{CollectionConfig, DistanceMetric, VectorRecord};
|
||||
use crate::storage::LocalStorage;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn create_test_snapshot_data(name: &str, vector_count: usize) -> SnapshotData {
|
||||
let config = CollectionConfig {
|
||||
dimension: 3,
|
||||
metric: DistanceMetric::Cosine,
|
||||
hnsw_config: None,
|
||||
};
|
||||
|
||||
let vectors = (0..vector_count)
|
||||
.map(|i| {
|
||||
VectorRecord::new(
|
||||
format!("v{}", i),
|
||||
vec![i as f32, (i + 1) as f32, (i + 2) as f32],
|
||||
None,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
SnapshotData::new(name.to_string(), config, vectors)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_and_restore_snapshot() {
|
||||
let temp_dir = std::env::temp_dir().join("ruvector-manager-test");
|
||||
let storage = Box::new(LocalStorage::new(temp_dir.clone()));
|
||||
let manager = SnapshotManager::new(storage);
|
||||
|
||||
let snapshot_data = create_test_snapshot_data("test-collection", 5);
|
||||
let id = snapshot_data.id().to_string();
|
||||
|
||||
// Create snapshot
|
||||
let snapshot = manager.create_snapshot(snapshot_data).await.unwrap();
|
||||
assert_eq!(snapshot.id, id);
|
||||
assert_eq!(snapshot.vectors_count, 5);
|
||||
|
||||
// Restore snapshot
|
||||
let restored = manager.restore_snapshot(&id).await.unwrap();
|
||||
assert_eq!(restored.id(), id);
|
||||
assert_eq!(restored.vectors_count(), 5);
|
||||
|
||||
// Cleanup
|
||||
let _ = manager.delete_snapshot(&id).await;
|
||||
let _ = std::fs::remove_dir_all(temp_dir);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_list_snapshots() {
|
||||
let temp_dir = std::env::temp_dir().join("ruvector-list-test");
|
||||
let storage = Box::new(LocalStorage::new(temp_dir.clone()));
|
||||
let manager = SnapshotManager::new(storage);
|
||||
|
||||
// Create multiple snapshots
|
||||
let snapshot1 = create_test_snapshot_data("collection-1", 3);
|
||||
let snapshot2 = create_test_snapshot_data("collection-2", 5);
|
||||
|
||||
let id1 = snapshot1.id().to_string();
|
||||
let id2 = snapshot2.id().to_string();
|
||||
|
||||
manager.create_snapshot(snapshot1).await.unwrap();
|
||||
manager.create_snapshot(snapshot2).await.unwrap();
|
||||
|
||||
// List all
|
||||
let all_snapshots = manager.list_snapshots().await.unwrap();
|
||||
assert!(all_snapshots.len() >= 2);
|
||||
|
||||
// List by collection
|
||||
let collection1_snapshots = manager
|
||||
.list_snapshots_for_collection("collection-1")
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(collection1_snapshots.len(), 1);
|
||||
|
||||
// Cleanup
|
||||
let _ = manager.delete_snapshot(&id1).await;
|
||||
let _ = manager.delete_snapshot(&id2).await;
|
||||
let _ = std::fs::remove_dir_all(temp_dir);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cleanup_old_snapshots() {
|
||||
let temp_dir = std::env::temp_dir().join("ruvector-cleanup-test");
|
||||
let storage = Box::new(LocalStorage::new(temp_dir.clone()));
|
||||
let manager = SnapshotManager::new(storage);
|
||||
|
||||
// Create multiple snapshots for the same collection
|
||||
for i in 0..5 {
|
||||
let snapshot_data = create_test_snapshot_data("test-collection", i + 1);
|
||||
manager.create_snapshot(snapshot_data).await.unwrap();
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
|
||||
}
|
||||
|
||||
// Cleanup, keeping only 2 most recent
|
||||
let deleted = manager
|
||||
.cleanup_old_snapshots("test-collection", 2)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(deleted, 3);
|
||||
|
||||
// Verify only 2 remain
|
||||
let remaining = manager
|
||||
.list_snapshots_for_collection("test-collection")
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(remaining.len(), 2);
|
||||
|
||||
// Cleanup
|
||||
let _ = std::fs::remove_dir_all(temp_dir);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_snapshot_validation() {
|
||||
let temp_dir = std::env::temp_dir().join("ruvector-validation-test");
|
||||
let storage = Box::new(LocalStorage::new(temp_dir.clone()));
|
||||
let manager = SnapshotManager::new(storage);
|
||||
|
||||
// Test empty collection
|
||||
let config = CollectionConfig {
|
||||
dimension: 3,
|
||||
metric: DistanceMetric::Cosine,
|
||||
hnsw_config: None,
|
||||
};
|
||||
let empty_data = SnapshotData::new("empty".to_string(), config, vec![]);
|
||||
let result = manager.create_snapshot(empty_data).await;
|
||||
assert!(result.is_err());
|
||||
|
||||
// Cleanup
|
||||
let _ = std::fs::remove_dir_all(temp_dir);
|
||||
}
|
||||
}
|
||||
195
vendor/ruvector/crates/ruvector-snapshot/src/snapshot.rs
vendored
Normal file
195
vendor/ruvector/crates/ruvector-snapshot/src/snapshot.rs
vendored
Normal file
@@ -0,0 +1,195 @@
|
||||
use bincode::{Decode, Encode};
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
|
||||
/// Snapshot metadata and information
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Snapshot {
|
||||
/// Unique snapshot identifier
|
||||
pub id: String,
|
||||
|
||||
/// Name of the collection this snapshot represents
|
||||
pub collection_name: String,
|
||||
|
||||
/// Timestamp when the snapshot was created
|
||||
pub created_at: DateTime<Utc>,
|
||||
|
||||
/// Number of vectors in the snapshot
|
||||
pub vectors_count: usize,
|
||||
|
||||
/// SHA-256 checksum of the snapshot data
|
||||
pub checksum: String,
|
||||
|
||||
/// Size of the snapshot in bytes (compressed)
|
||||
pub size_bytes: u64,
|
||||
}
|
||||
|
||||
/// Complete snapshot data including metadata and vectors
|
||||
#[derive(Debug, Serialize, Deserialize, Encode, Decode)]
|
||||
pub struct SnapshotData {
|
||||
/// Snapshot metadata
|
||||
pub metadata: SnapshotMetadata,
|
||||
|
||||
/// Collection configuration
|
||||
pub config: CollectionConfig,
|
||||
|
||||
/// All vectors in the collection
|
||||
pub vectors: Vec<VectorRecord>,
|
||||
}
|
||||
|
||||
impl SnapshotData {
|
||||
/// Create a new snapshot data instance
|
||||
pub fn new(
|
||||
collection_name: String,
|
||||
config: CollectionConfig,
|
||||
vectors: Vec<VectorRecord>,
|
||||
) -> Self {
|
||||
Self {
|
||||
metadata: SnapshotMetadata {
|
||||
id: uuid::Uuid::new_v4().to_string(),
|
||||
collection_name,
|
||||
created_at: Utc::now().to_rfc3339(),
|
||||
version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
},
|
||||
config,
|
||||
vectors,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the number of vectors in this snapshot
|
||||
pub fn vectors_count(&self) -> usize {
|
||||
self.vectors.len()
|
||||
}
|
||||
|
||||
/// Get the snapshot ID
|
||||
pub fn id(&self) -> &str {
|
||||
&self.metadata.id
|
||||
}
|
||||
|
||||
/// Get the collection name
|
||||
pub fn collection_name(&self) -> &str {
|
||||
&self.metadata.collection_name
|
||||
}
|
||||
}
|
||||
|
||||
/// Snapshot metadata
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
|
||||
pub struct SnapshotMetadata {
|
||||
/// Unique snapshot identifier
|
||||
pub id: String,
|
||||
|
||||
/// Name of the collection
|
||||
pub collection_name: String,
|
||||
|
||||
/// Creation timestamp (RFC3339 format)
|
||||
pub created_at: String,
|
||||
|
||||
/// Version of the snapshot format
|
||||
pub version: String,
|
||||
}
|
||||
|
||||
/// Collection configuration stored in snapshot
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
|
||||
pub struct CollectionConfig {
|
||||
/// Vector dimension
|
||||
pub dimension: usize,
|
||||
|
||||
/// Distance metric
|
||||
pub metric: DistanceMetric,
|
||||
|
||||
/// HNSW configuration
|
||||
pub hnsw_config: Option<HnswConfig>,
|
||||
}
|
||||
|
||||
/// Distance metric for vector similarity
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
|
||||
pub enum DistanceMetric {
|
||||
Cosine,
|
||||
Euclidean,
|
||||
DotProduct,
|
||||
}
|
||||
|
||||
/// HNSW index configuration
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
|
||||
pub struct HnswConfig {
|
||||
pub m: usize,
|
||||
pub ef_construction: usize,
|
||||
pub ef_search: usize,
|
||||
}
|
||||
|
||||
/// Individual vector record in a snapshot
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
|
||||
pub struct VectorRecord {
|
||||
/// Unique vector identifier
|
||||
pub id: String,
|
||||
|
||||
/// Vector data
|
||||
pub vector: Vec<f32>,
|
||||
|
||||
/// Optional metadata payload (stored as JSON string for bincode compatibility)
|
||||
#[serde(skip)]
|
||||
#[bincode(with_serde)]
|
||||
payload_json: Option<String>,
|
||||
}
|
||||
|
||||
impl VectorRecord {
|
||||
/// Create a new vector record
|
||||
pub fn new(id: String, vector: Vec<f32>, payload: Option<Value>) -> Self {
|
||||
let payload_json = payload.and_then(|v| serde_json::to_string(&v).ok());
|
||||
Self {
|
||||
id,
|
||||
vector,
|
||||
payload_json,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the payload as a serde_json::Value
|
||||
pub fn payload(&self) -> Option<Value> {
|
||||
self.payload_json
|
||||
.as_ref()
|
||||
.and_then(|s| serde_json::from_str(s).ok())
|
||||
}
|
||||
|
||||
/// Set the payload from a serde_json::Value
|
||||
pub fn set_payload(&mut self, payload: Option<Value>) {
|
||||
self.payload_json = payload.and_then(|v| serde_json::to_string(&v).ok());
|
||||
}
|
||||
|
||||
/// Get the dimension of this vector
|
||||
pub fn dimension(&self) -> usize {
|
||||
self.vector.len()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_vector_record_creation() {
|
||||
let record = VectorRecord::new("test-1".to_string(), vec![1.0, 2.0, 3.0], None);
|
||||
assert_eq!(record.id, "test-1");
|
||||
assert_eq!(record.dimension(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_snapshot_data_creation() {
|
||||
let config = CollectionConfig {
|
||||
dimension: 3,
|
||||
metric: DistanceMetric::Cosine,
|
||||
hnsw_config: None,
|
||||
};
|
||||
|
||||
let vectors = vec![
|
||||
VectorRecord::new("v1".to_string(), vec![1.0, 0.0, 0.0], None),
|
||||
VectorRecord::new("v2".to_string(), vec![0.0, 1.0, 0.0], None),
|
||||
];
|
||||
|
||||
let data = SnapshotData::new("test-collection".to_string(), config, vectors);
|
||||
|
||||
assert_eq!(data.vectors_count(), 2);
|
||||
assert_eq!(data.collection_name(), "test-collection");
|
||||
assert!(!data.id().is_empty());
|
||||
}
|
||||
}
|
||||
276
vendor/ruvector/crates/ruvector-snapshot/src/storage.rs
vendored
Normal file
276
vendor/ruvector/crates/ruvector-snapshot/src/storage.rs
vendored
Normal file
@@ -0,0 +1,276 @@
|
||||
use async_trait::async_trait;
|
||||
use flate2::read::GzDecoder;
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::io::{Read, Write};
|
||||
use std::path::PathBuf;
|
||||
use tokio::fs;
|
||||
|
||||
use crate::error::{Result, SnapshotError};
|
||||
use crate::snapshot::{Snapshot, SnapshotData};
|
||||
|
||||
/// Trait for snapshot storage backends
|
||||
#[async_trait]
|
||||
pub trait SnapshotStorage: Send + Sync {
|
||||
/// Save a snapshot to storage
|
||||
async fn save(&self, snapshot: &SnapshotData) -> Result<Snapshot>;
|
||||
|
||||
/// Load a snapshot from storage
|
||||
async fn load(&self, id: &str) -> Result<SnapshotData>;
|
||||
|
||||
/// List all available snapshots
|
||||
async fn list(&self) -> Result<Vec<Snapshot>>;
|
||||
|
||||
/// Delete a snapshot from storage
|
||||
async fn delete(&self, id: &str) -> Result<()>;
|
||||
}
|
||||
|
||||
/// Local filesystem storage backend
|
||||
pub struct LocalStorage {
|
||||
base_path: PathBuf,
|
||||
}
|
||||
|
||||
impl LocalStorage {
|
||||
/// Create a new local storage instance
|
||||
pub fn new(base_path: PathBuf) -> Self {
|
||||
Self { base_path }
|
||||
}
|
||||
|
||||
/// Get the path for a snapshot file
|
||||
fn snapshot_path(&self, id: &str) -> PathBuf {
|
||||
self.base_path.join(format!("{}.snapshot.gz", id))
|
||||
}
|
||||
|
||||
/// Get the path for a snapshot metadata file
|
||||
fn metadata_path(&self, id: &str) -> PathBuf {
|
||||
self.base_path.join(format!("{}.metadata.json", id))
|
||||
}
|
||||
|
||||
/// Compress data using gzip
|
||||
fn compress(data: &[u8]) -> Result<Vec<u8>> {
|
||||
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
|
||||
encoder
|
||||
.write_all(data)
|
||||
.map_err(|e| SnapshotError::compression(format!("Compression failed: {}", e)))?;
|
||||
encoder
|
||||
.finish()
|
||||
.map_err(|e| SnapshotError::compression(format!("Finish compression failed: {}", e)))
|
||||
}
|
||||
|
||||
/// Decompress gzip data
|
||||
fn decompress(data: &[u8]) -> Result<Vec<u8>> {
|
||||
let mut decoder = GzDecoder::new(data);
|
||||
let mut decompressed = Vec::new();
|
||||
decoder
|
||||
.read_to_end(&mut decompressed)
|
||||
.map_err(|e| SnapshotError::compression(format!("Decompression failed: {}", e)))?;
|
||||
Ok(decompressed)
|
||||
}
|
||||
|
||||
/// Calculate SHA-256 checksum
|
||||
fn calculate_checksum(data: &[u8]) -> String {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(data);
|
||||
format!("{:x}", hasher.finalize())
|
||||
}
|
||||
|
||||
/// Ensure the base directory exists
|
||||
async fn ensure_dir(&self) -> Result<()> {
|
||||
if !self.base_path.exists() {
|
||||
fs::create_dir_all(&self.base_path).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl SnapshotStorage for LocalStorage {
|
||||
async fn save(&self, snapshot_data: &SnapshotData) -> Result<Snapshot> {
|
||||
self.ensure_dir().await?;
|
||||
|
||||
let id = snapshot_data.id().to_string();
|
||||
let snapshot_path = self.snapshot_path(&id);
|
||||
let metadata_path = self.metadata_path(&id);
|
||||
|
||||
// Serialize snapshot data
|
||||
let config = bincode::config::standard();
|
||||
let serialized = bincode::encode_to_vec(snapshot_data, config)
|
||||
.map_err(|e| SnapshotError::SerializationError(e.to_string()))?;
|
||||
|
||||
// Calculate checksum before compression
|
||||
let checksum = Self::calculate_checksum(&serialized);
|
||||
|
||||
// Compress data
|
||||
let compressed = Self::compress(&serialized)?;
|
||||
let size_bytes = compressed.len() as u64;
|
||||
|
||||
// Write compressed data
|
||||
fs::write(&snapshot_path, &compressed).await?;
|
||||
|
||||
// Create snapshot metadata
|
||||
let created_at = chrono::DateTime::parse_from_rfc3339(&snapshot_data.metadata.created_at)
|
||||
.map_err(|e| SnapshotError::storage(format!("Invalid timestamp: {}", e)))?
|
||||
.with_timezone(&chrono::Utc);
|
||||
|
||||
let snapshot = Snapshot {
|
||||
id: id.clone(),
|
||||
collection_name: snapshot_data.collection_name().to_string(),
|
||||
created_at,
|
||||
vectors_count: snapshot_data.vectors_count(),
|
||||
checksum,
|
||||
size_bytes,
|
||||
};
|
||||
|
||||
// Write metadata
|
||||
let metadata_json = serde_json::to_string_pretty(&snapshot)?;
|
||||
fs::write(&metadata_path, metadata_json).await?;
|
||||
|
||||
Ok(snapshot)
|
||||
}
|
||||
|
||||
async fn load(&self, id: &str) -> Result<SnapshotData> {
|
||||
let snapshot_path = self.snapshot_path(id);
|
||||
let metadata_path = self.metadata_path(id);
|
||||
|
||||
// Check if files exist
|
||||
if !snapshot_path.exists() {
|
||||
return Err(SnapshotError::SnapshotNotFound(id.to_string()));
|
||||
}
|
||||
|
||||
// Load and verify metadata
|
||||
let metadata_json = fs::read_to_string(&metadata_path).await?;
|
||||
let snapshot: Snapshot = serde_json::from_str(&metadata_json)?;
|
||||
|
||||
// Load compressed data
|
||||
let compressed = fs::read(&snapshot_path).await?;
|
||||
|
||||
// Decompress
|
||||
let decompressed = Self::decompress(&compressed)?;
|
||||
|
||||
// Verify checksum
|
||||
let actual_checksum = Self::calculate_checksum(&decompressed);
|
||||
if actual_checksum != snapshot.checksum {
|
||||
return Err(SnapshotError::InvalidChecksum {
|
||||
expected: snapshot.checksum,
|
||||
actual: actual_checksum,
|
||||
});
|
||||
}
|
||||
|
||||
// Deserialize
|
||||
let config = bincode::config::standard();
|
||||
let (snapshot_data, _): (SnapshotData, usize) =
|
||||
bincode::decode_from_slice(&decompressed, config)
|
||||
.map_err(|e| SnapshotError::SerializationError(e.to_string()))?;
|
||||
|
||||
Ok(snapshot_data)
|
||||
}
|
||||
|
||||
async fn list(&self) -> Result<Vec<Snapshot>> {
|
||||
self.ensure_dir().await?;
|
||||
|
||||
let mut snapshots = Vec::new();
|
||||
let mut entries = fs::read_dir(&self.base_path).await?;
|
||||
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if let Some(extension) = path.extension() {
|
||||
if extension == "json" {
|
||||
if let Some(file_name) = path.file_stem() {
|
||||
let file_name_str = file_name.to_string_lossy();
|
||||
if file_name_str.ends_with(".metadata") {
|
||||
let contents = fs::read_to_string(&path).await?;
|
||||
if let Ok(snapshot) = serde_json::from_str::<Snapshot>(&contents) {
|
||||
snapshots.push(snapshot);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by creation date (newest first)
|
||||
snapshots.sort_by(|a, b| b.created_at.cmp(&a.created_at));
|
||||
|
||||
Ok(snapshots)
|
||||
}
|
||||
|
||||
async fn delete(&self, id: &str) -> Result<()> {
|
||||
let snapshot_path = self.snapshot_path(id);
|
||||
let metadata_path = self.metadata_path(id);
|
||||
|
||||
if !snapshot_path.exists() {
|
||||
return Err(SnapshotError::SnapshotNotFound(id.to_string()));
|
||||
}
|
||||
|
||||
// Delete both files
|
||||
fs::remove_file(&snapshot_path).await?;
|
||||
|
||||
if metadata_path.exists() {
|
||||
fs::remove_file(&metadata_path).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::snapshot::{CollectionConfig, DistanceMetric, VectorRecord};
|
||||
|
||||
#[test]
|
||||
fn test_compression_roundtrip() {
|
||||
let data = b"Hello, World! This is test data for compression.";
|
||||
let compressed = LocalStorage::compress(data).unwrap();
|
||||
let decompressed = LocalStorage::decompress(&compressed).unwrap();
|
||||
assert_eq!(data.to_vec(), decompressed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_checksum_calculation() {
|
||||
let data = b"test data";
|
||||
let checksum = LocalStorage::calculate_checksum(data);
|
||||
assert_eq!(checksum.len(), 64); // SHA-256 produces 64 hex characters
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_local_storage_roundtrip() {
|
||||
let temp_dir = std::env::temp_dir().join("ruvector-snapshot-test");
|
||||
let storage = LocalStorage::new(temp_dir.clone());
|
||||
|
||||
let config = CollectionConfig {
|
||||
dimension: 3,
|
||||
metric: DistanceMetric::Cosine,
|
||||
hnsw_config: None,
|
||||
};
|
||||
|
||||
let vectors = vec![
|
||||
VectorRecord::new("v1".to_string(), vec![1.0, 0.0, 0.0], None),
|
||||
VectorRecord::new("v2".to_string(), vec![0.0, 1.0, 0.0], None),
|
||||
];
|
||||
|
||||
let snapshot_data = SnapshotData::new("test-collection".to_string(), config, vectors);
|
||||
let id = snapshot_data.id().to_string();
|
||||
|
||||
// Save
|
||||
let snapshot = storage.save(&snapshot_data).await.unwrap();
|
||||
assert_eq!(snapshot.id, id);
|
||||
assert_eq!(snapshot.vectors_count, 2);
|
||||
|
||||
// List
|
||||
let snapshots = storage.list().await.unwrap();
|
||||
assert!(!snapshots.is_empty());
|
||||
|
||||
// Load
|
||||
let loaded = storage.load(&id).await.unwrap();
|
||||
assert_eq!(loaded.id(), id);
|
||||
assert_eq!(loaded.vectors_count(), 2);
|
||||
|
||||
// Delete
|
||||
storage.delete(&id).await.unwrap();
|
||||
|
||||
// Cleanup
|
||||
let _ = std::fs::remove_dir_all(temp_dir);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user