Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
22
vendor/ruvector/crates/ruvector-collections/Cargo.toml
vendored
Normal file
22
vendor/ruvector/crates/ruvector-collections/Cargo.toml
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
[package]
|
||||
name = "ruvector-collections"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
authors.workspace = true
|
||||
repository.workspace = true
|
||||
readme = "README.md"
|
||||
description = "High-performance collection management for Ruvector vector databases"
|
||||
|
||||
[dependencies]
|
||||
ruvector-core = { version = "2.0.2", path = "../ruvector-core" }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
dashmap = { workspace = true }
|
||||
parking_lot = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
bincode = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
196
vendor/ruvector/crates/ruvector-collections/README.md
vendored
Normal file
196
vendor/ruvector/crates/ruvector-collections/README.md
vendored
Normal file
@@ -0,0 +1,196 @@
|
||||
# Ruvector Collections
|
||||
|
||||
[](https://crates.io/crates/ruvector-collections)
|
||||
[](https://docs.rs/ruvector-collections)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://www.rust-lang.org)
|
||||
|
||||
**High-performance collection management for Ruvector vector databases.**
|
||||
|
||||
`ruvector-collections` provides multi-tenant collection support with isolated namespaces, schema management, and collection-level configuration. Part of the [Ruvector](https://github.com/ruvnet/ruvector) ecosystem.
|
||||
|
||||
## Why Ruvector Collections?
|
||||
|
||||
- **Multi-Tenant**: Isolated collections with separate namespaces
|
||||
- **Schema Support**: Define and enforce vector schemas
|
||||
- **Collection Configs**: Per-collection settings for dimensions, metrics
|
||||
- **Thread-Safe**: Concurrent access with DashMap
|
||||
- **Metadata Support**: Rich collection metadata and tagging
|
||||
|
||||
## Features
|
||||
|
||||
### Core Capabilities
|
||||
|
||||
- **Collection CRUD**: Create, read, update, delete collections
|
||||
- **Namespace Isolation**: Logical separation between collections
|
||||
- **Schema Validation**: Enforce vector dimensions and types
|
||||
- **Metadata Management**: Tags, descriptions, custom properties
|
||||
- **Alias Support**: Human-readable names for collections
|
||||
|
||||
### Advanced Features
|
||||
|
||||
- **Collection Groups**: Organize collections hierarchically
|
||||
- **Access Control**: Collection-level permissions (planned)
|
||||
- **Versioning**: Collection schema versioning
|
||||
- **Migration**: Tools for collection migration
|
||||
- **Statistics**: Per-collection metrics and stats
|
||||
|
||||
## Installation
|
||||
|
||||
Add `ruvector-collections` to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
ruvector-collections = "0.1.1"
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Create a Collection
|
||||
|
||||
```rust
|
||||
use ruvector_collections::{CollectionManager, CollectionConfig, Schema};
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Create collection manager
|
||||
let manager = CollectionManager::new()?;
|
||||
|
||||
// Define collection schema
|
||||
let schema = Schema {
|
||||
dimensions: 384,
|
||||
distance_metric: DistanceMetric::Cosine,
|
||||
vector_type: VectorType::Float32,
|
||||
};
|
||||
|
||||
// Create collection with config
|
||||
let config = CollectionConfig {
|
||||
name: "documents".to_string(),
|
||||
schema,
|
||||
description: Some("Document embeddings".to_string()),
|
||||
metadata: serde_json::json!({
|
||||
"model": "text-embedding-3-small",
|
||||
"created_by": "data-pipeline"
|
||||
}),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let collection = manager.create_collection(config)?;
|
||||
println!("Created collection: {}", collection.id);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
|
||||
### Manage Collections
|
||||
|
||||
```rust
|
||||
use ruvector_collections::CollectionManager;
|
||||
|
||||
let manager = CollectionManager::new()?;
|
||||
|
||||
// List all collections
|
||||
for collection in manager.list_collections()? {
|
||||
println!("{}: {} vectors", collection.name, collection.count);
|
||||
}
|
||||
|
||||
// Get collection by name
|
||||
let docs = manager.get_collection("documents")?;
|
||||
|
||||
// Update collection metadata
|
||||
manager.update_collection("documents", |c| {
|
||||
c.metadata["last_updated"] = serde_json::json!(chrono::Utc::now());
|
||||
})?;
|
||||
|
||||
// Delete collection
|
||||
manager.delete_collection("old_collection")?;
|
||||
```
|
||||
|
||||
### Collection Aliases
|
||||
|
||||
```rust
|
||||
// Create alias for collection
|
||||
manager.create_alias("docs", "documents_v2")?;
|
||||
|
||||
// Swap alias to new collection (zero-downtime migration)
|
||||
manager.swap_alias("docs", "documents_v3")?;
|
||||
|
||||
// Access via alias
|
||||
let collection = manager.get_collection_by_alias("docs")?;
|
||||
```
|
||||
|
||||
## API Overview
|
||||
|
||||
### Core Types
|
||||
|
||||
```rust
|
||||
// Collection configuration
|
||||
pub struct CollectionConfig {
|
||||
pub name: String,
|
||||
pub schema: Schema,
|
||||
pub description: Option<String>,
|
||||
pub metadata: serde_json::Value,
|
||||
pub replicas: usize,
|
||||
pub shards: usize,
|
||||
}
|
||||
|
||||
// Vector schema
|
||||
pub struct Schema {
|
||||
pub dimensions: usize,
|
||||
pub distance_metric: DistanceMetric,
|
||||
pub vector_type: VectorType,
|
||||
}
|
||||
|
||||
// Collection info
|
||||
pub struct Collection {
|
||||
pub id: Uuid,
|
||||
pub name: String,
|
||||
pub schema: Schema,
|
||||
pub count: usize,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
pub metadata: serde_json::Value,
|
||||
}
|
||||
```
|
||||
|
||||
### Manager Operations
|
||||
|
||||
```rust
|
||||
impl CollectionManager {
|
||||
pub fn new() -> Result<Self>;
|
||||
pub fn create_collection(&self, config: CollectionConfig) -> Result<Collection>;
|
||||
pub fn get_collection(&self, name: &str) -> Result<Option<Collection>>;
|
||||
pub fn list_collections(&self) -> Result<Vec<Collection>>;
|
||||
pub fn update_collection<F>(&self, name: &str, f: F) -> Result<Collection>;
|
||||
pub fn delete_collection(&self, name: &str) -> Result<bool>;
|
||||
pub fn create_alias(&self, alias: &str, collection: &str) -> Result<()>;
|
||||
pub fn delete_alias(&self, alias: &str) -> Result<bool>;
|
||||
}
|
||||
```
|
||||
|
||||
## Related Crates
|
||||
|
||||
- **[ruvector-core](../ruvector-core/)** - Core vector database engine
|
||||
- **[ruvector-server](../ruvector-server/)** - REST API server
|
||||
- **[ruvector-filter](../ruvector-filter/)** - Metadata filtering
|
||||
|
||||
## Documentation
|
||||
|
||||
- **[Main README](../../README.md)** - Complete project overview
|
||||
- **[API Documentation](https://docs.rs/ruvector-collections)** - Full API reference
|
||||
- **[GitHub Repository](https://github.com/ruvnet/ruvector)** - Source code
|
||||
|
||||
## License
|
||||
|
||||
**MIT License** - see [LICENSE](../../LICENSE) for details.
|
||||
|
||||
---
|
||||
|
||||
<div align="center">
|
||||
|
||||
**Part of [Ruvector](https://github.com/ruvnet/ruvector) - Built by [rUv](https://ruv.io)**
|
||||
|
||||
[](https://github.com/ruvnet/ruvector)
|
||||
|
||||
[Documentation](https://docs.rs/ruvector-collections) | [Crates.io](https://crates.io/crates/ruvector-collections) | [GitHub](https://github.com/ruvnet/ruvector)
|
||||
|
||||
</div>
|
||||
253
vendor/ruvector/crates/ruvector-collections/src/collection.rs
vendored
Normal file
253
vendor/ruvector/crates/ruvector-collections/src/collection.rs
vendored
Normal file
@@ -0,0 +1,253 @@
|
||||
//! Collection types and operations
|
||||
|
||||
use ruvector_core::types::{DistanceMetric, HnswConfig, QuantizationConfig};
|
||||
use ruvector_core::vector_db::VectorDB;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::{CollectionError, Result};
|
||||
|
||||
/// Configuration for creating a collection
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CollectionConfig {
|
||||
/// Vector dimensions
|
||||
pub dimensions: usize,
|
||||
|
||||
/// Distance metric for similarity calculation
|
||||
pub distance_metric: DistanceMetric,
|
||||
|
||||
/// HNSW index configuration
|
||||
pub hnsw_config: Option<HnswConfig>,
|
||||
|
||||
/// Quantization configuration
|
||||
pub quantization: Option<QuantizationConfig>,
|
||||
|
||||
/// Whether to store payload data on disk
|
||||
pub on_disk_payload: bool,
|
||||
}
|
||||
|
||||
impl CollectionConfig {
|
||||
/// Validate the configuration
|
||||
pub fn validate(&self) -> Result<()> {
|
||||
if self.dimensions == 0 {
|
||||
return Err(CollectionError::InvalidConfiguration {
|
||||
message: "Dimensions must be greater than 0".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if self.dimensions > 100_000 {
|
||||
return Err(CollectionError::InvalidConfiguration {
|
||||
message: "Dimensions exceeds maximum of 100,000".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Validate HNSW config if present
|
||||
if let Some(ref hnsw_config) = self.hnsw_config {
|
||||
if hnsw_config.m == 0 {
|
||||
return Err(CollectionError::InvalidConfiguration {
|
||||
message: "HNSW M parameter must be greater than 0".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if hnsw_config.ef_construction < hnsw_config.m {
|
||||
return Err(CollectionError::InvalidConfiguration {
|
||||
message: "HNSW ef_construction must be >= M".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if hnsw_config.ef_search == 0 {
|
||||
return Err(CollectionError::InvalidConfiguration {
|
||||
message: "HNSW ef_search must be greater than 0".to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Create a default configuration for the given dimensions
|
||||
pub fn with_dimensions(dimensions: usize) -> Self {
|
||||
Self {
|
||||
dimensions,
|
||||
distance_metric: DistanceMetric::Cosine,
|
||||
hnsw_config: Some(HnswConfig::default()),
|
||||
quantization: Some(QuantizationConfig::Scalar),
|
||||
on_disk_payload: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A collection of vectors with its own configuration
|
||||
pub struct Collection {
|
||||
/// Collection name
|
||||
pub name: String,
|
||||
|
||||
/// Collection configuration
|
||||
pub config: CollectionConfig,
|
||||
|
||||
/// Underlying vector database
|
||||
pub db: VectorDB,
|
||||
|
||||
/// When the collection was created (Unix timestamp in seconds)
|
||||
pub created_at: i64,
|
||||
|
||||
/// When the collection was last updated (Unix timestamp in seconds)
|
||||
pub updated_at: i64,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Collection {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("Collection")
|
||||
.field("name", &self.name)
|
||||
.field("config", &self.config)
|
||||
.field("created_at", &self.created_at)
|
||||
.field("updated_at", &self.updated_at)
|
||||
.field("db", &"<VectorDB>")
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl Collection {
|
||||
/// Create a new collection
|
||||
pub fn new(name: String, config: CollectionConfig, storage_path: String) -> Result<Self> {
|
||||
// Validate configuration
|
||||
config.validate()?;
|
||||
|
||||
// Create VectorDB with the configuration
|
||||
let db_options = ruvector_core::types::DbOptions {
|
||||
dimensions: config.dimensions,
|
||||
distance_metric: config.distance_metric,
|
||||
storage_path,
|
||||
hnsw_config: config.hnsw_config.clone(),
|
||||
quantization: config.quantization.clone(),
|
||||
};
|
||||
|
||||
let db = VectorDB::new(db_options)?;
|
||||
|
||||
let now = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs() as i64;
|
||||
|
||||
Ok(Self {
|
||||
name,
|
||||
config,
|
||||
db,
|
||||
created_at: now,
|
||||
updated_at: now,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get collection statistics
|
||||
pub fn stats(&self) -> Result<CollectionStats> {
|
||||
let vectors_count = self.db.len()?;
|
||||
|
||||
Ok(CollectionStats {
|
||||
vectors_count,
|
||||
segments_count: 1, // Single segment for now
|
||||
disk_size_bytes: 0, // TODO: Implement disk size calculation
|
||||
ram_size_bytes: 0, // TODO: Implement RAM size calculation
|
||||
})
|
||||
}
|
||||
|
||||
/// Update the last modified timestamp
|
||||
pub fn touch(&mut self) {
|
||||
self.updated_at = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs() as i64;
|
||||
}
|
||||
}
|
||||
|
||||
/// Statistics about a collection
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CollectionStats {
|
||||
/// Number of vectors in the collection
|
||||
pub vectors_count: usize,
|
||||
|
||||
/// Number of segments (partitions) in the collection
|
||||
pub segments_count: usize,
|
||||
|
||||
/// Total disk space used (bytes)
|
||||
pub disk_size_bytes: u64,
|
||||
|
||||
/// Total RAM used (bytes)
|
||||
pub ram_size_bytes: u64,
|
||||
}
|
||||
|
||||
impl CollectionStats {
|
||||
/// Check if the collection is empty
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.vectors_count == 0
|
||||
}
|
||||
|
||||
/// Get human-readable disk size
|
||||
pub fn disk_size_human(&self) -> String {
|
||||
format_bytes(self.disk_size_bytes)
|
||||
}
|
||||
|
||||
/// Get human-readable RAM size
|
||||
pub fn ram_size_human(&self) -> String {
|
||||
format_bytes(self.ram_size_bytes)
|
||||
}
|
||||
}
|
||||
|
||||
/// Format bytes into human-readable size
|
||||
fn format_bytes(bytes: u64) -> String {
|
||||
const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
|
||||
|
||||
if bytes == 0 {
|
||||
return "0 B".to_string();
|
||||
}
|
||||
|
||||
let mut size = bytes as f64;
|
||||
let mut unit_idx = 0;
|
||||
|
||||
while size >= 1024.0 && unit_idx < UNITS.len() - 1 {
|
||||
size /= 1024.0;
|
||||
unit_idx += 1;
|
||||
}
|
||||
|
||||
format!("{:.2} {}", size, UNITS[unit_idx])
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_collection_config_validation() {
|
||||
// Valid config
|
||||
let config = CollectionConfig::with_dimensions(384);
|
||||
assert!(config.validate().is_ok());
|
||||
|
||||
// Invalid: zero dimensions
|
||||
let config = CollectionConfig {
|
||||
dimensions: 0,
|
||||
distance_metric: DistanceMetric::Cosine,
|
||||
hnsw_config: None,
|
||||
quantization: None,
|
||||
on_disk_payload: true,
|
||||
};
|
||||
assert!(config.validate().is_err());
|
||||
|
||||
// Invalid: dimensions too large
|
||||
let config = CollectionConfig {
|
||||
dimensions: 200_000,
|
||||
distance_metric: DistanceMetric::Cosine,
|
||||
hnsw_config: None,
|
||||
quantization: None,
|
||||
on_disk_payload: true,
|
||||
};
|
||||
assert!(config.validate().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_bytes() {
|
||||
assert_eq!(format_bytes(0), "0 B");
|
||||
assert_eq!(format_bytes(512), "512.00 B");
|
||||
assert_eq!(format_bytes(1024), "1.00 KB");
|
||||
assert_eq!(format_bytes(1536), "1.50 KB");
|
||||
assert_eq!(format_bytes(1048576), "1.00 MB");
|
||||
assert_eq!(format_bytes(1073741824), "1.00 GB");
|
||||
}
|
||||
}
|
||||
102
vendor/ruvector/crates/ruvector-collections/src/error.rs
vendored
Normal file
102
vendor/ruvector/crates/ruvector-collections/src/error.rs
vendored
Normal file
@@ -0,0 +1,102 @@
|
||||
//! Error types for collection management
|
||||
|
||||
use thiserror::Error;
|
||||
|
||||
/// Result type for collection operations
|
||||
pub type Result<T> = std::result::Result<T, CollectionError>;
|
||||
|
||||
/// Errors that can occur during collection management
|
||||
#[derive(Debug, Error)]
|
||||
pub enum CollectionError {
|
||||
/// Collection was not found
|
||||
#[error("Collection not found: {name}")]
|
||||
CollectionNotFound {
|
||||
/// Name of the missing collection
|
||||
name: String,
|
||||
},
|
||||
|
||||
/// Collection already exists
|
||||
#[error("Collection already exists: {name}")]
|
||||
CollectionAlreadyExists {
|
||||
/// Name of the existing collection
|
||||
name: String,
|
||||
},
|
||||
|
||||
/// Alias was not found
|
||||
#[error("Alias not found: {alias}")]
|
||||
AliasNotFound {
|
||||
/// Name of the missing alias
|
||||
alias: String,
|
||||
},
|
||||
|
||||
/// Alias already exists
|
||||
#[error("Alias already exists: {alias}")]
|
||||
AliasAlreadyExists {
|
||||
/// Name of the existing alias
|
||||
alias: String,
|
||||
},
|
||||
|
||||
/// Invalid collection configuration
|
||||
#[error("Invalid configuration: {message}")]
|
||||
InvalidConfiguration {
|
||||
/// Error message
|
||||
message: String,
|
||||
},
|
||||
|
||||
/// Alias points to non-existent collection
|
||||
#[error("Alias '{alias}' points to non-existent collection '{collection}'")]
|
||||
InvalidAlias {
|
||||
/// Alias name
|
||||
alias: String,
|
||||
/// Target collection name
|
||||
collection: String,
|
||||
},
|
||||
|
||||
/// Cannot delete collection with active aliases
|
||||
#[error("Cannot delete collection '{collection}' because it has active aliases: {aliases:?}")]
|
||||
CollectionHasAliases {
|
||||
/// Collection name
|
||||
collection: String,
|
||||
/// List of aliases
|
||||
aliases: Vec<String>,
|
||||
},
|
||||
|
||||
/// Invalid collection name
|
||||
#[error("Invalid collection name: {name} - {reason}")]
|
||||
InvalidName {
|
||||
/// Collection name
|
||||
name: String,
|
||||
/// Reason for invalidity
|
||||
reason: String,
|
||||
},
|
||||
|
||||
/// Core database error
|
||||
#[error("Database error: {0}")]
|
||||
DatabaseError(#[from] ruvector_core::error::RuvectorError),
|
||||
|
||||
/// IO error
|
||||
#[error("IO error: {0}")]
|
||||
IoError(#[from] std::io::Error),
|
||||
|
||||
/// Serialization error
|
||||
#[error("Serialization error: {0}")]
|
||||
SerializationError(String),
|
||||
}
|
||||
|
||||
impl From<serde_json::Error> for CollectionError {
|
||||
fn from(err: serde_json::Error) -> Self {
|
||||
CollectionError::SerializationError(err.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<bincode::error::EncodeError> for CollectionError {
|
||||
fn from(err: bincode::error::EncodeError) -> Self {
|
||||
CollectionError::SerializationError(err.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<bincode::error::DecodeError> for CollectionError {
|
||||
fn from(err: bincode::error::DecodeError) -> Self {
|
||||
CollectionError::SerializationError(err.to_string())
|
||||
}
|
||||
}
|
||||
53
vendor/ruvector/crates/ruvector-collections/src/lib.rs
vendored
Normal file
53
vendor/ruvector/crates/ruvector-collections/src/lib.rs
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
//! # Ruvector Collections
|
||||
//!
|
||||
//! Multi-collection management with aliases for organizing vector databases.
|
||||
//!
|
||||
//! ## Features
|
||||
//!
|
||||
//! - **Multiple Collections**: Organize vectors into separate collections
|
||||
//! - **Alias Management**: Create aliases for collection names
|
||||
//! - **Collection Statistics**: Track collection metrics
|
||||
//! - **Thread-safe**: Concurrent access using DashMap
|
||||
//! - **Persistence**: Store collections on disk
|
||||
//!
|
||||
//! ## Example
|
||||
//!
|
||||
//! ```no_run
|
||||
//! use ruvector_collections::{CollectionManager, CollectionConfig};
|
||||
//! use ruvector_core::types::{DistanceMetric, HnswConfig};
|
||||
//! use std::path::PathBuf;
|
||||
//!
|
||||
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
//! // Create a collection manager
|
||||
//! let manager = CollectionManager::new(PathBuf::from("./collections"))?;
|
||||
//!
|
||||
//! // Create a collection
|
||||
//! let config = CollectionConfig {
|
||||
//! dimensions: 384,
|
||||
//! distance_metric: DistanceMetric::Cosine,
|
||||
//! hnsw_config: Some(HnswConfig::default()),
|
||||
//! quantization: None,
|
||||
//! on_disk_payload: true,
|
||||
//! };
|
||||
//!
|
||||
//! manager.create_collection("documents", config)?;
|
||||
//!
|
||||
//! // Create an alias
|
||||
//! manager.create_alias("current_docs", "documents")?;
|
||||
//!
|
||||
//! // Get collection by name or alias
|
||||
//! let collection = manager.get_collection("current_docs").unwrap();
|
||||
//! # Ok(())
|
||||
//! # }
|
||||
//! ```
|
||||
|
||||
#![warn(missing_docs)]
|
||||
#![warn(clippy::all)]
|
||||
|
||||
pub mod collection;
|
||||
pub mod error;
|
||||
pub mod manager;
|
||||
|
||||
pub use collection::{Collection, CollectionConfig, CollectionStats};
|
||||
pub use error::{CollectionError, Result};
|
||||
pub use manager::CollectionManager;
|
||||
522
vendor/ruvector/crates/ruvector-collections/src/manager.rs
vendored
Normal file
522
vendor/ruvector/crates/ruvector-collections/src/manager.rs
vendored
Normal file
@@ -0,0 +1,522 @@
|
||||
//! Collection manager for multi-collection operations
|
||||
|
||||
use dashmap::DashMap;
|
||||
use parking_lot::RwLock;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::collection::{Collection, CollectionConfig, CollectionStats};
|
||||
use crate::error::{CollectionError, Result};
|
||||
|
||||
/// Metadata for persisting collections
|
||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
||||
struct CollectionMetadata {
|
||||
name: String,
|
||||
config: CollectionConfig,
|
||||
created_at: i64,
|
||||
updated_at: i64,
|
||||
}
|
||||
|
||||
/// Manages multiple vector collections with alias support
|
||||
#[derive(Debug)]
|
||||
pub struct CollectionManager {
|
||||
/// Active collections
|
||||
collections: DashMap<String, Arc<RwLock<Collection>>>,
|
||||
|
||||
/// Alias mappings (alias -> collection_name)
|
||||
aliases: DashMap<String, String>,
|
||||
|
||||
/// Base path for storing collections
|
||||
base_path: PathBuf,
|
||||
}
|
||||
|
||||
impl CollectionManager {
|
||||
/// Create a new collection manager
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `base_path` - Directory where collections will be stored
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```no_run
|
||||
/// use ruvector_collections::CollectionManager;
|
||||
/// use std::path::PathBuf;
|
||||
///
|
||||
/// let manager = CollectionManager::new(PathBuf::from("./collections")).unwrap();
|
||||
/// ```
|
||||
pub fn new(base_path: PathBuf) -> Result<Self> {
|
||||
// Create base directory if it doesn't exist
|
||||
std::fs::create_dir_all(&base_path)?;
|
||||
|
||||
let manager = Self {
|
||||
collections: DashMap::new(),
|
||||
aliases: DashMap::new(),
|
||||
base_path,
|
||||
};
|
||||
|
||||
// Load existing collections
|
||||
manager.load_collections()?;
|
||||
|
||||
Ok(manager)
|
||||
}
|
||||
|
||||
/// Create a new collection
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `name` - Collection name (must be unique)
|
||||
/// * `config` - Collection configuration
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns `CollectionAlreadyExists` if a collection with the same name exists
|
||||
pub fn create_collection(&self, name: &str, config: CollectionConfig) -> Result<()> {
|
||||
// Validate collection name
|
||||
Self::validate_name(name)?;
|
||||
|
||||
// Check if collection already exists
|
||||
if self.collections.contains_key(name) {
|
||||
return Err(CollectionError::CollectionAlreadyExists {
|
||||
name: name.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Check if an alias with this name exists
|
||||
if self.aliases.contains_key(name) {
|
||||
return Err(CollectionError::InvalidName {
|
||||
name: name.to_string(),
|
||||
reason: "An alias with this name already exists".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Create storage path for this collection
|
||||
let storage_path = self.base_path.join(name);
|
||||
std::fs::create_dir_all(&storage_path)?;
|
||||
|
||||
let db_path = storage_path
|
||||
.join("vectors.db")
|
||||
.to_string_lossy()
|
||||
.to_string();
|
||||
|
||||
// Create collection
|
||||
let collection = Collection::new(name.to_string(), config, db_path)?;
|
||||
|
||||
// Save metadata
|
||||
self.save_collection_metadata(&collection)?;
|
||||
|
||||
// Add to collections map
|
||||
self.collections
|
||||
.insert(name.to_string(), Arc::new(RwLock::new(collection)));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete a collection
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `name` - Collection name to delete
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns `CollectionNotFound` if collection doesn't exist
|
||||
/// Returns `CollectionHasAliases` if collection has active aliases
|
||||
pub fn delete_collection(&self, name: &str) -> Result<()> {
|
||||
// Check if collection exists
|
||||
if !self.collections.contains_key(name) {
|
||||
return Err(CollectionError::CollectionNotFound {
|
||||
name: name.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Check for active aliases
|
||||
let active_aliases: Vec<String> = self
|
||||
.aliases
|
||||
.iter()
|
||||
.filter(|entry| entry.value() == name)
|
||||
.map(|entry| entry.key().clone())
|
||||
.collect();
|
||||
|
||||
if !active_aliases.is_empty() {
|
||||
return Err(CollectionError::CollectionHasAliases {
|
||||
collection: name.to_string(),
|
||||
aliases: active_aliases,
|
||||
});
|
||||
}
|
||||
|
||||
// Remove from collections map
|
||||
self.collections.remove(name);
|
||||
|
||||
// Delete from disk
|
||||
let collection_path = self.base_path.join(name);
|
||||
if collection_path.exists() {
|
||||
std::fs::remove_dir_all(&collection_path)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get a collection by name or alias
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `name` - Collection name or alias
|
||||
pub fn get_collection(&self, name: &str) -> Option<Arc<RwLock<Collection>>> {
|
||||
// Try to resolve as alias first
|
||||
let collection_name = self.resolve_alias(name).unwrap_or_else(|| name.to_string());
|
||||
|
||||
self.collections
|
||||
.get(&collection_name)
|
||||
.map(|entry| entry.value().clone())
|
||||
}
|
||||
|
||||
/// List all collection names
|
||||
pub fn list_collections(&self) -> Vec<String> {
|
||||
self.collections
|
||||
.iter()
|
||||
.map(|entry| entry.key().clone())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Check if a collection exists
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `name` - Collection name (not alias)
|
||||
pub fn collection_exists(&self, name: &str) -> bool {
|
||||
self.collections.contains_key(name)
|
||||
}
|
||||
|
||||
/// Get statistics for a collection
|
||||
pub fn collection_stats(&self, name: &str) -> Result<CollectionStats> {
|
||||
let collection =
|
||||
self.get_collection(name)
|
||||
.ok_or_else(|| CollectionError::CollectionNotFound {
|
||||
name: name.to_string(),
|
||||
})?;
|
||||
|
||||
let guard = collection.read();
|
||||
guard.stats()
|
||||
}
|
||||
|
||||
// ===== Alias Management =====
|
||||
|
||||
/// Create an alias for a collection
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `alias` - Alias name (must be unique)
|
||||
/// * `collection` - Target collection name
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns `AliasAlreadyExists` if alias already exists
|
||||
/// Returns `CollectionNotFound` if target collection doesn't exist
|
||||
pub fn create_alias(&self, alias: &str, collection: &str) -> Result<()> {
|
||||
// Validate alias name
|
||||
Self::validate_name(alias)?;
|
||||
|
||||
// Check if alias already exists
|
||||
if self.aliases.contains_key(alias) {
|
||||
return Err(CollectionError::AliasAlreadyExists {
|
||||
alias: alias.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Check if a collection with this name exists
|
||||
if self.collections.contains_key(alias) {
|
||||
return Err(CollectionError::InvalidName {
|
||||
name: alias.to_string(),
|
||||
reason: "A collection with this name already exists".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Verify target collection exists
|
||||
if !self.collections.contains_key(collection) {
|
||||
return Err(CollectionError::CollectionNotFound {
|
||||
name: collection.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Create alias
|
||||
self.aliases
|
||||
.insert(alias.to_string(), collection.to_string());
|
||||
|
||||
// Save aliases
|
||||
self.save_aliases()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete an alias
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `alias` - Alias name to delete
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns `AliasNotFound` if alias doesn't exist
|
||||
pub fn delete_alias(&self, alias: &str) -> Result<()> {
|
||||
if self.aliases.remove(alias).is_none() {
|
||||
return Err(CollectionError::AliasNotFound {
|
||||
alias: alias.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Save aliases
|
||||
self.save_aliases()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Switch an alias to point to a different collection
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `alias` - Alias name
|
||||
/// * `new_collection` - New target collection name
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns `AliasNotFound` if alias doesn't exist
|
||||
/// Returns `CollectionNotFound` if new collection doesn't exist
|
||||
pub fn switch_alias(&self, alias: &str, new_collection: &str) -> Result<()> {
|
||||
// Verify alias exists
|
||||
if !self.aliases.contains_key(alias) {
|
||||
return Err(CollectionError::AliasNotFound {
|
||||
alias: alias.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Verify new collection exists
|
||||
if !self.collections.contains_key(new_collection) {
|
||||
return Err(CollectionError::CollectionNotFound {
|
||||
name: new_collection.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Update alias
|
||||
self.aliases
|
||||
.insert(alias.to_string(), new_collection.to_string());
|
||||
|
||||
// Save aliases
|
||||
self.save_aliases()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Resolve an alias to a collection name
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `name_or_alias` - Collection name or alias
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// `Some(collection_name)` if it's an alias, `None` if it's not an alias
|
||||
pub fn resolve_alias(&self, name_or_alias: &str) -> Option<String> {
|
||||
self.aliases
|
||||
.get(name_or_alias)
|
||||
.map(|entry| entry.value().clone())
|
||||
}
|
||||
|
||||
/// List all aliases with their target collections
|
||||
pub fn list_aliases(&self) -> Vec<(String, String)> {
|
||||
self.aliases
|
||||
.iter()
|
||||
.map(|entry| (entry.key().clone(), entry.value().clone()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Check if a name is an alias
|
||||
pub fn is_alias(&self, name: &str) -> bool {
|
||||
self.aliases.contains_key(name)
|
||||
}
|
||||
|
||||
// ===== Internal Methods =====
|
||||
|
||||
/// Validate a collection or alias name
|
||||
fn validate_name(name: &str) -> Result<()> {
|
||||
if name.is_empty() {
|
||||
return Err(CollectionError::InvalidName {
|
||||
name: name.to_string(),
|
||||
reason: "Name cannot be empty".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if name.len() > 255 {
|
||||
return Err(CollectionError::InvalidName {
|
||||
name: name.to_string(),
|
||||
reason: "Name too long (max 255 characters)".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Only allow alphanumeric, hyphens, underscores
|
||||
if !name
|
||||
.chars()
|
||||
.all(|c| c.is_alphanumeric() || c == '-' || c == '_')
|
||||
{
|
||||
return Err(CollectionError::InvalidName {
|
||||
name: name.to_string(),
|
||||
reason: "Name can only contain letters, numbers, hyphens, and underscores"
|
||||
.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load existing collections from disk
|
||||
fn load_collections(&self) -> Result<()> {
|
||||
if !self.base_path.exists() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Load aliases
|
||||
self.load_aliases()?;
|
||||
|
||||
// Scan for collection directories
|
||||
for entry in std::fs::read_dir(&self.base_path)? {
|
||||
let entry = entry?;
|
||||
let path = entry.path();
|
||||
|
||||
if path.is_dir() {
|
||||
let name = path
|
||||
.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
// Skip special directories
|
||||
if name.starts_with('.') || name == "aliases.json" {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try to load collection metadata
|
||||
if let Ok(metadata) = self.load_collection_metadata(&name) {
|
||||
let db_path = path.join("vectors.db").to_string_lossy().to_string();
|
||||
|
||||
// Recreate collection
|
||||
if let Ok(mut collection) =
|
||||
Collection::new(metadata.name.clone(), metadata.config, db_path)
|
||||
{
|
||||
collection.created_at = metadata.created_at;
|
||||
collection.updated_at = metadata.updated_at;
|
||||
|
||||
self.collections
|
||||
.insert(name.clone(), Arc::new(RwLock::new(collection)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Save collection metadata to disk
|
||||
fn save_collection_metadata(&self, collection: &Collection) -> Result<()> {
|
||||
let metadata = CollectionMetadata {
|
||||
name: collection.name.clone(),
|
||||
config: collection.config.clone(),
|
||||
created_at: collection.created_at,
|
||||
updated_at: collection.updated_at,
|
||||
};
|
||||
|
||||
let metadata_path = self.base_path.join(&collection.name).join("metadata.json");
|
||||
|
||||
let json = serde_json::to_string_pretty(&metadata)?;
|
||||
std::fs::write(metadata_path, json)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load collection metadata from disk
|
||||
fn load_collection_metadata(&self, name: &str) -> Result<CollectionMetadata> {
|
||||
let metadata_path = self.base_path.join(name).join("metadata.json");
|
||||
let json = std::fs::read_to_string(metadata_path)?;
|
||||
let metadata: CollectionMetadata = serde_json::from_str(&json)?;
|
||||
Ok(metadata)
|
||||
}
|
||||
|
||||
/// Save aliases to disk
|
||||
fn save_aliases(&self) -> Result<()> {
|
||||
let aliases: HashMap<String, String> = self
|
||||
.aliases
|
||||
.iter()
|
||||
.map(|entry| (entry.key().clone(), entry.value().clone()))
|
||||
.collect();
|
||||
|
||||
let aliases_path = self.base_path.join("aliases.json");
|
||||
let json = serde_json::to_string_pretty(&aliases)?;
|
||||
std::fs::write(aliases_path, json)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load aliases from disk
|
||||
fn load_aliases(&self) -> Result<()> {
|
||||
let aliases_path = self.base_path.join("aliases.json");
|
||||
|
||||
if !aliases_path.exists() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let json = std::fs::read_to_string(aliases_path)?;
|
||||
let aliases: HashMap<String, String> = serde_json::from_str(&json)?;
|
||||
|
||||
for (alias, collection) in aliases {
|
||||
self.aliases.insert(alias, collection);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_validate_name() {
|
||||
assert!(CollectionManager::validate_name("valid-name_123").is_ok());
|
||||
assert!(CollectionManager::validate_name("").is_err());
|
||||
assert!(CollectionManager::validate_name("invalid name").is_err());
|
||||
assert!(CollectionManager::validate_name("invalid/name").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_collection_manager() -> Result<()> {
|
||||
let temp_dir = std::env::temp_dir().join("ruvector_test_collections");
|
||||
let _ = std::fs::remove_dir_all(&temp_dir);
|
||||
|
||||
let manager = CollectionManager::new(temp_dir.clone())?;
|
||||
|
||||
// Create collection
|
||||
let config = CollectionConfig::with_dimensions(128);
|
||||
manager.create_collection("test", config)?;
|
||||
|
||||
assert!(manager.collection_exists("test"));
|
||||
assert_eq!(manager.list_collections().len(), 1);
|
||||
|
||||
// Create alias
|
||||
manager.create_alias("test_alias", "test")?;
|
||||
assert!(manager.is_alias("test_alias"));
|
||||
assert_eq!(
|
||||
manager.resolve_alias("test_alias"),
|
||||
Some("test".to_string())
|
||||
);
|
||||
|
||||
// Get collection by alias
|
||||
assert!(manager.get_collection("test_alias").is_some());
|
||||
|
||||
// Cleanup
|
||||
manager.delete_alias("test_alias")?;
|
||||
manager.delete_collection("test")?;
|
||||
let _ = std::fs::remove_dir_all(&temp_dir);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user