Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,22 @@
[package]
name = "ruvector-collections"
version.workspace = true
edition.workspace = true
license.workspace = true
authors.workspace = true
repository.workspace = true
readme = "README.md"
description = "High-performance collection management for Ruvector vector databases"
[dependencies]
ruvector-core = { version = "2.0.2", path = "../ruvector-core" }
serde = { workspace = true }
serde_json = { workspace = true }
thiserror = { workspace = true }
dashmap = { workspace = true }
parking_lot = { workspace = true }
uuid = { workspace = true }
bincode = { workspace = true }
chrono = { workspace = true }
[dev-dependencies]

View File

@@ -0,0 +1,196 @@
# Ruvector Collections
[![Crates.io](https://img.shields.io/crates/v/ruvector-collections.svg)](https://crates.io/crates/ruvector-collections)
[![Documentation](https://docs.rs/ruvector-collections/badge.svg)](https://docs.rs/ruvector-collections)
[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
[![Rust](https://img.shields.io/badge/rust-1.77%2B-orange.svg)](https://www.rust-lang.org)
**High-performance collection management for Ruvector vector databases.**
`ruvector-collections` provides multi-tenant collection support with isolated namespaces, schema management, and collection-level configuration. Part of the [Ruvector](https://github.com/ruvnet/ruvector) ecosystem.
## Why Ruvector Collections?
- **Multi-Tenant**: Isolated collections with separate namespaces
- **Schema Support**: Define and enforce vector schemas
- **Collection Configs**: Per-collection settings for dimensions, metrics
- **Thread-Safe**: Concurrent access with DashMap
- **Metadata Support**: Rich collection metadata and tagging
## Features
### Core Capabilities
- **Collection CRUD**: Create, read, update, delete collections
- **Namespace Isolation**: Logical separation between collections
- **Schema Validation**: Enforce vector dimensions and types
- **Metadata Management**: Tags, descriptions, custom properties
- **Alias Support**: Human-readable names for collections
### Advanced Features
- **Collection Groups**: Organize collections hierarchically
- **Access Control**: Collection-level permissions (planned)
- **Versioning**: Collection schema versioning
- **Migration**: Tools for collection migration
- **Statistics**: Per-collection metrics and stats
## Installation
Add `ruvector-collections` to your `Cargo.toml`:
```toml
[dependencies]
ruvector-collections = "0.1.1"
```
## Quick Start
### Create a Collection
```rust
use ruvector_collections::{CollectionManager, CollectionConfig, Schema};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// Create collection manager
let manager = CollectionManager::new()?;
// Define collection schema
let schema = Schema {
dimensions: 384,
distance_metric: DistanceMetric::Cosine,
vector_type: VectorType::Float32,
};
// Create collection with config
let config = CollectionConfig {
name: "documents".to_string(),
schema,
description: Some("Document embeddings".to_string()),
metadata: serde_json::json!({
"model": "text-embedding-3-small",
"created_by": "data-pipeline"
}),
..Default::default()
};
let collection = manager.create_collection(config)?;
println!("Created collection: {}", collection.id);
Ok(())
}
```
### Manage Collections
```rust
use ruvector_collections::CollectionManager;
let manager = CollectionManager::new()?;
// List all collections
for collection in manager.list_collections()? {
println!("{}: {} vectors", collection.name, collection.count);
}
// Get collection by name
let docs = manager.get_collection("documents")?;
// Update collection metadata
manager.update_collection("documents", |c| {
c.metadata["last_updated"] = serde_json::json!(chrono::Utc::now());
})?;
// Delete collection
manager.delete_collection("old_collection")?;
```
### Collection Aliases
```rust
// Create alias for collection
manager.create_alias("docs", "documents_v2")?;
// Swap alias to new collection (zero-downtime migration)
manager.swap_alias("docs", "documents_v3")?;
// Access via alias
let collection = manager.get_collection_by_alias("docs")?;
```
## API Overview
### Core Types
```rust
// Collection configuration
pub struct CollectionConfig {
pub name: String,
pub schema: Schema,
pub description: Option<String>,
pub metadata: serde_json::Value,
pub replicas: usize,
pub shards: usize,
}
// Vector schema
pub struct Schema {
pub dimensions: usize,
pub distance_metric: DistanceMetric,
pub vector_type: VectorType,
}
// Collection info
pub struct Collection {
pub id: Uuid,
pub name: String,
pub schema: Schema,
pub count: usize,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub metadata: serde_json::Value,
}
```
### Manager Operations
```rust
impl CollectionManager {
pub fn new() -> Result<Self>;
pub fn create_collection(&self, config: CollectionConfig) -> Result<Collection>;
pub fn get_collection(&self, name: &str) -> Result<Option<Collection>>;
pub fn list_collections(&self) -> Result<Vec<Collection>>;
pub fn update_collection<F>(&self, name: &str, f: F) -> Result<Collection>;
pub fn delete_collection(&self, name: &str) -> Result<bool>;
pub fn create_alias(&self, alias: &str, collection: &str) -> Result<()>;
pub fn delete_alias(&self, alias: &str) -> Result<bool>;
}
```
## Related Crates
- **[ruvector-core](../ruvector-core/)** - Core vector database engine
- **[ruvector-server](../ruvector-server/)** - REST API server
- **[ruvector-filter](../ruvector-filter/)** - Metadata filtering
## Documentation
- **[Main README](../../README.md)** - Complete project overview
- **[API Documentation](https://docs.rs/ruvector-collections)** - Full API reference
- **[GitHub Repository](https://github.com/ruvnet/ruvector)** - Source code
## License
**MIT License** - see [LICENSE](../../LICENSE) for details.
---
<div align="center">
**Part of [Ruvector](https://github.com/ruvnet/ruvector) - Built by [rUv](https://ruv.io)**
[![Star on GitHub](https://img.shields.io/github/stars/ruvnet/ruvector?style=social)](https://github.com/ruvnet/ruvector)
[Documentation](https://docs.rs/ruvector-collections) | [Crates.io](https://crates.io/crates/ruvector-collections) | [GitHub](https://github.com/ruvnet/ruvector)
</div>

View File

@@ -0,0 +1,253 @@
//! Collection types and operations
use ruvector_core::types::{DistanceMetric, HnswConfig, QuantizationConfig};
use ruvector_core::vector_db::VectorDB;
use serde::{Deserialize, Serialize};
use crate::error::{CollectionError, Result};
/// Configuration for creating a collection
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CollectionConfig {
/// Vector dimensions
pub dimensions: usize,
/// Distance metric for similarity calculation
pub distance_metric: DistanceMetric,
/// HNSW index configuration
pub hnsw_config: Option<HnswConfig>,
/// Quantization configuration
pub quantization: Option<QuantizationConfig>,
/// Whether to store payload data on disk
pub on_disk_payload: bool,
}
impl CollectionConfig {
/// Validate the configuration
pub fn validate(&self) -> Result<()> {
if self.dimensions == 0 {
return Err(CollectionError::InvalidConfiguration {
message: "Dimensions must be greater than 0".to_string(),
});
}
if self.dimensions > 100_000 {
return Err(CollectionError::InvalidConfiguration {
message: "Dimensions exceeds maximum of 100,000".to_string(),
});
}
// Validate HNSW config if present
if let Some(ref hnsw_config) = self.hnsw_config {
if hnsw_config.m == 0 {
return Err(CollectionError::InvalidConfiguration {
message: "HNSW M parameter must be greater than 0".to_string(),
});
}
if hnsw_config.ef_construction < hnsw_config.m {
return Err(CollectionError::InvalidConfiguration {
message: "HNSW ef_construction must be >= M".to_string(),
});
}
if hnsw_config.ef_search == 0 {
return Err(CollectionError::InvalidConfiguration {
message: "HNSW ef_search must be greater than 0".to_string(),
});
}
}
Ok(())
}
/// Create a default configuration for the given dimensions
pub fn with_dimensions(dimensions: usize) -> Self {
Self {
dimensions,
distance_metric: DistanceMetric::Cosine,
hnsw_config: Some(HnswConfig::default()),
quantization: Some(QuantizationConfig::Scalar),
on_disk_payload: true,
}
}
}
/// A collection of vectors with its own configuration
pub struct Collection {
/// Collection name
pub name: String,
/// Collection configuration
pub config: CollectionConfig,
/// Underlying vector database
pub db: VectorDB,
/// When the collection was created (Unix timestamp in seconds)
pub created_at: i64,
/// When the collection was last updated (Unix timestamp in seconds)
pub updated_at: i64,
}
impl std::fmt::Debug for Collection {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Collection")
.field("name", &self.name)
.field("config", &self.config)
.field("created_at", &self.created_at)
.field("updated_at", &self.updated_at)
.field("db", &"<VectorDB>")
.finish()
}
}
impl Collection {
/// Create a new collection
pub fn new(name: String, config: CollectionConfig, storage_path: String) -> Result<Self> {
// Validate configuration
config.validate()?;
// Create VectorDB with the configuration
let db_options = ruvector_core::types::DbOptions {
dimensions: config.dimensions,
distance_metric: config.distance_metric,
storage_path,
hnsw_config: config.hnsw_config.clone(),
quantization: config.quantization.clone(),
};
let db = VectorDB::new(db_options)?;
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs() as i64;
Ok(Self {
name,
config,
db,
created_at: now,
updated_at: now,
})
}
/// Get collection statistics
pub fn stats(&self) -> Result<CollectionStats> {
let vectors_count = self.db.len()?;
Ok(CollectionStats {
vectors_count,
segments_count: 1, // Single segment for now
disk_size_bytes: 0, // TODO: Implement disk size calculation
ram_size_bytes: 0, // TODO: Implement RAM size calculation
})
}
/// Update the last modified timestamp
pub fn touch(&mut self) {
self.updated_at = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs() as i64;
}
}
/// Statistics about a collection
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CollectionStats {
/// Number of vectors in the collection
pub vectors_count: usize,
/// Number of segments (partitions) in the collection
pub segments_count: usize,
/// Total disk space used (bytes)
pub disk_size_bytes: u64,
/// Total RAM used (bytes)
pub ram_size_bytes: u64,
}
impl CollectionStats {
/// Check if the collection is empty
pub fn is_empty(&self) -> bool {
self.vectors_count == 0
}
/// Get human-readable disk size
pub fn disk_size_human(&self) -> String {
format_bytes(self.disk_size_bytes)
}
/// Get human-readable RAM size
pub fn ram_size_human(&self) -> String {
format_bytes(self.ram_size_bytes)
}
}
/// Format bytes into human-readable size
fn format_bytes(bytes: u64) -> String {
const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
if bytes == 0 {
return "0 B".to_string();
}
let mut size = bytes as f64;
let mut unit_idx = 0;
while size >= 1024.0 && unit_idx < UNITS.len() - 1 {
size /= 1024.0;
unit_idx += 1;
}
format!("{:.2} {}", size, UNITS[unit_idx])
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_collection_config_validation() {
// Valid config
let config = CollectionConfig::with_dimensions(384);
assert!(config.validate().is_ok());
// Invalid: zero dimensions
let config = CollectionConfig {
dimensions: 0,
distance_metric: DistanceMetric::Cosine,
hnsw_config: None,
quantization: None,
on_disk_payload: true,
};
assert!(config.validate().is_err());
// Invalid: dimensions too large
let config = CollectionConfig {
dimensions: 200_000,
distance_metric: DistanceMetric::Cosine,
hnsw_config: None,
quantization: None,
on_disk_payload: true,
};
assert!(config.validate().is_err());
}
#[test]
fn test_format_bytes() {
assert_eq!(format_bytes(0), "0 B");
assert_eq!(format_bytes(512), "512.00 B");
assert_eq!(format_bytes(1024), "1.00 KB");
assert_eq!(format_bytes(1536), "1.50 KB");
assert_eq!(format_bytes(1048576), "1.00 MB");
assert_eq!(format_bytes(1073741824), "1.00 GB");
}
}

View File

@@ -0,0 +1,102 @@
//! Error types for collection management
use thiserror::Error;
/// Result type for collection operations
pub type Result<T> = std::result::Result<T, CollectionError>;
/// Errors that can occur during collection management
#[derive(Debug, Error)]
pub enum CollectionError {
/// Collection was not found
#[error("Collection not found: {name}")]
CollectionNotFound {
/// Name of the missing collection
name: String,
},
/// Collection already exists
#[error("Collection already exists: {name}")]
CollectionAlreadyExists {
/// Name of the existing collection
name: String,
},
/// Alias was not found
#[error("Alias not found: {alias}")]
AliasNotFound {
/// Name of the missing alias
alias: String,
},
/// Alias already exists
#[error("Alias already exists: {alias}")]
AliasAlreadyExists {
/// Name of the existing alias
alias: String,
},
/// Invalid collection configuration
#[error("Invalid configuration: {message}")]
InvalidConfiguration {
/// Error message
message: String,
},
/// Alias points to non-existent collection
#[error("Alias '{alias}' points to non-existent collection '{collection}'")]
InvalidAlias {
/// Alias name
alias: String,
/// Target collection name
collection: String,
},
/// Cannot delete collection with active aliases
#[error("Cannot delete collection '{collection}' because it has active aliases: {aliases:?}")]
CollectionHasAliases {
/// Collection name
collection: String,
/// List of aliases
aliases: Vec<String>,
},
/// Invalid collection name
#[error("Invalid collection name: {name} - {reason}")]
InvalidName {
/// Collection name
name: String,
/// Reason for invalidity
reason: String,
},
/// Core database error
#[error("Database error: {0}")]
DatabaseError(#[from] ruvector_core::error::RuvectorError),
/// IO error
#[error("IO error: {0}")]
IoError(#[from] std::io::Error),
/// Serialization error
#[error("Serialization error: {0}")]
SerializationError(String),
}
impl From<serde_json::Error> for CollectionError {
fn from(err: serde_json::Error) -> Self {
CollectionError::SerializationError(err.to_string())
}
}
impl From<bincode::error::EncodeError> for CollectionError {
fn from(err: bincode::error::EncodeError) -> Self {
CollectionError::SerializationError(err.to_string())
}
}
impl From<bincode::error::DecodeError> for CollectionError {
fn from(err: bincode::error::DecodeError) -> Self {
CollectionError::SerializationError(err.to_string())
}
}

View File

@@ -0,0 +1,53 @@
//! # Ruvector Collections
//!
//! Multi-collection management with aliases for organizing vector databases.
//!
//! ## Features
//!
//! - **Multiple Collections**: Organize vectors into separate collections
//! - **Alias Management**: Create aliases for collection names
//! - **Collection Statistics**: Track collection metrics
//! - **Thread-safe**: Concurrent access using DashMap
//! - **Persistence**: Store collections on disk
//!
//! ## Example
//!
//! ```no_run
//! use ruvector_collections::{CollectionManager, CollectionConfig};
//! use ruvector_core::types::{DistanceMetric, HnswConfig};
//! use std::path::PathBuf;
//!
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! // Create a collection manager
//! let manager = CollectionManager::new(PathBuf::from("./collections"))?;
//!
//! // Create a collection
//! let config = CollectionConfig {
//! dimensions: 384,
//! distance_metric: DistanceMetric::Cosine,
//! hnsw_config: Some(HnswConfig::default()),
//! quantization: None,
//! on_disk_payload: true,
//! };
//!
//! manager.create_collection("documents", config)?;
//!
//! // Create an alias
//! manager.create_alias("current_docs", "documents")?;
//!
//! // Get collection by name or alias
//! let collection = manager.get_collection("current_docs").unwrap();
//! # Ok(())
//! # }
//! ```
#![warn(missing_docs)]
#![warn(clippy::all)]
pub mod collection;
pub mod error;
pub mod manager;
pub use collection::{Collection, CollectionConfig, CollectionStats};
pub use error::{CollectionError, Result};
pub use manager::CollectionManager;

View File

@@ -0,0 +1,522 @@
//! Collection manager for multi-collection operations
use dashmap::DashMap;
use parking_lot::RwLock;
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use crate::collection::{Collection, CollectionConfig, CollectionStats};
use crate::error::{CollectionError, Result};
/// Metadata for persisting collections
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
struct CollectionMetadata {
name: String,
config: CollectionConfig,
created_at: i64,
updated_at: i64,
}
/// Manages multiple vector collections with alias support
#[derive(Debug)]
pub struct CollectionManager {
/// Active collections
collections: DashMap<String, Arc<RwLock<Collection>>>,
/// Alias mappings (alias -> collection_name)
aliases: DashMap<String, String>,
/// Base path for storing collections
base_path: PathBuf,
}
impl CollectionManager {
/// Create a new collection manager
///
/// # Arguments
///
/// * `base_path` - Directory where collections will be stored
///
/// # Example
///
/// ```no_run
/// use ruvector_collections::CollectionManager;
/// use std::path::PathBuf;
///
/// let manager = CollectionManager::new(PathBuf::from("./collections")).unwrap();
/// ```
pub fn new(base_path: PathBuf) -> Result<Self> {
// Create base directory if it doesn't exist
std::fs::create_dir_all(&base_path)?;
let manager = Self {
collections: DashMap::new(),
aliases: DashMap::new(),
base_path,
};
// Load existing collections
manager.load_collections()?;
Ok(manager)
}
/// Create a new collection
///
/// # Arguments
///
/// * `name` - Collection name (must be unique)
/// * `config` - Collection configuration
///
/// # Errors
///
/// Returns `CollectionAlreadyExists` if a collection with the same name exists
pub fn create_collection(&self, name: &str, config: CollectionConfig) -> Result<()> {
// Validate collection name
Self::validate_name(name)?;
// Check if collection already exists
if self.collections.contains_key(name) {
return Err(CollectionError::CollectionAlreadyExists {
name: name.to_string(),
});
}
// Check if an alias with this name exists
if self.aliases.contains_key(name) {
return Err(CollectionError::InvalidName {
name: name.to_string(),
reason: "An alias with this name already exists".to_string(),
});
}
// Create storage path for this collection
let storage_path = self.base_path.join(name);
std::fs::create_dir_all(&storage_path)?;
let db_path = storage_path
.join("vectors.db")
.to_string_lossy()
.to_string();
// Create collection
let collection = Collection::new(name.to_string(), config, db_path)?;
// Save metadata
self.save_collection_metadata(&collection)?;
// Add to collections map
self.collections
.insert(name.to_string(), Arc::new(RwLock::new(collection)));
Ok(())
}
/// Delete a collection
///
/// # Arguments
///
/// * `name` - Collection name to delete
///
/// # Errors
///
/// Returns `CollectionNotFound` if collection doesn't exist
/// Returns `CollectionHasAliases` if collection has active aliases
pub fn delete_collection(&self, name: &str) -> Result<()> {
// Check if collection exists
if !self.collections.contains_key(name) {
return Err(CollectionError::CollectionNotFound {
name: name.to_string(),
});
}
// Check for active aliases
let active_aliases: Vec<String> = self
.aliases
.iter()
.filter(|entry| entry.value() == name)
.map(|entry| entry.key().clone())
.collect();
if !active_aliases.is_empty() {
return Err(CollectionError::CollectionHasAliases {
collection: name.to_string(),
aliases: active_aliases,
});
}
// Remove from collections map
self.collections.remove(name);
// Delete from disk
let collection_path = self.base_path.join(name);
if collection_path.exists() {
std::fs::remove_dir_all(&collection_path)?;
}
Ok(())
}
/// Get a collection by name or alias
///
/// # Arguments
///
/// * `name` - Collection name or alias
pub fn get_collection(&self, name: &str) -> Option<Arc<RwLock<Collection>>> {
// Try to resolve as alias first
let collection_name = self.resolve_alias(name).unwrap_or_else(|| name.to_string());
self.collections
.get(&collection_name)
.map(|entry| entry.value().clone())
}
/// List all collection names
pub fn list_collections(&self) -> Vec<String> {
self.collections
.iter()
.map(|entry| entry.key().clone())
.collect()
}
/// Check if a collection exists
///
/// # Arguments
///
/// * `name` - Collection name (not alias)
pub fn collection_exists(&self, name: &str) -> bool {
self.collections.contains_key(name)
}
/// Get statistics for a collection
pub fn collection_stats(&self, name: &str) -> Result<CollectionStats> {
let collection =
self.get_collection(name)
.ok_or_else(|| CollectionError::CollectionNotFound {
name: name.to_string(),
})?;
let guard = collection.read();
guard.stats()
}
// ===== Alias Management =====
/// Create an alias for a collection
///
/// # Arguments
///
/// * `alias` - Alias name (must be unique)
/// * `collection` - Target collection name
///
/// # Errors
///
/// Returns `AliasAlreadyExists` if alias already exists
/// Returns `CollectionNotFound` if target collection doesn't exist
pub fn create_alias(&self, alias: &str, collection: &str) -> Result<()> {
// Validate alias name
Self::validate_name(alias)?;
// Check if alias already exists
if self.aliases.contains_key(alias) {
return Err(CollectionError::AliasAlreadyExists {
alias: alias.to_string(),
});
}
// Check if a collection with this name exists
if self.collections.contains_key(alias) {
return Err(CollectionError::InvalidName {
name: alias.to_string(),
reason: "A collection with this name already exists".to_string(),
});
}
// Verify target collection exists
if !self.collections.contains_key(collection) {
return Err(CollectionError::CollectionNotFound {
name: collection.to_string(),
});
}
// Create alias
self.aliases
.insert(alias.to_string(), collection.to_string());
// Save aliases
self.save_aliases()?;
Ok(())
}
/// Delete an alias
///
/// # Arguments
///
/// * `alias` - Alias name to delete
///
/// # Errors
///
/// Returns `AliasNotFound` if alias doesn't exist
pub fn delete_alias(&self, alias: &str) -> Result<()> {
if self.aliases.remove(alias).is_none() {
return Err(CollectionError::AliasNotFound {
alias: alias.to_string(),
});
}
// Save aliases
self.save_aliases()?;
Ok(())
}
/// Switch an alias to point to a different collection
///
/// # Arguments
///
/// * `alias` - Alias name
/// * `new_collection` - New target collection name
///
/// # Errors
///
/// Returns `AliasNotFound` if alias doesn't exist
/// Returns `CollectionNotFound` if new collection doesn't exist
pub fn switch_alias(&self, alias: &str, new_collection: &str) -> Result<()> {
// Verify alias exists
if !self.aliases.contains_key(alias) {
return Err(CollectionError::AliasNotFound {
alias: alias.to_string(),
});
}
// Verify new collection exists
if !self.collections.contains_key(new_collection) {
return Err(CollectionError::CollectionNotFound {
name: new_collection.to_string(),
});
}
// Update alias
self.aliases
.insert(alias.to_string(), new_collection.to_string());
// Save aliases
self.save_aliases()?;
Ok(())
}
/// Resolve an alias to a collection name
///
/// # Arguments
///
/// * `name_or_alias` - Collection name or alias
///
/// # Returns
///
/// `Some(collection_name)` if it's an alias, `None` if it's not an alias
pub fn resolve_alias(&self, name_or_alias: &str) -> Option<String> {
self.aliases
.get(name_or_alias)
.map(|entry| entry.value().clone())
}
/// List all aliases with their target collections
pub fn list_aliases(&self) -> Vec<(String, String)> {
self.aliases
.iter()
.map(|entry| (entry.key().clone(), entry.value().clone()))
.collect()
}
/// Check if a name is an alias
pub fn is_alias(&self, name: &str) -> bool {
self.aliases.contains_key(name)
}
// ===== Internal Methods =====
/// Validate a collection or alias name
fn validate_name(name: &str) -> Result<()> {
if name.is_empty() {
return Err(CollectionError::InvalidName {
name: name.to_string(),
reason: "Name cannot be empty".to_string(),
});
}
if name.len() > 255 {
return Err(CollectionError::InvalidName {
name: name.to_string(),
reason: "Name too long (max 255 characters)".to_string(),
});
}
// Only allow alphanumeric, hyphens, underscores
if !name
.chars()
.all(|c| c.is_alphanumeric() || c == '-' || c == '_')
{
return Err(CollectionError::InvalidName {
name: name.to_string(),
reason: "Name can only contain letters, numbers, hyphens, and underscores"
.to_string(),
});
}
Ok(())
}
/// Load existing collections from disk
fn load_collections(&self) -> Result<()> {
if !self.base_path.exists() {
return Ok(());
}
// Load aliases
self.load_aliases()?;
// Scan for collection directories
for entry in std::fs::read_dir(&self.base_path)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
let name = path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("")
.to_string();
// Skip special directories
if name.starts_with('.') || name == "aliases.json" {
continue;
}
// Try to load collection metadata
if let Ok(metadata) = self.load_collection_metadata(&name) {
let db_path = path.join("vectors.db").to_string_lossy().to_string();
// Recreate collection
if let Ok(mut collection) =
Collection::new(metadata.name.clone(), metadata.config, db_path)
{
collection.created_at = metadata.created_at;
collection.updated_at = metadata.updated_at;
self.collections
.insert(name.clone(), Arc::new(RwLock::new(collection)));
}
}
}
}
Ok(())
}
/// Save collection metadata to disk
fn save_collection_metadata(&self, collection: &Collection) -> Result<()> {
let metadata = CollectionMetadata {
name: collection.name.clone(),
config: collection.config.clone(),
created_at: collection.created_at,
updated_at: collection.updated_at,
};
let metadata_path = self.base_path.join(&collection.name).join("metadata.json");
let json = serde_json::to_string_pretty(&metadata)?;
std::fs::write(metadata_path, json)?;
Ok(())
}
/// Load collection metadata from disk
fn load_collection_metadata(&self, name: &str) -> Result<CollectionMetadata> {
let metadata_path = self.base_path.join(name).join("metadata.json");
let json = std::fs::read_to_string(metadata_path)?;
let metadata: CollectionMetadata = serde_json::from_str(&json)?;
Ok(metadata)
}
/// Save aliases to disk
fn save_aliases(&self) -> Result<()> {
let aliases: HashMap<String, String> = self
.aliases
.iter()
.map(|entry| (entry.key().clone(), entry.value().clone()))
.collect();
let aliases_path = self.base_path.join("aliases.json");
let json = serde_json::to_string_pretty(&aliases)?;
std::fs::write(aliases_path, json)?;
Ok(())
}
/// Load aliases from disk
fn load_aliases(&self) -> Result<()> {
let aliases_path = self.base_path.join("aliases.json");
if !aliases_path.exists() {
return Ok(());
}
let json = std::fs::read_to_string(aliases_path)?;
let aliases: HashMap<String, String> = serde_json::from_str(&json)?;
for (alias, collection) in aliases {
self.aliases.insert(alias, collection);
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_validate_name() {
assert!(CollectionManager::validate_name("valid-name_123").is_ok());
assert!(CollectionManager::validate_name("").is_err());
assert!(CollectionManager::validate_name("invalid name").is_err());
assert!(CollectionManager::validate_name("invalid/name").is_err());
}
#[test]
fn test_collection_manager() -> Result<()> {
let temp_dir = std::env::temp_dir().join("ruvector_test_collections");
let _ = std::fs::remove_dir_all(&temp_dir);
let manager = CollectionManager::new(temp_dir.clone())?;
// Create collection
let config = CollectionConfig::with_dimensions(128);
manager.create_collection("test", config)?;
assert!(manager.collection_exists("test"));
assert_eq!(manager.list_collections().len(), 1);
// Create alias
manager.create_alias("test_alias", "test")?;
assert!(manager.is_alias("test_alias"));
assert_eq!(
manager.resolve_alias("test_alias"),
Some("test".to_string())
);
// Get collection by alias
assert!(manager.get_collection("test_alias").is_some());
// Cleanup
manager.delete_alias("test_alias")?;
manager.delete_collection("test")?;
let _ = std::fs::remove_dir_all(&temp_dir);
Ok(())
}
}