git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
454 lines
13 KiB
Rust
454 lines
13 KiB
Rust
//! Integration tests for end-to-end workflows
|
|
//!
|
|
//! These tests verify that all components work together correctly.
|
|
|
|
use ruvector_core::types::{DbOptions, DistanceMetric, HnswConfig, SearchQuery};
|
|
use ruvector_core::{VectorDB, VectorEntry};
|
|
use std::collections::HashMap;
|
|
use tempfile::tempdir;
|
|
|
|
// ============================================================================
|
|
// End-to-End Workflow Tests
|
|
// ============================================================================
|
|
|
|
#[test]
|
|
fn test_complete_insert_search_workflow() {
|
|
let dir = tempdir().unwrap();
|
|
let mut options = DbOptions::default();
|
|
options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
|
|
options.dimensions = 128;
|
|
options.distance_metric = DistanceMetric::Cosine;
|
|
options.hnsw_config = Some(HnswConfig {
|
|
m: 16,
|
|
ef_construction: 100,
|
|
ef_search: 50,
|
|
max_elements: 100_000,
|
|
});
|
|
|
|
let db = VectorDB::new(options).unwrap();
|
|
|
|
// Insert training data
|
|
let vectors: Vec<VectorEntry> = (0..100)
|
|
.map(|i| {
|
|
let mut metadata = HashMap::new();
|
|
metadata.insert("index".to_string(), serde_json::json!(i));
|
|
|
|
VectorEntry {
|
|
id: Some(format!("vec_{}", i)),
|
|
vector: (0..128).map(|j| ((i + j) as f32) * 0.01).collect(),
|
|
metadata: Some(metadata),
|
|
}
|
|
})
|
|
.collect();
|
|
|
|
let ids = db.insert_batch(vectors).unwrap();
|
|
assert_eq!(ids.len(), 100);
|
|
|
|
// Search for similar vectors
|
|
let query: Vec<f32> = (0..128).map(|j| (j as f32) * 0.01).collect();
|
|
let results = db
|
|
.search(SearchQuery {
|
|
vector: query,
|
|
k: 10,
|
|
filter: None,
|
|
ef_search: Some(100),
|
|
})
|
|
.unwrap();
|
|
|
|
assert_eq!(results.len(), 10);
|
|
assert!(results[0].vector.is_some());
|
|
assert!(results[0].metadata.is_some());
|
|
}
|
|
|
|
#[test]
|
|
fn test_batch_operations_10k_vectors() {
|
|
let dir = tempdir().unwrap();
|
|
let mut options = DbOptions::default();
|
|
options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
|
|
options.dimensions = 384;
|
|
options.distance_metric = DistanceMetric::Euclidean;
|
|
options.hnsw_config = Some(HnswConfig::default());
|
|
|
|
let db = VectorDB::new(options).unwrap();
|
|
|
|
// Generate 10K vectors
|
|
println!("Generating 10K vectors...");
|
|
let vectors: Vec<VectorEntry> = (0..10_000)
|
|
.map(|i| VectorEntry {
|
|
id: Some(format!("vec_{}", i)),
|
|
vector: (0..384).map(|j| ((i + j) as f32) * 0.001).collect(),
|
|
metadata: None,
|
|
})
|
|
.collect();
|
|
|
|
// Batch insert
|
|
println!("Batch inserting 10K vectors...");
|
|
let start = std::time::Instant::now();
|
|
let ids = db.insert_batch(vectors).unwrap();
|
|
let duration = start.elapsed();
|
|
println!("Batch insert took: {:?}", duration);
|
|
|
|
assert_eq!(ids.len(), 10_000);
|
|
assert_eq!(db.len().unwrap(), 10_000);
|
|
|
|
// Perform multiple searches
|
|
println!("Performing searches...");
|
|
for i in 0..10 {
|
|
let query: Vec<f32> = (0..384).map(|j| ((i * 100 + j) as f32) * 0.001).collect();
|
|
let results = db
|
|
.search(SearchQuery {
|
|
vector: query,
|
|
k: 10,
|
|
filter: None,
|
|
ef_search: None,
|
|
})
|
|
.unwrap();
|
|
|
|
assert_eq!(results.len(), 10);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_persistence_and_reload() {
|
|
let dir = tempdir().unwrap();
|
|
let db_path = dir
|
|
.path()
|
|
.join("persistent.db")
|
|
.to_string_lossy()
|
|
.to_string();
|
|
|
|
// Create and populate database
|
|
{
|
|
let mut options = DbOptions::default();
|
|
options.storage_path = db_path.clone();
|
|
options.dimensions = 3;
|
|
options.hnsw_config = None; // Use flat index for simpler persistence test
|
|
|
|
let db = VectorDB::new(options).unwrap();
|
|
|
|
for i in 0..10 {
|
|
db.insert(VectorEntry {
|
|
id: Some(format!("vec_{}", i)),
|
|
vector: vec![i as f32, (i * 2) as f32, (i * 3) as f32],
|
|
metadata: None,
|
|
})
|
|
.unwrap();
|
|
}
|
|
|
|
assert_eq!(db.len().unwrap(), 10);
|
|
}
|
|
|
|
// Reload database
|
|
{
|
|
let mut options = DbOptions::default();
|
|
options.storage_path = db_path.clone();
|
|
options.dimensions = 3;
|
|
options.hnsw_config = None;
|
|
|
|
let db = VectorDB::new(options).unwrap();
|
|
|
|
// Verify data persisted
|
|
assert_eq!(db.len().unwrap(), 10);
|
|
|
|
let entry = db.get("vec_5").unwrap().unwrap();
|
|
assert_eq!(entry.vector, vec![5.0, 10.0, 15.0]);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_mixed_operations_workflow() {
|
|
let dir = tempdir().unwrap();
|
|
let mut options = DbOptions::default();
|
|
options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
|
|
options.dimensions = 64;
|
|
|
|
let db = VectorDB::new(options).unwrap();
|
|
|
|
// Insert initial batch
|
|
let initial: Vec<VectorEntry> = (0..50)
|
|
.map(|i| VectorEntry {
|
|
id: Some(format!("vec_{}", i)),
|
|
vector: (0..64).map(|j| ((i + j) as f32) * 0.1).collect(),
|
|
metadata: None,
|
|
})
|
|
.collect();
|
|
|
|
db.insert_batch(initial).unwrap();
|
|
assert_eq!(db.len().unwrap(), 50);
|
|
|
|
// Delete some vectors
|
|
for i in 0..10 {
|
|
db.delete(&format!("vec_{}", i)).unwrap();
|
|
}
|
|
assert_eq!(db.len().unwrap(), 40);
|
|
|
|
// Insert more individual vectors
|
|
for i in 50..60 {
|
|
db.insert(VectorEntry {
|
|
id: Some(format!("vec_{}", i)),
|
|
vector: (0..64).map(|j| ((i + j) as f32) * 0.1).collect(),
|
|
metadata: None,
|
|
})
|
|
.unwrap();
|
|
}
|
|
assert_eq!(db.len().unwrap(), 50);
|
|
|
|
// Search
|
|
let query: Vec<f32> = (0..64).map(|j| (j as f32) * 0.1).collect();
|
|
let results = db
|
|
.search(SearchQuery {
|
|
vector: query,
|
|
k: 20,
|
|
filter: None,
|
|
ef_search: None,
|
|
})
|
|
.unwrap();
|
|
|
|
assert!(results.len() > 0);
|
|
}
|
|
|
|
// ============================================================================
|
|
// Different Distance Metrics
|
|
// ============================================================================
|
|
|
|
#[test]
|
|
fn test_all_distance_metrics() {
|
|
let metrics = vec![
|
|
DistanceMetric::Euclidean,
|
|
DistanceMetric::Cosine,
|
|
DistanceMetric::DotProduct,
|
|
DistanceMetric::Manhattan,
|
|
];
|
|
|
|
for metric in metrics {
|
|
let dir = tempdir().unwrap();
|
|
let mut options = DbOptions::default();
|
|
options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
|
|
options.dimensions = 32;
|
|
options.distance_metric = metric;
|
|
options.hnsw_config = None;
|
|
|
|
let db = VectorDB::new(options).unwrap();
|
|
|
|
// Insert test vectors
|
|
for i in 0..20 {
|
|
db.insert(VectorEntry {
|
|
id: Some(format!("vec_{}", i)),
|
|
vector: (0..32).map(|j| ((i + j) as f32) * 0.1).collect(),
|
|
metadata: None,
|
|
})
|
|
.unwrap();
|
|
}
|
|
|
|
// Search
|
|
let query: Vec<f32> = (0..32).map(|j| (j as f32) * 0.1).collect();
|
|
let results = db
|
|
.search(SearchQuery {
|
|
vector: query,
|
|
k: 5,
|
|
filter: None,
|
|
ef_search: None,
|
|
})
|
|
.unwrap();
|
|
|
|
assert_eq!(results.len(), 5, "Failed for metric {:?}", metric);
|
|
|
|
// Verify scores are in ascending order (lower is better for distance)
|
|
for i in 0..results.len() - 1 {
|
|
assert!(
|
|
results[i].score <= results[i + 1].score,
|
|
"Results not sorted for metric {:?}: {} > {}",
|
|
metric,
|
|
results[i].score,
|
|
results[i + 1].score
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// HNSW Configuration Tests
|
|
// ============================================================================
|
|
|
|
#[test]
|
|
fn test_hnsw_different_configurations() {
|
|
let configs = vec![
|
|
HnswConfig {
|
|
m: 8,
|
|
ef_construction: 50,
|
|
ef_search: 50,
|
|
max_elements: 1000,
|
|
},
|
|
HnswConfig {
|
|
m: 16,
|
|
ef_construction: 100,
|
|
ef_search: 100,
|
|
max_elements: 1000,
|
|
},
|
|
HnswConfig {
|
|
m: 32,
|
|
ef_construction: 200,
|
|
ef_search: 200,
|
|
max_elements: 1000,
|
|
},
|
|
];
|
|
|
|
for config in configs {
|
|
let dir = tempdir().unwrap();
|
|
let mut options = DbOptions::default();
|
|
options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
|
|
options.dimensions = 64;
|
|
options.hnsw_config = Some(config.clone());
|
|
|
|
let db = VectorDB::new(options).unwrap();
|
|
|
|
// Insert vectors
|
|
let vectors: Vec<VectorEntry> = (0..100)
|
|
.map(|i| VectorEntry {
|
|
id: Some(format!("vec_{}", i)),
|
|
vector: (0..64).map(|j| ((i + j) as f32) * 0.01).collect(),
|
|
metadata: None,
|
|
})
|
|
.collect();
|
|
|
|
db.insert_batch(vectors).unwrap();
|
|
|
|
// Search with different ef_search values
|
|
let query: Vec<f32> = (0..64).map(|j| (j as f32) * 0.01).collect();
|
|
let results = db
|
|
.search(SearchQuery {
|
|
vector: query,
|
|
k: 10,
|
|
filter: None,
|
|
ef_search: Some(config.ef_search),
|
|
})
|
|
.unwrap();
|
|
|
|
assert_eq!(results.len(), 10, "Failed for config M={}", config.m);
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// Metadata Filtering Tests
|
|
// ============================================================================
|
|
|
|
#[test]
|
|
fn test_complex_metadata_filtering() {
|
|
let dir = tempdir().unwrap();
|
|
let mut options = DbOptions::default();
|
|
options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
|
|
options.dimensions = 16;
|
|
options.hnsw_config = None;
|
|
|
|
let db = VectorDB::new(options).unwrap();
|
|
|
|
// Insert vectors with different categories and values
|
|
for i in 0..50 {
|
|
let mut metadata = HashMap::new();
|
|
metadata.insert("category".to_string(), serde_json::json!(i % 3));
|
|
metadata.insert("value".to_string(), serde_json::json!(i / 10));
|
|
|
|
db.insert(VectorEntry {
|
|
id: Some(format!("vec_{}", i)),
|
|
vector: (0..16).map(|j| ((i + j) as f32) * 0.1).collect(),
|
|
metadata: Some(metadata),
|
|
})
|
|
.unwrap();
|
|
}
|
|
|
|
// Search with single filter
|
|
let mut filter1 = HashMap::new();
|
|
filter1.insert("category".to_string(), serde_json::json!(0));
|
|
|
|
let query: Vec<f32> = (0..16).map(|j| (j as f32) * 0.1).collect();
|
|
let results1 = db
|
|
.search(SearchQuery {
|
|
vector: query.clone(),
|
|
k: 100,
|
|
filter: Some(filter1),
|
|
ef_search: None,
|
|
})
|
|
.unwrap();
|
|
|
|
// Should only get vectors where i % 3 == 0
|
|
for result in &results1 {
|
|
let meta = result.metadata.as_ref().unwrap();
|
|
assert_eq!(meta.get("category").unwrap(), &serde_json::json!(0));
|
|
}
|
|
|
|
// Search with different filter
|
|
let mut filter2 = HashMap::new();
|
|
filter2.insert("value".to_string(), serde_json::json!(2));
|
|
|
|
let results2 = db
|
|
.search(SearchQuery {
|
|
vector: query,
|
|
k: 100,
|
|
filter: Some(filter2),
|
|
ef_search: None,
|
|
})
|
|
.unwrap();
|
|
|
|
// Should only get vectors where i / 10 == 2 (i.e., i in 20..30)
|
|
for result in &results2 {
|
|
let meta = result.metadata.as_ref().unwrap();
|
|
assert_eq!(meta.get("value").unwrap(), &serde_json::json!(2));
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// Error Handling Tests
|
|
// ============================================================================
|
|
|
|
#[test]
|
|
fn test_dimension_validation() {
|
|
let dir = tempdir().unwrap();
|
|
let mut options = DbOptions::default();
|
|
options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
|
|
options.dimensions = 64;
|
|
|
|
let db = VectorDB::new(options).unwrap();
|
|
|
|
// Try to insert vector with wrong dimensions
|
|
let result = db.insert(VectorEntry {
|
|
id: None,
|
|
vector: vec![1.0, 2.0, 3.0], // Only 3 dimensions, should be 64
|
|
metadata: None,
|
|
});
|
|
|
|
assert!(result.is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn test_search_with_wrong_dimension() {
|
|
let dir = tempdir().unwrap();
|
|
let mut options = DbOptions::default();
|
|
options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
|
|
options.dimensions = 64;
|
|
options.hnsw_config = None;
|
|
|
|
let db = VectorDB::new(options).unwrap();
|
|
|
|
// Insert some vectors
|
|
db.insert(VectorEntry {
|
|
id: Some("v1".to_string()),
|
|
vector: (0..64).map(|i| i as f32).collect(),
|
|
metadata: None,
|
|
})
|
|
.unwrap();
|
|
|
|
// Try to search with wrong dimension query
|
|
// Note: This might not error in the current implementation, but should be validated
|
|
let query = vec![1.0, 2.0, 3.0]; // Wrong dimension
|
|
let result = db.search(SearchQuery {
|
|
vector: query,
|
|
k: 10,
|
|
filter: None,
|
|
ef_search: None,
|
|
});
|
|
|
|
// Depending on implementation, this might error or return empty results
|
|
// The important thing is it doesn't panic
|
|
let _ = result;
|
|
}
|