Files
wifi-densepose/crates/ruvector-core/tests/integration_tests.rs
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

454 lines
13 KiB
Rust

//! Integration tests for end-to-end workflows
//!
//! These tests verify that all components work together correctly.
use ruvector_core::types::{DbOptions, DistanceMetric, HnswConfig, SearchQuery};
use ruvector_core::{VectorDB, VectorEntry};
use std::collections::HashMap;
use tempfile::tempdir;
// ============================================================================
// End-to-End Workflow Tests
// ============================================================================
#[test]
fn test_complete_insert_search_workflow() {
let dir = tempdir().unwrap();
let mut options = DbOptions::default();
options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
options.dimensions = 128;
options.distance_metric = DistanceMetric::Cosine;
options.hnsw_config = Some(HnswConfig {
m: 16,
ef_construction: 100,
ef_search: 50,
max_elements: 100_000,
});
let db = VectorDB::new(options).unwrap();
// Insert training data
let vectors: Vec<VectorEntry> = (0..100)
.map(|i| {
let mut metadata = HashMap::new();
metadata.insert("index".to_string(), serde_json::json!(i));
VectorEntry {
id: Some(format!("vec_{}", i)),
vector: (0..128).map(|j| ((i + j) as f32) * 0.01).collect(),
metadata: Some(metadata),
}
})
.collect();
let ids = db.insert_batch(vectors).unwrap();
assert_eq!(ids.len(), 100);
// Search for similar vectors
let query: Vec<f32> = (0..128).map(|j| (j as f32) * 0.01).collect();
let results = db
.search(SearchQuery {
vector: query,
k: 10,
filter: None,
ef_search: Some(100),
})
.unwrap();
assert_eq!(results.len(), 10);
assert!(results[0].vector.is_some());
assert!(results[0].metadata.is_some());
}
#[test]
fn test_batch_operations_10k_vectors() {
let dir = tempdir().unwrap();
let mut options = DbOptions::default();
options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
options.dimensions = 384;
options.distance_metric = DistanceMetric::Euclidean;
options.hnsw_config = Some(HnswConfig::default());
let db = VectorDB::new(options).unwrap();
// Generate 10K vectors
println!("Generating 10K vectors...");
let vectors: Vec<VectorEntry> = (0..10_000)
.map(|i| VectorEntry {
id: Some(format!("vec_{}", i)),
vector: (0..384).map(|j| ((i + j) as f32) * 0.001).collect(),
metadata: None,
})
.collect();
// Batch insert
println!("Batch inserting 10K vectors...");
let start = std::time::Instant::now();
let ids = db.insert_batch(vectors).unwrap();
let duration = start.elapsed();
println!("Batch insert took: {:?}", duration);
assert_eq!(ids.len(), 10_000);
assert_eq!(db.len().unwrap(), 10_000);
// Perform multiple searches
println!("Performing searches...");
for i in 0..10 {
let query: Vec<f32> = (0..384).map(|j| ((i * 100 + j) as f32) * 0.001).collect();
let results = db
.search(SearchQuery {
vector: query,
k: 10,
filter: None,
ef_search: None,
})
.unwrap();
assert_eq!(results.len(), 10);
}
}
#[test]
fn test_persistence_and_reload() {
let dir = tempdir().unwrap();
let db_path = dir
.path()
.join("persistent.db")
.to_string_lossy()
.to_string();
// Create and populate database
{
let mut options = DbOptions::default();
options.storage_path = db_path.clone();
options.dimensions = 3;
options.hnsw_config = None; // Use flat index for simpler persistence test
let db = VectorDB::new(options).unwrap();
for i in 0..10 {
db.insert(VectorEntry {
id: Some(format!("vec_{}", i)),
vector: vec![i as f32, (i * 2) as f32, (i * 3) as f32],
metadata: None,
})
.unwrap();
}
assert_eq!(db.len().unwrap(), 10);
}
// Reload database
{
let mut options = DbOptions::default();
options.storage_path = db_path.clone();
options.dimensions = 3;
options.hnsw_config = None;
let db = VectorDB::new(options).unwrap();
// Verify data persisted
assert_eq!(db.len().unwrap(), 10);
let entry = db.get("vec_5").unwrap().unwrap();
assert_eq!(entry.vector, vec![5.0, 10.0, 15.0]);
}
}
#[test]
fn test_mixed_operations_workflow() {
let dir = tempdir().unwrap();
let mut options = DbOptions::default();
options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
options.dimensions = 64;
let db = VectorDB::new(options).unwrap();
// Insert initial batch
let initial: Vec<VectorEntry> = (0..50)
.map(|i| VectorEntry {
id: Some(format!("vec_{}", i)),
vector: (0..64).map(|j| ((i + j) as f32) * 0.1).collect(),
metadata: None,
})
.collect();
db.insert_batch(initial).unwrap();
assert_eq!(db.len().unwrap(), 50);
// Delete some vectors
for i in 0..10 {
db.delete(&format!("vec_{}", i)).unwrap();
}
assert_eq!(db.len().unwrap(), 40);
// Insert more individual vectors
for i in 50..60 {
db.insert(VectorEntry {
id: Some(format!("vec_{}", i)),
vector: (0..64).map(|j| ((i + j) as f32) * 0.1).collect(),
metadata: None,
})
.unwrap();
}
assert_eq!(db.len().unwrap(), 50);
// Search
let query: Vec<f32> = (0..64).map(|j| (j as f32) * 0.1).collect();
let results = db
.search(SearchQuery {
vector: query,
k: 20,
filter: None,
ef_search: None,
})
.unwrap();
assert!(results.len() > 0);
}
// ============================================================================
// Different Distance Metrics
// ============================================================================
#[test]
fn test_all_distance_metrics() {
let metrics = vec![
DistanceMetric::Euclidean,
DistanceMetric::Cosine,
DistanceMetric::DotProduct,
DistanceMetric::Manhattan,
];
for metric in metrics {
let dir = tempdir().unwrap();
let mut options = DbOptions::default();
options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
options.dimensions = 32;
options.distance_metric = metric;
options.hnsw_config = None;
let db = VectorDB::new(options).unwrap();
// Insert test vectors
for i in 0..20 {
db.insert(VectorEntry {
id: Some(format!("vec_{}", i)),
vector: (0..32).map(|j| ((i + j) as f32) * 0.1).collect(),
metadata: None,
})
.unwrap();
}
// Search
let query: Vec<f32> = (0..32).map(|j| (j as f32) * 0.1).collect();
let results = db
.search(SearchQuery {
vector: query,
k: 5,
filter: None,
ef_search: None,
})
.unwrap();
assert_eq!(results.len(), 5, "Failed for metric {:?}", metric);
// Verify scores are in ascending order (lower is better for distance)
for i in 0..results.len() - 1 {
assert!(
results[i].score <= results[i + 1].score,
"Results not sorted for metric {:?}: {} > {}",
metric,
results[i].score,
results[i + 1].score
);
}
}
}
// ============================================================================
// HNSW Configuration Tests
// ============================================================================
#[test]
fn test_hnsw_different_configurations() {
let configs = vec![
HnswConfig {
m: 8,
ef_construction: 50,
ef_search: 50,
max_elements: 1000,
},
HnswConfig {
m: 16,
ef_construction: 100,
ef_search: 100,
max_elements: 1000,
},
HnswConfig {
m: 32,
ef_construction: 200,
ef_search: 200,
max_elements: 1000,
},
];
for config in configs {
let dir = tempdir().unwrap();
let mut options = DbOptions::default();
options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
options.dimensions = 64;
options.hnsw_config = Some(config.clone());
let db = VectorDB::new(options).unwrap();
// Insert vectors
let vectors: Vec<VectorEntry> = (0..100)
.map(|i| VectorEntry {
id: Some(format!("vec_{}", i)),
vector: (0..64).map(|j| ((i + j) as f32) * 0.01).collect(),
metadata: None,
})
.collect();
db.insert_batch(vectors).unwrap();
// Search with different ef_search values
let query: Vec<f32> = (0..64).map(|j| (j as f32) * 0.01).collect();
let results = db
.search(SearchQuery {
vector: query,
k: 10,
filter: None,
ef_search: Some(config.ef_search),
})
.unwrap();
assert_eq!(results.len(), 10, "Failed for config M={}", config.m);
}
}
// ============================================================================
// Metadata Filtering Tests
// ============================================================================
#[test]
fn test_complex_metadata_filtering() {
let dir = tempdir().unwrap();
let mut options = DbOptions::default();
options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
options.dimensions = 16;
options.hnsw_config = None;
let db = VectorDB::new(options).unwrap();
// Insert vectors with different categories and values
for i in 0..50 {
let mut metadata = HashMap::new();
metadata.insert("category".to_string(), serde_json::json!(i % 3));
metadata.insert("value".to_string(), serde_json::json!(i / 10));
db.insert(VectorEntry {
id: Some(format!("vec_{}", i)),
vector: (0..16).map(|j| ((i + j) as f32) * 0.1).collect(),
metadata: Some(metadata),
})
.unwrap();
}
// Search with single filter
let mut filter1 = HashMap::new();
filter1.insert("category".to_string(), serde_json::json!(0));
let query: Vec<f32> = (0..16).map(|j| (j as f32) * 0.1).collect();
let results1 = db
.search(SearchQuery {
vector: query.clone(),
k: 100,
filter: Some(filter1),
ef_search: None,
})
.unwrap();
// Should only get vectors where i % 3 == 0
for result in &results1 {
let meta = result.metadata.as_ref().unwrap();
assert_eq!(meta.get("category").unwrap(), &serde_json::json!(0));
}
// Search with different filter
let mut filter2 = HashMap::new();
filter2.insert("value".to_string(), serde_json::json!(2));
let results2 = db
.search(SearchQuery {
vector: query,
k: 100,
filter: Some(filter2),
ef_search: None,
})
.unwrap();
// Should only get vectors where i / 10 == 2 (i.e., i in 20..30)
for result in &results2 {
let meta = result.metadata.as_ref().unwrap();
assert_eq!(meta.get("value").unwrap(), &serde_json::json!(2));
}
}
// ============================================================================
// Error Handling Tests
// ============================================================================
#[test]
fn test_dimension_validation() {
let dir = tempdir().unwrap();
let mut options = DbOptions::default();
options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
options.dimensions = 64;
let db = VectorDB::new(options).unwrap();
// Try to insert vector with wrong dimensions
let result = db.insert(VectorEntry {
id: None,
vector: vec![1.0, 2.0, 3.0], // Only 3 dimensions, should be 64
metadata: None,
});
assert!(result.is_err());
}
#[test]
fn test_search_with_wrong_dimension() {
let dir = tempdir().unwrap();
let mut options = DbOptions::default();
options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
options.dimensions = 64;
options.hnsw_config = None;
let db = VectorDB::new(options).unwrap();
// Insert some vectors
db.insert(VectorEntry {
id: Some("v1".to_string()),
vector: (0..64).map(|i| i as f32).collect(),
metadata: None,
})
.unwrap();
// Try to search with wrong dimension query
// Note: This might not error in the current implementation, but should be validated
let query = vec![1.0, 2.0, 3.0]; // Wrong dimension
let result = db.search(SearchQuery {
vector: query,
k: 10,
filter: None,
ef_search: None,
});
// Depending on implementation, this might error or return empty results
// The important thing is it doesn't panic
let _ = result;
}