Files
wifi-densepose/crates/ruvector-core/tests/stress_tests.rs
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

487 lines
14 KiB
Rust

//! Stress tests for scalability, concurrency, and resilience
//!
//! These tests push the system to its limits to verify robustness.
use ruvector_core::types::{DbOptions, HnswConfig, SearchQuery};
use ruvector_core::{VectorDB, VectorEntry};
use std::sync::{Arc, Barrier};
use std::thread;
use tempfile::tempdir;
// ============================================================================
// Large-Scale Insertion Tests
// ============================================================================
#[test]
#[ignore] // Run with: cargo test --test stress_tests -- --ignored --test-threads=1
fn test_million_vector_insertion() {
let dir = tempdir().unwrap();
let mut options = DbOptions::default();
options.storage_path = dir.path().join("million.db").to_string_lossy().to_string();
options.dimensions = 128;
options.hnsw_config = Some(HnswConfig {
m: 16,
ef_construction: 100,
ef_search: 50,
max_elements: 2_000_000,
});
let db = VectorDB::new(options).unwrap();
println!("Starting million-vector insertion test...");
let batch_size = 10_000;
let num_batches = 100; // Total: 1M vectors
for batch_idx in 0..num_batches {
println!("Inserting batch {}/{}...", batch_idx + 1, num_batches);
let vectors: Vec<VectorEntry> = (0..batch_size)
.map(|i| {
let global_idx = batch_idx * batch_size + i;
VectorEntry {
id: Some(format!("vec_{}", global_idx)),
vector: (0..128)
.map(|j| ((global_idx + j) as f32) * 0.0001)
.collect(),
metadata: None,
}
})
.collect();
let start = std::time::Instant::now();
db.insert_batch(vectors).unwrap();
let duration = start.elapsed();
println!("Batch {} took: {:?}", batch_idx + 1, duration);
}
println!("Final database size: {}", db.len().unwrap());
assert_eq!(db.len().unwrap(), 1_000_000);
// Perform some searches to verify functionality
println!("Testing search on 1M vectors...");
for i in 0..10 {
let query: Vec<f32> = (0..128)
.map(|j| ((i * 10000 + j) as f32) * 0.0001)
.collect();
let start = std::time::Instant::now();
let results = db
.search(SearchQuery {
vector: query,
k: 10,
filter: None,
ef_search: Some(50),
})
.unwrap();
let duration = start.elapsed();
println!(
"Search {} took: {:?}, found {} results",
i + 1,
duration,
results.len()
);
assert_eq!(results.len(), 10);
}
}
// ============================================================================
// Concurrent Query Tests
// ============================================================================
#[test]
fn test_concurrent_queries() {
let dir = tempdir().unwrap();
let mut options = DbOptions::default();
options.storage_path = dir
.path()
.join("concurrent.db")
.to_string_lossy()
.to_string();
options.dimensions = 64;
options.hnsw_config = Some(HnswConfig::default());
let db = Arc::new(VectorDB::new(options).unwrap());
// Insert test data
println!("Inserting test data...");
let vectors: Vec<VectorEntry> = (0..1000)
.map(|i| VectorEntry {
id: Some(format!("vec_{}", i)),
vector: (0..64).map(|j| ((i + j) as f32) * 0.01).collect(),
metadata: None,
})
.collect();
db.insert_batch(vectors).unwrap();
// Spawn multiple threads doing concurrent searches
println!("Starting 10 concurrent query threads...");
let num_threads = 10;
let queries_per_thread = 100;
let barrier = Arc::new(Barrier::new(num_threads));
let mut handles = vec![];
for thread_id in 0..num_threads {
let db_clone = Arc::clone(&db);
let barrier_clone = Arc::clone(&barrier);
let handle = thread::spawn(move || {
// Wait for all threads to be ready
barrier_clone.wait();
let start = std::time::Instant::now();
for i in 0..queries_per_thread {
let query: Vec<f32> = (0..64)
.map(|j| ((thread_id * 1000 + i + j) as f32) * 0.01)
.collect();
let results = db_clone
.search(SearchQuery {
vector: query,
k: 10,
filter: None,
ef_search: None,
})
.unwrap();
assert_eq!(results.len(), 10);
}
let duration = start.elapsed();
println!(
"Thread {} completed {} queries in {:?}",
thread_id, queries_per_thread, duration
);
duration
});
handles.push(handle);
}
// Wait for all threads and collect results
let mut total_duration = std::time::Duration::ZERO;
for handle in handles {
let duration = handle.join().unwrap();
total_duration += duration;
}
let total_queries = num_threads * queries_per_thread;
println!("Total queries: {}", total_queries);
println!(
"Average duration per thread: {:?}",
total_duration / num_threads as u32
);
}
#[test]
fn test_concurrent_inserts_and_queries() {
let dir = tempdir().unwrap();
let mut options = DbOptions::default();
options.storage_path = dir
.path()
.join("mixed_concurrent.db")
.to_string_lossy()
.to_string();
options.dimensions = 32;
options.hnsw_config = Some(HnswConfig::default());
let db = Arc::new(VectorDB::new(options).unwrap());
// Initial data
let initial: Vec<VectorEntry> = (0..100)
.map(|i| VectorEntry {
id: Some(format!("initial_{}", i)),
vector: (0..32).map(|j| ((i + j) as f32) * 0.1).collect(),
metadata: None,
})
.collect();
db.insert_batch(initial).unwrap();
// Spawn reader threads
let num_readers = 5;
let num_writers = 2;
let barrier = Arc::new(Barrier::new(num_readers + num_writers));
let mut handles = vec![];
// Reader threads
for reader_id in 0..num_readers {
let db_clone = Arc::clone(&db);
let barrier_clone = Arc::clone(&barrier);
let handle = thread::spawn(move || {
barrier_clone.wait();
for i in 0..50 {
let query: Vec<f32> = (0..32)
.map(|j| ((reader_id * 100 + i + j) as f32) * 0.1)
.collect();
let results = db_clone
.search(SearchQuery {
vector: query,
k: 5,
filter: None,
ef_search: None,
})
.unwrap();
assert!(results.len() > 0 && results.len() <= 5);
}
println!("Reader {} completed", reader_id);
});
handles.push(handle);
}
// Writer threads
for writer_id in 0..num_writers {
let db_clone = Arc::clone(&db);
let barrier_clone = Arc::clone(&barrier);
let handle = thread::spawn(move || {
barrier_clone.wait();
for i in 0..20 {
let entry = VectorEntry {
id: Some(format!("writer_{}_{}", writer_id, i)),
vector: (0..32)
.map(|j| ((writer_id * 1000 + i + j) as f32) * 0.1)
.collect(),
metadata: None,
};
db_clone.insert(entry).unwrap();
}
println!("Writer {} completed", writer_id);
});
handles.push(handle);
}
// Wait for all threads
for handle in handles {
handle.join().unwrap();
}
// Verify final state
let final_len = db.len().unwrap();
println!("Final database size: {}", final_len);
assert!(final_len >= 100); // At least initial data should remain
}
// ============================================================================
// Memory Pressure Tests
// ============================================================================
#[test]
#[ignore] // Run with: cargo test --test stress_tests -- --ignored
fn test_memory_pressure_large_vectors() {
let dir = tempdir().unwrap();
let mut options = DbOptions::default();
options.storage_path = dir
.path()
.join("large_vectors.db")
.to_string_lossy()
.to_string();
options.dimensions = 2048; // Very large vectors
options.hnsw_config = Some(HnswConfig {
m: 8,
ef_construction: 50,
ef_search: 50,
max_elements: 100_000,
});
let db = VectorDB::new(options).unwrap();
println!("Testing with large 2048-dimensional vectors...");
let num_vectors = 10_000;
let batch_size = 1000;
for batch_idx in 0..(num_vectors / batch_size) {
let vectors: Vec<VectorEntry> = (0..batch_size)
.map(|i| {
let global_idx = batch_idx * batch_size + i;
VectorEntry {
id: Some(format!("vec_{}", global_idx)),
vector: (0..2048)
.map(|j| ((global_idx + j) as f32) * 0.0001)
.collect(),
metadata: None,
}
})
.collect();
db.insert_batch(vectors).unwrap();
println!(
"Inserted batch {}/{}",
batch_idx + 1,
num_vectors / batch_size
);
}
println!("Database size: {}", db.len().unwrap());
assert_eq!(db.len().unwrap(), num_vectors);
// Perform searches
for i in 0..5 {
let query: Vec<f32> = (0..2048)
.map(|j| ((i * 1000 + j) as f32) * 0.0001)
.collect();
let results = db
.search(SearchQuery {
vector: query,
k: 10,
filter: None,
ef_search: None,
})
.unwrap();
assert_eq!(results.len(), 10);
}
}
// ============================================================================
// Error Recovery Tests
// ============================================================================
#[test]
fn test_invalid_operations_dont_crash() {
let dir = tempdir().unwrap();
let mut options = DbOptions::default();
options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
options.dimensions = 32;
let db = VectorDB::new(options).unwrap();
// Try various invalid operations
// 1. Delete non-existent vector
let _ = db.delete("nonexistent");
// 2. Get non-existent vector
let _ = db.get("nonexistent");
// 3. Search with k=0
let result = db.search(SearchQuery {
vector: vec![0.0; 32],
k: 0,
filter: None,
ef_search: None,
});
// Should either return empty or error gracefully
let _ = result;
// 4. Insert and immediately delete in rapid succession
for i in 0..100 {
let id = db
.insert(VectorEntry {
id: Some(format!("temp_{}", i)),
vector: vec![1.0; 32],
metadata: None,
})
.unwrap();
db.delete(&id).unwrap();
}
// Database should still be functional
db.insert(VectorEntry {
id: Some("final".to_string()),
vector: vec![1.0; 32],
metadata: None,
})
.unwrap();
assert!(db.get("final").unwrap().is_some());
}
#[test]
fn test_repeated_operations() {
let dir = tempdir().unwrap();
let mut options = DbOptions::default();
options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
options.dimensions = 16;
options.hnsw_config = None;
let db = VectorDB::new(options).unwrap();
// Insert the same ID multiple times (should replace or error)
for _ in 0..10 {
let _ = db.insert(VectorEntry {
id: Some("same_id".to_string()),
vector: vec![1.0; 16],
metadata: None,
});
}
// Delete the same ID multiple times
for _ in 0..5 {
let _ = db.delete("same_id");
}
// Search repeatedly with the same query
let query = vec![1.0; 16];
for _ in 0..100 {
let _ = db.search(SearchQuery {
vector: query.clone(),
k: 10,
filter: None,
ef_search: None,
});
}
}
// ============================================================================
// Extreme Parameter Tests
// ============================================================================
#[test]
fn test_extreme_k_values() {
let dir = tempdir().unwrap();
let mut options = DbOptions::default();
options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
options.dimensions = 16;
options.hnsw_config = None;
let db = VectorDB::new(options).unwrap();
// Insert some vectors
for i in 0..10 {
db.insert(VectorEntry {
id: Some(format!("vec_{}", i)),
vector: vec![i as f32; 16],
metadata: None,
})
.unwrap();
}
// Search with k larger than database size
let results = db
.search(SearchQuery {
vector: vec![1.0; 16],
k: 1000,
filter: None,
ef_search: None,
})
.unwrap();
// Should return at most 10 results
assert!(results.len() <= 10);
// Search with k=1
let results = db
.search(SearchQuery {
vector: vec![1.0; 16],
k: 1,
filter: None,
ef_search: None,
})
.unwrap();
assert_eq!(results.len(), 1);
}