Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
237
vendor/ruvector/crates/ruvector-delta-index/src/incremental.rs
vendored
Normal file
237
vendor/ruvector/crates/ruvector-delta-index/src/incremental.rs
vendored
Normal file
@@ -0,0 +1,237 @@
|
||||
//! Incremental index updates
|
||||
//!
|
||||
//! Provides efficient strategies for updating the index without full rebuild.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use ruvector_delta_core::{Delta, VectorDelta};
|
||||
|
||||
use crate::{DeltaHnsw, Result, SearchResult};
|
||||
|
||||
/// Configuration for incremental updates
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IncrementalConfig {
|
||||
/// Minimum delta magnitude to trigger reconnection
|
||||
pub reconnect_threshold: f32,
|
||||
/// Maximum pending updates before batch processing
|
||||
pub batch_threshold: usize,
|
||||
/// Whether to use lazy reconnection
|
||||
pub lazy_reconnect: bool,
|
||||
}
|
||||
|
||||
impl Default for IncrementalConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
reconnect_threshold: 0.1,
|
||||
batch_threshold: 100,
|
||||
lazy_reconnect: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Handles incremental updates to the HNSW index
|
||||
pub struct IncrementalUpdater {
|
||||
config: IncrementalConfig,
|
||||
pending_updates: HashMap<String, VectorDelta>,
|
||||
total_updates: usize,
|
||||
}
|
||||
|
||||
impl IncrementalUpdater {
|
||||
/// Create a new incremental updater
|
||||
pub fn new(config: IncrementalConfig) -> Self {
|
||||
Self {
|
||||
config,
|
||||
pending_updates: HashMap::new(),
|
||||
total_updates: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Queue an update for batch processing
|
||||
pub fn queue_update(&mut self, id: String, delta: VectorDelta) {
|
||||
self.pending_updates
|
||||
.entry(id)
|
||||
.and_modify(|existing| {
|
||||
*existing = existing.clone().compose(delta.clone());
|
||||
})
|
||||
.or_insert(delta);
|
||||
|
||||
self.total_updates += 1;
|
||||
}
|
||||
|
||||
/// Check if batch processing is needed
|
||||
pub fn needs_flush(&self) -> bool {
|
||||
self.pending_updates.len() >= self.config.batch_threshold
|
||||
}
|
||||
|
||||
/// Flush pending updates to the index
|
||||
pub fn flush(&mut self, index: &mut DeltaHnsw) -> Result<FlushResult> {
|
||||
let mut applied = 0;
|
||||
let mut reconnected = 0;
|
||||
let mut errors = Vec::new();
|
||||
|
||||
let updates: Vec<_> = self.pending_updates.drain().collect();
|
||||
|
||||
for (id, delta) in updates {
|
||||
match index.apply_delta(&id, &delta) {
|
||||
Ok(()) => {
|
||||
applied += 1;
|
||||
|
||||
// Check if reconnection is needed
|
||||
if delta.l2_norm() > self.config.reconnect_threshold {
|
||||
reconnected += 1;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
errors.push((id, e.to_string()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(FlushResult {
|
||||
applied,
|
||||
reconnected,
|
||||
errors,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get number of pending updates
|
||||
pub fn pending_count(&self) -> usize {
|
||||
self.pending_updates.len()
|
||||
}
|
||||
|
||||
/// Get total updates processed
|
||||
pub fn total_updates(&self) -> usize {
|
||||
self.total_updates
|
||||
}
|
||||
|
||||
/// Clear pending updates without applying
|
||||
pub fn clear_pending(&mut self) {
|
||||
self.pending_updates.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of flushing updates
|
||||
#[derive(Debug)]
|
||||
pub struct FlushResult {
|
||||
/// Number of updates applied
|
||||
pub applied: usize,
|
||||
/// Number of nodes reconnected
|
||||
pub reconnected: usize,
|
||||
/// Errors encountered
|
||||
pub errors: Vec<(String, String)>,
|
||||
}
|
||||
|
||||
/// Strategies for handling vector updates
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum UpdateStrategy {
|
||||
/// Apply delta without graph modification
|
||||
DeltaOnly,
|
||||
/// Apply delta and update local neighbors
|
||||
LocalRepair,
|
||||
/// Apply delta and full reconnection
|
||||
FullReconnect,
|
||||
/// Queue for batch processing
|
||||
Deferred,
|
||||
}
|
||||
|
||||
/// Determine the best update strategy based on delta magnitude
|
||||
pub fn select_strategy(delta: &VectorDelta, config: &IncrementalConfig) -> UpdateStrategy {
|
||||
let magnitude = delta.l2_norm();
|
||||
|
||||
if magnitude < config.reconnect_threshold * 0.1 {
|
||||
UpdateStrategy::DeltaOnly
|
||||
} else if magnitude < config.reconnect_threshold {
|
||||
if config.lazy_reconnect {
|
||||
UpdateStrategy::DeltaOnly
|
||||
} else {
|
||||
UpdateStrategy::LocalRepair
|
||||
}
|
||||
} else if magnitude < config.reconnect_threshold * 5.0 {
|
||||
UpdateStrategy::LocalRepair
|
||||
} else {
|
||||
UpdateStrategy::FullReconnect
|
||||
}
|
||||
}
|
||||
|
||||
/// Statistics about incremental updates
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct UpdateStats {
|
||||
/// Total updates applied
|
||||
pub total_applied: usize,
|
||||
/// Updates that triggered reconnection
|
||||
pub reconnections: usize,
|
||||
/// Updates that were delta-only
|
||||
pub delta_only: usize,
|
||||
/// Average delta magnitude
|
||||
pub avg_magnitude: f32,
|
||||
/// Maximum delta magnitude
|
||||
pub max_magnitude: f32,
|
||||
}
|
||||
|
||||
impl UpdateStats {
|
||||
/// Record an update
|
||||
pub fn record(&mut self, delta: &VectorDelta, reconnected: bool) {
|
||||
let mag = delta.l2_norm();
|
||||
|
||||
self.total_applied += 1;
|
||||
if reconnected {
|
||||
self.reconnections += 1;
|
||||
} else {
|
||||
self.delta_only += 1;
|
||||
}
|
||||
|
||||
// Update running average
|
||||
let n = self.total_applied as f32;
|
||||
self.avg_magnitude = self.avg_magnitude * ((n - 1.0) / n) + mag / n;
|
||||
self.max_magnitude = self.max_magnitude.max(mag);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_incremental_updater() {
|
||||
let mut updater = IncrementalUpdater::new(IncrementalConfig::default());
|
||||
|
||||
let delta = VectorDelta::from_dense(vec![0.1, 0.2, 0.3]);
|
||||
updater.queue_update("test".to_string(), delta);
|
||||
|
||||
assert_eq!(updater.pending_count(), 1);
|
||||
assert_eq!(updater.total_updates(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delta_composition() {
|
||||
let mut updater = IncrementalUpdater::new(IncrementalConfig::default());
|
||||
|
||||
let delta1 = VectorDelta::from_dense(vec![1.0, 0.0, 0.0]);
|
||||
let delta2 = VectorDelta::from_dense(vec![0.0, 1.0, 0.0]);
|
||||
|
||||
updater.queue_update("test".to_string(), delta1);
|
||||
updater.queue_update("test".to_string(), delta2);
|
||||
|
||||
// Should compose into single update
|
||||
assert_eq!(updater.pending_count(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_strategy_selection() {
|
||||
let config = IncrementalConfig {
|
||||
reconnect_threshold: 0.5,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Small delta -> DeltaOnly
|
||||
let small = VectorDelta::from_dense(vec![0.01, 0.01, 0.01]);
|
||||
assert_eq!(select_strategy(&small, &config), UpdateStrategy::DeltaOnly);
|
||||
|
||||
// Large delta -> FullReconnect
|
||||
let large = VectorDelta::from_dense(vec![10.0, 10.0, 10.0]);
|
||||
assert_eq!(
|
||||
select_strategy(&large, &config),
|
||||
UpdateStrategy::FullReconnect
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user