Files

215 lines
5.8 KiB
Rust

//! Delta capture optimizations
//!
//! Provides optimized routines for capturing deltas from vector pairs.
use ruvector_delta_core::{Delta, DeltaOp, DeltaValue, VectorDelta};
use smallvec::SmallVec;
/// Configuration for delta capture
#[derive(Debug, Clone)]
pub struct CaptureConfig {
/// Epsilon for considering values as zero
pub epsilon: f32,
/// Sparsity threshold for using sparse representation
pub sparsity_threshold: f32,
/// Maximum dimensions for always using sparse
pub sparse_max_dims: usize,
}
impl Default for CaptureConfig {
fn default() -> Self {
Self {
epsilon: 1e-7,
sparsity_threshold: 0.7,
sparse_max_dims: 10_000,
}
}
}
/// Optimized delta capture with configurable thresholds
pub fn capture_delta(old: &[f32], new: &[f32], config: &CaptureConfig) -> VectorDelta {
assert_eq!(old.len(), new.len(), "Vectors must have same length");
let dimensions = old.len();
// For small vectors, always use sparse initially
if dimensions <= 64 {
return capture_sparse(old, new, config);
}
// For larger vectors, sample to estimate sparsity
let sample_size = (dimensions / 10).max(16).min(256);
let mut non_zero_sample = 0;
for i in (0..dimensions).step_by(dimensions / sample_size) {
if (new[i] - old[i]).abs() > config.epsilon {
non_zero_sample += 1;
}
}
let estimated_sparsity = 1.0 - (non_zero_sample as f32 / sample_size as f32);
if estimated_sparsity > config.sparsity_threshold {
capture_sparse(old, new, config)
} else {
capture_dense(old, new, config)
}
}
/// Capture with sparse representation
fn capture_sparse(old: &[f32], new: &[f32], config: &CaptureConfig) -> VectorDelta {
let dimensions = old.len();
let mut ops: SmallVec<[DeltaOp<f32>; 8]> = SmallVec::new();
for i in 0..dimensions {
let diff = new[i] - old[i];
if diff.abs() > config.epsilon {
ops.push(DeltaOp::new(i as u32, diff));
}
}
VectorDelta::from_sparse(ops, dimensions)
}
/// Capture with dense representation
fn capture_dense(old: &[f32], new: &[f32], config: &CaptureConfig) -> VectorDelta {
let diffs: Vec<f32> = old
.iter()
.zip(new.iter())
.map(|(o, n)| {
let d = n - o;
if d.abs() <= config.epsilon {
0.0
} else {
d
}
})
.collect();
VectorDelta::from_dense(diffs)
}
/// SIMD-accelerated delta capture (when available)
#[cfg(target_feature = "simd128")]
pub fn capture_delta_simd(old: &[f32], new: &[f32], config: &CaptureConfig) -> VectorDelta {
use core::arch::wasm32::*;
let dimensions = old.len();
if dimensions < 4 {
return capture_delta(old, new, config);
}
let chunks = dimensions / 4;
let remainder = dimensions % 4;
let mut diffs = Vec::with_capacity(dimensions);
let epsilon_vec = f32x4_splat(config.epsilon);
let neg_epsilon_vec = f32x4_splat(-config.epsilon);
let zero_vec = f32x4_splat(0.0);
// Process 4 elements at a time
for i in 0..chunks {
let base = i * 4;
unsafe {
let old_chunk = v128_load(old.as_ptr().add(base) as *const v128);
let new_chunk = v128_load(new.as_ptr().add(base) as *const v128);
// Compute differences
let diff = f32x4_sub(new_chunk, old_chunk);
// Zero out small differences
let above_eps = f32x4_gt(diff, epsilon_vec);
let below_neg_eps = f32x4_lt(diff, neg_epsilon_vec);
let significant = v128_or(above_eps, below_neg_eps);
let masked = v128_and(diff, significant);
// Extract to array
let d: [f32; 4] = core::mem::transmute(masked);
diffs.extend_from_slice(&d);
}
}
// Handle remainder
for i in (chunks * 4)..dimensions {
let d = new[i] - old[i];
diffs.push(if d.abs() > config.epsilon { d } else { 0.0 });
}
VectorDelta::from_dense(diffs)
}
/// Batch capture for multiple vector pairs
pub fn capture_batch(
old_vecs: &[&[f32]],
new_vecs: &[&[f32]],
config: &CaptureConfig,
) -> Vec<VectorDelta> {
assert_eq!(
old_vecs.len(),
new_vecs.len(),
"Must have same number of vectors"
);
old_vecs
.iter()
.zip(new_vecs.iter())
.map(|(old, new)| capture_delta(old, new, config))
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_capture_sparse() {
let old = vec![1.0f32; 100];
let mut new = old.clone();
new[10] = 2.0;
new[50] = 3.0;
let config = CaptureConfig::default();
let delta = capture_delta(&old, &new, &config);
assert!(matches!(delta.value, DeltaValue::Sparse(_)));
assert_eq!(delta.value.nnz(), 2);
}
#[test]
fn test_capture_dense() {
let old = vec![1.0f32; 4];
let new = vec![2.0f32; 4];
let config = CaptureConfig::default();
let delta = capture_delta(&old, &new, &config);
// All changed, should be dense
assert_eq!(delta.value.nnz(), 4);
}
#[test]
fn test_capture_identity() {
let v = vec![1.0f32, 2.0, 3.0];
let config = CaptureConfig::default();
let delta = capture_delta(&v, &v, &config);
assert!(delta.is_identity());
}
#[test]
fn test_epsilon_filtering() {
let old = vec![1.0f32, 2.0, 3.0];
let new = vec![1.0000001, 2.0000001, 3.0000001]; // Very small changes
let config = CaptureConfig {
epsilon: 1e-5,
..Default::default()
};
let delta = capture_delta(&old, &new, &config);
assert!(delta.is_identity());
}
}