Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
680
crates/ruvector-delta-core/src/compression.rs
Normal file
680
crates/ruvector-delta-core/src/compression.rs
Normal file
@@ -0,0 +1,680 @@
|
||||
//! Delta compression strategies
|
||||
//!
|
||||
//! Provides specialized compression for delta data, leveraging
|
||||
//! the statistical properties of change data.
|
||||
|
||||
use alloc::vec::Vec;
|
||||
|
||||
use crate::delta::VectorDelta;
|
||||
use crate::encoding::{DeltaEncoding, HybridEncoding};
|
||||
use crate::error::{DeltaError, Result};
|
||||
|
||||
/// Compression level settings
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum CompressionLevel {
|
||||
/// No compression
|
||||
None,
|
||||
/// Fast compression (lower ratio)
|
||||
Fast,
|
||||
/// Balanced compression
|
||||
Balanced,
|
||||
/// Best compression (slower)
|
||||
Best,
|
||||
}
|
||||
|
||||
impl Default for CompressionLevel {
|
||||
fn default() -> Self {
|
||||
Self::Balanced
|
||||
}
|
||||
}
|
||||
|
||||
/// Compression codec types
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
pub enum CompressionCodec {
|
||||
/// No compression
|
||||
None = 0,
|
||||
/// LZ4 compression
|
||||
Lz4 = 1,
|
||||
/// Zstandard compression
|
||||
Zstd = 2,
|
||||
/// Delta-of-delta encoding
|
||||
DeltaOfDelta = 3,
|
||||
/// Quantization-based compression
|
||||
Quantized = 4,
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for CompressionCodec {
|
||||
type Error = DeltaError;
|
||||
|
||||
fn try_from(value: u8) -> Result<Self> {
|
||||
match value {
|
||||
0 => Ok(Self::None),
|
||||
1 => Ok(Self::Lz4),
|
||||
2 => Ok(Self::Zstd),
|
||||
3 => Ok(Self::DeltaOfDelta),
|
||||
4 => Ok(Self::Quantized),
|
||||
_ => Err(DeltaError::InvalidEncoding(alloc::format!(
|
||||
"Unknown codec: {}",
|
||||
value
|
||||
))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Delta compressor configuration
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CompressorConfig {
|
||||
/// Compression codec
|
||||
pub codec: CompressionCodec,
|
||||
/// Compression level
|
||||
pub level: CompressionLevel,
|
||||
/// Minimum size to compress (bytes)
|
||||
pub min_size: usize,
|
||||
/// Enable checksums
|
||||
pub enable_checksum: bool,
|
||||
}
|
||||
|
||||
impl Default for CompressorConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
codec: CompressionCodec::Lz4,
|
||||
level: CompressionLevel::Balanced,
|
||||
min_size: 64,
|
||||
enable_checksum: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Compressed data header
|
||||
#[derive(Debug, Clone)]
|
||||
struct CompressedHeader {
|
||||
/// Compression codec used
|
||||
codec: CompressionCodec,
|
||||
/// Original uncompressed size
|
||||
original_size: u32,
|
||||
/// Compressed size
|
||||
compressed_size: u32,
|
||||
/// Optional checksum (FNV-1a)
|
||||
checksum: Option<u64>,
|
||||
}
|
||||
|
||||
impl CompressedHeader {
|
||||
const MAGIC: u32 = 0x44454C54; // "DELT"
|
||||
const VERSION: u8 = 1;
|
||||
|
||||
fn to_bytes(&self) -> Vec<u8> {
|
||||
let mut bytes = Vec::with_capacity(21);
|
||||
|
||||
// Magic (4 bytes)
|
||||
bytes.extend_from_slice(&Self::MAGIC.to_le_bytes());
|
||||
|
||||
// Version (1 byte)
|
||||
bytes.push(Self::VERSION);
|
||||
|
||||
// Codec (1 byte)
|
||||
bytes.push(self.codec as u8);
|
||||
|
||||
// Has checksum flag (1 byte)
|
||||
bytes.push(if self.checksum.is_some() { 1 } else { 0 });
|
||||
|
||||
// Original size (4 bytes)
|
||||
bytes.extend_from_slice(&self.original_size.to_le_bytes());
|
||||
|
||||
// Compressed size (4 bytes)
|
||||
bytes.extend_from_slice(&self.compressed_size.to_le_bytes());
|
||||
|
||||
// Checksum (8 bytes if present)
|
||||
if let Some(cs) = self.checksum {
|
||||
bytes.extend_from_slice(&cs.to_le_bytes());
|
||||
}
|
||||
|
||||
bytes
|
||||
}
|
||||
|
||||
fn from_bytes(bytes: &[u8]) -> Result<(Self, usize)> {
|
||||
if bytes.len() < 15 {
|
||||
return Err(DeltaError::DecompressionError("Header too small".into()));
|
||||
}
|
||||
|
||||
let magic = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
|
||||
if magic != Self::MAGIC {
|
||||
return Err(DeltaError::DecompressionError(
|
||||
"Invalid magic number".into(),
|
||||
));
|
||||
}
|
||||
|
||||
let version = bytes[4];
|
||||
if version != Self::VERSION {
|
||||
return Err(DeltaError::VersionMismatch {
|
||||
expected: Self::VERSION as u32,
|
||||
actual: version as u32,
|
||||
});
|
||||
}
|
||||
|
||||
let codec = CompressionCodec::try_from(bytes[5])?;
|
||||
let has_checksum = bytes[6] != 0;
|
||||
|
||||
let original_size = u32::from_le_bytes([bytes[7], bytes[8], bytes[9], bytes[10]]);
|
||||
let compressed_size = u32::from_le_bytes([bytes[11], bytes[12], bytes[13], bytes[14]]);
|
||||
|
||||
let (checksum, header_size) = if has_checksum {
|
||||
if bytes.len() < 23 {
|
||||
return Err(DeltaError::DecompressionError(
|
||||
"Header too small for checksum".into(),
|
||||
));
|
||||
}
|
||||
let cs = u64::from_le_bytes([
|
||||
bytes[15], bytes[16], bytes[17], bytes[18], bytes[19], bytes[20], bytes[21],
|
||||
bytes[22],
|
||||
]);
|
||||
(Some(cs), 23)
|
||||
} else {
|
||||
(None, 15)
|
||||
};
|
||||
|
||||
Ok((
|
||||
Self {
|
||||
codec,
|
||||
original_size,
|
||||
compressed_size,
|
||||
checksum,
|
||||
},
|
||||
header_size,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// Delta compressor for efficient storage
|
||||
pub struct DeltaCompressor {
|
||||
config: CompressorConfig,
|
||||
encoding: HybridEncoding,
|
||||
}
|
||||
|
||||
impl DeltaCompressor {
|
||||
/// Create a new compressor with default configuration
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
config: CompressorConfig::default(),
|
||||
encoding: HybridEncoding::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create with custom configuration
|
||||
pub fn with_config(config: CompressorConfig) -> Self {
|
||||
Self {
|
||||
config,
|
||||
encoding: HybridEncoding::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Compress a delta
|
||||
pub fn compress(&self, delta: &VectorDelta) -> Result<Vec<u8>> {
|
||||
// First encode the delta
|
||||
let encoded = self.encoding.encode(delta)?;
|
||||
|
||||
// Check if compression is worthwhile
|
||||
if encoded.len() < self.config.min_size || self.config.codec == CompressionCodec::None {
|
||||
// Return uncompressed with header
|
||||
let header = CompressedHeader {
|
||||
codec: CompressionCodec::None,
|
||||
original_size: encoded.len() as u32,
|
||||
compressed_size: encoded.len() as u32,
|
||||
checksum: if self.config.enable_checksum {
|
||||
Some(fnv1a_hash(&encoded))
|
||||
} else {
|
||||
None
|
||||
},
|
||||
};
|
||||
|
||||
let mut result = header.to_bytes();
|
||||
result.extend_from_slice(&encoded);
|
||||
return Ok(result);
|
||||
}
|
||||
|
||||
// Compress based on codec
|
||||
let compressed = match self.config.codec {
|
||||
CompressionCodec::None => encoded.clone(),
|
||||
#[cfg(feature = "compression")]
|
||||
CompressionCodec::Lz4 => self.compress_lz4(&encoded)?,
|
||||
#[cfg(feature = "compression")]
|
||||
CompressionCodec::Zstd => self.compress_zstd(&encoded)?,
|
||||
CompressionCodec::DeltaOfDelta => self.compress_delta_of_delta(&encoded)?,
|
||||
CompressionCodec::Quantized => self.compress_quantized(&encoded)?,
|
||||
#[cfg(not(feature = "compression"))]
|
||||
CompressionCodec::Lz4 | CompressionCodec::Zstd => {
|
||||
return Err(DeltaError::CompressionError(
|
||||
"Compression feature not enabled".into(),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
// Build result
|
||||
let header = CompressedHeader {
|
||||
codec: self.config.codec,
|
||||
original_size: encoded.len() as u32,
|
||||
compressed_size: compressed.len() as u32,
|
||||
checksum: if self.config.enable_checksum {
|
||||
Some(fnv1a_hash(&encoded))
|
||||
} else {
|
||||
None
|
||||
},
|
||||
};
|
||||
|
||||
let mut result = header.to_bytes();
|
||||
result.extend_from_slice(&compressed);
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Decompress bytes to a delta
|
||||
pub fn decompress(&self, bytes: &[u8]) -> Result<VectorDelta> {
|
||||
let (header, header_size) = CompressedHeader::from_bytes(bytes)?;
|
||||
|
||||
let compressed_data = &bytes[header_size..];
|
||||
if compressed_data.len() < header.compressed_size as usize {
|
||||
return Err(DeltaError::DecompressionError(alloc::format!(
|
||||
"Insufficient data: expected {}, got {}",
|
||||
header.compressed_size,
|
||||
compressed_data.len()
|
||||
)));
|
||||
}
|
||||
|
||||
let compressed = &compressed_data[..header.compressed_size as usize];
|
||||
|
||||
// Decompress based on codec
|
||||
let decompressed = match header.codec {
|
||||
CompressionCodec::None => compressed.to_vec(),
|
||||
#[cfg(feature = "compression")]
|
||||
CompressionCodec::Lz4 => {
|
||||
self.decompress_lz4(compressed, header.original_size as usize)?
|
||||
}
|
||||
#[cfg(feature = "compression")]
|
||||
CompressionCodec::Zstd => self.decompress_zstd(compressed)?,
|
||||
CompressionCodec::DeltaOfDelta => {
|
||||
self.decompress_delta_of_delta(compressed, header.original_size as usize)?
|
||||
}
|
||||
CompressionCodec::Quantized => {
|
||||
self.decompress_quantized(compressed, header.original_size as usize)?
|
||||
}
|
||||
#[cfg(not(feature = "compression"))]
|
||||
CompressionCodec::Lz4 | CompressionCodec::Zstd => {
|
||||
return Err(DeltaError::DecompressionError(
|
||||
"Compression feature not enabled".into(),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
// Verify checksum
|
||||
if let Some(expected_checksum) = header.checksum {
|
||||
let actual_checksum = fnv1a_hash(&decompressed);
|
||||
if expected_checksum != actual_checksum {
|
||||
return Err(DeltaError::ChecksumMismatch {
|
||||
expected: expected_checksum,
|
||||
actual: actual_checksum,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Decode
|
||||
self.encoding.decode(&decompressed)
|
||||
}
|
||||
|
||||
/// Get compression ratio for a compressed buffer
|
||||
pub fn compression_ratio(&self, compressed: &[u8]) -> Result<f64> {
|
||||
let (header, _) = CompressedHeader::from_bytes(compressed)?;
|
||||
|
||||
if header.compressed_size == 0 {
|
||||
return Ok(1.0);
|
||||
}
|
||||
|
||||
Ok(header.original_size as f64 / header.compressed_size as f64)
|
||||
}
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
fn compress_lz4(&self, data: &[u8]) -> Result<Vec<u8>> {
|
||||
lz4_flex::compress_prepend_size(data)
|
||||
.map_err(|e| DeltaError::CompressionError(alloc::format!("LZ4 error: {}", e)))
|
||||
}
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
fn decompress_lz4(&self, data: &[u8], _original_size: usize) -> Result<Vec<u8>> {
|
||||
lz4_flex::decompress_size_prepended(data)
|
||||
.map_err(|e| DeltaError::DecompressionError(alloc::format!("LZ4 error: {}", e)))
|
||||
}
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
fn compress_zstd(&self, data: &[u8]) -> Result<Vec<u8>> {
|
||||
let level = match self.config.level {
|
||||
CompressionLevel::None => 0,
|
||||
CompressionLevel::Fast => 1,
|
||||
CompressionLevel::Balanced => 3,
|
||||
CompressionLevel::Best => 19,
|
||||
};
|
||||
|
||||
zstd::encode_all(data, level)
|
||||
.map_err(|e| DeltaError::CompressionError(alloc::format!("Zstd error: {}", e)))
|
||||
}
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
fn decompress_zstd(&self, data: &[u8]) -> Result<Vec<u8>> {
|
||||
zstd::decode_all(data)
|
||||
.map_err(|e| DeltaError::DecompressionError(alloc::format!("Zstd error: {}", e)))
|
||||
}
|
||||
|
||||
/// Delta-of-delta encoding for sequential data
|
||||
fn compress_delta_of_delta(&self, data: &[u8]) -> Result<Vec<u8>> {
|
||||
if data.len() < 4 {
|
||||
return Ok(data.to_vec());
|
||||
}
|
||||
|
||||
// Treat as f32 array and compute delta-of-delta
|
||||
let float_count = data.len() / 4;
|
||||
let mut result = Vec::with_capacity(data.len());
|
||||
|
||||
// First value stored as-is
|
||||
result.extend_from_slice(&data[..4]);
|
||||
|
||||
if float_count < 2 {
|
||||
return Ok(result);
|
||||
}
|
||||
|
||||
// Second value: store delta
|
||||
let v0 = f32::from_le_bytes([data[0], data[1], data[2], data[3]]);
|
||||
let v1 = f32::from_le_bytes([data[4], data[5], data[6], data[7]]);
|
||||
let delta0 = v1 - v0;
|
||||
result.extend_from_slice(&delta0.to_le_bytes());
|
||||
|
||||
// Remaining: store delta-of-delta
|
||||
let mut prev_delta = delta0;
|
||||
for i in 2..float_count {
|
||||
let offset = i * 4;
|
||||
let curr = f32::from_le_bytes([
|
||||
data[offset],
|
||||
data[offset + 1],
|
||||
data[offset + 2],
|
||||
data[offset + 3],
|
||||
]);
|
||||
let prev_offset = (i - 1) * 4;
|
||||
let prev = f32::from_le_bytes([
|
||||
data[prev_offset],
|
||||
data[prev_offset + 1],
|
||||
data[prev_offset + 2],
|
||||
data[prev_offset + 3],
|
||||
]);
|
||||
|
||||
let curr_delta = curr - prev;
|
||||
let delta_of_delta = curr_delta - prev_delta;
|
||||
|
||||
result.extend_from_slice(&delta_of_delta.to_le_bytes());
|
||||
prev_delta = curr_delta;
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn decompress_delta_of_delta(&self, data: &[u8], original_size: usize) -> Result<Vec<u8>> {
|
||||
if data.len() < 4 {
|
||||
return Ok(data.to_vec());
|
||||
}
|
||||
|
||||
let float_count = original_size / 4;
|
||||
let mut result = Vec::with_capacity(original_size);
|
||||
|
||||
// First value
|
||||
result.extend_from_slice(&data[..4]);
|
||||
let mut prev = f32::from_le_bytes([data[0], data[1], data[2], data[3]]);
|
||||
|
||||
if float_count < 2 || data.len() < 8 {
|
||||
return Ok(result);
|
||||
}
|
||||
|
||||
// Second value from delta
|
||||
let delta0 = f32::from_le_bytes([data[4], data[5], data[6], data[7]]);
|
||||
let v1 = prev + delta0;
|
||||
result.extend_from_slice(&v1.to_le_bytes());
|
||||
|
||||
// Remaining from delta-of-delta
|
||||
let mut prev_delta = delta0;
|
||||
prev = v1;
|
||||
|
||||
for i in 2..float_count {
|
||||
let offset = i * 4;
|
||||
if offset + 4 > data.len() {
|
||||
break;
|
||||
}
|
||||
|
||||
let dod = f32::from_le_bytes([
|
||||
data[offset],
|
||||
data[offset + 1],
|
||||
data[offset + 2],
|
||||
data[offset + 3],
|
||||
]);
|
||||
|
||||
let curr_delta = prev_delta + dod;
|
||||
let curr = prev + curr_delta;
|
||||
|
||||
result.extend_from_slice(&curr.to_le_bytes());
|
||||
|
||||
prev_delta = curr_delta;
|
||||
prev = curr;
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Quantization-based compression (reduce f32 to f16)
|
||||
fn compress_quantized(&self, data: &[u8]) -> Result<Vec<u8>> {
|
||||
if data.len() < 4 {
|
||||
return Ok(data.to_vec());
|
||||
}
|
||||
|
||||
let float_count = data.len() / 4;
|
||||
let mut result = Vec::with_capacity(float_count * 2);
|
||||
|
||||
for i in 0..float_count {
|
||||
let offset = i * 4;
|
||||
let value = f32::from_le_bytes([
|
||||
data[offset],
|
||||
data[offset + 1],
|
||||
data[offset + 2],
|
||||
data[offset + 3],
|
||||
]);
|
||||
|
||||
// Convert to f16 representation (simplified)
|
||||
let f16_bits = f32_to_f16_bits(value);
|
||||
result.extend_from_slice(&f16_bits.to_le_bytes());
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn decompress_quantized(&self, data: &[u8], original_size: usize) -> Result<Vec<u8>> {
|
||||
let float_count = original_size / 4;
|
||||
let mut result = Vec::with_capacity(original_size);
|
||||
|
||||
for i in 0..float_count {
|
||||
let offset = i * 2;
|
||||
if offset + 2 > data.len() {
|
||||
break;
|
||||
}
|
||||
|
||||
let f16_bits = u16::from_le_bytes([data[offset], data[offset + 1]]);
|
||||
let value = f16_bits_to_f32(f16_bits);
|
||||
|
||||
result.extend_from_slice(&value.to_le_bytes());
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for DeltaCompressor {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// FNV-1a hash for checksums
|
||||
fn fnv1a_hash(data: &[u8]) -> u64 {
|
||||
const FNV_OFFSET: u64 = 0xcbf29ce484222325;
|
||||
const FNV_PRIME: u64 = 0x100000001b3;
|
||||
|
||||
let mut hash = FNV_OFFSET;
|
||||
for &byte in data {
|
||||
hash ^= byte as u64;
|
||||
hash = hash.wrapping_mul(FNV_PRIME);
|
||||
}
|
||||
hash
|
||||
}
|
||||
|
||||
/// Convert f32 to f16 bit representation
|
||||
fn f32_to_f16_bits(value: f32) -> u16 {
|
||||
let bits = value.to_bits();
|
||||
|
||||
let sign = (bits >> 31) as u16;
|
||||
let exp = ((bits >> 23) & 0xff) as i32;
|
||||
let frac = bits & 0x7fffff;
|
||||
|
||||
if exp == 0xff {
|
||||
// Inf or NaN
|
||||
return (sign << 15) | 0x7c00 | ((frac != 0) as u16);
|
||||
}
|
||||
|
||||
let new_exp = exp - 127 + 15;
|
||||
|
||||
if new_exp <= 0 {
|
||||
// Subnormal or zero
|
||||
0
|
||||
} else if new_exp >= 31 {
|
||||
// Overflow to infinity
|
||||
(sign << 15) | 0x7c00
|
||||
} else {
|
||||
let new_frac = (frac >> 13) as u16;
|
||||
(sign << 15) | ((new_exp as u16) << 10) | new_frac
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert f16 bits to f32
|
||||
fn f16_bits_to_f32(bits: u16) -> f32 {
|
||||
let sign = ((bits >> 15) as u32) << 31;
|
||||
let exp = ((bits >> 10) & 0x1f) as i32;
|
||||
let frac = (bits & 0x3ff) as u32;
|
||||
|
||||
if exp == 0 {
|
||||
// Zero or subnormal
|
||||
if frac == 0 {
|
||||
f32::from_bits(sign)
|
||||
} else {
|
||||
// Subnormal f16 -> normalized f32
|
||||
let shift = frac.leading_zeros() - 21;
|
||||
let new_exp = (127 - 15 - shift as i32) as u32;
|
||||
let new_frac = (frac << (shift + 1)) & 0x7fffff;
|
||||
f32::from_bits(sign | (new_exp << 23) | new_frac)
|
||||
}
|
||||
} else if exp == 31 {
|
||||
// Inf or NaN
|
||||
if frac == 0 {
|
||||
f32::from_bits(sign | 0x7f800000)
|
||||
} else {
|
||||
f32::from_bits(sign | 0x7fc00000)
|
||||
}
|
||||
} else {
|
||||
let new_exp = ((exp - 15 + 127) as u32) << 23;
|
||||
let new_frac = frac << 13;
|
||||
f32::from_bits(sign | new_exp | new_frac)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_compressor_roundtrip_none() {
|
||||
let config = CompressorConfig {
|
||||
codec: CompressionCodec::None,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let compressor = DeltaCompressor::with_config(config);
|
||||
let delta = VectorDelta::from_dense(vec![1.0, 2.0, 3.0, 4.0]);
|
||||
|
||||
let compressed = compressor.compress(&delta).unwrap();
|
||||
let decompressed = compressor.decompress(&compressed).unwrap();
|
||||
|
||||
assert_eq!(delta.dimensions, decompressed.dimensions);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compressor_delta_of_delta() {
|
||||
let config = CompressorConfig {
|
||||
codec: CompressionCodec::DeltaOfDelta,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let compressor = DeltaCompressor::with_config(config);
|
||||
|
||||
// Sequential data works well with delta-of-delta
|
||||
let delta = VectorDelta::from_dense(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
|
||||
|
||||
let compressed = compressor.compress(&delta).unwrap();
|
||||
let decompressed = compressor.decompress(&compressed).unwrap();
|
||||
|
||||
assert_eq!(delta.dimensions, decompressed.dimensions);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compressor_quantized() {
|
||||
let config = CompressorConfig {
|
||||
codec: CompressionCodec::Quantized,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let compressor = DeltaCompressor::with_config(config);
|
||||
let delta = VectorDelta::from_dense(vec![1.0, 2.0, 3.0, 4.0]);
|
||||
|
||||
let compressed = compressor.compress(&delta).unwrap();
|
||||
let decompressed = compressor.decompress(&compressed).unwrap();
|
||||
|
||||
// Quantization loses precision, so just check dimensions
|
||||
assert_eq!(delta.dimensions, decompressed.dimensions);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_checksum_verification() {
|
||||
let compressor = DeltaCompressor::new();
|
||||
let delta = VectorDelta::from_dense(vec![1.0, 2.0, 3.0]);
|
||||
|
||||
let mut compressed = compressor.compress(&delta).unwrap();
|
||||
|
||||
// Corrupt data
|
||||
if compressed.len() > 30 {
|
||||
compressed[30] ^= 0xff;
|
||||
}
|
||||
|
||||
let result = compressor.decompress(&compressed);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_f16_conversion() {
|
||||
let values = [0.0f32, 1.0, -1.0, 0.5, 2.5, 1000.0, -0.001];
|
||||
|
||||
for &original in &values {
|
||||
let bits = f32_to_f16_bits(original);
|
||||
let recovered = f16_bits_to_f32(bits);
|
||||
|
||||
// f16 has limited precision
|
||||
if original != 0.0 {
|
||||
let relative_error = ((recovered - original) / original).abs();
|
||||
assert!(
|
||||
relative_error < 0.01,
|
||||
"Failed for {}: got {}, error {}",
|
||||
original,
|
||||
recovered,
|
||||
relative_error
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
692
crates/ruvector-delta-core/src/delta.rs
Normal file
692
crates/ruvector-delta-core/src/delta.rs
Normal file
@@ -0,0 +1,692 @@
|
||||
//! Core delta types and the Delta trait
|
||||
//!
|
||||
//! This module provides the fundamental Delta trait and implementations
|
||||
//! for vector data structures.
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use core::ops::{Add, Mul, Neg, Sub};
|
||||
use smallvec::SmallVec;
|
||||
|
||||
use crate::error::{DeltaError, Result};
|
||||
|
||||
/// The core Delta trait for computing and applying changes
|
||||
///
|
||||
/// A delta represents the difference between two states of a value.
|
||||
/// Deltas can be computed, applied, composed, and inverted.
|
||||
pub trait Delta: Sized + Send + Sync + Clone {
|
||||
/// The base type this delta operates on
|
||||
type Base;
|
||||
|
||||
/// Error type for delta operations
|
||||
type Error;
|
||||
|
||||
/// Compute the delta between old and new values
|
||||
fn compute(old: &Self::Base, new: &Self::Base) -> Self;
|
||||
|
||||
/// Apply this delta to a base value
|
||||
fn apply(&self, base: &mut Self::Base) -> core::result::Result<(), Self::Error>;
|
||||
|
||||
/// Compose this delta with another (this then other)
|
||||
fn compose(self, other: Self) -> Self;
|
||||
|
||||
/// Compute the inverse delta (undo operation)
|
||||
fn inverse(&self) -> Self;
|
||||
|
||||
/// Check if this delta is an identity (no change)
|
||||
fn is_identity(&self) -> bool;
|
||||
|
||||
/// Get the size of this delta in bytes (for memory tracking)
|
||||
fn byte_size(&self) -> usize;
|
||||
}
|
||||
|
||||
/// A single delta operation on a value at an index
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||
pub struct DeltaOp<T> {
|
||||
/// Index where the change occurs
|
||||
pub index: u32,
|
||||
/// The change value (new - old)
|
||||
pub value: T,
|
||||
}
|
||||
|
||||
impl<T: Default + PartialEq> DeltaOp<T> {
|
||||
/// Create a new delta operation
|
||||
pub fn new(index: u32, value: T) -> Self {
|
||||
Self { index, value }
|
||||
}
|
||||
|
||||
/// Check if this operation is a no-op
|
||||
pub fn is_zero(&self) -> bool
|
||||
where
|
||||
T: Default + PartialEq,
|
||||
{
|
||||
self.value == T::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// A delta value that can be sparse or dense
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||
pub enum DeltaValue<T> {
|
||||
/// No change (identity)
|
||||
Identity,
|
||||
|
||||
/// Sparse delta: only non-zero changes stored
|
||||
Sparse(SmallVec<[DeltaOp<T>; 8]>),
|
||||
|
||||
/// Dense delta: all values stored
|
||||
Dense(Vec<T>),
|
||||
|
||||
/// Full replacement (for large changes)
|
||||
Replace(Vec<T>),
|
||||
}
|
||||
|
||||
impl<T: Default + Clone + PartialEq> Default for DeltaValue<T> {
|
||||
fn default() -> Self {
|
||||
Self::Identity
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> DeltaValue<T>
|
||||
where
|
||||
T: Default + Clone + PartialEq + Add<Output = T> + Sub<Output = T> + Neg<Output = T> + Copy,
|
||||
{
|
||||
/// Convert to sparse representation if beneficial
|
||||
pub fn to_sparse(&self, threshold: f32) -> Self {
|
||||
match self {
|
||||
Self::Dense(values) => {
|
||||
let non_zero_count = values.iter().filter(|v| **v != T::default()).count();
|
||||
let sparsity = 1.0 - (non_zero_count as f32 / values.len() as f32);
|
||||
|
||||
if sparsity > threshold {
|
||||
let ops: SmallVec<[DeltaOp<T>; 8]> = values
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, v)| **v != T::default())
|
||||
.map(|(i, v)| DeltaOp::new(i as u32, *v))
|
||||
.collect();
|
||||
|
||||
if ops.is_empty() {
|
||||
Self::Identity
|
||||
} else {
|
||||
Self::Sparse(ops)
|
||||
}
|
||||
} else {
|
||||
self.clone()
|
||||
}
|
||||
}
|
||||
_ => self.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert to dense representation
|
||||
pub fn to_dense(&self, dimensions: usize) -> Self {
|
||||
match self {
|
||||
Self::Identity => Self::Dense(vec![T::default(); dimensions]),
|
||||
Self::Sparse(ops) => {
|
||||
let mut values = vec![T::default(); dimensions];
|
||||
for op in ops {
|
||||
if (op.index as usize) < dimensions {
|
||||
values[op.index as usize] = op.value;
|
||||
}
|
||||
}
|
||||
Self::Dense(values)
|
||||
}
|
||||
Self::Dense(_) | Self::Replace(_) => self.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Count non-zero elements
|
||||
pub fn nnz(&self) -> usize {
|
||||
match self {
|
||||
Self::Identity => 0,
|
||||
Self::Sparse(ops) => ops.len(),
|
||||
Self::Dense(values) => values.iter().filter(|v| **v != T::default()).count(),
|
||||
Self::Replace(values) => values.iter().filter(|v| **v != T::default()).count(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Delta for f32 vectors with sparse optimization
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||
pub struct VectorDelta {
|
||||
/// The delta value (sparse or dense)
|
||||
pub value: DeltaValue<f32>,
|
||||
/// Original dimensions
|
||||
pub dimensions: usize,
|
||||
/// Sparsity threshold for encoding decisions
|
||||
pub sparsity_threshold: f32,
|
||||
}
|
||||
|
||||
impl VectorDelta {
|
||||
/// Create a new empty vector delta
|
||||
pub fn new(dimensions: usize) -> Self {
|
||||
Self {
|
||||
value: DeltaValue::Identity,
|
||||
dimensions,
|
||||
sparsity_threshold: 0.7,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create from sparse operations
|
||||
pub fn from_sparse(ops: SmallVec<[DeltaOp<f32>; 8]>, dimensions: usize) -> Self {
|
||||
let value = if ops.is_empty() {
|
||||
DeltaValue::Identity
|
||||
} else {
|
||||
DeltaValue::Sparse(ops)
|
||||
};
|
||||
|
||||
Self {
|
||||
value,
|
||||
dimensions,
|
||||
sparsity_threshold: 0.7,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create from dense values
|
||||
pub fn from_dense(values: Vec<f32>) -> Self {
|
||||
let dimensions = values.len();
|
||||
let non_zero = values.iter().filter(|v| **v != 0.0).count();
|
||||
let sparsity = 1.0 - (non_zero as f32 / dimensions as f32);
|
||||
|
||||
let value = if non_zero == 0 {
|
||||
DeltaValue::Identity
|
||||
} else if sparsity > 0.7 {
|
||||
// Convert to sparse
|
||||
let ops: SmallVec<[DeltaOp<f32>; 8]> = values
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, v)| **v != 0.0)
|
||||
.map(|(i, v)| DeltaOp::new(i as u32, *v))
|
||||
.collect();
|
||||
DeltaValue::Sparse(ops)
|
||||
} else {
|
||||
DeltaValue::Dense(values)
|
||||
};
|
||||
|
||||
Self {
|
||||
value,
|
||||
dimensions,
|
||||
sparsity_threshold: 0.7,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the L2 norm of the delta
|
||||
pub fn l2_norm(&self) -> f32 {
|
||||
match &self.value {
|
||||
DeltaValue::Identity => 0.0,
|
||||
DeltaValue::Sparse(ops) => ops.iter().map(|op| op.value * op.value).sum::<f32>().sqrt(),
|
||||
DeltaValue::Dense(values) | DeltaValue::Replace(values) => {
|
||||
values.iter().map(|v| v * v).sum::<f32>().sqrt()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the L1 norm of the delta
|
||||
pub fn l1_norm(&self) -> f32 {
|
||||
match &self.value {
|
||||
DeltaValue::Identity => 0.0,
|
||||
DeltaValue::Sparse(ops) => ops.iter().map(|op| op.value.abs()).sum(),
|
||||
DeltaValue::Dense(values) | DeltaValue::Replace(values) => {
|
||||
values.iter().map(|v| v.abs()).sum()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Scale the delta by a factor
|
||||
pub fn scale(&self, factor: f32) -> Self {
|
||||
let value = match &self.value {
|
||||
DeltaValue::Identity => DeltaValue::Identity,
|
||||
DeltaValue::Sparse(ops) => {
|
||||
let scaled: SmallVec<[DeltaOp<f32>; 8]> = ops
|
||||
.iter()
|
||||
.map(|op| DeltaOp::new(op.index, op.value * factor))
|
||||
.collect();
|
||||
DeltaValue::Sparse(scaled)
|
||||
}
|
||||
DeltaValue::Dense(values) => {
|
||||
DeltaValue::Dense(values.iter().map(|v| v * factor).collect())
|
||||
}
|
||||
DeltaValue::Replace(values) => {
|
||||
DeltaValue::Replace(values.iter().map(|v| v * factor).collect())
|
||||
}
|
||||
};
|
||||
|
||||
Self {
|
||||
value,
|
||||
dimensions: self.dimensions,
|
||||
sparsity_threshold: self.sparsity_threshold,
|
||||
}
|
||||
}
|
||||
|
||||
/// Clip delta values to a range
|
||||
pub fn clip(&self, min: f32, max: f32) -> Self {
|
||||
let value = match &self.value {
|
||||
DeltaValue::Identity => DeltaValue::Identity,
|
||||
DeltaValue::Sparse(ops) => {
|
||||
let clipped: SmallVec<[DeltaOp<f32>; 8]> = ops
|
||||
.iter()
|
||||
.map(|op| DeltaOp::new(op.index, op.value.clamp(min, max)))
|
||||
.collect();
|
||||
DeltaValue::Sparse(clipped)
|
||||
}
|
||||
DeltaValue::Dense(values) => {
|
||||
DeltaValue::Dense(values.iter().map(|v| v.clamp(min, max)).collect())
|
||||
}
|
||||
DeltaValue::Replace(values) => {
|
||||
DeltaValue::Replace(values.iter().map(|v| v.clamp(min, max)).collect())
|
||||
}
|
||||
};
|
||||
|
||||
Self {
|
||||
value,
|
||||
dimensions: self.dimensions,
|
||||
sparsity_threshold: self.sparsity_threshold,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Delta for VectorDelta {
|
||||
type Base = Vec<f32>;
|
||||
type Error = DeltaError;
|
||||
|
||||
fn compute(old: &Vec<f32>, new: &Vec<f32>) -> Self {
|
||||
assert_eq!(old.len(), new.len(), "Vectors must have same dimensions");
|
||||
|
||||
let dimensions = old.len();
|
||||
|
||||
// Compute differences
|
||||
let diffs: Vec<f32> = old.iter().zip(new.iter()).map(|(o, n)| n - o).collect();
|
||||
|
||||
// Count non-zero differences (with epsilon)
|
||||
let epsilon = 1e-7;
|
||||
let non_zero: Vec<(usize, f32)> = diffs
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, d)| d.abs() > epsilon)
|
||||
.map(|(i, d)| (i, *d))
|
||||
.collect();
|
||||
|
||||
let value = if non_zero.is_empty() {
|
||||
DeltaValue::Identity
|
||||
} else {
|
||||
let sparsity = 1.0 - (non_zero.len() as f32 / dimensions as f32);
|
||||
|
||||
if sparsity > 0.7 {
|
||||
// Use sparse representation
|
||||
let ops: SmallVec<[DeltaOp<f32>; 8]> = non_zero
|
||||
.into_iter()
|
||||
.map(|(i, v)| DeltaOp::new(i as u32, v))
|
||||
.collect();
|
||||
DeltaValue::Sparse(ops)
|
||||
} else {
|
||||
// Use dense representation
|
||||
DeltaValue::Dense(diffs)
|
||||
}
|
||||
};
|
||||
|
||||
Self {
|
||||
value,
|
||||
dimensions,
|
||||
sparsity_threshold: 0.7,
|
||||
}
|
||||
}
|
||||
|
||||
fn apply(&self, base: &mut Vec<f32>) -> Result<()> {
|
||||
if base.len() != self.dimensions {
|
||||
return Err(DeltaError::DimensionMismatch {
|
||||
expected: self.dimensions,
|
||||
actual: base.len(),
|
||||
});
|
||||
}
|
||||
|
||||
match &self.value {
|
||||
DeltaValue::Identity => {
|
||||
// No change
|
||||
}
|
||||
DeltaValue::Sparse(ops) => {
|
||||
for op in ops {
|
||||
let idx = op.index as usize;
|
||||
if idx < base.len() {
|
||||
base[idx] += op.value;
|
||||
}
|
||||
}
|
||||
}
|
||||
DeltaValue::Dense(deltas) => {
|
||||
for (b, d) in base.iter_mut().zip(deltas.iter()) {
|
||||
*b += d;
|
||||
}
|
||||
}
|
||||
DeltaValue::Replace(new_values) => {
|
||||
base.clone_from(new_values);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compose(self, other: Self) -> Self {
|
||||
if self.dimensions != other.dimensions {
|
||||
panic!(
|
||||
"Cannot compose deltas of different dimensions: {} vs {}",
|
||||
self.dimensions, other.dimensions
|
||||
);
|
||||
}
|
||||
|
||||
let value = match (&self.value, &other.value) {
|
||||
(DeltaValue::Identity, _) => other.value.clone(),
|
||||
(_, DeltaValue::Identity) => self.value.clone(),
|
||||
|
||||
(DeltaValue::Replace(_), DeltaValue::Replace(new)) => DeltaValue::Replace(new.clone()),
|
||||
|
||||
(DeltaValue::Sparse(ops1), DeltaValue::Sparse(ops2)) => {
|
||||
// Merge sparse operations
|
||||
let mut merged: alloc::collections::BTreeMap<u32, f32> =
|
||||
alloc::collections::BTreeMap::new();
|
||||
|
||||
for op in ops1 {
|
||||
*merged.entry(op.index).or_default() += op.value;
|
||||
}
|
||||
for op in ops2 {
|
||||
*merged.entry(op.index).or_default() += op.value;
|
||||
}
|
||||
|
||||
let ops: SmallVec<[DeltaOp<f32>; 8]> = merged
|
||||
.into_iter()
|
||||
.filter(|(_, v)| v.abs() > 1e-7)
|
||||
.map(|(i, v)| DeltaOp::new(i, v))
|
||||
.collect();
|
||||
|
||||
if ops.is_empty() {
|
||||
DeltaValue::Identity
|
||||
} else {
|
||||
DeltaValue::Sparse(ops)
|
||||
}
|
||||
}
|
||||
|
||||
(DeltaValue::Dense(d1), DeltaValue::Dense(d2)) => {
|
||||
let combined: Vec<f32> = d1.iter().zip(d2.iter()).map(|(a, b)| a + b).collect();
|
||||
|
||||
// Check if result is identity
|
||||
if combined.iter().all(|v| v.abs() < 1e-7) {
|
||||
DeltaValue::Identity
|
||||
} else {
|
||||
DeltaValue::Dense(combined)
|
||||
}
|
||||
}
|
||||
|
||||
// Mixed cases: convert to dense and combine
|
||||
_ => {
|
||||
let d1 = self.value.to_dense(self.dimensions);
|
||||
let d2 = other.value.to_dense(other.dimensions);
|
||||
|
||||
if let (DeltaValue::Dense(v1), DeltaValue::Dense(v2)) = (d1, d2) {
|
||||
let combined: Vec<f32> = v1.iter().zip(v2.iter()).map(|(a, b)| a + b).collect();
|
||||
DeltaValue::Dense(combined)
|
||||
} else {
|
||||
DeltaValue::Identity
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Self {
|
||||
value,
|
||||
dimensions: self.dimensions,
|
||||
sparsity_threshold: self.sparsity_threshold,
|
||||
}
|
||||
}
|
||||
|
||||
fn inverse(&self) -> Self {
|
||||
let value = match &self.value {
|
||||
DeltaValue::Identity => DeltaValue::Identity,
|
||||
DeltaValue::Sparse(ops) => {
|
||||
let inverted: SmallVec<[DeltaOp<f32>; 8]> = ops
|
||||
.iter()
|
||||
.map(|op| DeltaOp::new(op.index, -op.value))
|
||||
.collect();
|
||||
DeltaValue::Sparse(inverted)
|
||||
}
|
||||
DeltaValue::Dense(values) => DeltaValue::Dense(values.iter().map(|v| -v).collect()),
|
||||
DeltaValue::Replace(_) => {
|
||||
// Cannot invert a replace without knowing original
|
||||
panic!("Cannot invert Replace delta without original value");
|
||||
}
|
||||
};
|
||||
|
||||
Self {
|
||||
value,
|
||||
dimensions: self.dimensions,
|
||||
sparsity_threshold: self.sparsity_threshold,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_identity(&self) -> bool {
|
||||
matches!(self.value, DeltaValue::Identity)
|
||||
}
|
||||
|
||||
fn byte_size(&self) -> usize {
|
||||
core::mem::size_of::<Self>()
|
||||
+ match &self.value {
|
||||
DeltaValue::Identity => 0,
|
||||
DeltaValue::Sparse(ops) => ops.len() * core::mem::size_of::<DeltaOp<f32>>(),
|
||||
DeltaValue::Dense(v) | DeltaValue::Replace(v) => v.len() * 4,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Sparse delta representation for high-dimensional vectors
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||
pub struct SparseDelta {
|
||||
/// Non-zero delta entries (index, old_value, new_value)
|
||||
pub entries: SmallVec<[(u32, f32, f32); 16]>,
|
||||
/// Total dimensions
|
||||
pub dimensions: usize,
|
||||
}
|
||||
|
||||
impl SparseDelta {
|
||||
/// Create a new sparse delta
|
||||
pub fn new(dimensions: usize) -> Self {
|
||||
Self {
|
||||
entries: SmallVec::new(),
|
||||
dimensions,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add an entry to the delta
|
||||
pub fn add_entry(&mut self, index: u32, old_value: f32, new_value: f32) {
|
||||
if (old_value - new_value).abs() > 1e-7 {
|
||||
self.entries.push((index, old_value, new_value));
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the sparsity ratio (0.0 = dense, 1.0 = fully sparse)
|
||||
pub fn sparsity(&self) -> f32 {
|
||||
1.0 - (self.entries.len() as f32 / self.dimensions as f32)
|
||||
}
|
||||
|
||||
/// Convert to VectorDelta
|
||||
pub fn to_vector_delta(&self) -> VectorDelta {
|
||||
if self.entries.is_empty() {
|
||||
return VectorDelta::new(self.dimensions);
|
||||
}
|
||||
|
||||
let ops: SmallVec<[DeltaOp<f32>; 8]> = self
|
||||
.entries
|
||||
.iter()
|
||||
.map(|(idx, old, new)| DeltaOp::new(*idx, new - old))
|
||||
.collect();
|
||||
|
||||
VectorDelta::from_sparse(ops, self.dimensions)
|
||||
}
|
||||
}
|
||||
|
||||
impl Delta for SparseDelta {
|
||||
type Base = Vec<f32>;
|
||||
type Error = DeltaError;
|
||||
|
||||
fn compute(old: &Vec<f32>, new: &Vec<f32>) -> Self {
|
||||
assert_eq!(old.len(), new.len());
|
||||
|
||||
let mut delta = Self::new(old.len());
|
||||
|
||||
for (i, (o, n)) in old.iter().zip(new.iter()).enumerate() {
|
||||
delta.add_entry(i as u32, *o, *n);
|
||||
}
|
||||
|
||||
delta
|
||||
}
|
||||
|
||||
fn apply(&self, base: &mut Vec<f32>) -> Result<()> {
|
||||
if base.len() != self.dimensions {
|
||||
return Err(DeltaError::DimensionMismatch {
|
||||
expected: self.dimensions,
|
||||
actual: base.len(),
|
||||
});
|
||||
}
|
||||
|
||||
for (idx, _, new_value) in &self.entries {
|
||||
let idx = *idx as usize;
|
||||
if idx < base.len() {
|
||||
base[idx] = *new_value;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compose(self, other: Self) -> Self {
|
||||
// For sparse delta, composition keeps original old values and final new values
|
||||
let mut result = Self::new(self.dimensions);
|
||||
|
||||
// Build maps for efficient lookup
|
||||
use alloc::collections::BTreeMap;
|
||||
let mut self_map: BTreeMap<u32, (f32, f32)> = BTreeMap::new();
|
||||
for (idx, old, new) in &self.entries {
|
||||
self_map.insert(*idx, (*old, *new));
|
||||
}
|
||||
|
||||
let mut other_map: BTreeMap<u32, (f32, f32)> = BTreeMap::new();
|
||||
for (idx, old, new) in &other.entries {
|
||||
other_map.insert(*idx, (*old, *new));
|
||||
}
|
||||
|
||||
// Merge: for each index, keep original old and final new
|
||||
for (idx, (old1, new1)) in &self_map {
|
||||
if let Some((_, new2)) = other_map.get(idx) {
|
||||
result.add_entry(*idx, *old1, *new2);
|
||||
} else {
|
||||
result.add_entry(*idx, *old1, *new1);
|
||||
}
|
||||
}
|
||||
|
||||
for (idx, (old2, new2)) in &other_map {
|
||||
if !self_map.contains_key(idx) {
|
||||
result.add_entry(*idx, *old2, *new2);
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn inverse(&self) -> Self {
|
||||
let mut result = Self::new(self.dimensions);
|
||||
|
||||
for (idx, old, new) in &self.entries {
|
||||
result.add_entry(*idx, *new, *old);
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn is_identity(&self) -> bool {
|
||||
self.entries.is_empty()
|
||||
}
|
||||
|
||||
fn byte_size(&self) -> usize {
|
||||
core::mem::size_of::<Self>() + self.entries.len() * core::mem::size_of::<(u32, f32, f32)>()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_delta_op() {
|
||||
let op = DeltaOp::new(5, 1.5f32);
|
||||
assert_eq!(op.index, 5);
|
||||
assert_eq!(op.value, 1.5);
|
||||
assert!(!op.is_zero());
|
||||
|
||||
let zero_op = DeltaOp::new(0, 0.0f32);
|
||||
assert!(zero_op.is_zero());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_delta_sparse() {
|
||||
let old = vec![1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
|
||||
let new = vec![1.0f32, 2.5, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
|
||||
|
||||
let delta = VectorDelta::compute(&old, &new);
|
||||
|
||||
// Should be sparse (only 1 change)
|
||||
assert!(matches!(delta.value, DeltaValue::Sparse(_)));
|
||||
|
||||
let mut result = old.clone();
|
||||
delta.apply(&mut result).unwrap();
|
||||
|
||||
for (a, b) in result.iter().zip(new.iter()) {
|
||||
assert!((a - b).abs() < 1e-6);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_delta_dense() {
|
||||
let old = vec![1.0f32, 2.0, 3.0, 4.0];
|
||||
let new = vec![2.0f32, 3.0, 4.0, 5.0];
|
||||
|
||||
let delta = VectorDelta::compute(&old, &new);
|
||||
|
||||
// Should be dense (all changed)
|
||||
assert!(matches!(delta.value, DeltaValue::Dense(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_delta_l2_norm() {
|
||||
let delta = VectorDelta::from_dense(vec![3.0, 4.0, 0.0, 0.0]);
|
||||
assert!((delta.l2_norm() - 5.0).abs() < 1e-6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_delta_scale() {
|
||||
let delta = VectorDelta::from_dense(vec![1.0, 2.0, 3.0]);
|
||||
let scaled = delta.scale(2.0);
|
||||
|
||||
if let DeltaValue::Dense(values) = scaled.value {
|
||||
assert!((values[0] - 2.0).abs() < 1e-6);
|
||||
assert!((values[1] - 4.0).abs() < 1e-6);
|
||||
assert!((values[2] - 6.0).abs() < 1e-6);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sparse_delta() {
|
||||
let old = vec![1.0f32; 100];
|
||||
let mut new = old.clone();
|
||||
new[10] = 2.0;
|
||||
new[50] = 3.0;
|
||||
|
||||
let delta = SparseDelta::compute(&old, &new);
|
||||
|
||||
assert_eq!(delta.entries.len(), 2);
|
||||
assert!(delta.sparsity() > 0.9);
|
||||
|
||||
let mut result = old.clone();
|
||||
delta.apply(&mut result).unwrap();
|
||||
|
||||
assert!((result[10] - 2.0).abs() < 1e-6);
|
||||
assert!((result[50] - 3.0).abs() < 1e-6);
|
||||
}
|
||||
}
|
||||
601
crates/ruvector-delta-core/src/encoding.rs
Normal file
601
crates/ruvector-delta-core/src/encoding.rs
Normal file
@@ -0,0 +1,601 @@
|
||||
//! Delta encoding strategies
|
||||
//!
|
||||
//! This module provides various encoding strategies for deltas,
|
||||
//! optimizing for different access patterns and sparsity levels.
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use core::marker::PhantomData;
|
||||
|
||||
use crate::delta::{DeltaOp, DeltaValue, VectorDelta};
|
||||
use crate::error::{DeltaError, Result};
|
||||
|
||||
/// Trait for delta encoding strategies
|
||||
pub trait DeltaEncoding: Send + Sync {
|
||||
/// Encode a delta to bytes
|
||||
fn encode(&self, delta: &VectorDelta) -> Result<Vec<u8>>;
|
||||
|
||||
/// Decode bytes to a delta
|
||||
fn decode(&self, bytes: &[u8]) -> Result<VectorDelta>;
|
||||
|
||||
/// Get the encoding type identifier
|
||||
fn encoding_type(&self) -> EncodingType;
|
||||
}
|
||||
|
||||
/// Encoding type identifiers
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
pub enum EncodingType {
|
||||
/// Dense encoding (all values stored)
|
||||
Dense = 0,
|
||||
/// Sparse encoding (only non-zero values)
|
||||
Sparse = 1,
|
||||
/// Run-length encoding
|
||||
RunLength = 2,
|
||||
/// Varint encoding
|
||||
Varint = 3,
|
||||
/// Hybrid encoding (automatic selection)
|
||||
Hybrid = 4,
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for EncodingType {
|
||||
type Error = DeltaError;
|
||||
|
||||
fn try_from(value: u8) -> Result<Self> {
|
||||
match value {
|
||||
0 => Ok(Self::Dense),
|
||||
1 => Ok(Self::Sparse),
|
||||
2 => Ok(Self::RunLength),
|
||||
3 => Ok(Self::Varint),
|
||||
4 => Ok(Self::Hybrid),
|
||||
_ => Err(DeltaError::InvalidEncoding(alloc::format!(
|
||||
"Unknown encoding type: {}",
|
||||
value
|
||||
))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Dense encoding - stores all values
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct DenseEncoding;
|
||||
|
||||
impl DenseEncoding {
|
||||
/// Create a new dense encoding
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
}
|
||||
|
||||
impl DeltaEncoding for DenseEncoding {
|
||||
fn encode(&self, delta: &VectorDelta) -> Result<Vec<u8>> {
|
||||
let mut bytes = Vec::with_capacity(4 + 4 + delta.dimensions * 4);
|
||||
|
||||
// Header: encoding type (1 byte) + dimensions (4 bytes)
|
||||
bytes.push(EncodingType::Dense as u8);
|
||||
bytes.extend_from_slice(&(delta.dimensions as u32).to_le_bytes());
|
||||
|
||||
// Convert to dense and encode
|
||||
match &delta.value {
|
||||
DeltaValue::Identity => {
|
||||
// Write zeros
|
||||
bytes.extend(core::iter::repeat(0u8).take(delta.dimensions * 4));
|
||||
}
|
||||
DeltaValue::Sparse(ops) => {
|
||||
let mut values = vec![0.0f32; delta.dimensions];
|
||||
for op in ops {
|
||||
if (op.index as usize) < delta.dimensions {
|
||||
values[op.index as usize] = op.value;
|
||||
}
|
||||
}
|
||||
for v in values {
|
||||
bytes.extend_from_slice(&v.to_le_bytes());
|
||||
}
|
||||
}
|
||||
DeltaValue::Dense(values) | DeltaValue::Replace(values) => {
|
||||
for v in values {
|
||||
bytes.extend_from_slice(&v.to_le_bytes());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(bytes)
|
||||
}
|
||||
|
||||
fn decode(&self, bytes: &[u8]) -> Result<VectorDelta> {
|
||||
if bytes.len() < 5 {
|
||||
return Err(DeltaError::InvalidEncoding(
|
||||
"Buffer too small for header".into(),
|
||||
));
|
||||
}
|
||||
|
||||
let encoding_type = EncodingType::try_from(bytes[0])?;
|
||||
if encoding_type != EncodingType::Dense {
|
||||
return Err(DeltaError::InvalidEncoding("Not a dense encoding".into()));
|
||||
}
|
||||
|
||||
let dimensions = u32::from_le_bytes([bytes[1], bytes[2], bytes[3], bytes[4]]) as usize;
|
||||
|
||||
let expected_len = 5 + dimensions * 4;
|
||||
if bytes.len() < expected_len {
|
||||
return Err(DeltaError::InvalidEncoding(alloc::format!(
|
||||
"Buffer too small: expected {}, got {}",
|
||||
expected_len,
|
||||
bytes.len()
|
||||
)));
|
||||
}
|
||||
|
||||
let mut values = Vec::with_capacity(dimensions);
|
||||
for i in 0..dimensions {
|
||||
let offset = 5 + i * 4;
|
||||
let v = f32::from_le_bytes([
|
||||
bytes[offset],
|
||||
bytes[offset + 1],
|
||||
bytes[offset + 2],
|
||||
bytes[offset + 3],
|
||||
]);
|
||||
values.push(v);
|
||||
}
|
||||
|
||||
Ok(VectorDelta::from_dense(values))
|
||||
}
|
||||
|
||||
fn encoding_type(&self) -> EncodingType {
|
||||
EncodingType::Dense
|
||||
}
|
||||
}
|
||||
|
||||
/// Sparse encoding - stores only non-zero values with their indices
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct SparseEncoding {
|
||||
/// Threshold for considering a value as zero
|
||||
pub epsilon: f32,
|
||||
}
|
||||
|
||||
impl SparseEncoding {
|
||||
/// Create a new sparse encoding with default epsilon
|
||||
pub fn new() -> Self {
|
||||
Self { epsilon: 1e-7 }
|
||||
}
|
||||
|
||||
/// Create with custom epsilon
|
||||
pub fn with_epsilon(epsilon: f32) -> Self {
|
||||
Self { epsilon }
|
||||
}
|
||||
}
|
||||
|
||||
impl DeltaEncoding for SparseEncoding {
|
||||
fn encode(&self, delta: &VectorDelta) -> Result<Vec<u8>> {
|
||||
// Header: encoding type (1) + dimensions (4) + count (4)
|
||||
let mut bytes = Vec::new();
|
||||
|
||||
bytes.push(EncodingType::Sparse as u8);
|
||||
bytes.extend_from_slice(&(delta.dimensions as u32).to_le_bytes());
|
||||
|
||||
match &delta.value {
|
||||
DeltaValue::Identity => {
|
||||
// Zero entries
|
||||
bytes.extend_from_slice(&0u32.to_le_bytes());
|
||||
}
|
||||
DeltaValue::Sparse(ops) => {
|
||||
bytes.extend_from_slice(&(ops.len() as u32).to_le_bytes());
|
||||
for op in ops {
|
||||
bytes.extend_from_slice(&op.index.to_le_bytes());
|
||||
bytes.extend_from_slice(&op.value.to_le_bytes());
|
||||
}
|
||||
}
|
||||
DeltaValue::Dense(values) | DeltaValue::Replace(values) => {
|
||||
let non_zero: Vec<_> = values
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, v)| v.abs() > self.epsilon)
|
||||
.collect();
|
||||
|
||||
bytes.extend_from_slice(&(non_zero.len() as u32).to_le_bytes());
|
||||
for (i, v) in non_zero {
|
||||
bytes.extend_from_slice(&(i as u32).to_le_bytes());
|
||||
bytes.extend_from_slice(&v.to_le_bytes());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(bytes)
|
||||
}
|
||||
|
||||
fn decode(&self, bytes: &[u8]) -> Result<VectorDelta> {
|
||||
if bytes.len() < 9 {
|
||||
return Err(DeltaError::InvalidEncoding(
|
||||
"Buffer too small for sparse header".into(),
|
||||
));
|
||||
}
|
||||
|
||||
let encoding_type = EncodingType::try_from(bytes[0])?;
|
||||
if encoding_type != EncodingType::Sparse {
|
||||
return Err(DeltaError::InvalidEncoding("Not a sparse encoding".into()));
|
||||
}
|
||||
|
||||
let dimensions = u32::from_le_bytes([bytes[1], bytes[2], bytes[3], bytes[4]]) as usize;
|
||||
let count = u32::from_le_bytes([bytes[5], bytes[6], bytes[7], bytes[8]]) as usize;
|
||||
|
||||
let expected_len = 9 + count * 8;
|
||||
if bytes.len() < expected_len {
|
||||
return Err(DeltaError::InvalidEncoding(alloc::format!(
|
||||
"Buffer too small: expected {}, got {}",
|
||||
expected_len,
|
||||
bytes.len()
|
||||
)));
|
||||
}
|
||||
|
||||
let mut ops = smallvec::SmallVec::new();
|
||||
for i in 0..count {
|
||||
let offset = 9 + i * 8;
|
||||
let index = u32::from_le_bytes([
|
||||
bytes[offset],
|
||||
bytes[offset + 1],
|
||||
bytes[offset + 2],
|
||||
bytes[offset + 3],
|
||||
]);
|
||||
let value = f32::from_le_bytes([
|
||||
bytes[offset + 4],
|
||||
bytes[offset + 5],
|
||||
bytes[offset + 6],
|
||||
bytes[offset + 7],
|
||||
]);
|
||||
ops.push(DeltaOp::new(index, value));
|
||||
}
|
||||
|
||||
Ok(VectorDelta::from_sparse(ops, dimensions))
|
||||
}
|
||||
|
||||
fn encoding_type(&self) -> EncodingType {
|
||||
EncodingType::Sparse
|
||||
}
|
||||
}
|
||||
|
||||
/// Run-length encoding for consecutive identical deltas
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct RunLengthEncoding {
|
||||
/// Threshold for considering values equal
|
||||
pub epsilon: f32,
|
||||
}
|
||||
|
||||
impl RunLengthEncoding {
|
||||
/// Create a new run-length encoding
|
||||
pub fn new() -> Self {
|
||||
Self { epsilon: 1e-7 }
|
||||
}
|
||||
|
||||
/// Create with custom epsilon
|
||||
pub fn with_epsilon(epsilon: f32) -> Self {
|
||||
Self { epsilon }
|
||||
}
|
||||
|
||||
/// Check if two values are approximately equal
|
||||
fn approx_eq(&self, a: f32, b: f32) -> bool {
|
||||
(a - b).abs() <= self.epsilon
|
||||
}
|
||||
}
|
||||
|
||||
/// A run in RLE encoding
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
struct Run {
|
||||
value: f32,
|
||||
count: u32,
|
||||
}
|
||||
|
||||
impl DeltaEncoding for RunLengthEncoding {
|
||||
fn encode(&self, delta: &VectorDelta) -> Result<Vec<u8>> {
|
||||
let values = match &delta.value {
|
||||
DeltaValue::Identity => vec![0.0f32; delta.dimensions],
|
||||
DeltaValue::Sparse(ops) => {
|
||||
let mut v = vec![0.0f32; delta.dimensions];
|
||||
for op in ops {
|
||||
if (op.index as usize) < delta.dimensions {
|
||||
v[op.index as usize] = op.value;
|
||||
}
|
||||
}
|
||||
v
|
||||
}
|
||||
DeltaValue::Dense(v) | DeltaValue::Replace(v) => v.clone(),
|
||||
};
|
||||
|
||||
if values.is_empty() {
|
||||
let mut bytes = Vec::with_capacity(9);
|
||||
bytes.push(EncodingType::RunLength as u8);
|
||||
bytes.extend_from_slice(&(0u32).to_le_bytes());
|
||||
bytes.extend_from_slice(&(0u32).to_le_bytes());
|
||||
return Ok(bytes);
|
||||
}
|
||||
|
||||
// Build runs
|
||||
let mut runs: Vec<Run> = Vec::new();
|
||||
let mut current_value = values[0];
|
||||
let mut current_count = 1u32;
|
||||
|
||||
for &v in values.iter().skip(1) {
|
||||
if self.approx_eq(v, current_value) {
|
||||
current_count += 1;
|
||||
} else {
|
||||
runs.push(Run {
|
||||
value: current_value,
|
||||
count: current_count,
|
||||
});
|
||||
current_value = v;
|
||||
current_count = 1;
|
||||
}
|
||||
}
|
||||
runs.push(Run {
|
||||
value: current_value,
|
||||
count: current_count,
|
||||
});
|
||||
|
||||
// Encode
|
||||
let mut bytes = Vec::with_capacity(9 + runs.len() * 8);
|
||||
bytes.push(EncodingType::RunLength as u8);
|
||||
bytes.extend_from_slice(&(delta.dimensions as u32).to_le_bytes());
|
||||
bytes.extend_from_slice(&(runs.len() as u32).to_le_bytes());
|
||||
|
||||
for run in runs {
|
||||
bytes.extend_from_slice(&run.value.to_le_bytes());
|
||||
bytes.extend_from_slice(&run.count.to_le_bytes());
|
||||
}
|
||||
|
||||
Ok(bytes)
|
||||
}
|
||||
|
||||
fn decode(&self, bytes: &[u8]) -> Result<VectorDelta> {
|
||||
if bytes.len() < 9 {
|
||||
return Err(DeltaError::InvalidEncoding(
|
||||
"Buffer too small for RLE header".into(),
|
||||
));
|
||||
}
|
||||
|
||||
let encoding_type = EncodingType::try_from(bytes[0])?;
|
||||
if encoding_type != EncodingType::RunLength {
|
||||
return Err(DeltaError::InvalidEncoding(
|
||||
"Not a run-length encoding".into(),
|
||||
));
|
||||
}
|
||||
|
||||
let dimensions = u32::from_le_bytes([bytes[1], bytes[2], bytes[3], bytes[4]]) as usize;
|
||||
let run_count = u32::from_le_bytes([bytes[5], bytes[6], bytes[7], bytes[8]]) as usize;
|
||||
|
||||
let expected_len = 9 + run_count * 8;
|
||||
if bytes.len() < expected_len {
|
||||
return Err(DeltaError::InvalidEncoding(alloc::format!(
|
||||
"Buffer too small: expected {}, got {}",
|
||||
expected_len,
|
||||
bytes.len()
|
||||
)));
|
||||
}
|
||||
|
||||
let mut values = Vec::with_capacity(dimensions);
|
||||
for i in 0..run_count {
|
||||
let offset = 9 + i * 8;
|
||||
let value = f32::from_le_bytes([
|
||||
bytes[offset],
|
||||
bytes[offset + 1],
|
||||
bytes[offset + 2],
|
||||
bytes[offset + 3],
|
||||
]);
|
||||
let count = u32::from_le_bytes([
|
||||
bytes[offset + 4],
|
||||
bytes[offset + 5],
|
||||
bytes[offset + 6],
|
||||
bytes[offset + 7],
|
||||
]) as usize;
|
||||
|
||||
for _ in 0..count {
|
||||
values.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
if values.len() != dimensions {
|
||||
return Err(DeltaError::InvalidEncoding(alloc::format!(
|
||||
"RLE decoded to {} values, expected {}",
|
||||
values.len(),
|
||||
dimensions
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(VectorDelta::from_dense(values))
|
||||
}
|
||||
|
||||
fn encoding_type(&self) -> EncodingType {
|
||||
EncodingType::RunLength
|
||||
}
|
||||
}
|
||||
|
||||
/// Hybrid encoding that automatically selects the best strategy
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct HybridEncoding {
|
||||
/// Sparsity threshold for choosing sparse encoding
|
||||
pub sparsity_threshold: f32,
|
||||
/// RLE benefit threshold
|
||||
pub rle_threshold: f32,
|
||||
/// Epsilon for float comparisons
|
||||
pub epsilon: f32,
|
||||
}
|
||||
|
||||
impl Default for HybridEncoding {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
sparsity_threshold: 0.7,
|
||||
rle_threshold: 0.5,
|
||||
epsilon: 1e-7,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl HybridEncoding {
|
||||
/// Create a new hybrid encoding with default thresholds
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Create with custom thresholds
|
||||
pub fn with_thresholds(sparsity: f32, rle: f32) -> Self {
|
||||
Self {
|
||||
sparsity_threshold: sparsity,
|
||||
rle_threshold: rle,
|
||||
epsilon: 1e-7,
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine the best encoding for a delta
|
||||
pub fn select_encoding(&self, delta: &VectorDelta) -> EncodingType {
|
||||
match &delta.value {
|
||||
DeltaValue::Identity => EncodingType::Sparse,
|
||||
DeltaValue::Sparse(ops) => {
|
||||
let sparsity = 1.0 - (ops.len() as f32 / delta.dimensions as f32);
|
||||
if sparsity > self.sparsity_threshold {
|
||||
EncodingType::Sparse
|
||||
} else {
|
||||
EncodingType::Dense
|
||||
}
|
||||
}
|
||||
DeltaValue::Dense(values) | DeltaValue::Replace(values) => {
|
||||
// Check sparsity
|
||||
let non_zero = values.iter().filter(|v| v.abs() > self.epsilon).count();
|
||||
let sparsity = 1.0 - (non_zero as f32 / values.len() as f32);
|
||||
|
||||
if sparsity > self.sparsity_threshold {
|
||||
return EncodingType::Sparse;
|
||||
}
|
||||
|
||||
// Check RLE potential
|
||||
let mut runs = 1usize;
|
||||
let mut prev = values[0];
|
||||
for &v in values.iter().skip(1) {
|
||||
if (v - prev).abs() > self.epsilon {
|
||||
runs += 1;
|
||||
prev = v;
|
||||
}
|
||||
}
|
||||
|
||||
let rle_ratio = runs as f32 / values.len() as f32;
|
||||
if rle_ratio < self.rle_threshold {
|
||||
EncodingType::RunLength
|
||||
} else {
|
||||
EncodingType::Dense
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DeltaEncoding for HybridEncoding {
|
||||
fn encode(&self, delta: &VectorDelta) -> Result<Vec<u8>> {
|
||||
let selected = self.select_encoding(delta);
|
||||
|
||||
match selected {
|
||||
EncodingType::Dense => DenseEncoding.encode(delta),
|
||||
EncodingType::Sparse => SparseEncoding::with_epsilon(self.epsilon).encode(delta),
|
||||
EncodingType::RunLength => RunLengthEncoding::with_epsilon(self.epsilon).encode(delta),
|
||||
_ => DenseEncoding.encode(delta),
|
||||
}
|
||||
}
|
||||
|
||||
fn decode(&self, bytes: &[u8]) -> Result<VectorDelta> {
|
||||
if bytes.is_empty() {
|
||||
return Err(DeltaError::InvalidEncoding("Empty buffer".into()));
|
||||
}
|
||||
|
||||
let encoding_type = EncodingType::try_from(bytes[0])?;
|
||||
|
||||
match encoding_type {
|
||||
EncodingType::Dense => DenseEncoding.decode(bytes),
|
||||
EncodingType::Sparse => SparseEncoding::with_epsilon(self.epsilon).decode(bytes),
|
||||
EncodingType::RunLength => RunLengthEncoding::with_epsilon(self.epsilon).decode(bytes),
|
||||
EncodingType::Hybrid => Err(DeltaError::InvalidEncoding(
|
||||
"Hybrid type should not appear in encoded data".into(),
|
||||
)),
|
||||
EncodingType::Varint => Err(DeltaError::InvalidEncoding(
|
||||
"Varint encoding not yet implemented".into(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn encoding_type(&self) -> EncodingType {
|
||||
EncodingType::Hybrid
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::delta::Delta;
|
||||
use alloc::vec;
|
||||
|
||||
#[test]
|
||||
fn test_dense_encoding_roundtrip() {
|
||||
let delta = VectorDelta::from_dense(vec![1.0, 2.0, 3.0, 4.0]);
|
||||
|
||||
let encoding = DenseEncoding::new();
|
||||
let bytes = encoding.encode(&delta).unwrap();
|
||||
let decoded = encoding.decode(&bytes).unwrap();
|
||||
|
||||
assert_eq!(delta.dimensions, decoded.dimensions);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sparse_encoding_roundtrip() {
|
||||
let mut ops = smallvec::SmallVec::new();
|
||||
ops.push(DeltaOp::new(5, 1.5));
|
||||
ops.push(DeltaOp::new(10, 2.5));
|
||||
let delta = VectorDelta::from_sparse(ops, 100);
|
||||
|
||||
let encoding = SparseEncoding::new();
|
||||
let bytes = encoding.encode(&delta).unwrap();
|
||||
let decoded = encoding.decode(&bytes).unwrap();
|
||||
|
||||
assert_eq!(delta.dimensions, decoded.dimensions);
|
||||
assert_eq!(delta.value.nnz(), decoded.value.nnz());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rle_encoding_roundtrip() {
|
||||
// Create a delta with runs
|
||||
let values = vec![1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0];
|
||||
let delta = VectorDelta::from_dense(values.clone());
|
||||
|
||||
let encoding = RunLengthEncoding::new();
|
||||
let bytes = encoding.encode(&delta).unwrap();
|
||||
let decoded = encoding.decode(&bytes).unwrap();
|
||||
|
||||
assert_eq!(delta.dimensions, decoded.dimensions);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hybrid_encoding_selects_sparse() {
|
||||
// Very sparse delta
|
||||
let mut ops = smallvec::SmallVec::new();
|
||||
ops.push(DeltaOp::new(5, 1.5));
|
||||
let delta = VectorDelta::from_sparse(ops, 1000);
|
||||
|
||||
let encoding = HybridEncoding::new();
|
||||
assert_eq!(encoding.select_encoding(&delta), EncodingType::Sparse);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hybrid_encoding_roundtrip() {
|
||||
let delta = VectorDelta::from_dense(vec![1.0, 2.0, 3.0, 4.0]);
|
||||
|
||||
let encoding = HybridEncoding::new();
|
||||
let bytes = encoding.encode(&delta).unwrap();
|
||||
let decoded = encoding.decode(&bytes).unwrap();
|
||||
|
||||
assert_eq!(delta.dimensions, decoded.dimensions);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_identity_encoding() {
|
||||
let delta = VectorDelta::new(100);
|
||||
assert!(delta.is_identity());
|
||||
|
||||
let encoding = SparseEncoding::new();
|
||||
let bytes = encoding.encode(&delta).unwrap();
|
||||
let decoded = encoding.decode(&bytes).unwrap();
|
||||
|
||||
assert!(decoded.is_identity());
|
||||
}
|
||||
}
|
||||
119
crates/ruvector-delta-core/src/error.rs
Normal file
119
crates/ruvector-delta-core/src/error.rs
Normal file
@@ -0,0 +1,119 @@
|
||||
//! Error types for delta operations
|
||||
|
||||
use alloc::string::String;
|
||||
use core::fmt;
|
||||
|
||||
/// Result type for delta operations
|
||||
pub type Result<T> = core::result::Result<T, DeltaError>;
|
||||
|
||||
/// Errors that can occur during delta operations
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum DeltaError {
|
||||
/// Dimension mismatch between vectors
|
||||
DimensionMismatch {
|
||||
/// Expected dimension
|
||||
expected: usize,
|
||||
/// Actual dimension
|
||||
actual: usize,
|
||||
},
|
||||
|
||||
/// Invalid delta encoding
|
||||
InvalidEncoding(String),
|
||||
|
||||
/// Compression error
|
||||
CompressionError(String),
|
||||
|
||||
/// Decompression error
|
||||
DecompressionError(String),
|
||||
|
||||
/// Stream error
|
||||
StreamError(String),
|
||||
|
||||
/// Window error
|
||||
WindowError(String),
|
||||
|
||||
/// Serialization error
|
||||
SerializationError(String),
|
||||
|
||||
/// Index out of bounds
|
||||
IndexOutOfBounds {
|
||||
/// The index that was accessed
|
||||
index: usize,
|
||||
/// The length of the collection
|
||||
length: usize,
|
||||
},
|
||||
|
||||
/// Invalid operation
|
||||
InvalidOperation(String),
|
||||
|
||||
/// Buffer overflow
|
||||
BufferOverflow {
|
||||
/// Required capacity
|
||||
required: usize,
|
||||
/// Available capacity
|
||||
available: usize,
|
||||
},
|
||||
|
||||
/// Checksum mismatch
|
||||
ChecksumMismatch {
|
||||
/// Expected checksum
|
||||
expected: u64,
|
||||
/// Actual checksum
|
||||
actual: u64,
|
||||
},
|
||||
|
||||
/// Version incompatibility
|
||||
VersionMismatch {
|
||||
/// Expected version
|
||||
expected: u32,
|
||||
/// Actual version
|
||||
actual: u32,
|
||||
},
|
||||
}
|
||||
|
||||
impl fmt::Display for DeltaError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::DimensionMismatch { expected, actual } => {
|
||||
write!(
|
||||
f,
|
||||
"Dimension mismatch: expected {}, got {}",
|
||||
expected, actual
|
||||
)
|
||||
}
|
||||
Self::InvalidEncoding(msg) => write!(f, "Invalid encoding: {}", msg),
|
||||
Self::CompressionError(msg) => write!(f, "Compression error: {}", msg),
|
||||
Self::DecompressionError(msg) => write!(f, "Decompression error: {}", msg),
|
||||
Self::StreamError(msg) => write!(f, "Stream error: {}", msg),
|
||||
Self::WindowError(msg) => write!(f, "Window error: {}", msg),
|
||||
Self::SerializationError(msg) => write!(f, "Serialization error: {}", msg),
|
||||
Self::IndexOutOfBounds { index, length } => {
|
||||
write!(f, "Index out of bounds: {} (length: {})", index, length)
|
||||
}
|
||||
Self::InvalidOperation(msg) => write!(f, "Invalid operation: {}", msg),
|
||||
Self::BufferOverflow {
|
||||
required,
|
||||
available,
|
||||
} => {
|
||||
write!(
|
||||
f,
|
||||
"Buffer overflow: required {}, available {}",
|
||||
required, available
|
||||
)
|
||||
}
|
||||
Self::ChecksumMismatch { expected, actual } => {
|
||||
write!(
|
||||
f,
|
||||
"Checksum mismatch: expected {:016x}, got {:016x}",
|
||||
expected, actual
|
||||
)
|
||||
}
|
||||
Self::VersionMismatch { expected, actual } => {
|
||||
write!(f, "Version mismatch: expected {}, got {}", expected, actual)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
impl std::error::Error for DeltaError {}
|
||||
126
crates/ruvector-delta-core/src/lib.rs
Normal file
126
crates/ruvector-delta-core/src/lib.rs
Normal file
@@ -0,0 +1,126 @@
|
||||
//! # RuVector Delta Core
|
||||
//!
|
||||
//! Core delta types and traits for behavioral vector change tracking.
|
||||
//! This crate provides the fundamental abstractions for computing, applying,
|
||||
//! and composing deltas on vector data structures.
|
||||
//!
|
||||
//! ## Key Concepts
|
||||
//!
|
||||
//! - **Delta**: A representation of the change between two states
|
||||
//! - **DeltaStream**: An ordered sequence of deltas for event sourcing
|
||||
//! - **DeltaWindow**: Time-bounded aggregation of deltas
|
||||
//! - **Encoding**: Sparse and dense delta representations
|
||||
//! - **Compression**: Delta-specific compression strategies
|
||||
//!
|
||||
//! ## Example
|
||||
//!
|
||||
//! ```rust
|
||||
//! use ruvector_delta_core::{Delta, VectorDelta, DeltaStream};
|
||||
//!
|
||||
//! // Compute delta between two vectors
|
||||
//! let old = vec![1.0f32, 2.0, 3.0];
|
||||
//! let new = vec![1.1f32, 2.0, 3.5];
|
||||
//! let delta = VectorDelta::compute(&old, &new);
|
||||
//!
|
||||
//! // Apply delta to reconstruct
|
||||
//! let mut reconstructed = old.clone();
|
||||
//! delta.apply(&mut reconstructed).unwrap();
|
||||
//! assert_eq!(reconstructed, new);
|
||||
//! ```
|
||||
|
||||
#![cfg_attr(not(feature = "std"), no_std)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(clippy::all)]
|
||||
#![deny(unsafe_op_in_unsafe_fn)]
|
||||
|
||||
extern crate alloc;
|
||||
|
||||
pub mod compression;
|
||||
pub mod delta;
|
||||
pub mod encoding;
|
||||
pub mod error;
|
||||
pub mod stream;
|
||||
pub mod window;
|
||||
|
||||
// Re-exports
|
||||
pub use compression::{CompressionCodec, CompressionLevel, DeltaCompressor};
|
||||
pub use delta::{Delta, DeltaOp, DeltaValue, SparseDelta, VectorDelta};
|
||||
pub use encoding::{
|
||||
DeltaEncoding, DenseEncoding, EncodingType, HybridEncoding, RunLengthEncoding, SparseEncoding,
|
||||
};
|
||||
pub use error::{DeltaError, Result};
|
||||
pub use stream::{DeltaStream, DeltaStreamConfig, StreamCheckpoint};
|
||||
pub use window::{DeltaWindow, WindowAggregator, WindowConfig, WindowResult, WindowType};
|
||||
|
||||
/// Prelude for convenient imports
|
||||
pub mod prelude {
|
||||
pub use crate::compression::{CompressionCodec, DeltaCompressor};
|
||||
pub use crate::delta::{Delta, DeltaOp, DeltaValue, VectorDelta};
|
||||
pub use crate::encoding::{DeltaEncoding, DenseEncoding, SparseEncoding};
|
||||
pub use crate::error::Result;
|
||||
pub use crate::stream::{DeltaStream, StreamCheckpoint};
|
||||
pub use crate::window::{DeltaWindow, WindowAggregator};
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_basic_delta() {
|
||||
let old = vec![1.0f32, 2.0, 3.0, 4.0];
|
||||
let new = vec![1.0f32, 2.5, 3.0, 4.5];
|
||||
|
||||
let delta = VectorDelta::compute(&old, &new);
|
||||
|
||||
let mut reconstructed = old.clone();
|
||||
delta.apply(&mut reconstructed).unwrap();
|
||||
|
||||
for (a, b) in reconstructed.iter().zip(new.iter()) {
|
||||
assert!((a - b).abs() < 1e-6);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delta_composition() {
|
||||
let v1 = vec![1.0f32, 2.0, 3.0];
|
||||
let v2 = vec![1.5f32, 2.0, 3.5];
|
||||
let v3 = vec![2.0f32, 2.5, 4.0];
|
||||
|
||||
let delta1 = VectorDelta::compute(&v1, &v2);
|
||||
let delta2 = VectorDelta::compute(&v2, &v3);
|
||||
|
||||
let composed = delta1.compose(delta2);
|
||||
|
||||
let mut result = v1.clone();
|
||||
composed.apply(&mut result).unwrap();
|
||||
|
||||
for (a, b) in result.iter().zip(v3.iter()) {
|
||||
assert!((a - b).abs() < 1e-6);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delta_inverse() {
|
||||
let old = vec![1.0f32, 2.0, 3.0];
|
||||
let new = vec![1.5f32, 2.5, 3.5];
|
||||
|
||||
let delta = VectorDelta::compute(&old, &new);
|
||||
let inverse = delta.inverse();
|
||||
|
||||
let mut result = new.clone();
|
||||
inverse.apply(&mut result).unwrap();
|
||||
|
||||
for (a, b) in result.iter().zip(old.iter()) {
|
||||
assert!((a - b).abs() < 1e-6);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_identity_delta() {
|
||||
let v = vec![1.0f32, 2.0, 3.0];
|
||||
let delta = VectorDelta::compute(&v, &v);
|
||||
|
||||
assert!(delta.is_identity());
|
||||
}
|
||||
}
|
||||
463
crates/ruvector-delta-core/src/stream.rs
Normal file
463
crates/ruvector-delta-core/src/stream.rs
Normal file
@@ -0,0 +1,463 @@
|
||||
//! Delta stream for event sourcing and temporal queries
|
||||
//!
|
||||
//! Provides ordered sequences of deltas with checkpointing,
|
||||
//! compaction, and replay capabilities.
|
||||
|
||||
use alloc::collections::VecDeque;
|
||||
use alloc::vec::Vec;
|
||||
use core::time::Duration;
|
||||
|
||||
use crate::delta::{Delta, VectorDelta};
|
||||
use crate::error::{DeltaError, Result};
|
||||
|
||||
/// Configuration for delta streams
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DeltaStreamConfig {
|
||||
/// Maximum number of deltas before automatic compaction
|
||||
pub max_deltas: usize,
|
||||
/// Checkpoint interval (in number of deltas)
|
||||
pub checkpoint_interval: usize,
|
||||
/// Maximum memory usage before eviction
|
||||
pub max_memory_bytes: usize,
|
||||
/// Enable automatic compaction
|
||||
pub auto_compact: bool,
|
||||
}
|
||||
|
||||
impl Default for DeltaStreamConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
max_deltas: 1000,
|
||||
checkpoint_interval: 100,
|
||||
max_memory_bytes: 64 * 1024 * 1024, // 64 MB
|
||||
auto_compact: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A checkpoint in the delta stream
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct StreamCheckpoint<T> {
|
||||
/// The base value at this checkpoint
|
||||
pub value: T,
|
||||
/// Sequence number of this checkpoint
|
||||
pub sequence: u64,
|
||||
/// Timestamp when created (nanoseconds since epoch)
|
||||
pub timestamp_ns: u64,
|
||||
}
|
||||
|
||||
/// Entry in the delta stream
|
||||
#[derive(Debug, Clone)]
|
||||
struct StreamEntry<D: Clone> {
|
||||
/// The delta
|
||||
delta: D,
|
||||
/// Sequence number
|
||||
sequence: u64,
|
||||
/// Timestamp (nanoseconds)
|
||||
timestamp_ns: u64,
|
||||
}
|
||||
|
||||
/// A stream of deltas with event sourcing capabilities
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DeltaStream<D: Delta>
|
||||
where
|
||||
D: Clone,
|
||||
D::Base: Clone,
|
||||
{
|
||||
/// Configuration
|
||||
config: DeltaStreamConfig,
|
||||
/// Ordered deltas
|
||||
deltas: VecDeque<StreamEntry<D>>,
|
||||
/// Checkpoints
|
||||
checkpoints: Vec<StreamCheckpoint<D::Base>>,
|
||||
/// Current sequence number
|
||||
current_sequence: u64,
|
||||
/// Memory usage estimate
|
||||
memory_usage: usize,
|
||||
}
|
||||
|
||||
impl<D: Delta + Clone> DeltaStream<D>
|
||||
where
|
||||
D::Base: Clone,
|
||||
{
|
||||
/// Create a new delta stream with default configuration
|
||||
pub fn new() -> Self {
|
||||
Self::with_config(DeltaStreamConfig::default())
|
||||
}
|
||||
|
||||
/// Create with custom configuration
|
||||
pub fn with_config(config: DeltaStreamConfig) -> Self {
|
||||
Self {
|
||||
config,
|
||||
deltas: VecDeque::new(),
|
||||
checkpoints: Vec::new(),
|
||||
current_sequence: 0,
|
||||
memory_usage: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get current configuration
|
||||
pub fn config(&self) -> &DeltaStreamConfig {
|
||||
&self.config
|
||||
}
|
||||
|
||||
/// Get the current sequence number
|
||||
pub fn sequence(&self) -> u64 {
|
||||
self.current_sequence
|
||||
}
|
||||
|
||||
/// Get the number of deltas in the stream
|
||||
pub fn len(&self) -> usize {
|
||||
self.deltas.len()
|
||||
}
|
||||
|
||||
/// Check if the stream is empty
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.deltas.is_empty()
|
||||
}
|
||||
|
||||
/// Get the number of checkpoints
|
||||
pub fn checkpoint_count(&self) -> usize {
|
||||
self.checkpoints.len()
|
||||
}
|
||||
|
||||
/// Push a new delta to the stream
|
||||
pub fn push(&mut self, delta: D) {
|
||||
self.push_with_timestamp(delta, Self::current_timestamp_ns());
|
||||
}
|
||||
|
||||
/// Push a delta with a specific timestamp
|
||||
pub fn push_with_timestamp(&mut self, delta: D, timestamp_ns: u64) {
|
||||
self.current_sequence += 1;
|
||||
|
||||
let entry = StreamEntry {
|
||||
delta,
|
||||
sequence: self.current_sequence,
|
||||
timestamp_ns,
|
||||
};
|
||||
|
||||
self.memory_usage += entry.delta.byte_size();
|
||||
self.deltas.push_back(entry);
|
||||
|
||||
// Check if compaction is needed
|
||||
if self.config.auto_compact && self.needs_compaction() {
|
||||
let _ = self.compact();
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a checkpoint at the current position
|
||||
pub fn create_checkpoint(&mut self, value: D::Base) {
|
||||
let checkpoint = StreamCheckpoint {
|
||||
value,
|
||||
sequence: self.current_sequence,
|
||||
timestamp_ns: Self::current_timestamp_ns(),
|
||||
};
|
||||
self.checkpoints.push(checkpoint);
|
||||
}
|
||||
|
||||
/// Replay from the beginning to reconstruct the current state
|
||||
pub fn replay(&self, initial: D::Base) -> core::result::Result<D::Base, D::Error> {
|
||||
let mut current = initial;
|
||||
for entry in &self.deltas {
|
||||
entry.delta.apply(&mut current)?;
|
||||
}
|
||||
Ok(current)
|
||||
}
|
||||
|
||||
/// Replay from a specific checkpoint
|
||||
///
|
||||
/// Returns `None` if the checkpoint index is out of bounds, otherwise
|
||||
/// returns the result of replaying deltas from that checkpoint.
|
||||
pub fn replay_from_checkpoint(
|
||||
&self,
|
||||
checkpoint_idx: usize,
|
||||
) -> Option<core::result::Result<D::Base, D::Error>> {
|
||||
if checkpoint_idx >= self.checkpoints.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let checkpoint = &self.checkpoints[checkpoint_idx];
|
||||
let mut current = checkpoint.value.clone();
|
||||
|
||||
// Find deltas after this checkpoint
|
||||
for entry in &self.deltas {
|
||||
if entry.sequence > checkpoint.sequence {
|
||||
if let Err(e) = entry.delta.apply(&mut current) {
|
||||
return Some(Err(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some(Ok(current))
|
||||
}
|
||||
|
||||
/// Replay to a specific sequence number
|
||||
pub fn replay_to_sequence(
|
||||
&self,
|
||||
initial: D::Base,
|
||||
target_sequence: u64,
|
||||
) -> core::result::Result<D::Base, D::Error> {
|
||||
let mut current = initial;
|
||||
|
||||
for entry in &self.deltas {
|
||||
if entry.sequence > target_sequence {
|
||||
break;
|
||||
}
|
||||
entry.delta.apply(&mut current)?;
|
||||
}
|
||||
|
||||
Ok(current)
|
||||
}
|
||||
|
||||
/// Get deltas in a sequence range
|
||||
pub fn get_range(&self, start: u64, end: u64) -> Vec<&D> {
|
||||
self.deltas
|
||||
.iter()
|
||||
.filter(|e| e.sequence >= start && e.sequence <= end)
|
||||
.map(|e| &e.delta)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get deltas in a time range
|
||||
pub fn get_time_range(&self, start_ns: u64, end_ns: u64) -> Vec<&D> {
|
||||
self.deltas
|
||||
.iter()
|
||||
.filter(|e| e.timestamp_ns >= start_ns && e.timestamp_ns <= end_ns)
|
||||
.map(|e| &e.delta)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Check if compaction is needed
|
||||
pub fn needs_compaction(&self) -> bool {
|
||||
self.deltas.len() > self.config.max_deltas
|
||||
|| self.memory_usage > self.config.max_memory_bytes
|
||||
}
|
||||
|
||||
/// Compact the stream by composing consecutive deltas
|
||||
pub fn compact(&mut self) -> Result<usize> {
|
||||
if self.deltas.len() < 2 {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// Find the latest checkpoint sequence
|
||||
let checkpoint_sequence = self.checkpoints.last().map(|c| c.sequence).unwrap_or(0);
|
||||
|
||||
// Only compact deltas after the latest checkpoint
|
||||
let mut compacted = 0;
|
||||
let mut new_deltas: VecDeque<StreamEntry<D>> = VecDeque::new();
|
||||
let mut pending: Option<StreamEntry<D>> = None;
|
||||
|
||||
for entry in self.deltas.drain(..) {
|
||||
if entry.sequence <= checkpoint_sequence {
|
||||
// Keep deltas at or before checkpoint as-is
|
||||
if let Some(p) = pending.take() {
|
||||
new_deltas.push_back(p);
|
||||
}
|
||||
new_deltas.push_back(entry);
|
||||
} else if let Some(p) = pending.take() {
|
||||
// Compose with pending
|
||||
let composed = p.delta.compose(entry.delta.clone());
|
||||
if composed.is_identity() {
|
||||
// They cancel out
|
||||
compacted += 2;
|
||||
} else {
|
||||
pending = Some(StreamEntry {
|
||||
delta: composed,
|
||||
sequence: entry.sequence,
|
||||
timestamp_ns: entry.timestamp_ns,
|
||||
});
|
||||
compacted += 1;
|
||||
}
|
||||
} else {
|
||||
pending = Some(entry);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(p) = pending {
|
||||
new_deltas.push_back(p);
|
||||
}
|
||||
|
||||
let old_len = self.deltas.len();
|
||||
self.deltas = new_deltas;
|
||||
|
||||
// Recalculate memory usage
|
||||
self.memory_usage = self.deltas.iter().map(|e| e.delta.byte_size()).sum();
|
||||
|
||||
Ok(old_len.saturating_sub(self.deltas.len()))
|
||||
}
|
||||
|
||||
/// Trim deltas before a sequence number
|
||||
pub fn trim_before(&mut self, sequence: u64) {
|
||||
while let Some(front) = self.deltas.front() {
|
||||
if front.sequence < sequence {
|
||||
if let Some(entry) = self.deltas.pop_front() {
|
||||
self.memory_usage = self.memory_usage.saturating_sub(entry.delta.byte_size());
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Also trim old checkpoints
|
||||
self.checkpoints.retain(|c| c.sequence >= sequence);
|
||||
}
|
||||
|
||||
/// Clear all deltas and checkpoints
|
||||
pub fn clear(&mut self) {
|
||||
self.deltas.clear();
|
||||
self.checkpoints.clear();
|
||||
self.memory_usage = 0;
|
||||
}
|
||||
|
||||
/// Get current timestamp in nanoseconds
|
||||
fn current_timestamp_ns() -> u64 {
|
||||
#[cfg(feature = "std")]
|
||||
{
|
||||
use std::time::SystemTime;
|
||||
SystemTime::now()
|
||||
.duration_since(SystemTime::UNIX_EPOCH)
|
||||
.map(|d| d.as_nanos() as u64)
|
||||
.unwrap_or(0)
|
||||
}
|
||||
#[cfg(not(feature = "std"))]
|
||||
{
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Delta> Default for DeltaStream<D>
|
||||
where
|
||||
D::Base: Clone,
|
||||
{
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
// Implement for VectorDelta specifically
|
||||
impl DeltaStream<VectorDelta> {
|
||||
/// Create a stream optimized for vector deltas
|
||||
pub fn for_vectors(dimensions: usize) -> Self {
|
||||
let estimated_delta_size = dimensions * 4; // Worst case: dense f32
|
||||
let max_deltas = (64 * 1024 * 1024) / estimated_delta_size;
|
||||
|
||||
Self::with_config(DeltaStreamConfig {
|
||||
max_deltas,
|
||||
checkpoint_interval: max_deltas / 10,
|
||||
max_memory_bytes: 64 * 1024 * 1024,
|
||||
auto_compact: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterator over stream entries
|
||||
pub struct DeltaStreamIter<'a, D: Clone> {
|
||||
inner: alloc::collections::vec_deque::Iter<'a, StreamEntry<D>>,
|
||||
}
|
||||
|
||||
impl<'a, D: Clone> Iterator for DeltaStreamIter<'a, D> {
|
||||
type Item = (u64, &'a D);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.inner.next().map(|e| (e.sequence, &e.delta))
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Delta + Clone> DeltaStream<D>
|
||||
where
|
||||
D::Base: Clone,
|
||||
{
|
||||
/// Iterate over deltas with their sequence numbers
|
||||
pub fn iter(&self) -> DeltaStreamIter<'_, D> {
|
||||
DeltaStreamIter {
|
||||
inner: self.deltas.iter(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::delta::VectorDelta;
|
||||
|
||||
#[test]
|
||||
fn test_stream_push_replay() {
|
||||
let mut stream = DeltaStream::<VectorDelta>::new();
|
||||
|
||||
let initial = vec![1.0f32, 2.0, 3.0];
|
||||
|
||||
let delta1 = VectorDelta::from_dense(vec![0.5, 0.0, 0.5]);
|
||||
let delta2 = VectorDelta::from_dense(vec![0.0, 1.0, 0.0]);
|
||||
|
||||
stream.push(delta1);
|
||||
stream.push(delta2);
|
||||
|
||||
let result = stream.replay(initial.clone()).unwrap();
|
||||
|
||||
assert!((result[0] - 1.5).abs() < 1e-6);
|
||||
assert!((result[1] - 3.0).abs() < 1e-6);
|
||||
assert!((result[2] - 3.5).abs() < 1e-6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stream_checkpoint() {
|
||||
let mut stream = DeltaStream::<VectorDelta>::new();
|
||||
|
||||
let initial = vec![0.0f32; 3];
|
||||
let delta1 = VectorDelta::from_dense(vec![1.0, 1.0, 1.0]);
|
||||
stream.push(delta1);
|
||||
|
||||
let state_at_checkpoint = stream.replay(initial.clone()).unwrap();
|
||||
stream.create_checkpoint(state_at_checkpoint);
|
||||
|
||||
let delta2 = VectorDelta::from_dense(vec![2.0, 2.0, 2.0]);
|
||||
stream.push(delta2);
|
||||
|
||||
let from_checkpoint = stream.replay_from_checkpoint(0).unwrap().unwrap();
|
||||
|
||||
assert!((from_checkpoint[0] - 3.0).abs() < 1e-6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stream_sequence_range() {
|
||||
let mut stream = DeltaStream::<VectorDelta>::new();
|
||||
|
||||
for i in 0..10 {
|
||||
let delta = VectorDelta::from_dense(vec![i as f32; 3]);
|
||||
stream.push(delta);
|
||||
}
|
||||
|
||||
let range = stream.get_range(3, 7);
|
||||
assert_eq!(range.len(), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replay_to_sequence() {
|
||||
let mut stream = DeltaStream::<VectorDelta>::new();
|
||||
let initial = vec![0.0f32; 3];
|
||||
|
||||
stream.push(VectorDelta::from_dense(vec![1.0, 0.0, 0.0]));
|
||||
stream.push(VectorDelta::from_dense(vec![0.0, 1.0, 0.0]));
|
||||
stream.push(VectorDelta::from_dense(vec![0.0, 0.0, 1.0]));
|
||||
|
||||
let at_seq_2 = stream.replay_to_sequence(initial, 2).unwrap();
|
||||
assert!((at_seq_2[0] - 1.0).abs() < 1e-6);
|
||||
assert!((at_seq_2[1] - 1.0).abs() < 1e-6);
|
||||
assert!((at_seq_2[2] - 0.0).abs() < 1e-6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stream_trim() {
|
||||
let mut stream = DeltaStream::<VectorDelta>::new();
|
||||
|
||||
for _ in 0..10 {
|
||||
let delta = VectorDelta::from_dense(vec![1.0; 3]);
|
||||
stream.push(delta);
|
||||
}
|
||||
|
||||
assert_eq!(stream.len(), 10);
|
||||
|
||||
stream.trim_before(5);
|
||||
assert_eq!(stream.len(), 6); // Sequences 5-10
|
||||
}
|
||||
}
|
||||
510
crates/ruvector-delta-core/src/window.rs
Normal file
510
crates/ruvector-delta-core/src/window.rs
Normal file
@@ -0,0 +1,510 @@
|
||||
//! Delta window for time-bounded aggregation
|
||||
//!
|
||||
//! Provides sliding and tumbling windows for aggregating deltas
|
||||
//! over time or count-based boundaries.
|
||||
|
||||
use alloc::collections::VecDeque;
|
||||
use alloc::vec::Vec;
|
||||
use core::marker::PhantomData;
|
||||
|
||||
use crate::delta::{Delta, VectorDelta};
|
||||
use crate::error::{DeltaError, Result};
|
||||
|
||||
/// Configuration for delta windows
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct WindowConfig {
|
||||
/// Window type
|
||||
pub window_type: WindowType,
|
||||
/// Window size (interpretation depends on type)
|
||||
pub size: usize,
|
||||
/// Slide amount for sliding windows
|
||||
pub slide: usize,
|
||||
/// Maximum items to keep
|
||||
pub max_items: usize,
|
||||
}
|
||||
|
||||
impl Default for WindowConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
window_type: WindowType::Tumbling,
|
||||
size: 100,
|
||||
slide: 1,
|
||||
max_items: 10_000,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Window types
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum WindowType {
|
||||
/// Tumbling window (non-overlapping)
|
||||
Tumbling,
|
||||
/// Sliding window (overlapping)
|
||||
Sliding,
|
||||
/// Session window (gap-based)
|
||||
Session,
|
||||
/// Count-based window
|
||||
Count,
|
||||
}
|
||||
|
||||
/// Entry in the window
|
||||
#[derive(Debug, Clone)]
|
||||
struct WindowEntry<D> {
|
||||
delta: D,
|
||||
timestamp_ns: u64,
|
||||
}
|
||||
|
||||
/// Aggregated window result
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct WindowResult<D: Clone> {
|
||||
/// Composed delta for this window
|
||||
pub delta: D,
|
||||
/// Start timestamp (ns)
|
||||
pub start_ns: u64,
|
||||
/// End timestamp (ns)
|
||||
pub end_ns: u64,
|
||||
/// Number of deltas in window
|
||||
pub count: usize,
|
||||
}
|
||||
|
||||
/// A delta window for time-bounded aggregation
|
||||
#[derive(Debug)]
|
||||
pub struct DeltaWindow<D: Delta> {
|
||||
config: WindowConfig,
|
||||
entries: VecDeque<WindowEntry<D>>,
|
||||
/// For tumbling/sliding: window boundaries
|
||||
window_start_ns: u64,
|
||||
}
|
||||
|
||||
impl<D: Delta + Clone> DeltaWindow<D>
|
||||
where
|
||||
D::Base: Clone,
|
||||
{
|
||||
/// Create a new delta window
|
||||
pub fn new(config: WindowConfig) -> Self {
|
||||
Self {
|
||||
config,
|
||||
entries: VecDeque::new(),
|
||||
window_start_ns: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a tumbling window of the given size (in nanoseconds)
|
||||
pub fn tumbling(size_ns: u64) -> Self {
|
||||
Self::new(WindowConfig {
|
||||
window_type: WindowType::Tumbling,
|
||||
size: size_ns as usize,
|
||||
slide: size_ns as usize,
|
||||
max_items: 10_000,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a sliding window
|
||||
pub fn sliding(size_ns: u64, slide_ns: u64) -> Self {
|
||||
Self::new(WindowConfig {
|
||||
window_type: WindowType::Sliding,
|
||||
size: size_ns as usize,
|
||||
slide: slide_ns as usize,
|
||||
max_items: 10_000,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a count-based window
|
||||
pub fn count_based(count: usize) -> Self {
|
||||
Self::new(WindowConfig {
|
||||
window_type: WindowType::Count,
|
||||
size: count,
|
||||
slide: count,
|
||||
max_items: count * 2,
|
||||
})
|
||||
}
|
||||
|
||||
/// Add a delta to the window
|
||||
pub fn add(&mut self, delta: D, timestamp_ns: u64) {
|
||||
// Initialize window start if first entry
|
||||
if self.entries.is_empty() {
|
||||
self.window_start_ns = timestamp_ns;
|
||||
}
|
||||
|
||||
self.entries.push_back(WindowEntry {
|
||||
delta,
|
||||
timestamp_ns,
|
||||
});
|
||||
|
||||
// Enforce max items
|
||||
while self.entries.len() > self.config.max_items {
|
||||
self.entries.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if the current window is complete
|
||||
pub fn is_complete(&self, current_ns: u64) -> bool {
|
||||
match self.config.window_type {
|
||||
WindowType::Tumbling | WindowType::Sliding => {
|
||||
current_ns >= self.window_start_ns + self.config.size as u64
|
||||
}
|
||||
WindowType::Count => self.entries.len() >= self.config.size,
|
||||
WindowType::Session => {
|
||||
// Session window closes after a gap
|
||||
if let Some(last) = self.entries.back() {
|
||||
current_ns - last.timestamp_ns > self.config.size as u64
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit the current window and advance
|
||||
pub fn emit(&mut self) -> Option<WindowResult<D>>
|
||||
where
|
||||
D: Default,
|
||||
{
|
||||
if self.entries.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
match self.config.window_type {
|
||||
WindowType::Tumbling => self.emit_tumbling(),
|
||||
WindowType::Sliding => self.emit_sliding(),
|
||||
WindowType::Count => self.emit_count(),
|
||||
WindowType::Session => self.emit_session(),
|
||||
}
|
||||
}
|
||||
|
||||
fn emit_tumbling(&mut self) -> Option<WindowResult<D>>
|
||||
where
|
||||
D: Default,
|
||||
{
|
||||
let window_end = self.window_start_ns + self.config.size as u64;
|
||||
|
||||
// Collect entries in window
|
||||
let in_window: Vec<_> = self
|
||||
.entries
|
||||
.iter()
|
||||
.filter(|e| e.timestamp_ns < window_end)
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
if in_window.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Compose all deltas
|
||||
let result = self.compose_entries(&in_window);
|
||||
|
||||
// Remove processed entries
|
||||
self.entries.retain(|e| e.timestamp_ns >= window_end);
|
||||
|
||||
// Advance window
|
||||
self.window_start_ns = window_end;
|
||||
|
||||
Some(result)
|
||||
}
|
||||
|
||||
fn emit_sliding(&mut self) -> Option<WindowResult<D>>
|
||||
where
|
||||
D: Default,
|
||||
{
|
||||
let window_end = self.window_start_ns + self.config.size as u64;
|
||||
|
||||
// Collect entries in window
|
||||
let in_window: Vec<_> = self
|
||||
.entries
|
||||
.iter()
|
||||
.filter(|e| e.timestamp_ns >= self.window_start_ns && e.timestamp_ns < window_end)
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
if in_window.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let result = self.compose_entries(&in_window);
|
||||
|
||||
// Slide window
|
||||
let new_start = self.window_start_ns + self.config.slide as u64;
|
||||
|
||||
// Remove entries before new window start
|
||||
self.entries.retain(|e| e.timestamp_ns >= new_start);
|
||||
|
||||
self.window_start_ns = new_start;
|
||||
|
||||
Some(result)
|
||||
}
|
||||
|
||||
fn emit_count(&mut self) -> Option<WindowResult<D>>
|
||||
where
|
||||
D: Default,
|
||||
{
|
||||
if self.entries.len() < self.config.size {
|
||||
return None;
|
||||
}
|
||||
|
||||
let window_entries: Vec<_> = self.entries.drain(..self.config.size).collect();
|
||||
|
||||
Some(self.compose_entries(&window_entries))
|
||||
}
|
||||
|
||||
fn emit_session(&mut self) -> Option<WindowResult<D>>
|
||||
where
|
||||
D: Default,
|
||||
{
|
||||
if self.entries.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let all_entries: Vec<_> = self.entries.drain(..).collect();
|
||||
Some(self.compose_entries(&all_entries))
|
||||
}
|
||||
|
||||
fn compose_entries(&self, entries: &[WindowEntry<D>]) -> WindowResult<D>
|
||||
where
|
||||
D: Default,
|
||||
{
|
||||
let start_ns = entries.first().map(|e| e.timestamp_ns).unwrap_or(0);
|
||||
let end_ns = entries.last().map(|e| e.timestamp_ns).unwrap_or(0);
|
||||
let count = entries.len();
|
||||
|
||||
let delta = if entries.is_empty() {
|
||||
D::default()
|
||||
} else {
|
||||
let mut composed = entries[0].delta.clone();
|
||||
for entry in entries.iter().skip(1) {
|
||||
composed = composed.compose(entry.delta.clone());
|
||||
}
|
||||
composed
|
||||
};
|
||||
|
||||
WindowResult {
|
||||
delta,
|
||||
start_ns,
|
||||
end_ns,
|
||||
count,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the number of entries in the window
|
||||
pub fn len(&self) -> usize {
|
||||
self.entries.len()
|
||||
}
|
||||
|
||||
/// Check if the window is empty
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.entries.is_empty()
|
||||
}
|
||||
|
||||
/// Clear all entries
|
||||
pub fn clear(&mut self) {
|
||||
self.entries.clear();
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for VectorDelta {
|
||||
fn default() -> Self {
|
||||
Self::new(0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Trait for aggregating window results
|
||||
pub trait WindowAggregator<D: Delta>: Send + Sync {
|
||||
/// Aggregate multiple window results
|
||||
fn aggregate(&self, results: &[WindowResult<D>]) -> WindowResult<D>;
|
||||
|
||||
/// Get aggregation type name
|
||||
fn name(&self) -> &'static str;
|
||||
}
|
||||
|
||||
/// Sum aggregator - composes all deltas
|
||||
pub struct SumAggregator;
|
||||
|
||||
impl<D: Delta + Clone + Default> WindowAggregator<D> for SumAggregator {
|
||||
fn aggregate(&self, results: &[WindowResult<D>]) -> WindowResult<D> {
|
||||
if results.is_empty() {
|
||||
return WindowResult {
|
||||
delta: D::default(),
|
||||
start_ns: 0,
|
||||
end_ns: 0,
|
||||
count: 0,
|
||||
};
|
||||
}
|
||||
|
||||
let start_ns = results.first().map(|r| r.start_ns).unwrap_or(0);
|
||||
let end_ns = results.last().map(|r| r.end_ns).unwrap_or(0);
|
||||
let count: usize = results.iter().map(|r| r.count).sum();
|
||||
|
||||
let delta = if results.is_empty() {
|
||||
D::default()
|
||||
} else {
|
||||
let mut composed = results[0].delta.clone();
|
||||
for result in results.iter().skip(1) {
|
||||
composed = composed.compose(result.delta.clone());
|
||||
}
|
||||
composed
|
||||
};
|
||||
|
||||
WindowResult {
|
||||
delta,
|
||||
start_ns,
|
||||
end_ns,
|
||||
count,
|
||||
}
|
||||
}
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
"sum"
|
||||
}
|
||||
}
|
||||
|
||||
/// Average aggregator - scales composed delta by 1/count
|
||||
pub struct AverageAggregator;
|
||||
|
||||
impl WindowAggregator<VectorDelta> for AverageAggregator {
|
||||
fn aggregate(&self, results: &[WindowResult<VectorDelta>]) -> WindowResult<VectorDelta> {
|
||||
if results.is_empty() {
|
||||
return WindowResult {
|
||||
delta: VectorDelta::default(),
|
||||
start_ns: 0,
|
||||
end_ns: 0,
|
||||
count: 0,
|
||||
};
|
||||
}
|
||||
|
||||
let sum_result = SumAggregator.aggregate(results);
|
||||
let count = sum_result.count.max(1) as f32;
|
||||
|
||||
WindowResult {
|
||||
delta: sum_result.delta.scale(1.0 / count),
|
||||
start_ns: sum_result.start_ns,
|
||||
end_ns: sum_result.end_ns,
|
||||
count: sum_result.count,
|
||||
}
|
||||
}
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
"average"
|
||||
}
|
||||
}
|
||||
|
||||
/// Exponential moving average aggregator
|
||||
pub struct EmaAggregator {
|
||||
/// Smoothing factor (0 < alpha <= 1)
|
||||
pub alpha: f32,
|
||||
}
|
||||
|
||||
impl EmaAggregator {
|
||||
/// Create with smoothing factor
|
||||
pub fn new(alpha: f32) -> Self {
|
||||
Self {
|
||||
alpha: alpha.clamp(0.0, 1.0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl WindowAggregator<VectorDelta> for EmaAggregator {
|
||||
fn aggregate(&self, results: &[WindowResult<VectorDelta>]) -> WindowResult<VectorDelta> {
|
||||
if results.is_empty() {
|
||||
return WindowResult {
|
||||
delta: VectorDelta::default(),
|
||||
start_ns: 0,
|
||||
end_ns: 0,
|
||||
count: 0,
|
||||
};
|
||||
}
|
||||
|
||||
let start_ns = results.first().map(|r| r.start_ns).unwrap_or(0);
|
||||
let end_ns = results.last().map(|r| r.end_ns).unwrap_or(0);
|
||||
let count: usize = results.iter().map(|r| r.count).sum();
|
||||
|
||||
// EMA: new_ema = alpha * current + (1 - alpha) * old_ema
|
||||
let mut ema = results[0].delta.clone();
|
||||
for result in results.iter().skip(1) {
|
||||
let scaled_current = result.delta.scale(self.alpha);
|
||||
let scaled_ema = ema.scale(1.0 - self.alpha);
|
||||
ema = scaled_current.compose(scaled_ema);
|
||||
}
|
||||
|
||||
WindowResult {
|
||||
delta: ema,
|
||||
start_ns,
|
||||
end_ns,
|
||||
count,
|
||||
}
|
||||
}
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
"ema"
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_tumbling_window() {
|
||||
let mut window = DeltaWindow::<VectorDelta>::tumbling(1_000_000); // 1ms
|
||||
|
||||
// Add deltas at different times
|
||||
window.add(VectorDelta::from_dense(vec![1.0, 0.0, 0.0]), 0);
|
||||
window.add(VectorDelta::from_dense(vec![0.0, 1.0, 0.0]), 500_000);
|
||||
|
||||
// Window not complete yet
|
||||
assert!(!window.is_complete(900_000));
|
||||
|
||||
// Window complete
|
||||
assert!(window.is_complete(1_000_000));
|
||||
|
||||
// Emit
|
||||
let result = window.emit().unwrap();
|
||||
assert_eq!(result.count, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_count_window() {
|
||||
let mut window = DeltaWindow::<VectorDelta>::count_based(3);
|
||||
|
||||
window.add(VectorDelta::from_dense(vec![1.0]), 0);
|
||||
window.add(VectorDelta::from_dense(vec![1.0]), 1);
|
||||
|
||||
assert!(window.emit().is_none()); // Not enough
|
||||
|
||||
window.add(VectorDelta::from_dense(vec![1.0]), 2);
|
||||
|
||||
let result = window.emit().unwrap();
|
||||
assert_eq!(result.count, 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sliding_window() {
|
||||
let mut window = DeltaWindow::<VectorDelta>::sliding(1_000_000, 500_000);
|
||||
|
||||
window.add(VectorDelta::from_dense(vec![1.0]), 0);
|
||||
window.add(VectorDelta::from_dense(vec![2.0]), 250_000);
|
||||
window.add(VectorDelta::from_dense(vec![3.0]), 750_000);
|
||||
|
||||
// Complete after 1ms
|
||||
assert!(window.is_complete(1_000_000));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sum_aggregator() {
|
||||
let results = vec![
|
||||
WindowResult {
|
||||
delta: VectorDelta::from_dense(vec![1.0, 0.0]),
|
||||
start_ns: 0,
|
||||
end_ns: 100,
|
||||
count: 1,
|
||||
},
|
||||
WindowResult {
|
||||
delta: VectorDelta::from_dense(vec![0.0, 1.0]),
|
||||
start_ns: 100,
|
||||
end_ns: 200,
|
||||
count: 1,
|
||||
},
|
||||
];
|
||||
|
||||
let aggregated = SumAggregator.aggregate(&results);
|
||||
assert_eq!(aggregated.count, 2);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user