Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,680 @@
//! Delta compression strategies
//!
//! Provides specialized compression for delta data, leveraging
//! the statistical properties of change data.
use alloc::vec::Vec;
use crate::delta::VectorDelta;
use crate::encoding::{DeltaEncoding, HybridEncoding};
use crate::error::{DeltaError, Result};
/// Compression level settings
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CompressionLevel {
/// No compression
None,
/// Fast compression (lower ratio)
Fast,
/// Balanced compression
Balanced,
/// Best compression (slower)
Best,
}
impl Default for CompressionLevel {
fn default() -> Self {
Self::Balanced
}
}
/// Compression codec types
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
pub enum CompressionCodec {
/// No compression
None = 0,
/// LZ4 compression
Lz4 = 1,
/// Zstandard compression
Zstd = 2,
/// Delta-of-delta encoding
DeltaOfDelta = 3,
/// Quantization-based compression
Quantized = 4,
}
impl TryFrom<u8> for CompressionCodec {
type Error = DeltaError;
fn try_from(value: u8) -> Result<Self> {
match value {
0 => Ok(Self::None),
1 => Ok(Self::Lz4),
2 => Ok(Self::Zstd),
3 => Ok(Self::DeltaOfDelta),
4 => Ok(Self::Quantized),
_ => Err(DeltaError::InvalidEncoding(alloc::format!(
"Unknown codec: {}",
value
))),
}
}
}
/// Delta compressor configuration
#[derive(Debug, Clone)]
pub struct CompressorConfig {
/// Compression codec
pub codec: CompressionCodec,
/// Compression level
pub level: CompressionLevel,
/// Minimum size to compress (bytes)
pub min_size: usize,
/// Enable checksums
pub enable_checksum: bool,
}
impl Default for CompressorConfig {
fn default() -> Self {
Self {
codec: CompressionCodec::Lz4,
level: CompressionLevel::Balanced,
min_size: 64,
enable_checksum: true,
}
}
}
/// Compressed data header
#[derive(Debug, Clone)]
struct CompressedHeader {
/// Compression codec used
codec: CompressionCodec,
/// Original uncompressed size
original_size: u32,
/// Compressed size
compressed_size: u32,
/// Optional checksum (FNV-1a)
checksum: Option<u64>,
}
impl CompressedHeader {
const MAGIC: u32 = 0x44454C54; // "DELT"
const VERSION: u8 = 1;
fn to_bytes(&self) -> Vec<u8> {
let mut bytes = Vec::with_capacity(21);
// Magic (4 bytes)
bytes.extend_from_slice(&Self::MAGIC.to_le_bytes());
// Version (1 byte)
bytes.push(Self::VERSION);
// Codec (1 byte)
bytes.push(self.codec as u8);
// Has checksum flag (1 byte)
bytes.push(if self.checksum.is_some() { 1 } else { 0 });
// Original size (4 bytes)
bytes.extend_from_slice(&self.original_size.to_le_bytes());
// Compressed size (4 bytes)
bytes.extend_from_slice(&self.compressed_size.to_le_bytes());
// Checksum (8 bytes if present)
if let Some(cs) = self.checksum {
bytes.extend_from_slice(&cs.to_le_bytes());
}
bytes
}
fn from_bytes(bytes: &[u8]) -> Result<(Self, usize)> {
if bytes.len() < 15 {
return Err(DeltaError::DecompressionError("Header too small".into()));
}
let magic = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
if magic != Self::MAGIC {
return Err(DeltaError::DecompressionError(
"Invalid magic number".into(),
));
}
let version = bytes[4];
if version != Self::VERSION {
return Err(DeltaError::VersionMismatch {
expected: Self::VERSION as u32,
actual: version as u32,
});
}
let codec = CompressionCodec::try_from(bytes[5])?;
let has_checksum = bytes[6] != 0;
let original_size = u32::from_le_bytes([bytes[7], bytes[8], bytes[9], bytes[10]]);
let compressed_size = u32::from_le_bytes([bytes[11], bytes[12], bytes[13], bytes[14]]);
let (checksum, header_size) = if has_checksum {
if bytes.len() < 23 {
return Err(DeltaError::DecompressionError(
"Header too small for checksum".into(),
));
}
let cs = u64::from_le_bytes([
bytes[15], bytes[16], bytes[17], bytes[18], bytes[19], bytes[20], bytes[21],
bytes[22],
]);
(Some(cs), 23)
} else {
(None, 15)
};
Ok((
Self {
codec,
original_size,
compressed_size,
checksum,
},
header_size,
))
}
}
/// Delta compressor for efficient storage
pub struct DeltaCompressor {
config: CompressorConfig,
encoding: HybridEncoding,
}
impl DeltaCompressor {
/// Create a new compressor with default configuration
pub fn new() -> Self {
Self {
config: CompressorConfig::default(),
encoding: HybridEncoding::default(),
}
}
/// Create with custom configuration
pub fn with_config(config: CompressorConfig) -> Self {
Self {
config,
encoding: HybridEncoding::default(),
}
}
/// Compress a delta
pub fn compress(&self, delta: &VectorDelta) -> Result<Vec<u8>> {
// First encode the delta
let encoded = self.encoding.encode(delta)?;
// Check if compression is worthwhile
if encoded.len() < self.config.min_size || self.config.codec == CompressionCodec::None {
// Return uncompressed with header
let header = CompressedHeader {
codec: CompressionCodec::None,
original_size: encoded.len() as u32,
compressed_size: encoded.len() as u32,
checksum: if self.config.enable_checksum {
Some(fnv1a_hash(&encoded))
} else {
None
},
};
let mut result = header.to_bytes();
result.extend_from_slice(&encoded);
return Ok(result);
}
// Compress based on codec
let compressed = match self.config.codec {
CompressionCodec::None => encoded.clone(),
#[cfg(feature = "compression")]
CompressionCodec::Lz4 => self.compress_lz4(&encoded)?,
#[cfg(feature = "compression")]
CompressionCodec::Zstd => self.compress_zstd(&encoded)?,
CompressionCodec::DeltaOfDelta => self.compress_delta_of_delta(&encoded)?,
CompressionCodec::Quantized => self.compress_quantized(&encoded)?,
#[cfg(not(feature = "compression"))]
CompressionCodec::Lz4 | CompressionCodec::Zstd => {
return Err(DeltaError::CompressionError(
"Compression feature not enabled".into(),
));
}
};
// Build result
let header = CompressedHeader {
codec: self.config.codec,
original_size: encoded.len() as u32,
compressed_size: compressed.len() as u32,
checksum: if self.config.enable_checksum {
Some(fnv1a_hash(&encoded))
} else {
None
},
};
let mut result = header.to_bytes();
result.extend_from_slice(&compressed);
Ok(result)
}
/// Decompress bytes to a delta
pub fn decompress(&self, bytes: &[u8]) -> Result<VectorDelta> {
let (header, header_size) = CompressedHeader::from_bytes(bytes)?;
let compressed_data = &bytes[header_size..];
if compressed_data.len() < header.compressed_size as usize {
return Err(DeltaError::DecompressionError(alloc::format!(
"Insufficient data: expected {}, got {}",
header.compressed_size,
compressed_data.len()
)));
}
let compressed = &compressed_data[..header.compressed_size as usize];
// Decompress based on codec
let decompressed = match header.codec {
CompressionCodec::None => compressed.to_vec(),
#[cfg(feature = "compression")]
CompressionCodec::Lz4 => {
self.decompress_lz4(compressed, header.original_size as usize)?
}
#[cfg(feature = "compression")]
CompressionCodec::Zstd => self.decompress_zstd(compressed)?,
CompressionCodec::DeltaOfDelta => {
self.decompress_delta_of_delta(compressed, header.original_size as usize)?
}
CompressionCodec::Quantized => {
self.decompress_quantized(compressed, header.original_size as usize)?
}
#[cfg(not(feature = "compression"))]
CompressionCodec::Lz4 | CompressionCodec::Zstd => {
return Err(DeltaError::DecompressionError(
"Compression feature not enabled".into(),
));
}
};
// Verify checksum
if let Some(expected_checksum) = header.checksum {
let actual_checksum = fnv1a_hash(&decompressed);
if expected_checksum != actual_checksum {
return Err(DeltaError::ChecksumMismatch {
expected: expected_checksum,
actual: actual_checksum,
});
}
}
// Decode
self.encoding.decode(&decompressed)
}
/// Get compression ratio for a compressed buffer
pub fn compression_ratio(&self, compressed: &[u8]) -> Result<f64> {
let (header, _) = CompressedHeader::from_bytes(compressed)?;
if header.compressed_size == 0 {
return Ok(1.0);
}
Ok(header.original_size as f64 / header.compressed_size as f64)
}
#[cfg(feature = "compression")]
fn compress_lz4(&self, data: &[u8]) -> Result<Vec<u8>> {
lz4_flex::compress_prepend_size(data)
.map_err(|e| DeltaError::CompressionError(alloc::format!("LZ4 error: {}", e)))
}
#[cfg(feature = "compression")]
fn decompress_lz4(&self, data: &[u8], _original_size: usize) -> Result<Vec<u8>> {
lz4_flex::decompress_size_prepended(data)
.map_err(|e| DeltaError::DecompressionError(alloc::format!("LZ4 error: {}", e)))
}
#[cfg(feature = "compression")]
fn compress_zstd(&self, data: &[u8]) -> Result<Vec<u8>> {
let level = match self.config.level {
CompressionLevel::None => 0,
CompressionLevel::Fast => 1,
CompressionLevel::Balanced => 3,
CompressionLevel::Best => 19,
};
zstd::encode_all(data, level)
.map_err(|e| DeltaError::CompressionError(alloc::format!("Zstd error: {}", e)))
}
#[cfg(feature = "compression")]
fn decompress_zstd(&self, data: &[u8]) -> Result<Vec<u8>> {
zstd::decode_all(data)
.map_err(|e| DeltaError::DecompressionError(alloc::format!("Zstd error: {}", e)))
}
/// Delta-of-delta encoding for sequential data
fn compress_delta_of_delta(&self, data: &[u8]) -> Result<Vec<u8>> {
if data.len() < 4 {
return Ok(data.to_vec());
}
// Treat as f32 array and compute delta-of-delta
let float_count = data.len() / 4;
let mut result = Vec::with_capacity(data.len());
// First value stored as-is
result.extend_from_slice(&data[..4]);
if float_count < 2 {
return Ok(result);
}
// Second value: store delta
let v0 = f32::from_le_bytes([data[0], data[1], data[2], data[3]]);
let v1 = f32::from_le_bytes([data[4], data[5], data[6], data[7]]);
let delta0 = v1 - v0;
result.extend_from_slice(&delta0.to_le_bytes());
// Remaining: store delta-of-delta
let mut prev_delta = delta0;
for i in 2..float_count {
let offset = i * 4;
let curr = f32::from_le_bytes([
data[offset],
data[offset + 1],
data[offset + 2],
data[offset + 3],
]);
let prev_offset = (i - 1) * 4;
let prev = f32::from_le_bytes([
data[prev_offset],
data[prev_offset + 1],
data[prev_offset + 2],
data[prev_offset + 3],
]);
let curr_delta = curr - prev;
let delta_of_delta = curr_delta - prev_delta;
result.extend_from_slice(&delta_of_delta.to_le_bytes());
prev_delta = curr_delta;
}
Ok(result)
}
fn decompress_delta_of_delta(&self, data: &[u8], original_size: usize) -> Result<Vec<u8>> {
if data.len() < 4 {
return Ok(data.to_vec());
}
let float_count = original_size / 4;
let mut result = Vec::with_capacity(original_size);
// First value
result.extend_from_slice(&data[..4]);
let mut prev = f32::from_le_bytes([data[0], data[1], data[2], data[3]]);
if float_count < 2 || data.len() < 8 {
return Ok(result);
}
// Second value from delta
let delta0 = f32::from_le_bytes([data[4], data[5], data[6], data[7]]);
let v1 = prev + delta0;
result.extend_from_slice(&v1.to_le_bytes());
// Remaining from delta-of-delta
let mut prev_delta = delta0;
prev = v1;
for i in 2..float_count {
let offset = i * 4;
if offset + 4 > data.len() {
break;
}
let dod = f32::from_le_bytes([
data[offset],
data[offset + 1],
data[offset + 2],
data[offset + 3],
]);
let curr_delta = prev_delta + dod;
let curr = prev + curr_delta;
result.extend_from_slice(&curr.to_le_bytes());
prev_delta = curr_delta;
prev = curr;
}
Ok(result)
}
/// Quantization-based compression (reduce f32 to f16)
fn compress_quantized(&self, data: &[u8]) -> Result<Vec<u8>> {
if data.len() < 4 {
return Ok(data.to_vec());
}
let float_count = data.len() / 4;
let mut result = Vec::with_capacity(float_count * 2);
for i in 0..float_count {
let offset = i * 4;
let value = f32::from_le_bytes([
data[offset],
data[offset + 1],
data[offset + 2],
data[offset + 3],
]);
// Convert to f16 representation (simplified)
let f16_bits = f32_to_f16_bits(value);
result.extend_from_slice(&f16_bits.to_le_bytes());
}
Ok(result)
}
fn decompress_quantized(&self, data: &[u8], original_size: usize) -> Result<Vec<u8>> {
let float_count = original_size / 4;
let mut result = Vec::with_capacity(original_size);
for i in 0..float_count {
let offset = i * 2;
if offset + 2 > data.len() {
break;
}
let f16_bits = u16::from_le_bytes([data[offset], data[offset + 1]]);
let value = f16_bits_to_f32(f16_bits);
result.extend_from_slice(&value.to_le_bytes());
}
Ok(result)
}
}
impl Default for DeltaCompressor {
fn default() -> Self {
Self::new()
}
}
/// FNV-1a hash for checksums
fn fnv1a_hash(data: &[u8]) -> u64 {
const FNV_OFFSET: u64 = 0xcbf29ce484222325;
const FNV_PRIME: u64 = 0x100000001b3;
let mut hash = FNV_OFFSET;
for &byte in data {
hash ^= byte as u64;
hash = hash.wrapping_mul(FNV_PRIME);
}
hash
}
/// Convert f32 to f16 bit representation
fn f32_to_f16_bits(value: f32) -> u16 {
let bits = value.to_bits();
let sign = (bits >> 31) as u16;
let exp = ((bits >> 23) & 0xff) as i32;
let frac = bits & 0x7fffff;
if exp == 0xff {
// Inf or NaN
return (sign << 15) | 0x7c00 | ((frac != 0) as u16);
}
let new_exp = exp - 127 + 15;
if new_exp <= 0 {
// Subnormal or zero
0
} else if new_exp >= 31 {
// Overflow to infinity
(sign << 15) | 0x7c00
} else {
let new_frac = (frac >> 13) as u16;
(sign << 15) | ((new_exp as u16) << 10) | new_frac
}
}
/// Convert f16 bits to f32
fn f16_bits_to_f32(bits: u16) -> f32 {
let sign = ((bits >> 15) as u32) << 31;
let exp = ((bits >> 10) & 0x1f) as i32;
let frac = (bits & 0x3ff) as u32;
if exp == 0 {
// Zero or subnormal
if frac == 0 {
f32::from_bits(sign)
} else {
// Subnormal f16 -> normalized f32
let shift = frac.leading_zeros() - 21;
let new_exp = (127 - 15 - shift as i32) as u32;
let new_frac = (frac << (shift + 1)) & 0x7fffff;
f32::from_bits(sign | (new_exp << 23) | new_frac)
}
} else if exp == 31 {
// Inf or NaN
if frac == 0 {
f32::from_bits(sign | 0x7f800000)
} else {
f32::from_bits(sign | 0x7fc00000)
}
} else {
let new_exp = ((exp - 15 + 127) as u32) << 23;
let new_frac = frac << 13;
f32::from_bits(sign | new_exp | new_frac)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compressor_roundtrip_none() {
let config = CompressorConfig {
codec: CompressionCodec::None,
..Default::default()
};
let compressor = DeltaCompressor::with_config(config);
let delta = VectorDelta::from_dense(vec![1.0, 2.0, 3.0, 4.0]);
let compressed = compressor.compress(&delta).unwrap();
let decompressed = compressor.decompress(&compressed).unwrap();
assert_eq!(delta.dimensions, decompressed.dimensions);
}
#[test]
fn test_compressor_delta_of_delta() {
let config = CompressorConfig {
codec: CompressionCodec::DeltaOfDelta,
..Default::default()
};
let compressor = DeltaCompressor::with_config(config);
// Sequential data works well with delta-of-delta
let delta = VectorDelta::from_dense(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
let compressed = compressor.compress(&delta).unwrap();
let decompressed = compressor.decompress(&compressed).unwrap();
assert_eq!(delta.dimensions, decompressed.dimensions);
}
#[test]
fn test_compressor_quantized() {
let config = CompressorConfig {
codec: CompressionCodec::Quantized,
..Default::default()
};
let compressor = DeltaCompressor::with_config(config);
let delta = VectorDelta::from_dense(vec![1.0, 2.0, 3.0, 4.0]);
let compressed = compressor.compress(&delta).unwrap();
let decompressed = compressor.decompress(&compressed).unwrap();
// Quantization loses precision, so just check dimensions
assert_eq!(delta.dimensions, decompressed.dimensions);
}
#[test]
fn test_checksum_verification() {
let compressor = DeltaCompressor::new();
let delta = VectorDelta::from_dense(vec![1.0, 2.0, 3.0]);
let mut compressed = compressor.compress(&delta).unwrap();
// Corrupt data
if compressed.len() > 30 {
compressed[30] ^= 0xff;
}
let result = compressor.decompress(&compressed);
assert!(result.is_err());
}
#[test]
fn test_f16_conversion() {
let values = [0.0f32, 1.0, -1.0, 0.5, 2.5, 1000.0, -0.001];
for &original in &values {
let bits = f32_to_f16_bits(original);
let recovered = f16_bits_to_f32(bits);
// f16 has limited precision
if original != 0.0 {
let relative_error = ((recovered - original) / original).abs();
assert!(
relative_error < 0.01,
"Failed for {}: got {}, error {}",
original,
recovered,
relative_error
);
}
}
}
}

View File

@@ -0,0 +1,692 @@
//! Core delta types and the Delta trait
//!
//! This module provides the fundamental Delta trait and implementations
//! for vector data structures.
use alloc::vec::Vec;
use core::ops::{Add, Mul, Neg, Sub};
use smallvec::SmallVec;
use crate::error::{DeltaError, Result};
/// The core Delta trait for computing and applying changes
///
/// A delta represents the difference between two states of a value.
/// Deltas can be computed, applied, composed, and inverted.
pub trait Delta: Sized + Send + Sync + Clone {
/// The base type this delta operates on
type Base;
/// Error type for delta operations
type Error;
/// Compute the delta between old and new values
fn compute(old: &Self::Base, new: &Self::Base) -> Self;
/// Apply this delta to a base value
fn apply(&self, base: &mut Self::Base) -> core::result::Result<(), Self::Error>;
/// Compose this delta with another (this then other)
fn compose(self, other: Self) -> Self;
/// Compute the inverse delta (undo operation)
fn inverse(&self) -> Self;
/// Check if this delta is an identity (no change)
fn is_identity(&self) -> bool;
/// Get the size of this delta in bytes (for memory tracking)
fn byte_size(&self) -> usize;
}
/// A single delta operation on a value at an index
#[derive(Debug, Clone, Copy, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct DeltaOp<T> {
/// Index where the change occurs
pub index: u32,
/// The change value (new - old)
pub value: T,
}
impl<T: Default + PartialEq> DeltaOp<T> {
/// Create a new delta operation
pub fn new(index: u32, value: T) -> Self {
Self { index, value }
}
/// Check if this operation is a no-op
pub fn is_zero(&self) -> bool
where
T: Default + PartialEq,
{
self.value == T::default()
}
}
/// A delta value that can be sparse or dense
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum DeltaValue<T> {
/// No change (identity)
Identity,
/// Sparse delta: only non-zero changes stored
Sparse(SmallVec<[DeltaOp<T>; 8]>),
/// Dense delta: all values stored
Dense(Vec<T>),
/// Full replacement (for large changes)
Replace(Vec<T>),
}
impl<T: Default + Clone + PartialEq> Default for DeltaValue<T> {
fn default() -> Self {
Self::Identity
}
}
impl<T> DeltaValue<T>
where
T: Default + Clone + PartialEq + Add<Output = T> + Sub<Output = T> + Neg<Output = T> + Copy,
{
/// Convert to sparse representation if beneficial
pub fn to_sparse(&self, threshold: f32) -> Self {
match self {
Self::Dense(values) => {
let non_zero_count = values.iter().filter(|v| **v != T::default()).count();
let sparsity = 1.0 - (non_zero_count as f32 / values.len() as f32);
if sparsity > threshold {
let ops: SmallVec<[DeltaOp<T>; 8]> = values
.iter()
.enumerate()
.filter(|(_, v)| **v != T::default())
.map(|(i, v)| DeltaOp::new(i as u32, *v))
.collect();
if ops.is_empty() {
Self::Identity
} else {
Self::Sparse(ops)
}
} else {
self.clone()
}
}
_ => self.clone(),
}
}
/// Convert to dense representation
pub fn to_dense(&self, dimensions: usize) -> Self {
match self {
Self::Identity => Self::Dense(vec![T::default(); dimensions]),
Self::Sparse(ops) => {
let mut values = vec![T::default(); dimensions];
for op in ops {
if (op.index as usize) < dimensions {
values[op.index as usize] = op.value;
}
}
Self::Dense(values)
}
Self::Dense(_) | Self::Replace(_) => self.clone(),
}
}
/// Count non-zero elements
pub fn nnz(&self) -> usize {
match self {
Self::Identity => 0,
Self::Sparse(ops) => ops.len(),
Self::Dense(values) => values.iter().filter(|v| **v != T::default()).count(),
Self::Replace(values) => values.iter().filter(|v| **v != T::default()).count(),
}
}
}
/// Delta for f32 vectors with sparse optimization
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct VectorDelta {
/// The delta value (sparse or dense)
pub value: DeltaValue<f32>,
/// Original dimensions
pub dimensions: usize,
/// Sparsity threshold for encoding decisions
pub sparsity_threshold: f32,
}
impl VectorDelta {
/// Create a new empty vector delta
pub fn new(dimensions: usize) -> Self {
Self {
value: DeltaValue::Identity,
dimensions,
sparsity_threshold: 0.7,
}
}
/// Create from sparse operations
pub fn from_sparse(ops: SmallVec<[DeltaOp<f32>; 8]>, dimensions: usize) -> Self {
let value = if ops.is_empty() {
DeltaValue::Identity
} else {
DeltaValue::Sparse(ops)
};
Self {
value,
dimensions,
sparsity_threshold: 0.7,
}
}
/// Create from dense values
pub fn from_dense(values: Vec<f32>) -> Self {
let dimensions = values.len();
let non_zero = values.iter().filter(|v| **v != 0.0).count();
let sparsity = 1.0 - (non_zero as f32 / dimensions as f32);
let value = if non_zero == 0 {
DeltaValue::Identity
} else if sparsity > 0.7 {
// Convert to sparse
let ops: SmallVec<[DeltaOp<f32>; 8]> = values
.iter()
.enumerate()
.filter(|(_, v)| **v != 0.0)
.map(|(i, v)| DeltaOp::new(i as u32, *v))
.collect();
DeltaValue::Sparse(ops)
} else {
DeltaValue::Dense(values)
};
Self {
value,
dimensions,
sparsity_threshold: 0.7,
}
}
/// Get the L2 norm of the delta
pub fn l2_norm(&self) -> f32 {
match &self.value {
DeltaValue::Identity => 0.0,
DeltaValue::Sparse(ops) => ops.iter().map(|op| op.value * op.value).sum::<f32>().sqrt(),
DeltaValue::Dense(values) | DeltaValue::Replace(values) => {
values.iter().map(|v| v * v).sum::<f32>().sqrt()
}
}
}
/// Get the L1 norm of the delta
pub fn l1_norm(&self) -> f32 {
match &self.value {
DeltaValue::Identity => 0.0,
DeltaValue::Sparse(ops) => ops.iter().map(|op| op.value.abs()).sum(),
DeltaValue::Dense(values) | DeltaValue::Replace(values) => {
values.iter().map(|v| v.abs()).sum()
}
}
}
/// Scale the delta by a factor
pub fn scale(&self, factor: f32) -> Self {
let value = match &self.value {
DeltaValue::Identity => DeltaValue::Identity,
DeltaValue::Sparse(ops) => {
let scaled: SmallVec<[DeltaOp<f32>; 8]> = ops
.iter()
.map(|op| DeltaOp::new(op.index, op.value * factor))
.collect();
DeltaValue::Sparse(scaled)
}
DeltaValue::Dense(values) => {
DeltaValue::Dense(values.iter().map(|v| v * factor).collect())
}
DeltaValue::Replace(values) => {
DeltaValue::Replace(values.iter().map(|v| v * factor).collect())
}
};
Self {
value,
dimensions: self.dimensions,
sparsity_threshold: self.sparsity_threshold,
}
}
/// Clip delta values to a range
pub fn clip(&self, min: f32, max: f32) -> Self {
let value = match &self.value {
DeltaValue::Identity => DeltaValue::Identity,
DeltaValue::Sparse(ops) => {
let clipped: SmallVec<[DeltaOp<f32>; 8]> = ops
.iter()
.map(|op| DeltaOp::new(op.index, op.value.clamp(min, max)))
.collect();
DeltaValue::Sparse(clipped)
}
DeltaValue::Dense(values) => {
DeltaValue::Dense(values.iter().map(|v| v.clamp(min, max)).collect())
}
DeltaValue::Replace(values) => {
DeltaValue::Replace(values.iter().map(|v| v.clamp(min, max)).collect())
}
};
Self {
value,
dimensions: self.dimensions,
sparsity_threshold: self.sparsity_threshold,
}
}
}
impl Delta for VectorDelta {
type Base = Vec<f32>;
type Error = DeltaError;
fn compute(old: &Vec<f32>, new: &Vec<f32>) -> Self {
assert_eq!(old.len(), new.len(), "Vectors must have same dimensions");
let dimensions = old.len();
// Compute differences
let diffs: Vec<f32> = old.iter().zip(new.iter()).map(|(o, n)| n - o).collect();
// Count non-zero differences (with epsilon)
let epsilon = 1e-7;
let non_zero: Vec<(usize, f32)> = diffs
.iter()
.enumerate()
.filter(|(_, d)| d.abs() > epsilon)
.map(|(i, d)| (i, *d))
.collect();
let value = if non_zero.is_empty() {
DeltaValue::Identity
} else {
let sparsity = 1.0 - (non_zero.len() as f32 / dimensions as f32);
if sparsity > 0.7 {
// Use sparse representation
let ops: SmallVec<[DeltaOp<f32>; 8]> = non_zero
.into_iter()
.map(|(i, v)| DeltaOp::new(i as u32, v))
.collect();
DeltaValue::Sparse(ops)
} else {
// Use dense representation
DeltaValue::Dense(diffs)
}
};
Self {
value,
dimensions,
sparsity_threshold: 0.7,
}
}
fn apply(&self, base: &mut Vec<f32>) -> Result<()> {
if base.len() != self.dimensions {
return Err(DeltaError::DimensionMismatch {
expected: self.dimensions,
actual: base.len(),
});
}
match &self.value {
DeltaValue::Identity => {
// No change
}
DeltaValue::Sparse(ops) => {
for op in ops {
let idx = op.index as usize;
if idx < base.len() {
base[idx] += op.value;
}
}
}
DeltaValue::Dense(deltas) => {
for (b, d) in base.iter_mut().zip(deltas.iter()) {
*b += d;
}
}
DeltaValue::Replace(new_values) => {
base.clone_from(new_values);
}
}
Ok(())
}
fn compose(self, other: Self) -> Self {
if self.dimensions != other.dimensions {
panic!(
"Cannot compose deltas of different dimensions: {} vs {}",
self.dimensions, other.dimensions
);
}
let value = match (&self.value, &other.value) {
(DeltaValue::Identity, _) => other.value.clone(),
(_, DeltaValue::Identity) => self.value.clone(),
(DeltaValue::Replace(_), DeltaValue::Replace(new)) => DeltaValue::Replace(new.clone()),
(DeltaValue::Sparse(ops1), DeltaValue::Sparse(ops2)) => {
// Merge sparse operations
let mut merged: alloc::collections::BTreeMap<u32, f32> =
alloc::collections::BTreeMap::new();
for op in ops1 {
*merged.entry(op.index).or_default() += op.value;
}
for op in ops2 {
*merged.entry(op.index).or_default() += op.value;
}
let ops: SmallVec<[DeltaOp<f32>; 8]> = merged
.into_iter()
.filter(|(_, v)| v.abs() > 1e-7)
.map(|(i, v)| DeltaOp::new(i, v))
.collect();
if ops.is_empty() {
DeltaValue::Identity
} else {
DeltaValue::Sparse(ops)
}
}
(DeltaValue::Dense(d1), DeltaValue::Dense(d2)) => {
let combined: Vec<f32> = d1.iter().zip(d2.iter()).map(|(a, b)| a + b).collect();
// Check if result is identity
if combined.iter().all(|v| v.abs() < 1e-7) {
DeltaValue::Identity
} else {
DeltaValue::Dense(combined)
}
}
// Mixed cases: convert to dense and combine
_ => {
let d1 = self.value.to_dense(self.dimensions);
let d2 = other.value.to_dense(other.dimensions);
if let (DeltaValue::Dense(v1), DeltaValue::Dense(v2)) = (d1, d2) {
let combined: Vec<f32> = v1.iter().zip(v2.iter()).map(|(a, b)| a + b).collect();
DeltaValue::Dense(combined)
} else {
DeltaValue::Identity
}
}
};
Self {
value,
dimensions: self.dimensions,
sparsity_threshold: self.sparsity_threshold,
}
}
fn inverse(&self) -> Self {
let value = match &self.value {
DeltaValue::Identity => DeltaValue::Identity,
DeltaValue::Sparse(ops) => {
let inverted: SmallVec<[DeltaOp<f32>; 8]> = ops
.iter()
.map(|op| DeltaOp::new(op.index, -op.value))
.collect();
DeltaValue::Sparse(inverted)
}
DeltaValue::Dense(values) => DeltaValue::Dense(values.iter().map(|v| -v).collect()),
DeltaValue::Replace(_) => {
// Cannot invert a replace without knowing original
panic!("Cannot invert Replace delta without original value");
}
};
Self {
value,
dimensions: self.dimensions,
sparsity_threshold: self.sparsity_threshold,
}
}
fn is_identity(&self) -> bool {
matches!(self.value, DeltaValue::Identity)
}
fn byte_size(&self) -> usize {
core::mem::size_of::<Self>()
+ match &self.value {
DeltaValue::Identity => 0,
DeltaValue::Sparse(ops) => ops.len() * core::mem::size_of::<DeltaOp<f32>>(),
DeltaValue::Dense(v) | DeltaValue::Replace(v) => v.len() * 4,
}
}
}
/// Sparse delta representation for high-dimensional vectors
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct SparseDelta {
/// Non-zero delta entries (index, old_value, new_value)
pub entries: SmallVec<[(u32, f32, f32); 16]>,
/// Total dimensions
pub dimensions: usize,
}
impl SparseDelta {
/// Create a new sparse delta
pub fn new(dimensions: usize) -> Self {
Self {
entries: SmallVec::new(),
dimensions,
}
}
/// Add an entry to the delta
pub fn add_entry(&mut self, index: u32, old_value: f32, new_value: f32) {
if (old_value - new_value).abs() > 1e-7 {
self.entries.push((index, old_value, new_value));
}
}
/// Get the sparsity ratio (0.0 = dense, 1.0 = fully sparse)
pub fn sparsity(&self) -> f32 {
1.0 - (self.entries.len() as f32 / self.dimensions as f32)
}
/// Convert to VectorDelta
pub fn to_vector_delta(&self) -> VectorDelta {
if self.entries.is_empty() {
return VectorDelta::new(self.dimensions);
}
let ops: SmallVec<[DeltaOp<f32>; 8]> = self
.entries
.iter()
.map(|(idx, old, new)| DeltaOp::new(*idx, new - old))
.collect();
VectorDelta::from_sparse(ops, self.dimensions)
}
}
impl Delta for SparseDelta {
type Base = Vec<f32>;
type Error = DeltaError;
fn compute(old: &Vec<f32>, new: &Vec<f32>) -> Self {
assert_eq!(old.len(), new.len());
let mut delta = Self::new(old.len());
for (i, (o, n)) in old.iter().zip(new.iter()).enumerate() {
delta.add_entry(i as u32, *o, *n);
}
delta
}
fn apply(&self, base: &mut Vec<f32>) -> Result<()> {
if base.len() != self.dimensions {
return Err(DeltaError::DimensionMismatch {
expected: self.dimensions,
actual: base.len(),
});
}
for (idx, _, new_value) in &self.entries {
let idx = *idx as usize;
if idx < base.len() {
base[idx] = *new_value;
}
}
Ok(())
}
fn compose(self, other: Self) -> Self {
// For sparse delta, composition keeps original old values and final new values
let mut result = Self::new(self.dimensions);
// Build maps for efficient lookup
use alloc::collections::BTreeMap;
let mut self_map: BTreeMap<u32, (f32, f32)> = BTreeMap::new();
for (idx, old, new) in &self.entries {
self_map.insert(*idx, (*old, *new));
}
let mut other_map: BTreeMap<u32, (f32, f32)> = BTreeMap::new();
for (idx, old, new) in &other.entries {
other_map.insert(*idx, (*old, *new));
}
// Merge: for each index, keep original old and final new
for (idx, (old1, new1)) in &self_map {
if let Some((_, new2)) = other_map.get(idx) {
result.add_entry(*idx, *old1, *new2);
} else {
result.add_entry(*idx, *old1, *new1);
}
}
for (idx, (old2, new2)) in &other_map {
if !self_map.contains_key(idx) {
result.add_entry(*idx, *old2, *new2);
}
}
result
}
fn inverse(&self) -> Self {
let mut result = Self::new(self.dimensions);
for (idx, old, new) in &self.entries {
result.add_entry(*idx, *new, *old);
}
result
}
fn is_identity(&self) -> bool {
self.entries.is_empty()
}
fn byte_size(&self) -> usize {
core::mem::size_of::<Self>() + self.entries.len() * core::mem::size_of::<(u32, f32, f32)>()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_delta_op() {
let op = DeltaOp::new(5, 1.5f32);
assert_eq!(op.index, 5);
assert_eq!(op.value, 1.5);
assert!(!op.is_zero());
let zero_op = DeltaOp::new(0, 0.0f32);
assert!(zero_op.is_zero());
}
#[test]
fn test_vector_delta_sparse() {
let old = vec![1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
let new = vec![1.0f32, 2.5, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
let delta = VectorDelta::compute(&old, &new);
// Should be sparse (only 1 change)
assert!(matches!(delta.value, DeltaValue::Sparse(_)));
let mut result = old.clone();
delta.apply(&mut result).unwrap();
for (a, b) in result.iter().zip(new.iter()) {
assert!((a - b).abs() < 1e-6);
}
}
#[test]
fn test_vector_delta_dense() {
let old = vec![1.0f32, 2.0, 3.0, 4.0];
let new = vec![2.0f32, 3.0, 4.0, 5.0];
let delta = VectorDelta::compute(&old, &new);
// Should be dense (all changed)
assert!(matches!(delta.value, DeltaValue::Dense(_)));
}
#[test]
fn test_vector_delta_l2_norm() {
let delta = VectorDelta::from_dense(vec![3.0, 4.0, 0.0, 0.0]);
assert!((delta.l2_norm() - 5.0).abs() < 1e-6);
}
#[test]
fn test_vector_delta_scale() {
let delta = VectorDelta::from_dense(vec![1.0, 2.0, 3.0]);
let scaled = delta.scale(2.0);
if let DeltaValue::Dense(values) = scaled.value {
assert!((values[0] - 2.0).abs() < 1e-6);
assert!((values[1] - 4.0).abs() < 1e-6);
assert!((values[2] - 6.0).abs() < 1e-6);
}
}
#[test]
fn test_sparse_delta() {
let old = vec![1.0f32; 100];
let mut new = old.clone();
new[10] = 2.0;
new[50] = 3.0;
let delta = SparseDelta::compute(&old, &new);
assert_eq!(delta.entries.len(), 2);
assert!(delta.sparsity() > 0.9);
let mut result = old.clone();
delta.apply(&mut result).unwrap();
assert!((result[10] - 2.0).abs() < 1e-6);
assert!((result[50] - 3.0).abs() < 1e-6);
}
}

View File

@@ -0,0 +1,601 @@
//! Delta encoding strategies
//!
//! This module provides various encoding strategies for deltas,
//! optimizing for different access patterns and sparsity levels.
use alloc::vec::Vec;
use core::marker::PhantomData;
use crate::delta::{DeltaOp, DeltaValue, VectorDelta};
use crate::error::{DeltaError, Result};
/// Trait for delta encoding strategies
pub trait DeltaEncoding: Send + Sync {
/// Encode a delta to bytes
fn encode(&self, delta: &VectorDelta) -> Result<Vec<u8>>;
/// Decode bytes to a delta
fn decode(&self, bytes: &[u8]) -> Result<VectorDelta>;
/// Get the encoding type identifier
fn encoding_type(&self) -> EncodingType;
}
/// Encoding type identifiers
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
pub enum EncodingType {
/// Dense encoding (all values stored)
Dense = 0,
/// Sparse encoding (only non-zero values)
Sparse = 1,
/// Run-length encoding
RunLength = 2,
/// Varint encoding
Varint = 3,
/// Hybrid encoding (automatic selection)
Hybrid = 4,
}
impl TryFrom<u8> for EncodingType {
type Error = DeltaError;
fn try_from(value: u8) -> Result<Self> {
match value {
0 => Ok(Self::Dense),
1 => Ok(Self::Sparse),
2 => Ok(Self::RunLength),
3 => Ok(Self::Varint),
4 => Ok(Self::Hybrid),
_ => Err(DeltaError::InvalidEncoding(alloc::format!(
"Unknown encoding type: {}",
value
))),
}
}
}
/// Dense encoding - stores all values
#[derive(Debug, Clone, Default)]
pub struct DenseEncoding;
impl DenseEncoding {
/// Create a new dense encoding
pub fn new() -> Self {
Self
}
}
impl DeltaEncoding for DenseEncoding {
fn encode(&self, delta: &VectorDelta) -> Result<Vec<u8>> {
let mut bytes = Vec::with_capacity(4 + 4 + delta.dimensions * 4);
// Header: encoding type (1 byte) + dimensions (4 bytes)
bytes.push(EncodingType::Dense as u8);
bytes.extend_from_slice(&(delta.dimensions as u32).to_le_bytes());
// Convert to dense and encode
match &delta.value {
DeltaValue::Identity => {
// Write zeros
bytes.extend(core::iter::repeat(0u8).take(delta.dimensions * 4));
}
DeltaValue::Sparse(ops) => {
let mut values = vec![0.0f32; delta.dimensions];
for op in ops {
if (op.index as usize) < delta.dimensions {
values[op.index as usize] = op.value;
}
}
for v in values {
bytes.extend_from_slice(&v.to_le_bytes());
}
}
DeltaValue::Dense(values) | DeltaValue::Replace(values) => {
for v in values {
bytes.extend_from_slice(&v.to_le_bytes());
}
}
}
Ok(bytes)
}
fn decode(&self, bytes: &[u8]) -> Result<VectorDelta> {
if bytes.len() < 5 {
return Err(DeltaError::InvalidEncoding(
"Buffer too small for header".into(),
));
}
let encoding_type = EncodingType::try_from(bytes[0])?;
if encoding_type != EncodingType::Dense {
return Err(DeltaError::InvalidEncoding("Not a dense encoding".into()));
}
let dimensions = u32::from_le_bytes([bytes[1], bytes[2], bytes[3], bytes[4]]) as usize;
let expected_len = 5 + dimensions * 4;
if bytes.len() < expected_len {
return Err(DeltaError::InvalidEncoding(alloc::format!(
"Buffer too small: expected {}, got {}",
expected_len,
bytes.len()
)));
}
let mut values = Vec::with_capacity(dimensions);
for i in 0..dimensions {
let offset = 5 + i * 4;
let v = f32::from_le_bytes([
bytes[offset],
bytes[offset + 1],
bytes[offset + 2],
bytes[offset + 3],
]);
values.push(v);
}
Ok(VectorDelta::from_dense(values))
}
fn encoding_type(&self) -> EncodingType {
EncodingType::Dense
}
}
/// Sparse encoding - stores only non-zero values with their indices
#[derive(Debug, Clone, Default)]
pub struct SparseEncoding {
/// Threshold for considering a value as zero
pub epsilon: f32,
}
impl SparseEncoding {
/// Create a new sparse encoding with default epsilon
pub fn new() -> Self {
Self { epsilon: 1e-7 }
}
/// Create with custom epsilon
pub fn with_epsilon(epsilon: f32) -> Self {
Self { epsilon }
}
}
impl DeltaEncoding for SparseEncoding {
fn encode(&self, delta: &VectorDelta) -> Result<Vec<u8>> {
// Header: encoding type (1) + dimensions (4) + count (4)
let mut bytes = Vec::new();
bytes.push(EncodingType::Sparse as u8);
bytes.extend_from_slice(&(delta.dimensions as u32).to_le_bytes());
match &delta.value {
DeltaValue::Identity => {
// Zero entries
bytes.extend_from_slice(&0u32.to_le_bytes());
}
DeltaValue::Sparse(ops) => {
bytes.extend_from_slice(&(ops.len() as u32).to_le_bytes());
for op in ops {
bytes.extend_from_slice(&op.index.to_le_bytes());
bytes.extend_from_slice(&op.value.to_le_bytes());
}
}
DeltaValue::Dense(values) | DeltaValue::Replace(values) => {
let non_zero: Vec<_> = values
.iter()
.enumerate()
.filter(|(_, v)| v.abs() > self.epsilon)
.collect();
bytes.extend_from_slice(&(non_zero.len() as u32).to_le_bytes());
for (i, v) in non_zero {
bytes.extend_from_slice(&(i as u32).to_le_bytes());
bytes.extend_from_slice(&v.to_le_bytes());
}
}
}
Ok(bytes)
}
fn decode(&self, bytes: &[u8]) -> Result<VectorDelta> {
if bytes.len() < 9 {
return Err(DeltaError::InvalidEncoding(
"Buffer too small for sparse header".into(),
));
}
let encoding_type = EncodingType::try_from(bytes[0])?;
if encoding_type != EncodingType::Sparse {
return Err(DeltaError::InvalidEncoding("Not a sparse encoding".into()));
}
let dimensions = u32::from_le_bytes([bytes[1], bytes[2], bytes[3], bytes[4]]) as usize;
let count = u32::from_le_bytes([bytes[5], bytes[6], bytes[7], bytes[8]]) as usize;
let expected_len = 9 + count * 8;
if bytes.len() < expected_len {
return Err(DeltaError::InvalidEncoding(alloc::format!(
"Buffer too small: expected {}, got {}",
expected_len,
bytes.len()
)));
}
let mut ops = smallvec::SmallVec::new();
for i in 0..count {
let offset = 9 + i * 8;
let index = u32::from_le_bytes([
bytes[offset],
bytes[offset + 1],
bytes[offset + 2],
bytes[offset + 3],
]);
let value = f32::from_le_bytes([
bytes[offset + 4],
bytes[offset + 5],
bytes[offset + 6],
bytes[offset + 7],
]);
ops.push(DeltaOp::new(index, value));
}
Ok(VectorDelta::from_sparse(ops, dimensions))
}
fn encoding_type(&self) -> EncodingType {
EncodingType::Sparse
}
}
/// Run-length encoding for consecutive identical deltas
#[derive(Debug, Clone, Default)]
pub struct RunLengthEncoding {
/// Threshold for considering values equal
pub epsilon: f32,
}
impl RunLengthEncoding {
/// Create a new run-length encoding
pub fn new() -> Self {
Self { epsilon: 1e-7 }
}
/// Create with custom epsilon
pub fn with_epsilon(epsilon: f32) -> Self {
Self { epsilon }
}
/// Check if two values are approximately equal
fn approx_eq(&self, a: f32, b: f32) -> bool {
(a - b).abs() <= self.epsilon
}
}
/// A run in RLE encoding
#[derive(Debug, Clone, Copy)]
struct Run {
value: f32,
count: u32,
}
impl DeltaEncoding for RunLengthEncoding {
fn encode(&self, delta: &VectorDelta) -> Result<Vec<u8>> {
let values = match &delta.value {
DeltaValue::Identity => vec![0.0f32; delta.dimensions],
DeltaValue::Sparse(ops) => {
let mut v = vec![0.0f32; delta.dimensions];
for op in ops {
if (op.index as usize) < delta.dimensions {
v[op.index as usize] = op.value;
}
}
v
}
DeltaValue::Dense(v) | DeltaValue::Replace(v) => v.clone(),
};
if values.is_empty() {
let mut bytes = Vec::with_capacity(9);
bytes.push(EncodingType::RunLength as u8);
bytes.extend_from_slice(&(0u32).to_le_bytes());
bytes.extend_from_slice(&(0u32).to_le_bytes());
return Ok(bytes);
}
// Build runs
let mut runs: Vec<Run> = Vec::new();
let mut current_value = values[0];
let mut current_count = 1u32;
for &v in values.iter().skip(1) {
if self.approx_eq(v, current_value) {
current_count += 1;
} else {
runs.push(Run {
value: current_value,
count: current_count,
});
current_value = v;
current_count = 1;
}
}
runs.push(Run {
value: current_value,
count: current_count,
});
// Encode
let mut bytes = Vec::with_capacity(9 + runs.len() * 8);
bytes.push(EncodingType::RunLength as u8);
bytes.extend_from_slice(&(delta.dimensions as u32).to_le_bytes());
bytes.extend_from_slice(&(runs.len() as u32).to_le_bytes());
for run in runs {
bytes.extend_from_slice(&run.value.to_le_bytes());
bytes.extend_from_slice(&run.count.to_le_bytes());
}
Ok(bytes)
}
fn decode(&self, bytes: &[u8]) -> Result<VectorDelta> {
if bytes.len() < 9 {
return Err(DeltaError::InvalidEncoding(
"Buffer too small for RLE header".into(),
));
}
let encoding_type = EncodingType::try_from(bytes[0])?;
if encoding_type != EncodingType::RunLength {
return Err(DeltaError::InvalidEncoding(
"Not a run-length encoding".into(),
));
}
let dimensions = u32::from_le_bytes([bytes[1], bytes[2], bytes[3], bytes[4]]) as usize;
let run_count = u32::from_le_bytes([bytes[5], bytes[6], bytes[7], bytes[8]]) as usize;
let expected_len = 9 + run_count * 8;
if bytes.len() < expected_len {
return Err(DeltaError::InvalidEncoding(alloc::format!(
"Buffer too small: expected {}, got {}",
expected_len,
bytes.len()
)));
}
let mut values = Vec::with_capacity(dimensions);
for i in 0..run_count {
let offset = 9 + i * 8;
let value = f32::from_le_bytes([
bytes[offset],
bytes[offset + 1],
bytes[offset + 2],
bytes[offset + 3],
]);
let count = u32::from_le_bytes([
bytes[offset + 4],
bytes[offset + 5],
bytes[offset + 6],
bytes[offset + 7],
]) as usize;
for _ in 0..count {
values.push(value);
}
}
if values.len() != dimensions {
return Err(DeltaError::InvalidEncoding(alloc::format!(
"RLE decoded to {} values, expected {}",
values.len(),
dimensions
)));
}
Ok(VectorDelta::from_dense(values))
}
fn encoding_type(&self) -> EncodingType {
EncodingType::RunLength
}
}
/// Hybrid encoding that automatically selects the best strategy
#[derive(Debug, Clone)]
pub struct HybridEncoding {
/// Sparsity threshold for choosing sparse encoding
pub sparsity_threshold: f32,
/// RLE benefit threshold
pub rle_threshold: f32,
/// Epsilon for float comparisons
pub epsilon: f32,
}
impl Default for HybridEncoding {
fn default() -> Self {
Self {
sparsity_threshold: 0.7,
rle_threshold: 0.5,
epsilon: 1e-7,
}
}
}
impl HybridEncoding {
/// Create a new hybrid encoding with default thresholds
pub fn new() -> Self {
Self::default()
}
/// Create with custom thresholds
pub fn with_thresholds(sparsity: f32, rle: f32) -> Self {
Self {
sparsity_threshold: sparsity,
rle_threshold: rle,
epsilon: 1e-7,
}
}
/// Determine the best encoding for a delta
pub fn select_encoding(&self, delta: &VectorDelta) -> EncodingType {
match &delta.value {
DeltaValue::Identity => EncodingType::Sparse,
DeltaValue::Sparse(ops) => {
let sparsity = 1.0 - (ops.len() as f32 / delta.dimensions as f32);
if sparsity > self.sparsity_threshold {
EncodingType::Sparse
} else {
EncodingType::Dense
}
}
DeltaValue::Dense(values) | DeltaValue::Replace(values) => {
// Check sparsity
let non_zero = values.iter().filter(|v| v.abs() > self.epsilon).count();
let sparsity = 1.0 - (non_zero as f32 / values.len() as f32);
if sparsity > self.sparsity_threshold {
return EncodingType::Sparse;
}
// Check RLE potential
let mut runs = 1usize;
let mut prev = values[0];
for &v in values.iter().skip(1) {
if (v - prev).abs() > self.epsilon {
runs += 1;
prev = v;
}
}
let rle_ratio = runs as f32 / values.len() as f32;
if rle_ratio < self.rle_threshold {
EncodingType::RunLength
} else {
EncodingType::Dense
}
}
}
}
}
impl DeltaEncoding for HybridEncoding {
fn encode(&self, delta: &VectorDelta) -> Result<Vec<u8>> {
let selected = self.select_encoding(delta);
match selected {
EncodingType::Dense => DenseEncoding.encode(delta),
EncodingType::Sparse => SparseEncoding::with_epsilon(self.epsilon).encode(delta),
EncodingType::RunLength => RunLengthEncoding::with_epsilon(self.epsilon).encode(delta),
_ => DenseEncoding.encode(delta),
}
}
fn decode(&self, bytes: &[u8]) -> Result<VectorDelta> {
if bytes.is_empty() {
return Err(DeltaError::InvalidEncoding("Empty buffer".into()));
}
let encoding_type = EncodingType::try_from(bytes[0])?;
match encoding_type {
EncodingType::Dense => DenseEncoding.decode(bytes),
EncodingType::Sparse => SparseEncoding::with_epsilon(self.epsilon).decode(bytes),
EncodingType::RunLength => RunLengthEncoding::with_epsilon(self.epsilon).decode(bytes),
EncodingType::Hybrid => Err(DeltaError::InvalidEncoding(
"Hybrid type should not appear in encoded data".into(),
)),
EncodingType::Varint => Err(DeltaError::InvalidEncoding(
"Varint encoding not yet implemented".into(),
)),
}
}
fn encoding_type(&self) -> EncodingType {
EncodingType::Hybrid
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::delta::Delta;
use alloc::vec;
#[test]
fn test_dense_encoding_roundtrip() {
let delta = VectorDelta::from_dense(vec![1.0, 2.0, 3.0, 4.0]);
let encoding = DenseEncoding::new();
let bytes = encoding.encode(&delta).unwrap();
let decoded = encoding.decode(&bytes).unwrap();
assert_eq!(delta.dimensions, decoded.dimensions);
}
#[test]
fn test_sparse_encoding_roundtrip() {
let mut ops = smallvec::SmallVec::new();
ops.push(DeltaOp::new(5, 1.5));
ops.push(DeltaOp::new(10, 2.5));
let delta = VectorDelta::from_sparse(ops, 100);
let encoding = SparseEncoding::new();
let bytes = encoding.encode(&delta).unwrap();
let decoded = encoding.decode(&bytes).unwrap();
assert_eq!(delta.dimensions, decoded.dimensions);
assert_eq!(delta.value.nnz(), decoded.value.nnz());
}
#[test]
fn test_rle_encoding_roundtrip() {
// Create a delta with runs
let values = vec![1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0];
let delta = VectorDelta::from_dense(values.clone());
let encoding = RunLengthEncoding::new();
let bytes = encoding.encode(&delta).unwrap();
let decoded = encoding.decode(&bytes).unwrap();
assert_eq!(delta.dimensions, decoded.dimensions);
}
#[test]
fn test_hybrid_encoding_selects_sparse() {
// Very sparse delta
let mut ops = smallvec::SmallVec::new();
ops.push(DeltaOp::new(5, 1.5));
let delta = VectorDelta::from_sparse(ops, 1000);
let encoding = HybridEncoding::new();
assert_eq!(encoding.select_encoding(&delta), EncodingType::Sparse);
}
#[test]
fn test_hybrid_encoding_roundtrip() {
let delta = VectorDelta::from_dense(vec![1.0, 2.0, 3.0, 4.0]);
let encoding = HybridEncoding::new();
let bytes = encoding.encode(&delta).unwrap();
let decoded = encoding.decode(&bytes).unwrap();
assert_eq!(delta.dimensions, decoded.dimensions);
}
#[test]
fn test_identity_encoding() {
let delta = VectorDelta::new(100);
assert!(delta.is_identity());
let encoding = SparseEncoding::new();
let bytes = encoding.encode(&delta).unwrap();
let decoded = encoding.decode(&bytes).unwrap();
assert!(decoded.is_identity());
}
}

View File

@@ -0,0 +1,119 @@
//! Error types for delta operations
use alloc::string::String;
use core::fmt;
/// Result type for delta operations
pub type Result<T> = core::result::Result<T, DeltaError>;
/// Errors that can occur during delta operations
#[derive(Debug, Clone)]
pub enum DeltaError {
/// Dimension mismatch between vectors
DimensionMismatch {
/// Expected dimension
expected: usize,
/// Actual dimension
actual: usize,
},
/// Invalid delta encoding
InvalidEncoding(String),
/// Compression error
CompressionError(String),
/// Decompression error
DecompressionError(String),
/// Stream error
StreamError(String),
/// Window error
WindowError(String),
/// Serialization error
SerializationError(String),
/// Index out of bounds
IndexOutOfBounds {
/// The index that was accessed
index: usize,
/// The length of the collection
length: usize,
},
/// Invalid operation
InvalidOperation(String),
/// Buffer overflow
BufferOverflow {
/// Required capacity
required: usize,
/// Available capacity
available: usize,
},
/// Checksum mismatch
ChecksumMismatch {
/// Expected checksum
expected: u64,
/// Actual checksum
actual: u64,
},
/// Version incompatibility
VersionMismatch {
/// Expected version
expected: u32,
/// Actual version
actual: u32,
},
}
impl fmt::Display for DeltaError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::DimensionMismatch { expected, actual } => {
write!(
f,
"Dimension mismatch: expected {}, got {}",
expected, actual
)
}
Self::InvalidEncoding(msg) => write!(f, "Invalid encoding: {}", msg),
Self::CompressionError(msg) => write!(f, "Compression error: {}", msg),
Self::DecompressionError(msg) => write!(f, "Decompression error: {}", msg),
Self::StreamError(msg) => write!(f, "Stream error: {}", msg),
Self::WindowError(msg) => write!(f, "Window error: {}", msg),
Self::SerializationError(msg) => write!(f, "Serialization error: {}", msg),
Self::IndexOutOfBounds { index, length } => {
write!(f, "Index out of bounds: {} (length: {})", index, length)
}
Self::InvalidOperation(msg) => write!(f, "Invalid operation: {}", msg),
Self::BufferOverflow {
required,
available,
} => {
write!(
f,
"Buffer overflow: required {}, available {}",
required, available
)
}
Self::ChecksumMismatch { expected, actual } => {
write!(
f,
"Checksum mismatch: expected {:016x}, got {:016x}",
expected, actual
)
}
Self::VersionMismatch { expected, actual } => {
write!(f, "Version mismatch: expected {}, got {}", expected, actual)
}
}
}
}
#[cfg(feature = "std")]
impl std::error::Error for DeltaError {}

View File

@@ -0,0 +1,126 @@
//! # RuVector Delta Core
//!
//! Core delta types and traits for behavioral vector change tracking.
//! This crate provides the fundamental abstractions for computing, applying,
//! and composing deltas on vector data structures.
//!
//! ## Key Concepts
//!
//! - **Delta**: A representation of the change between two states
//! - **DeltaStream**: An ordered sequence of deltas for event sourcing
//! - **DeltaWindow**: Time-bounded aggregation of deltas
//! - **Encoding**: Sparse and dense delta representations
//! - **Compression**: Delta-specific compression strategies
//!
//! ## Example
//!
//! ```rust
//! use ruvector_delta_core::{Delta, VectorDelta, DeltaStream};
//!
//! // Compute delta between two vectors
//! let old = vec![1.0f32, 2.0, 3.0];
//! let new = vec![1.1f32, 2.0, 3.5];
//! let delta = VectorDelta::compute(&old, &new);
//!
//! // Apply delta to reconstruct
//! let mut reconstructed = old.clone();
//! delta.apply(&mut reconstructed).unwrap();
//! assert_eq!(reconstructed, new);
//! ```
#![cfg_attr(not(feature = "std"), no_std)]
#![warn(missing_docs)]
#![warn(clippy::all)]
#![deny(unsafe_op_in_unsafe_fn)]
extern crate alloc;
pub mod compression;
pub mod delta;
pub mod encoding;
pub mod error;
pub mod stream;
pub mod window;
// Re-exports
pub use compression::{CompressionCodec, CompressionLevel, DeltaCompressor};
pub use delta::{Delta, DeltaOp, DeltaValue, SparseDelta, VectorDelta};
pub use encoding::{
DeltaEncoding, DenseEncoding, EncodingType, HybridEncoding, RunLengthEncoding, SparseEncoding,
};
pub use error::{DeltaError, Result};
pub use stream::{DeltaStream, DeltaStreamConfig, StreamCheckpoint};
pub use window::{DeltaWindow, WindowAggregator, WindowConfig, WindowResult, WindowType};
/// Prelude for convenient imports
pub mod prelude {
pub use crate::compression::{CompressionCodec, DeltaCompressor};
pub use crate::delta::{Delta, DeltaOp, DeltaValue, VectorDelta};
pub use crate::encoding::{DeltaEncoding, DenseEncoding, SparseEncoding};
pub use crate::error::Result;
pub use crate::stream::{DeltaStream, StreamCheckpoint};
pub use crate::window::{DeltaWindow, WindowAggregator};
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_delta() {
let old = vec![1.0f32, 2.0, 3.0, 4.0];
let new = vec![1.0f32, 2.5, 3.0, 4.5];
let delta = VectorDelta::compute(&old, &new);
let mut reconstructed = old.clone();
delta.apply(&mut reconstructed).unwrap();
for (a, b) in reconstructed.iter().zip(new.iter()) {
assert!((a - b).abs() < 1e-6);
}
}
#[test]
fn test_delta_composition() {
let v1 = vec![1.0f32, 2.0, 3.0];
let v2 = vec![1.5f32, 2.0, 3.5];
let v3 = vec![2.0f32, 2.5, 4.0];
let delta1 = VectorDelta::compute(&v1, &v2);
let delta2 = VectorDelta::compute(&v2, &v3);
let composed = delta1.compose(delta2);
let mut result = v1.clone();
composed.apply(&mut result).unwrap();
for (a, b) in result.iter().zip(v3.iter()) {
assert!((a - b).abs() < 1e-6);
}
}
#[test]
fn test_delta_inverse() {
let old = vec![1.0f32, 2.0, 3.0];
let new = vec![1.5f32, 2.5, 3.5];
let delta = VectorDelta::compute(&old, &new);
let inverse = delta.inverse();
let mut result = new.clone();
inverse.apply(&mut result).unwrap();
for (a, b) in result.iter().zip(old.iter()) {
assert!((a - b).abs() < 1e-6);
}
}
#[test]
fn test_identity_delta() {
let v = vec![1.0f32, 2.0, 3.0];
let delta = VectorDelta::compute(&v, &v);
assert!(delta.is_identity());
}
}

View File

@@ -0,0 +1,463 @@
//! Delta stream for event sourcing and temporal queries
//!
//! Provides ordered sequences of deltas with checkpointing,
//! compaction, and replay capabilities.
use alloc::collections::VecDeque;
use alloc::vec::Vec;
use core::time::Duration;
use crate::delta::{Delta, VectorDelta};
use crate::error::{DeltaError, Result};
/// Configuration for delta streams
#[derive(Debug, Clone)]
pub struct DeltaStreamConfig {
/// Maximum number of deltas before automatic compaction
pub max_deltas: usize,
/// Checkpoint interval (in number of deltas)
pub checkpoint_interval: usize,
/// Maximum memory usage before eviction
pub max_memory_bytes: usize,
/// Enable automatic compaction
pub auto_compact: bool,
}
impl Default for DeltaStreamConfig {
fn default() -> Self {
Self {
max_deltas: 1000,
checkpoint_interval: 100,
max_memory_bytes: 64 * 1024 * 1024, // 64 MB
auto_compact: true,
}
}
}
/// A checkpoint in the delta stream
#[derive(Debug, Clone)]
pub struct StreamCheckpoint<T> {
/// The base value at this checkpoint
pub value: T,
/// Sequence number of this checkpoint
pub sequence: u64,
/// Timestamp when created (nanoseconds since epoch)
pub timestamp_ns: u64,
}
/// Entry in the delta stream
#[derive(Debug, Clone)]
struct StreamEntry<D: Clone> {
/// The delta
delta: D,
/// Sequence number
sequence: u64,
/// Timestamp (nanoseconds)
timestamp_ns: u64,
}
/// A stream of deltas with event sourcing capabilities
#[derive(Debug, Clone)]
pub struct DeltaStream<D: Delta>
where
D: Clone,
D::Base: Clone,
{
/// Configuration
config: DeltaStreamConfig,
/// Ordered deltas
deltas: VecDeque<StreamEntry<D>>,
/// Checkpoints
checkpoints: Vec<StreamCheckpoint<D::Base>>,
/// Current sequence number
current_sequence: u64,
/// Memory usage estimate
memory_usage: usize,
}
impl<D: Delta + Clone> DeltaStream<D>
where
D::Base: Clone,
{
/// Create a new delta stream with default configuration
pub fn new() -> Self {
Self::with_config(DeltaStreamConfig::default())
}
/// Create with custom configuration
pub fn with_config(config: DeltaStreamConfig) -> Self {
Self {
config,
deltas: VecDeque::new(),
checkpoints: Vec::new(),
current_sequence: 0,
memory_usage: 0,
}
}
/// Get current configuration
pub fn config(&self) -> &DeltaStreamConfig {
&self.config
}
/// Get the current sequence number
pub fn sequence(&self) -> u64 {
self.current_sequence
}
/// Get the number of deltas in the stream
pub fn len(&self) -> usize {
self.deltas.len()
}
/// Check if the stream is empty
pub fn is_empty(&self) -> bool {
self.deltas.is_empty()
}
/// Get the number of checkpoints
pub fn checkpoint_count(&self) -> usize {
self.checkpoints.len()
}
/// Push a new delta to the stream
pub fn push(&mut self, delta: D) {
self.push_with_timestamp(delta, Self::current_timestamp_ns());
}
/// Push a delta with a specific timestamp
pub fn push_with_timestamp(&mut self, delta: D, timestamp_ns: u64) {
self.current_sequence += 1;
let entry = StreamEntry {
delta,
sequence: self.current_sequence,
timestamp_ns,
};
self.memory_usage += entry.delta.byte_size();
self.deltas.push_back(entry);
// Check if compaction is needed
if self.config.auto_compact && self.needs_compaction() {
let _ = self.compact();
}
}
/// Create a checkpoint at the current position
pub fn create_checkpoint(&mut self, value: D::Base) {
let checkpoint = StreamCheckpoint {
value,
sequence: self.current_sequence,
timestamp_ns: Self::current_timestamp_ns(),
};
self.checkpoints.push(checkpoint);
}
/// Replay from the beginning to reconstruct the current state
pub fn replay(&self, initial: D::Base) -> core::result::Result<D::Base, D::Error> {
let mut current = initial;
for entry in &self.deltas {
entry.delta.apply(&mut current)?;
}
Ok(current)
}
/// Replay from a specific checkpoint
///
/// Returns `None` if the checkpoint index is out of bounds, otherwise
/// returns the result of replaying deltas from that checkpoint.
pub fn replay_from_checkpoint(
&self,
checkpoint_idx: usize,
) -> Option<core::result::Result<D::Base, D::Error>> {
if checkpoint_idx >= self.checkpoints.len() {
return None;
}
let checkpoint = &self.checkpoints[checkpoint_idx];
let mut current = checkpoint.value.clone();
// Find deltas after this checkpoint
for entry in &self.deltas {
if entry.sequence > checkpoint.sequence {
if let Err(e) = entry.delta.apply(&mut current) {
return Some(Err(e));
}
}
}
Some(Ok(current))
}
/// Replay to a specific sequence number
pub fn replay_to_sequence(
&self,
initial: D::Base,
target_sequence: u64,
) -> core::result::Result<D::Base, D::Error> {
let mut current = initial;
for entry in &self.deltas {
if entry.sequence > target_sequence {
break;
}
entry.delta.apply(&mut current)?;
}
Ok(current)
}
/// Get deltas in a sequence range
pub fn get_range(&self, start: u64, end: u64) -> Vec<&D> {
self.deltas
.iter()
.filter(|e| e.sequence >= start && e.sequence <= end)
.map(|e| &e.delta)
.collect()
}
/// Get deltas in a time range
pub fn get_time_range(&self, start_ns: u64, end_ns: u64) -> Vec<&D> {
self.deltas
.iter()
.filter(|e| e.timestamp_ns >= start_ns && e.timestamp_ns <= end_ns)
.map(|e| &e.delta)
.collect()
}
/// Check if compaction is needed
pub fn needs_compaction(&self) -> bool {
self.deltas.len() > self.config.max_deltas
|| self.memory_usage > self.config.max_memory_bytes
}
/// Compact the stream by composing consecutive deltas
pub fn compact(&mut self) -> Result<usize> {
if self.deltas.len() < 2 {
return Ok(0);
}
// Find the latest checkpoint sequence
let checkpoint_sequence = self.checkpoints.last().map(|c| c.sequence).unwrap_or(0);
// Only compact deltas after the latest checkpoint
let mut compacted = 0;
let mut new_deltas: VecDeque<StreamEntry<D>> = VecDeque::new();
let mut pending: Option<StreamEntry<D>> = None;
for entry in self.deltas.drain(..) {
if entry.sequence <= checkpoint_sequence {
// Keep deltas at or before checkpoint as-is
if let Some(p) = pending.take() {
new_deltas.push_back(p);
}
new_deltas.push_back(entry);
} else if let Some(p) = pending.take() {
// Compose with pending
let composed = p.delta.compose(entry.delta.clone());
if composed.is_identity() {
// They cancel out
compacted += 2;
} else {
pending = Some(StreamEntry {
delta: composed,
sequence: entry.sequence,
timestamp_ns: entry.timestamp_ns,
});
compacted += 1;
}
} else {
pending = Some(entry);
}
}
if let Some(p) = pending {
new_deltas.push_back(p);
}
let old_len = self.deltas.len();
self.deltas = new_deltas;
// Recalculate memory usage
self.memory_usage = self.deltas.iter().map(|e| e.delta.byte_size()).sum();
Ok(old_len.saturating_sub(self.deltas.len()))
}
/// Trim deltas before a sequence number
pub fn trim_before(&mut self, sequence: u64) {
while let Some(front) = self.deltas.front() {
if front.sequence < sequence {
if let Some(entry) = self.deltas.pop_front() {
self.memory_usage = self.memory_usage.saturating_sub(entry.delta.byte_size());
}
} else {
break;
}
}
// Also trim old checkpoints
self.checkpoints.retain(|c| c.sequence >= sequence);
}
/// Clear all deltas and checkpoints
pub fn clear(&mut self) {
self.deltas.clear();
self.checkpoints.clear();
self.memory_usage = 0;
}
/// Get current timestamp in nanoseconds
fn current_timestamp_ns() -> u64 {
#[cfg(feature = "std")]
{
use std::time::SystemTime;
SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0)
}
#[cfg(not(feature = "std"))]
{
0
}
}
}
impl<D: Delta> Default for DeltaStream<D>
where
D::Base: Clone,
{
fn default() -> Self {
Self::new()
}
}
// Implement for VectorDelta specifically
impl DeltaStream<VectorDelta> {
/// Create a stream optimized for vector deltas
pub fn for_vectors(dimensions: usize) -> Self {
let estimated_delta_size = dimensions * 4; // Worst case: dense f32
let max_deltas = (64 * 1024 * 1024) / estimated_delta_size;
Self::with_config(DeltaStreamConfig {
max_deltas,
checkpoint_interval: max_deltas / 10,
max_memory_bytes: 64 * 1024 * 1024,
auto_compact: true,
})
}
}
/// Iterator over stream entries
pub struct DeltaStreamIter<'a, D: Clone> {
inner: alloc::collections::vec_deque::Iter<'a, StreamEntry<D>>,
}
impl<'a, D: Clone> Iterator for DeltaStreamIter<'a, D> {
type Item = (u64, &'a D);
fn next(&mut self) -> Option<Self::Item> {
self.inner.next().map(|e| (e.sequence, &e.delta))
}
}
impl<D: Delta + Clone> DeltaStream<D>
where
D::Base: Clone,
{
/// Iterate over deltas with their sequence numbers
pub fn iter(&self) -> DeltaStreamIter<'_, D> {
DeltaStreamIter {
inner: self.deltas.iter(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::delta::VectorDelta;
#[test]
fn test_stream_push_replay() {
let mut stream = DeltaStream::<VectorDelta>::new();
let initial = vec![1.0f32, 2.0, 3.0];
let delta1 = VectorDelta::from_dense(vec![0.5, 0.0, 0.5]);
let delta2 = VectorDelta::from_dense(vec![0.0, 1.0, 0.0]);
stream.push(delta1);
stream.push(delta2);
let result = stream.replay(initial.clone()).unwrap();
assert!((result[0] - 1.5).abs() < 1e-6);
assert!((result[1] - 3.0).abs() < 1e-6);
assert!((result[2] - 3.5).abs() < 1e-6);
}
#[test]
fn test_stream_checkpoint() {
let mut stream = DeltaStream::<VectorDelta>::new();
let initial = vec![0.0f32; 3];
let delta1 = VectorDelta::from_dense(vec![1.0, 1.0, 1.0]);
stream.push(delta1);
let state_at_checkpoint = stream.replay(initial.clone()).unwrap();
stream.create_checkpoint(state_at_checkpoint);
let delta2 = VectorDelta::from_dense(vec![2.0, 2.0, 2.0]);
stream.push(delta2);
let from_checkpoint = stream.replay_from_checkpoint(0).unwrap().unwrap();
assert!((from_checkpoint[0] - 3.0).abs() < 1e-6);
}
#[test]
fn test_stream_sequence_range() {
let mut stream = DeltaStream::<VectorDelta>::new();
for i in 0..10 {
let delta = VectorDelta::from_dense(vec![i as f32; 3]);
stream.push(delta);
}
let range = stream.get_range(3, 7);
assert_eq!(range.len(), 5);
}
#[test]
fn test_replay_to_sequence() {
let mut stream = DeltaStream::<VectorDelta>::new();
let initial = vec![0.0f32; 3];
stream.push(VectorDelta::from_dense(vec![1.0, 0.0, 0.0]));
stream.push(VectorDelta::from_dense(vec![0.0, 1.0, 0.0]));
stream.push(VectorDelta::from_dense(vec![0.0, 0.0, 1.0]));
let at_seq_2 = stream.replay_to_sequence(initial, 2).unwrap();
assert!((at_seq_2[0] - 1.0).abs() < 1e-6);
assert!((at_seq_2[1] - 1.0).abs() < 1e-6);
assert!((at_seq_2[2] - 0.0).abs() < 1e-6);
}
#[test]
fn test_stream_trim() {
let mut stream = DeltaStream::<VectorDelta>::new();
for _ in 0..10 {
let delta = VectorDelta::from_dense(vec![1.0; 3]);
stream.push(delta);
}
assert_eq!(stream.len(), 10);
stream.trim_before(5);
assert_eq!(stream.len(), 6); // Sequences 5-10
}
}

View File

@@ -0,0 +1,510 @@
//! Delta window for time-bounded aggregation
//!
//! Provides sliding and tumbling windows for aggregating deltas
//! over time or count-based boundaries.
use alloc::collections::VecDeque;
use alloc::vec::Vec;
use core::marker::PhantomData;
use crate::delta::{Delta, VectorDelta};
use crate::error::{DeltaError, Result};
/// Configuration for delta windows
#[derive(Debug, Clone)]
pub struct WindowConfig {
/// Window type
pub window_type: WindowType,
/// Window size (interpretation depends on type)
pub size: usize,
/// Slide amount for sliding windows
pub slide: usize,
/// Maximum items to keep
pub max_items: usize,
}
impl Default for WindowConfig {
fn default() -> Self {
Self {
window_type: WindowType::Tumbling,
size: 100,
slide: 1,
max_items: 10_000,
}
}
}
/// Window types
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum WindowType {
/// Tumbling window (non-overlapping)
Tumbling,
/// Sliding window (overlapping)
Sliding,
/// Session window (gap-based)
Session,
/// Count-based window
Count,
}
/// Entry in the window
#[derive(Debug, Clone)]
struct WindowEntry<D> {
delta: D,
timestamp_ns: u64,
}
/// Aggregated window result
#[derive(Debug, Clone)]
pub struct WindowResult<D: Clone> {
/// Composed delta for this window
pub delta: D,
/// Start timestamp (ns)
pub start_ns: u64,
/// End timestamp (ns)
pub end_ns: u64,
/// Number of deltas in window
pub count: usize,
}
/// A delta window for time-bounded aggregation
#[derive(Debug)]
pub struct DeltaWindow<D: Delta> {
config: WindowConfig,
entries: VecDeque<WindowEntry<D>>,
/// For tumbling/sliding: window boundaries
window_start_ns: u64,
}
impl<D: Delta + Clone> DeltaWindow<D>
where
D::Base: Clone,
{
/// Create a new delta window
pub fn new(config: WindowConfig) -> Self {
Self {
config,
entries: VecDeque::new(),
window_start_ns: 0,
}
}
/// Create a tumbling window of the given size (in nanoseconds)
pub fn tumbling(size_ns: u64) -> Self {
Self::new(WindowConfig {
window_type: WindowType::Tumbling,
size: size_ns as usize,
slide: size_ns as usize,
max_items: 10_000,
})
}
/// Create a sliding window
pub fn sliding(size_ns: u64, slide_ns: u64) -> Self {
Self::new(WindowConfig {
window_type: WindowType::Sliding,
size: size_ns as usize,
slide: slide_ns as usize,
max_items: 10_000,
})
}
/// Create a count-based window
pub fn count_based(count: usize) -> Self {
Self::new(WindowConfig {
window_type: WindowType::Count,
size: count,
slide: count,
max_items: count * 2,
})
}
/// Add a delta to the window
pub fn add(&mut self, delta: D, timestamp_ns: u64) {
// Initialize window start if first entry
if self.entries.is_empty() {
self.window_start_ns = timestamp_ns;
}
self.entries.push_back(WindowEntry {
delta,
timestamp_ns,
});
// Enforce max items
while self.entries.len() > self.config.max_items {
self.entries.pop_front();
}
}
/// Check if the current window is complete
pub fn is_complete(&self, current_ns: u64) -> bool {
match self.config.window_type {
WindowType::Tumbling | WindowType::Sliding => {
current_ns >= self.window_start_ns + self.config.size as u64
}
WindowType::Count => self.entries.len() >= self.config.size,
WindowType::Session => {
// Session window closes after a gap
if let Some(last) = self.entries.back() {
current_ns - last.timestamp_ns > self.config.size as u64
} else {
false
}
}
}
}
/// Emit the current window and advance
pub fn emit(&mut self) -> Option<WindowResult<D>>
where
D: Default,
{
if self.entries.is_empty() {
return None;
}
match self.config.window_type {
WindowType::Tumbling => self.emit_tumbling(),
WindowType::Sliding => self.emit_sliding(),
WindowType::Count => self.emit_count(),
WindowType::Session => self.emit_session(),
}
}
fn emit_tumbling(&mut self) -> Option<WindowResult<D>>
where
D: Default,
{
let window_end = self.window_start_ns + self.config.size as u64;
// Collect entries in window
let in_window: Vec<_> = self
.entries
.iter()
.filter(|e| e.timestamp_ns < window_end)
.cloned()
.collect();
if in_window.is_empty() {
return None;
}
// Compose all deltas
let result = self.compose_entries(&in_window);
// Remove processed entries
self.entries.retain(|e| e.timestamp_ns >= window_end);
// Advance window
self.window_start_ns = window_end;
Some(result)
}
fn emit_sliding(&mut self) -> Option<WindowResult<D>>
where
D: Default,
{
let window_end = self.window_start_ns + self.config.size as u64;
// Collect entries in window
let in_window: Vec<_> = self
.entries
.iter()
.filter(|e| e.timestamp_ns >= self.window_start_ns && e.timestamp_ns < window_end)
.cloned()
.collect();
if in_window.is_empty() {
return None;
}
let result = self.compose_entries(&in_window);
// Slide window
let new_start = self.window_start_ns + self.config.slide as u64;
// Remove entries before new window start
self.entries.retain(|e| e.timestamp_ns >= new_start);
self.window_start_ns = new_start;
Some(result)
}
fn emit_count(&mut self) -> Option<WindowResult<D>>
where
D: Default,
{
if self.entries.len() < self.config.size {
return None;
}
let window_entries: Vec<_> = self.entries.drain(..self.config.size).collect();
Some(self.compose_entries(&window_entries))
}
fn emit_session(&mut self) -> Option<WindowResult<D>>
where
D: Default,
{
if self.entries.is_empty() {
return None;
}
let all_entries: Vec<_> = self.entries.drain(..).collect();
Some(self.compose_entries(&all_entries))
}
fn compose_entries(&self, entries: &[WindowEntry<D>]) -> WindowResult<D>
where
D: Default,
{
let start_ns = entries.first().map(|e| e.timestamp_ns).unwrap_or(0);
let end_ns = entries.last().map(|e| e.timestamp_ns).unwrap_or(0);
let count = entries.len();
let delta = if entries.is_empty() {
D::default()
} else {
let mut composed = entries[0].delta.clone();
for entry in entries.iter().skip(1) {
composed = composed.compose(entry.delta.clone());
}
composed
};
WindowResult {
delta,
start_ns,
end_ns,
count,
}
}
/// Get the number of entries in the window
pub fn len(&self) -> usize {
self.entries.len()
}
/// Check if the window is empty
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
/// Clear all entries
pub fn clear(&mut self) {
self.entries.clear();
}
}
impl Default for VectorDelta {
fn default() -> Self {
Self::new(0)
}
}
/// Trait for aggregating window results
pub trait WindowAggregator<D: Delta>: Send + Sync {
/// Aggregate multiple window results
fn aggregate(&self, results: &[WindowResult<D>]) -> WindowResult<D>;
/// Get aggregation type name
fn name(&self) -> &'static str;
}
/// Sum aggregator - composes all deltas
pub struct SumAggregator;
impl<D: Delta + Clone + Default> WindowAggregator<D> for SumAggregator {
fn aggregate(&self, results: &[WindowResult<D>]) -> WindowResult<D> {
if results.is_empty() {
return WindowResult {
delta: D::default(),
start_ns: 0,
end_ns: 0,
count: 0,
};
}
let start_ns = results.first().map(|r| r.start_ns).unwrap_or(0);
let end_ns = results.last().map(|r| r.end_ns).unwrap_or(0);
let count: usize = results.iter().map(|r| r.count).sum();
let delta = if results.is_empty() {
D::default()
} else {
let mut composed = results[0].delta.clone();
for result in results.iter().skip(1) {
composed = composed.compose(result.delta.clone());
}
composed
};
WindowResult {
delta,
start_ns,
end_ns,
count,
}
}
fn name(&self) -> &'static str {
"sum"
}
}
/// Average aggregator - scales composed delta by 1/count
pub struct AverageAggregator;
impl WindowAggregator<VectorDelta> for AverageAggregator {
fn aggregate(&self, results: &[WindowResult<VectorDelta>]) -> WindowResult<VectorDelta> {
if results.is_empty() {
return WindowResult {
delta: VectorDelta::default(),
start_ns: 0,
end_ns: 0,
count: 0,
};
}
let sum_result = SumAggregator.aggregate(results);
let count = sum_result.count.max(1) as f32;
WindowResult {
delta: sum_result.delta.scale(1.0 / count),
start_ns: sum_result.start_ns,
end_ns: sum_result.end_ns,
count: sum_result.count,
}
}
fn name(&self) -> &'static str {
"average"
}
}
/// Exponential moving average aggregator
pub struct EmaAggregator {
/// Smoothing factor (0 < alpha <= 1)
pub alpha: f32,
}
impl EmaAggregator {
/// Create with smoothing factor
pub fn new(alpha: f32) -> Self {
Self {
alpha: alpha.clamp(0.0, 1.0),
}
}
}
impl WindowAggregator<VectorDelta> for EmaAggregator {
fn aggregate(&self, results: &[WindowResult<VectorDelta>]) -> WindowResult<VectorDelta> {
if results.is_empty() {
return WindowResult {
delta: VectorDelta::default(),
start_ns: 0,
end_ns: 0,
count: 0,
};
}
let start_ns = results.first().map(|r| r.start_ns).unwrap_or(0);
let end_ns = results.last().map(|r| r.end_ns).unwrap_or(0);
let count: usize = results.iter().map(|r| r.count).sum();
// EMA: new_ema = alpha * current + (1 - alpha) * old_ema
let mut ema = results[0].delta.clone();
for result in results.iter().skip(1) {
let scaled_current = result.delta.scale(self.alpha);
let scaled_ema = ema.scale(1.0 - self.alpha);
ema = scaled_current.compose(scaled_ema);
}
WindowResult {
delta: ema,
start_ns,
end_ns,
count,
}
}
fn name(&self) -> &'static str {
"ema"
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_tumbling_window() {
let mut window = DeltaWindow::<VectorDelta>::tumbling(1_000_000); // 1ms
// Add deltas at different times
window.add(VectorDelta::from_dense(vec![1.0, 0.0, 0.0]), 0);
window.add(VectorDelta::from_dense(vec![0.0, 1.0, 0.0]), 500_000);
// Window not complete yet
assert!(!window.is_complete(900_000));
// Window complete
assert!(window.is_complete(1_000_000));
// Emit
let result = window.emit().unwrap();
assert_eq!(result.count, 2);
}
#[test]
fn test_count_window() {
let mut window = DeltaWindow::<VectorDelta>::count_based(3);
window.add(VectorDelta::from_dense(vec![1.0]), 0);
window.add(VectorDelta::from_dense(vec![1.0]), 1);
assert!(window.emit().is_none()); // Not enough
window.add(VectorDelta::from_dense(vec![1.0]), 2);
let result = window.emit().unwrap();
assert_eq!(result.count, 3);
}
#[test]
fn test_sliding_window() {
let mut window = DeltaWindow::<VectorDelta>::sliding(1_000_000, 500_000);
window.add(VectorDelta::from_dense(vec![1.0]), 0);
window.add(VectorDelta::from_dense(vec![2.0]), 250_000);
window.add(VectorDelta::from_dense(vec![3.0]), 750_000);
// Complete after 1ms
assert!(window.is_complete(1_000_000));
}
#[test]
fn test_sum_aggregator() {
let results = vec![
WindowResult {
delta: VectorDelta::from_dense(vec![1.0, 0.0]),
start_ns: 0,
end_ns: 100,
count: 1,
},
WindowResult {
delta: VectorDelta::from_dense(vec![0.0, 1.0]),
start_ns: 100,
end_ns: 200,
count: 1,
},
];
let aggregated = SumAggregator.aggregate(&results);
assert_eq!(aggregated.count, 2);
}
}