Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,24 @@
[package]
name = "rvf-quant"
version = "0.1.0"
edition = "2021"
description = "RuVector Format temperature-tiered vector quantization (f32/f16/u8/binary)"
license = "MIT OR Apache-2.0"
repository = "https://github.com/ruvnet/ruvector"
homepage = "https://github.com/ruvnet/ruvector"
readme = "README.md"
categories = ["algorithms", "compression"]
keywords = ["vector", "quantization", "compression", "embedding", "rvf"]
rust-version = "1.87"
[features]
default = ["std"]
std = []
simd = []
[dependencies]
rvf-types = { version = "0.2.0", path = "../rvf-types" }
[dev-dependencies]
rand = "0.8"
approx = "0.5"

View File

@@ -0,0 +1,29 @@
# rvf-quant
Temperature-tiered vector quantization for RuVector Format.
## Overview
`rvf-quant` provides quantization codecs that reduce vector storage size based on access temperature:
- **f32** -- full precision for hot vectors
- **f16** -- half precision for warm vectors
- **u8** -- scalar quantization for cool vectors
- **binary** -- 1-bit quantization for cold/archive vectors
- **Automatic tiering** -- promote/demote vectors based on access patterns
## Usage
```toml
[dependencies]
rvf-quant = "0.1"
```
## Features
- `std` (default) -- enable `std` support
- `simd` -- enable SIMD-accelerated quantization
## License
MIT OR Apache-2.0

View File

@@ -0,0 +1,168 @@
//! Binary Quantization — 32x compression (1 bit per dimension).
//!
//! Used for the **Cold** (Tier 2) tier. Encodes only the sign of each
//! dimension and uses Hamming distance for comparison.
use alloc::vec;
use alloc::vec::Vec;
/// Encode a float vector to binary: 1 bit per dimension (sign bit).
///
/// Bit layout: dimension `d` maps to bit `d % 8` of byte `d / 8`.
/// A positive value (>= 0) is encoded as 1, negative as 0.
pub fn encode_binary(vector: &[f32]) -> Vec<u8> {
let num_bytes = vector.len().div_ceil(8);
let mut bits = vec![0u8; num_bytes];
for (d, &val) in vector.iter().enumerate() {
if val >= 0.0 {
bits[d / 8] |= 1 << (d % 8);
}
}
bits
}
/// Decode binary codes back to an approximate float vector.
///
/// Each bit is decoded to +1.0 (set) or -1.0 (unset).
pub fn decode_binary(bits: &[u8], dim: usize) -> Vec<f32> {
let mut vector = Vec::with_capacity(dim);
for d in 0..dim {
let byte_idx = d / 8;
let bit_idx = d % 8;
if byte_idx < bits.len() && (bits[byte_idx] >> bit_idx) & 1 == 1 {
vector.push(1.0);
} else {
vector.push(-1.0);
}
}
vector
}
/// Compute the Hamming distance between two binary-encoded vectors.
///
/// Processes data in u64 chunks (8 bytes at a time) using `count_ones()`
/// which maps to hardware POPCNT on supported platforms. Falls back to
/// byte-by-byte processing for the remainder.
pub fn hamming_distance(a: &[u8], b: &[u8]) -> u32 {
assert_eq!(a.len(), b.len(), "binary vectors must have equal length");
let n = a.len();
let chunks = n / 8;
let remainder = n % 8;
let mut dist = 0u32;
// Process 8 bytes at a time using u64 popcount.
for i in 0..chunks {
let offset = i * 8;
let xa = u64::from_le_bytes([
a[offset],
a[offset + 1],
a[offset + 2],
a[offset + 3],
a[offset + 4],
a[offset + 5],
a[offset + 6],
a[offset + 7],
]);
let xb = u64::from_le_bytes([
b[offset],
b[offset + 1],
b[offset + 2],
b[offset + 3],
b[offset + 4],
b[offset + 5],
b[offset + 6],
b[offset + 7],
]);
dist += (xa ^ xb).count_ones();
}
// Handle remainder bytes.
let base = chunks * 8;
for i in 0..remainder {
dist += (a[base + i] ^ b[base + i]).count_ones();
}
dist
}
/// SIMD-accelerated Hamming distance (stub; falls back to scalar
/// when the `simd` feature is not enabled or unavailable).
#[cfg(feature = "simd")]
pub fn hamming_distance_simd(a: &[u8], b: &[u8]) -> u32 {
// Future: VPOPCNTDQ / CNT implementation.
hamming_distance(a, b)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn encode_decode_round_trip() {
let v = vec![1.0, -0.5, 0.3, -2.0, 0.0, 0.1, -0.1, 0.9];
let bits = encode_binary(&v);
let decoded = decode_binary(&bits, v.len());
// Check sign preservation
for (d, (&orig, &dec)) in v.iter().zip(decoded.iter()).enumerate() {
if orig >= 0.0 {
assert_eq!(dec, 1.0, "dim {d}: expected +1 for val {orig}");
} else {
assert_eq!(dec, -1.0, "dim {d}: expected -1 for val {orig}");
}
}
}
#[test]
fn hamming_self_is_zero() {
let v = vec![1.0, -1.0, 0.5, -0.5, 0.0, 1.0, -1.0, 0.5];
let bits = encode_binary(&v);
assert_eq!(hamming_distance(&bits, &bits), 0);
}
#[test]
fn hamming_opposite_is_max() {
let v1 = vec![1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0];
let v2 = vec![-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0];
let b1 = encode_binary(&v1);
let b2 = encode_binary(&v2);
assert_eq!(hamming_distance(&b1, &b2), 8);
}
#[test]
fn hamming_matches_naive() {
let v1 = vec![
1.0, -1.0, 0.5, -0.5, 0.1, -0.1, 0.9, -0.9, 0.3, -0.3, 0.7, -0.7, 0.2, -0.2, 0.8, -0.8,
];
let v2 = vec![
-1.0, 1.0, -0.5, 0.5, -0.1, 0.1, -0.9, 0.9, -0.3, 0.3, -0.7, 0.7, -0.2, 0.2, -0.8, 0.8,
];
let b1 = encode_binary(&v1);
let b2 = encode_binary(&v2);
// All signs are flipped -> hamming distance = 16
assert_eq!(hamming_distance(&b1, &b2), 16);
// Naive computation for verification
let mut naive_dist = 0u32;
for d in 0..16 {
let s1 = if v1[d] >= 0.0 { 1 } else { 0 };
let s2 = if v2[d] >= 0.0 { 1 } else { 0 };
if s1 != s2 {
naive_dist += 1;
}
}
assert_eq!(hamming_distance(&b1, &b2), naive_dist);
}
#[test]
fn non_multiple_of_8_dimensions() {
let v = vec![1.0, -1.0, 0.5, -0.5, 0.1]; // 5 dims
let bits = encode_binary(&v);
assert_eq!(bits.len(), 1); // ceil(5/8) = 1
let decoded = decode_binary(&bits, 5);
assert_eq!(decoded.len(), 5);
assert_eq!(decoded[0], 1.0);
assert_eq!(decoded[1], -1.0);
assert_eq!(decoded[4], 1.0); // 0.1 >= 0
}
}

View File

@@ -0,0 +1,410 @@
//! QUANT_SEG and SKETCH_SEG wire format codec.
//!
//! Serializes / deserializes quantizer parameters and Count-Min Sketch
//! data to the binary layout defined in the RVF wire spec.
use alloc::boxed::Box;
use alloc::vec;
use alloc::vec::Vec;
use crate::binary;
use crate::product::ProductQuantizer;
use crate::scalar::ScalarQuantizer;
use crate::sketch::CountMinSketch;
use crate::traits::Quantizer;
// ---------------------------------------------------------------------------
// QUANT_SEG codec
// ---------------------------------------------------------------------------
/// Quantization type tags matching the QUANT_SEG wire spec.
const QUANT_TYPE_SCALAR: u8 = 0;
const QUANT_TYPE_PRODUCT: u8 = 1;
const QUANT_TYPE_BINARY: u8 = 2;
/// Encode a quantizer into the QUANT_SEG binary payload.
///
/// Layout:
/// ```text
/// [quant_type: u8] [tier: u8] [dim: u16 LE] [padding: 60 bytes to 64B]
/// [type-specific data ...]
/// ```
pub fn encode_quant_seg(quantizer: &dyn Quantizer) -> Vec<u8> {
let tier = quantizer.tier() as u8;
let dim = quantizer.dim() as u16;
// Downcast to determine the concrete type.
// We use the tier as a proxy since each tier maps to exactly one quantizer type.
match tier {
0 => encode_scalar_quant_seg(quantizer, dim),
1 => encode_product_quant_seg(quantizer, dim),
2 => encode_binary_quant_seg(dim),
_ => panic!("unknown quantizer tier"),
}
}
/// Decode a QUANT_SEG binary payload into a boxed Quantizer.
pub fn decode_quant_seg(data: &[u8]) -> Box<dyn Quantizer> {
assert!(data.len() >= 64, "QUANT_SEG header too short");
let quant_type = data[0];
let _tier = data[1];
let dim = u16::from_le_bytes([data[2], data[3]]) as usize;
let body = &data[64..];
match quant_type {
QUANT_TYPE_SCALAR => Box::new(decode_scalar(body, dim)),
QUANT_TYPE_PRODUCT => Box::new(decode_product(body, dim)),
QUANT_TYPE_BINARY => Box::new(BinaryQuantizerWrapper { dim }),
_ => panic!("unknown quant_type {quant_type}"),
}
}
// ---------------------------------------------------------------------------
// Scalar
// ---------------------------------------------------------------------------
fn encode_scalar_quant_seg(quantizer: &dyn Quantizer, dim: u16) -> Vec<u8> {
// Header (64 bytes)
let mut buf = vec![0u8; 64];
buf[0] = QUANT_TYPE_SCALAR;
buf[1] = quantizer.tier() as u8;
buf[2..4].copy_from_slice(&dim.to_le_bytes());
// Encode a known vector to extract min/max via round-trip.
// We re-derive from the trait interface.
// To get actual parameters, we encode/decode unit vectors.
// However, we need the raw ScalarQuantizer data.
// Since we only have &dyn Quantizer, we store dim floats of min then max.
// Workaround: encode zero and full-scale to reverse-engineer params.
// Better approach: serialize directly from ScalarQuantizer.
// For now, this function is called with concrete types via helper.
// Placeholder: we'll fill this properly in the type-specific functions below.
buf
}
/// Encode a ScalarQuantizer directly (preferred over trait-based encoding).
pub fn encode_scalar_quantizer(sq: &ScalarQuantizer) -> Vec<u8> {
let dim = sq.dim as u16;
let mut buf = vec![0u8; 64];
buf[0] = QUANT_TYPE_SCALAR;
buf[1] = 0; // Hot tier
buf[2..4].copy_from_slice(&dim.to_le_bytes());
// min[dim], max[dim]
for &v in &sq.min_vals {
buf.extend_from_slice(&v.to_le_bytes());
}
for &v in &sq.max_vals {
buf.extend_from_slice(&v.to_le_bytes());
}
buf
}
fn decode_scalar(body: &[u8], dim: usize) -> ScalarQuantizer {
let float_bytes = dim * 4;
assert!(body.len() >= float_bytes * 2, "scalar quant data too short");
let mut min_vals = Vec::with_capacity(dim);
let mut max_vals = Vec::with_capacity(dim);
for d in 0..dim {
let offset = d * 4;
let v = f32::from_le_bytes([
body[offset],
body[offset + 1],
body[offset + 2],
body[offset + 3],
]);
min_vals.push(v);
}
for d in 0..dim {
let offset = (dim + d) * 4;
let v = f32::from_le_bytes([
body[offset],
body[offset + 1],
body[offset + 2],
body[offset + 3],
]);
max_vals.push(v);
}
ScalarQuantizer {
min_vals,
max_vals,
dim,
}
}
// ---------------------------------------------------------------------------
// Product
// ---------------------------------------------------------------------------
fn encode_product_quant_seg(quantizer: &dyn Quantizer, dim: u16) -> Vec<u8> {
let mut buf = vec![0u8; 64];
buf[0] = QUANT_TYPE_PRODUCT;
buf[1] = quantizer.tier() as u8;
buf[2..4].copy_from_slice(&dim.to_le_bytes());
buf
}
/// Encode a ProductQuantizer directly.
pub fn encode_product_quantizer(pq: &ProductQuantizer) -> Vec<u8> {
let dim = (pq.m * pq.sub_dim) as u16;
let mut buf = vec![0u8; 64];
buf[0] = QUANT_TYPE_PRODUCT;
buf[1] = 1; // Warm tier
buf[2..4].copy_from_slice(&dim.to_le_bytes());
// PQ header: M, K, sub_dim (each as u16 LE)
// Written after the 64-byte aligned header.
buf.extend_from_slice(&(pq.m as u16).to_le_bytes());
buf.extend_from_slice(&(pq.k as u16).to_le_bytes());
buf.extend_from_slice(&(pq.sub_dim as u16).to_le_bytes());
// Codebook: M * K * sub_dim floats
for sub_book in &pq.codebooks {
for centroid in sub_book {
for &val in centroid {
buf.extend_from_slice(&val.to_le_bytes());
}
}
}
buf
}
fn decode_product(body: &[u8], _dim: usize) -> ProductQuantizer {
assert!(body.len() >= 6, "PQ header too short");
let m = u16::from_le_bytes([body[0], body[1]]) as usize;
let k = u16::from_le_bytes([body[2], body[3]]) as usize;
let sub_dim = u16::from_le_bytes([body[4], body[5]]) as usize;
let codebook_floats = m * k * sub_dim;
let codebook_bytes = codebook_floats * 4;
assert!(
body.len() >= 6 + codebook_bytes,
"PQ codebook data too short"
);
let mut codebooks = Vec::with_capacity(m);
let mut offset = 6;
for _ in 0..m {
let mut sub_book = Vec::with_capacity(k);
for _ in 0..k {
let mut centroid = Vec::with_capacity(sub_dim);
for _ in 0..sub_dim {
let v = f32::from_le_bytes([
body[offset],
body[offset + 1],
body[offset + 2],
body[offset + 3],
]);
centroid.push(v);
offset += 4;
}
sub_book.push(centroid);
}
codebooks.push(sub_book);
}
ProductQuantizer {
m,
k,
sub_dim,
codebooks,
}
}
// ---------------------------------------------------------------------------
// Binary
// ---------------------------------------------------------------------------
fn encode_binary_quant_seg(dim: u16) -> Vec<u8> {
let mut buf = vec![0u8; 64];
buf[0] = QUANT_TYPE_BINARY;
buf[1] = 2; // Cold tier
buf[2..4].copy_from_slice(&dim.to_le_bytes());
// Binary quantization has no additional parameters (sign-based).
buf
}
/// Wrapper to implement `Quantizer` for binary quantization.
struct BinaryQuantizerWrapper {
dim: usize,
}
impl Quantizer for BinaryQuantizerWrapper {
fn encode(&self, vector: &[f32]) -> Vec<u8> {
binary::encode_binary(vector)
}
fn decode(&self, codes: &[u8]) -> Vec<f32> {
binary::decode_binary(codes, self.dim)
}
fn tier(&self) -> crate::tier::TemperatureTier {
crate::tier::TemperatureTier::Cold
}
fn dim(&self) -> usize {
self.dim
}
}
// ---------------------------------------------------------------------------
// SKETCH_SEG codec
// ---------------------------------------------------------------------------
/// Encode a CountMinSketch into the SKETCH_SEG binary payload.
///
/// Layout:
/// ```text
/// [width: u32 LE] [depth: u32 LE] [total_accesses: u64 LE] [padding: 48 bytes to 64B]
/// [counters: depth * width bytes]
/// ```
pub fn encode_sketch_seg(sketch: &CountMinSketch) -> Vec<u8> {
let mut buf = vec![0u8; 64]; // 64-byte aligned header
buf[0..4].copy_from_slice(&(sketch.width as u32).to_le_bytes());
buf[4..8].copy_from_slice(&(sketch.depth as u32).to_le_bytes());
buf[8..16].copy_from_slice(&sketch.total_accesses.to_le_bytes());
// Counter data: row-major
for row in &sketch.counters {
buf.extend_from_slice(row);
}
buf
}
/// Decode a SKETCH_SEG binary payload into a CountMinSketch.
pub fn decode_sketch_seg(data: &[u8]) -> CountMinSketch {
assert!(data.len() >= 64, "SKETCH_SEG header too short");
let width = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
let depth = u32::from_le_bytes([data[4], data[5], data[6], data[7]]) as usize;
let total_accesses = u64::from_le_bytes([
data[8], data[9], data[10], data[11], data[12], data[13], data[14], data[15],
]);
let body = &data[64..];
let expected = width * depth;
assert!(body.len() >= expected, "SKETCH_SEG counter data too short");
let mut counters = Vec::with_capacity(depth);
for row in 0..depth {
let start = row * width;
counters.push(body[start..start + width].to_vec());
}
CountMinSketch {
counters,
width,
depth,
total_accesses,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn scalar_quant_seg_round_trip() {
let sq = ScalarQuantizer {
min_vals: vec![-1.0, -2.0, -0.5, 0.0],
max_vals: vec![1.0, 2.0, 0.5, 1.0],
dim: 4,
};
let encoded = encode_scalar_quantizer(&sq);
let decoded = decode_quant_seg(&encoded);
assert_eq!(decoded.dim(), 4);
assert_eq!(decoded.tier(), crate::tier::TemperatureTier::Hot);
// Verify round-trip: encode a test vector, check similar output
let test_vec = vec![0.5, 1.0, 0.0, 0.5];
let codes_orig = sq.encode_vec(&test_vec);
let codes_decoded = decoded.encode(&test_vec);
assert_eq!(codes_orig, codes_decoded);
}
#[test]
fn product_quant_seg_round_trip() {
// Build a small PQ manually
let pq = ProductQuantizer {
m: 2,
k: 4,
sub_dim: 2,
codebooks: vec![
vec![
vec![0.0, 0.1],
vec![0.2, 0.3],
vec![0.4, 0.5],
vec![0.6, 0.7],
],
vec![
vec![0.8, 0.9],
vec![1.0, 1.1],
vec![1.2, 1.3],
vec![1.4, 1.5],
],
],
};
let encoded = encode_product_quantizer(&pq);
let decoded = decode_quant_seg(&encoded);
assert_eq!(decoded.dim(), 4);
assert_eq!(decoded.tier(), crate::tier::TemperatureTier::Warm);
let test_vec = vec![0.1, 0.2, 0.9, 1.0];
let codes_orig = pq.encode_vec(&test_vec);
let codes_decoded = decoded.encode(&test_vec);
assert_eq!(codes_orig, codes_decoded);
}
#[test]
fn binary_quant_seg_round_trip() {
let dim: u16 = 16;
let encoded = encode_binary_quant_seg(dim);
let decoded = decode_quant_seg(&encoded);
assert_eq!(decoded.dim(), 16);
assert_eq!(decoded.tier(), crate::tier::TemperatureTier::Cold);
let test_vec: Vec<f32> = (0..16)
.map(|i| if i % 2 == 0 { 1.0 } else { -1.0 })
.collect();
let codes = decoded.encode(&test_vec);
let recon = decoded.decode(&codes);
assert_eq!(recon.len(), 16);
}
#[test]
fn sketch_seg_round_trip() {
let mut sketch = CountMinSketch::new(64, 4);
for block_id in 0..20u64 {
for _ in 0..(block_id + 1) {
sketch.increment(block_id);
}
}
let encoded = encode_sketch_seg(&sketch);
let decoded = decode_sketch_seg(&encoded);
assert_eq!(decoded.width, sketch.width);
assert_eq!(decoded.depth, sketch.depth);
assert_eq!(decoded.total_accesses, sketch.total_accesses);
// Verify estimates match
for block_id in 0..20u64 {
assert_eq!(decoded.estimate(block_id), sketch.estimate(block_id));
}
}
}

View File

@@ -0,0 +1,31 @@
//! Temperature-tiered vector quantization for the RuVector Format (RVF).
//!
//! Provides three quantization levels mapped to temperature tiers:
//!
//! | Tier | Quantization | Compression |
//! |------|-------------|-------------|
//! | Hot | Scalar (int8) | 4x |
//! | Warm | Product (PQ) | 8-16x |
//! | Cold | Binary (1-bit)| 32x |
//!
//! A Count-Min Sketch tracks per-block access frequency to drive
//! promotion/demotion decisions.
#![cfg_attr(not(feature = "std"), no_std)]
extern crate alloc;
pub mod binary;
pub mod codec;
pub mod product;
pub mod scalar;
pub mod sketch;
pub mod tier;
pub mod traits;
pub use binary::{decode_binary, encode_binary, hamming_distance};
pub use product::ProductQuantizer;
pub use scalar::ScalarQuantizer;
pub use sketch::CountMinSketch;
pub use tier::TemperatureTier;
pub use traits::Quantizer;

View File

@@ -0,0 +1,333 @@
//! Product Quantization (PQ) — 8-16x compression.
//!
//! Splits a vector into M subspaces, learns K centroids per subspace
//! via k-means, and encodes each sub-vector as a centroid index (1 byte
//! when K <= 256).
//!
//! Used for the **Warm** (Tier 1) tier.
use crate::tier::TemperatureTier;
use crate::traits::Quantizer;
use alloc::vec;
use alloc::vec::Vec;
/// Product quantizer parameters and codebooks.
#[derive(Clone, Debug)]
pub struct ProductQuantizer {
/// Number of subspaces.
pub m: usize,
/// Number of centroids per subspace.
pub k: usize,
/// Dimensions per subspace.
pub sub_dim: usize,
/// Codebooks: `codebooks[subspace][centroid]` is a `Vec<f32>` of length `sub_dim`.
pub codebooks: Vec<Vec<Vec<f32>>>,
}
impl ProductQuantizer {
/// Train a product quantizer using k-means clustering per subspace.
///
/// # Arguments
///
/// - `vectors`: Training vectors (all must have the same dimensionality).
/// - `m`: Number of subspaces.
/// - `k`: Number of centroids per subspace (typically 64, 128, or 256).
/// - `iterations`: Number of k-means iterations.
///
/// # Panics
///
/// Panics if the vector dimensionality is not divisible by `m`, if `vectors`
/// is empty, or if `k` or `m` is zero.
pub fn train(vectors: &[&[f32]], m: usize, k: usize, iterations: usize) -> Self {
assert!(!vectors.is_empty(), "need training data");
assert!(m > 0 && k > 0, "m and k must be > 0");
let dim = vectors[0].len();
assert!(
dim.is_multiple_of(m),
"dim ({dim}) must be divisible by m ({m})"
);
let sub_dim = dim / m;
let mut codebooks = Vec::with_capacity(m);
for sub in 0..m {
let start = sub * sub_dim;
let end = start + sub_dim;
// Extract sub-vectors for this subspace.
let sub_vecs: Vec<&[f32]> = vectors.iter().map(|v| &v[start..end]).collect();
let centroids = kmeans(&sub_vecs, k, sub_dim, iterations);
codebooks.push(centroids);
}
Self {
m,
k,
sub_dim,
codebooks,
}
}
/// Encode a vector: for each subspace, find the nearest centroid index.
pub fn encode_vec(&self, vector: &[f32]) -> Vec<u8> {
assert_eq!(vector.len(), self.m * self.sub_dim);
let mut codes = Vec::with_capacity(self.m);
for sub in 0..self.m {
let start = sub * self.sub_dim;
let sub_vec = &vector[start..start + self.sub_dim];
let idx = nearest_centroid(sub_vec, &self.codebooks[sub]);
codes.push(idx as u8);
}
codes
}
/// Decode codes back to an approximate vector by concatenating centroids.
pub fn decode_vec(&self, codes: &[u8]) -> Vec<f32> {
assert_eq!(codes.len(), self.m);
let mut vector = Vec::with_capacity(self.m * self.sub_dim);
for (sub, &code) in codes.iter().enumerate() {
vector.extend_from_slice(&self.codebooks[sub][code as usize]);
}
vector
}
/// Precompute distance tables for Asymmetric Distance Computation (ADC).
///
/// Returns a table `[subspace][centroid]` where entry (s, c) is the
/// squared L2 distance from the query sub-vector s to centroid c.
pub fn compute_distance_tables(&self, query: &[f32]) -> Vec<Vec<f32>> {
assert_eq!(query.len(), self.m * self.sub_dim);
let mut tables = Vec::with_capacity(self.m);
for sub in 0..self.m {
let start = sub * self.sub_dim;
let q_sub = &query[start..start + self.sub_dim];
let mut table = Vec::with_capacity(self.k);
for centroid in &self.codebooks[sub] {
table.push(l2_squared(q_sub, centroid));
}
tables.push(table);
}
tables
}
/// Compute the ADC distance using precomputed tables.
///
/// Sum of table lookups: `dist = sum over s of tables[s][codes[s]]`.
/// Uses `get_unchecked` for the inner lookup since `code` is always
/// a valid centroid index (0..k) produced by `encode_vec`.
pub fn distance_adc(tables: &[Vec<f32>], codes: &[u8]) -> f32 {
assert_eq!(tables.len(), codes.len());
let mut dist = 0.0f32;
for (table, &code) in tables.iter().zip(codes.iter()) {
// Safety: code is always in [0, k) as produced by encode_vec,
// and each table has exactly k entries. Bounds check with debug_assert.
debug_assert!((code as usize) < table.len());
unsafe {
dist += *table.get_unchecked(code as usize);
}
}
dist
}
}
impl Quantizer for ProductQuantizer {
fn encode(&self, vector: &[f32]) -> Vec<u8> {
self.encode_vec(vector)
}
fn decode(&self, codes: &[u8]) -> Vec<f32> {
self.decode_vec(codes)
}
fn tier(&self) -> TemperatureTier {
TemperatureTier::Warm
}
fn dim(&self) -> usize {
self.m * self.sub_dim
}
}
// ---------------------------------------------------------------------------
// Internal helpers
// ---------------------------------------------------------------------------
/// Squared L2 distance between two slices.
fn l2_squared(a: &[f32], b: &[f32]) -> f32 {
a.iter()
.zip(b.iter())
.map(|(x, y)| {
let d = x - y;
d * d
})
.sum()
}
/// Find the index of the nearest centroid to `point`.
fn nearest_centroid(point: &[f32], centroids: &[Vec<f32>]) -> usize {
let mut best_idx = 0;
let mut best_dist = f32::INFINITY;
for (i, c) in centroids.iter().enumerate() {
let d = l2_squared(point, c);
if d < best_dist {
best_dist = d;
best_idx = i;
}
}
best_idx
}
/// Simple k-means clustering.
///
/// Initializes centroids from the first K data points (wrapping if needed),
/// then runs Lloyd's algorithm for `iterations` rounds.
fn kmeans(data: &[&[f32]], k: usize, sub_dim: usize, iterations: usize) -> Vec<Vec<f32>> {
let n = data.len();
let actual_k = k.min(n); // can't have more centroids than data points
// Initialize centroids from data.
let mut centroids: Vec<Vec<f32>> = (0..actual_k).map(|i| data[i % n].to_vec()).collect();
let mut assignments = vec![0usize; n];
let mut counts = vec![0usize; actual_k];
let mut sums = vec![vec![0.0f32; sub_dim]; actual_k];
for _ in 0..iterations {
// Assignment step.
for (i, point) in data.iter().enumerate() {
assignments[i] = nearest_centroid(point, &centroids);
}
// Update step.
counts.fill(0);
for s in &mut sums {
for v in s.iter_mut() {
*v = 0.0;
}
}
for (i, point) in data.iter().enumerate() {
let c = assignments[i];
counts[c] += 1;
for (d, &val) in point.iter().enumerate() {
sums[c][d] += val;
}
}
for c in 0..actual_k {
if counts[c] > 0 {
for d in 0..sub_dim {
centroids[c][d] = sums[c][d] / counts[c] as f32;
}
}
}
}
// If we need more centroids than data points, duplicate the last centroid.
while centroids.len() < k {
centroids.push(centroids[centroids.len() - 1].clone());
}
centroids
}
#[cfg(test)]
mod tests {
use super::*;
fn make_pq_data() -> Vec<Vec<f32>> {
// 50 vectors of dim 16
let mut vecs = Vec::new();
for i in 0..50 {
let v: Vec<f32> = (0..16)
.map(|d| ((i * 7 + d * 13 + 3) % 200) as f32 / 100.0 - 1.0)
.collect();
vecs.push(v);
}
vecs
}
#[test]
fn train_and_encode_decode() {
let data = make_pq_data();
let refs: Vec<&[f32]> = data.iter().map(|v| v.as_slice()).collect();
let pq = ProductQuantizer::train(&refs, 4, 8, 10);
assert_eq!(pq.m, 4);
assert_eq!(pq.k, 8);
assert_eq!(pq.sub_dim, 4);
assert_eq!(pq.codebooks.len(), 4);
let codes = pq.encode_vec(&data[0]);
assert_eq!(codes.len(), 4);
for &c in &codes {
assert!((c as usize) < 8);
}
let recon = pq.decode_vec(&codes);
assert_eq!(recon.len(), 16);
}
#[test]
fn adc_distance() {
let data = make_pq_data();
let refs: Vec<&[f32]> = data.iter().map(|v| v.as_slice()).collect();
let pq = ProductQuantizer::train(&refs, 4, 8, 10);
let query = &data[0];
let tables = pq.compute_distance_tables(query);
assert_eq!(tables.len(), 4);
let codes = pq.encode_vec(&data[1]);
let dist = ProductQuantizer::distance_adc(&tables, &codes);
assert!(dist >= 0.0);
// Distance to self should be very small
let self_codes = pq.encode_vec(query);
let self_dist = ProductQuantizer::distance_adc(&tables, &self_codes);
assert!(self_dist < dist || dist == 0.0);
}
#[test]
fn pq_convergence() {
// After training, reconstruction error should decrease with more iterations.
let data = make_pq_data();
let refs: Vec<&[f32]> = data.iter().map(|v| v.as_slice()).collect();
let pq_1 = ProductQuantizer::train(&refs, 4, 8, 1);
let pq_20 = ProductQuantizer::train(&refs, 4, 8, 20);
let error_1: f32 = data
.iter()
.map(|v| {
let codes = pq_1.encode_vec(v);
let recon = pq_1.decode_vec(&codes);
l2_squared(v, &recon)
})
.sum();
let error_20: f32 = data
.iter()
.map(|v| {
let codes = pq_20.encode_vec(v);
let recon = pq_20.decode_vec(&codes);
l2_squared(v, &recon)
})
.sum();
assert!(
error_20 <= error_1 + f32::EPSILON,
"more iterations should not increase error: {error_1} vs {error_20}"
);
}
#[test]
fn quantizer_trait() {
let data = make_pq_data();
let refs: Vec<&[f32]> = data.iter().map(|v| v.as_slice()).collect();
let pq = ProductQuantizer::train(&refs, 4, 8, 5);
assert_eq!(pq.tier(), TemperatureTier::Warm);
assert_eq!(pq.dim(), 16);
}
}

View File

@@ -0,0 +1,218 @@
//! Scalar Quantization (SQ) — fp32 to u8, 4x compression.
//!
//! Each dimension is independently mapped from [min, max] to [0, 255].
//! This is the quantization used for the **Hot** (Tier 0) tier.
use crate::tier::TemperatureTier;
use crate::traits::Quantizer;
use alloc::vec;
use alloc::vec::Vec;
/// Scalar quantizer parameters: per-dimension min/max ranges.
#[derive(Clone, Debug)]
pub struct ScalarQuantizer {
/// Minimum value per dimension (training set).
pub min_vals: Vec<f32>,
/// Maximum value per dimension (training set).
pub max_vals: Vec<f32>,
/// Vector dimensionality.
pub dim: usize,
}
impl ScalarQuantizer {
/// Train a scalar quantizer by computing per-dimension min/max over
/// a set of training vectors.
///
/// # Panics
///
/// Panics if `vectors` is empty or any vector has inconsistent dimensionality.
pub fn train(vectors: &[&[f32]]) -> Self {
assert!(!vectors.is_empty(), "need at least one training vector");
let dim = vectors[0].len();
assert!(dim > 0, "vector dimensionality must be > 0");
let mut min_vals = vec![f32::INFINITY; dim];
let mut max_vals = vec![f32::NEG_INFINITY; dim];
for v in vectors {
assert_eq!(v.len(), dim, "dimension mismatch in training data");
for (d, &val) in v.iter().enumerate() {
if val < min_vals[d] {
min_vals[d] = val;
}
if val > max_vals[d] {
max_vals[d] = val;
}
}
}
// Avoid zero-range dimensions (would cause division by zero).
for d in 0..dim {
if (max_vals[d] - min_vals[d]).abs() < f32::EPSILON {
max_vals[d] = min_vals[d] + 1.0;
}
}
Self {
min_vals,
max_vals,
dim,
}
}
/// Quantize a float vector to u8 codes.
///
/// `q[d] = round((v[d] - min[d]) / (max[d] - min[d]) * 255)`
pub fn encode_vec(&self, vector: &[f32]) -> Vec<u8> {
assert_eq!(vector.len(), self.dim);
let mut codes = Vec::with_capacity(self.dim);
for (d, &val) in vector.iter().enumerate().take(self.dim) {
let range = self.max_vals[d] - self.min_vals[d];
let normalized = (val - self.min_vals[d]) / range;
let clamped = normalized.clamp(0.0, 1.0);
codes.push((clamped * 255.0).round() as u8);
}
codes
}
/// Dequantize u8 codes back to approximate float values.
///
/// `v[d] = q[d] / 255 * (max[d] - min[d]) + min[d]`
pub fn decode_vec(&self, codes: &[u8]) -> Vec<f32> {
assert_eq!(codes.len(), self.dim);
let mut vector = Vec::with_capacity(self.dim);
for (d, &code) in codes.iter().enumerate().take(self.dim) {
let range = self.max_vals[d] - self.min_vals[d];
let val = (code as f32 / 255.0) * range + self.min_vals[d];
vector.push(val);
}
vector
}
/// Compute approximate L2 squared distance between two quantized vectors.
///
/// Accumulates differences in i32 arithmetic to avoid per-element f32
/// conversion, then converts to f32 only for the final scaling step.
/// This is significantly faster when the dimension is large.
pub fn distance_l2_quantized(&self, a: &[u8], b: &[u8]) -> f32 {
assert_eq!(a.len(), self.dim);
assert_eq!(b.len(), self.dim);
// Accumulate (a[d] - b[d])^2 in integer, then scale per-dimension.
// Since dequantized value = code / 255 * range + min, the difference
// is (a_code - b_code) / 255 * range, so squared diff is
// (a_code - b_code)^2 / 65025 * range^2.
// We group by range to minimize f32 ops.
let mut dist = 0.0f32;
let inv_255_sq = 1.0f32 / (255.0 * 255.0);
for d in 0..self.dim {
let diff = a[d] as i32 - b[d] as i32;
let range = self.max_vals[d] - self.min_vals[d];
dist += (diff * diff) as f32 * (range * range) * inv_255_sq;
}
dist
}
/// SIMD-accelerated L2 distance (stub; falls back to scalar when
/// the `simd` feature is not enabled).
#[cfg(feature = "simd")]
pub fn distance_l2_simd(&self, a: &[u8], b: &[u8]) -> f32 {
// Future: AVX-512 / NEON implementation.
self.distance_l2_quantized(a, b)
}
}
impl Quantizer for ScalarQuantizer {
fn encode(&self, vector: &[f32]) -> Vec<u8> {
self.encode_vec(vector)
}
fn decode(&self, codes: &[u8]) -> Vec<f32> {
self.decode_vec(codes)
}
fn tier(&self) -> TemperatureTier {
TemperatureTier::Hot
}
fn dim(&self) -> usize {
self.dim
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_training_data() -> Vec<Vec<f32>> {
// 10 vectors of dim 8 in [-1, 1]
let mut vecs = Vec::new();
for i in 0..10 {
let v: Vec<f32> = (0..8)
.map(|d| ((i * 7 + d * 13) % 200) as f32 / 100.0 - 1.0)
.collect();
vecs.push(v);
}
vecs
}
#[test]
fn round_trip_low_error() {
let data = make_training_data();
let refs: Vec<&[f32]> = data.iter().map(|v| v.as_slice()).collect();
let sq = ScalarQuantizer::train(&refs);
for v in &data {
let codes = sq.encode_vec(v);
let reconstructed = sq.decode_vec(&codes);
assert_eq!(reconstructed.len(), v.len());
// Check reconstruction error per dimension
for (orig, recon) in v.iter().zip(reconstructed.iter()) {
let max_error = (sq
.max_vals
.iter()
.zip(sq.min_vals.iter())
.map(|(mx, mn)| mx - mn)
.fold(0.0f32, f32::max))
/ 255.0;
assert!(
(orig - recon).abs() <= max_error + f32::EPSILON,
"reconstruction error too large: orig={orig}, recon={recon}"
);
}
}
}
#[test]
fn quantized_distance_nonnegative() {
let data = make_training_data();
let refs: Vec<&[f32]> = data.iter().map(|v| v.as_slice()).collect();
let sq = ScalarQuantizer::train(&refs);
let a = sq.encode_vec(&data[0]);
let b = sq.encode_vec(&data[1]);
let dist = sq.distance_l2_quantized(&a, &b);
assert!(dist >= 0.0);
}
#[test]
fn identical_vectors_zero_distance() {
let data = make_training_data();
let refs: Vec<&[f32]> = data.iter().map(|v| v.as_slice()).collect();
let sq = ScalarQuantizer::train(&refs);
let a = sq.encode_vec(&data[0]);
let dist = sq.distance_l2_quantized(&a, &a);
assert!(dist.abs() < f32::EPSILON);
}
#[test]
fn quantizer_trait() {
let data = make_training_data();
let refs: Vec<&[f32]> = data.iter().map(|v| v.as_slice()).collect();
let sq = ScalarQuantizer::train(&refs);
assert_eq!(sq.tier(), TemperatureTier::Hot);
assert_eq!(sq.dim(), 8);
}
}

View File

@@ -0,0 +1,228 @@
//! Count-Min Sketch for temperature tracking.
//!
//! Tracks per-block access frequency to drive tier promotion/demotion.
//! Spec defaults: width=1024, depth=4, 8-bit saturating counters.
use alloc::vec;
use alloc::vec::Vec;
/// Count-Min Sketch for access frequency estimation.
#[derive(Clone, Debug)]
pub struct CountMinSketch {
/// Counter matrix: `counters[row][col]`, each row uses a different hash.
pub counters: Vec<Vec<u8>>,
/// Number of counters per row.
pub width: usize,
/// Number of hash functions (rows).
pub depth: usize,
/// Total number of increment operations (for aging decisions).
pub total_accesses: u64,
}
/// Default width (counters per row).
pub const DEFAULT_WIDTH: usize = 1024;
/// Default depth (hash functions / rows).
pub const DEFAULT_DEPTH: usize = 4;
/// Aging trigger: halve all counters every 2^16 accesses.
const AGING_INTERVAL: u64 = 1 << 16;
impl CountMinSketch {
/// Create a new sketch with the given width and depth.
pub fn new(width: usize, depth: usize) -> Self {
Self {
counters: vec![vec![0u8; width]; depth],
width,
depth,
total_accesses: 0,
}
}
/// Create a sketch with default parameters (w=1024, d=4).
pub fn default_sketch() -> Self {
Self::new(DEFAULT_WIDTH, DEFAULT_DEPTH)
}
/// Increment the count for `block_id` using saturating addition.
///
/// Updates all `depth` hash rows with `min(counter + 1, 255)`.
pub fn increment(&mut self, block_id: u64) {
for row in 0..self.depth {
let idx = self.hash(block_id, row) % self.width;
self.counters[row][idx] = self.counters[row][idx].saturating_add(1);
}
self.total_accesses = self.total_accesses.wrapping_add(1);
}
/// Estimate the access count for `block_id`.
///
/// Returns the minimum across all hash rows (Count-Min guarantee:
/// estimate >= true count, with bounded overestimation).
pub fn estimate(&self, block_id: u64) -> u8 {
let mut min_val = u8::MAX;
for row in 0..self.depth {
let idx = self.hash(block_id, row) % self.width;
min_val = min_val.min(self.counters[row][idx]);
}
min_val
}
/// Age (decay) all counters by right-shifting by 1 (halving).
///
/// This ensures the sketch tracks *recent* access patterns rather
/// than cumulative history.
pub fn age(&mut self) {
for row in &mut self.counters {
for counter in row.iter_mut() {
*counter >>= 1;
}
}
}
/// Returns true if aging should be triggered (every 2^16 accesses).
pub fn should_age(&self) -> bool {
self.total_accesses > 0 && self.total_accesses.is_multiple_of(AGING_INTERVAL)
}
/// Memory footprint in bytes (counters only, excluding struct overhead).
pub fn memory_bytes(&self) -> usize {
self.width * self.depth
}
/// Hash function using FNV-1a style multiplicative hashing.
///
/// Each row uses a different seed to produce independent hash values.
fn hash(&self, block_id: u64, row: usize) -> usize {
// FNV-1a inspired: mix block_id with row-dependent seed.
const FNV_OFFSET: u64 = 0xcbf29ce484222325;
const FNV_PRIME: u64 = 0x100000001b3;
let seed = HASH_SEEDS[row % HASH_SEEDS.len()];
let mut h = FNV_OFFSET ^ seed;
let bytes = block_id.to_le_bytes();
for &b in &bytes {
h ^= b as u64;
h = h.wrapping_mul(FNV_PRIME);
}
h as usize
}
}
/// Seeds for hash functions (one per row).
const HASH_SEEDS: [u64; 8] = [
0x517cc1b727220a95,
0x6c62272e07bb0142,
0x44c6b90e0f294e41,
0x3b9f7a3e2d8f1c5b,
0x7e4a1b3c5d6f8a9e,
0x1a2b3c4d5e6f7089,
0x9f8e7d6c5b4a3210,
0xdeadbeefcafebabe,
];
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn new_sketch_all_zeros() {
let s = CountMinSketch::new(64, 4);
for row in &s.counters {
for &c in row {
assert_eq!(c, 0);
}
}
assert_eq!(s.total_accesses, 0);
}
#[test]
fn estimate_ge_true_count() {
let mut s = CountMinSketch::new(256, 4);
let block = 42u64;
for _ in 0..10 {
s.increment(block);
}
let est = s.estimate(block);
assert!(est >= 10, "estimate {est} should be >= true count 10");
}
#[test]
fn increment_saturates_at_255() {
let mut s = CountMinSketch::new(64, 2);
let block = 1u64;
for _ in 0..300 {
s.increment(block);
}
let est = s.estimate(block);
assert_eq!(est, 255);
}
#[test]
fn aging_halves_counters() {
let mut s = CountMinSketch::new(64, 2);
let block = 7u64;
for _ in 0..100 {
s.increment(block);
}
let before = s.estimate(block);
s.age();
let after = s.estimate(block);
// After aging, counts should be approximately halved.
assert!(after <= before, "aging should not increase count");
assert!(
after >= before / 2 - 1,
"aging should halve: before={before}, after={after}"
);
}
#[test]
fn should_age_at_power_of_two() {
let mut s = CountMinSketch::new(64, 2);
// Not at boundary
s.total_accesses = 100;
assert!(!s.should_age());
// At 2^16
s.total_accesses = 1 << 16;
assert!(s.should_age());
// At 2 * 2^16
s.total_accesses = 2 << 16;
assert!(s.should_age());
}
#[test]
fn different_blocks_independent() {
let mut s = CountMinSketch::new(1024, 4);
for _ in 0..50 {
s.increment(100);
}
for _ in 0..10 {
s.increment(200);
}
let est_100 = s.estimate(100);
let est_200 = s.estimate(200);
assert!(est_100 >= 50);
assert!(est_200 >= 10);
assert!(est_100 > est_200);
}
#[test]
fn memory_bytes() {
let s = CountMinSketch::new(1024, 4);
assert_eq!(s.memory_bytes(), 4096);
}
#[test]
fn unseen_block_is_zero() {
let s = CountMinSketch::new(1024, 4);
assert_eq!(s.estimate(999), 0);
}
}

View File

@@ -0,0 +1,95 @@
//! Temperature tier assignment for vector blocks.
/// Access count above which a block is considered "hot" (Tier 0).
pub const HOT_THRESHOLD: u8 = 128;
/// Access count above which a block is considered "warm" (Tier 1).
/// Below this threshold, a block is "cold" (Tier 2).
pub const WARM_THRESHOLD: u8 = 16;
/// Temperature tier for a vector block.
///
/// Determines the quantization level and storage layout.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[repr(u8)]
pub enum TemperatureTier {
/// Frequently accessed. Scalar quantized (int8), interleaved layout.
Hot = 0,
/// Moderately accessed. Product quantized, columnar layout.
Warm = 1,
/// Rarely accessed. Binary quantized, columnar + heavy compression.
Cold = 2,
}
/// Assign a temperature tier based on the estimated access count.
///
/// Uses the thresholds from the RVF spec:
/// - `count > HOT_THRESHOLD` -> Hot
/// - `count > WARM_THRESHOLD` -> Warm
/// - otherwise -> Cold
pub fn assign_tier(access_count: u8) -> TemperatureTier {
if access_count > HOT_THRESHOLD {
TemperatureTier::Hot
} else if access_count > WARM_THRESHOLD {
TemperatureTier::Warm
} else {
TemperatureTier::Cold
}
}
impl TemperatureTier {
/// Returns the wire representation (0, 1, or 2).
#[inline]
pub const fn as_u8(self) -> u8 {
self as u8
}
}
impl TryFrom<u8> for TemperatureTier {
type Error = u8;
fn try_from(value: u8) -> Result<Self, Self::Error> {
match value {
0 => Ok(Self::Hot),
1 => Ok(Self::Warm),
2 => Ok(Self::Cold),
other => Err(other),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn tier_assignment() {
assert_eq!(assign_tier(255), TemperatureTier::Hot);
assert_eq!(assign_tier(129), TemperatureTier::Hot);
assert_eq!(assign_tier(128), TemperatureTier::Warm); // not >128
assert_eq!(assign_tier(64), TemperatureTier::Warm);
assert_eq!(assign_tier(17), TemperatureTier::Warm);
assert_eq!(assign_tier(16), TemperatureTier::Cold); // not >16
assert_eq!(assign_tier(1), TemperatureTier::Cold);
assert_eq!(assign_tier(0), TemperatureTier::Cold);
}
#[test]
fn round_trip() {
for raw in 0..=2u8 {
let t = TemperatureTier::try_from(raw).unwrap();
assert_eq!(t.as_u8(), raw);
}
}
#[test]
fn invalid_tier() {
assert_eq!(TemperatureTier::try_from(3), Err(3));
}
#[test]
fn ordering() {
assert!(TemperatureTier::Hot < TemperatureTier::Warm);
assert!(TemperatureTier::Warm < TemperatureTier::Cold);
}
}

View File

@@ -0,0 +1,22 @@
//! Common quantization trait shared by all quantizer types.
use crate::tier::TemperatureTier;
use alloc::vec::Vec;
/// Trait for vector quantization codecs.
///
/// Every quantizer can encode a float vector into a compact byte representation
/// and decode it back to an approximate float vector.
pub trait Quantizer {
/// Encode a float vector into compact codes.
fn encode(&self, vector: &[f32]) -> Vec<u8>;
/// Decode compact codes back to an approximate float vector.
fn decode(&self, codes: &[u8]) -> Vec<f32>;
/// The temperature tier this quantizer is designed for.
fn tier(&self) -> TemperatureTier;
/// The dimensionality this quantizer was trained for.
fn dim(&self) -> usize;
}