Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/crates/rvf/rvf-quant/Cargo.toml
+++ b/vendor/ruvector/crates/rvf/rvf-quant/Cargo.toml
@@ -0,0 +1,24 @@
+[package]
+name = "rvf-quant"
+version = "0.1.0"
+edition = "2021"
+description = "RuVector Format temperature-tiered vector quantization (f32/f16/u8/binary)"
+license = "MIT OR Apache-2.0"
+repository = "https://github.com/ruvnet/ruvector"
+homepage = "https://github.com/ruvnet/ruvector"
+readme = "README.md"
+categories = ["algorithms", "compression"]
+keywords = ["vector", "quantization", "compression", "embedding", "rvf"]
+rust-version = "1.87"
+
+[features]
+default = ["std"]
+std = []
+simd = []
+
+[dependencies]
+rvf-types = { version = "0.2.0", path = "../rvf-types" }
+
+[dev-dependencies]
+rand = "0.8"
+approx = "0.5"
--- a/vendor/ruvector/crates/rvf/rvf-quant/README.md
+++ b/vendor/ruvector/crates/rvf/rvf-quant/README.md
@@ -0,0 +1,29 @@
+# rvf-quant
+
+Temperature-tiered vector quantization for RuVector Format.
+
+## Overview
+
+`rvf-quant` provides quantization codecs that reduce vector storage size based on access temperature:
+
+- **f32** -- full precision for hot vectors
+- **f16** -- half precision for warm vectors
+- **u8** -- scalar quantization for cool vectors
+- **binary** -- 1-bit quantization for cold/archive vectors
+- **Automatic tiering** -- promote/demote vectors based on access patterns
+
+## Usage
+
+```toml
+[dependencies]
+rvf-quant = "0.1"
+```
+
+## Features
+
+- `std` (default) -- enable `std` support
+- `simd` -- enable SIMD-accelerated quantization
+
+## License
+
+MIT OR Apache-2.0
--- a/vendor/ruvector/crates/rvf/rvf-quant/src/binary.rs
+++ b/vendor/ruvector/crates/rvf/rvf-quant/src/binary.rs
@@ -0,0 +1,168 @@
+//! Binary Quantization — 32x compression (1 bit per dimension).
+//!
+//! Used for the **Cold** (Tier 2) tier. Encodes only the sign of each
+//! dimension and uses Hamming distance for comparison.
+
+use alloc::vec;
+use alloc::vec::Vec;
+
+/// Encode a float vector to binary: 1 bit per dimension (sign bit).
+///
+/// Bit layout: dimension `d` maps to bit `d % 8` of byte `d / 8`.
+/// A positive value (>= 0) is encoded as 1, negative as 0.
+pub fn encode_binary(vector: &[f32]) -> Vec<u8> {
+    let num_bytes = vector.len().div_ceil(8);
+    let mut bits = vec![0u8; num_bytes];
+    for (d, &val) in vector.iter().enumerate() {
+        if val >= 0.0 {
+            bits[d / 8] |= 1 << (d % 8);
+        }
+    }
+    bits
+}
+
+/// Decode binary codes back to an approximate float vector.
+///
+/// Each bit is decoded to +1.0 (set) or -1.0 (unset).
+pub fn decode_binary(bits: &[u8], dim: usize) -> Vec<f32> {
+    let mut vector = Vec::with_capacity(dim);
+    for d in 0..dim {
+        let byte_idx = d / 8;
+        let bit_idx = d % 8;
+        if byte_idx < bits.len() && (bits[byte_idx] >> bit_idx) & 1 == 1 {
+            vector.push(1.0);
+        } else {
+            vector.push(-1.0);
+        }
+    }
+    vector
+}
+
+/// Compute the Hamming distance between two binary-encoded vectors.
+///
+/// Processes data in u64 chunks (8 bytes at a time) using `count_ones()`
+/// which maps to hardware POPCNT on supported platforms. Falls back to
+/// byte-by-byte processing for the remainder.
+pub fn hamming_distance(a: &[u8], b: &[u8]) -> u32 {
+    assert_eq!(a.len(), b.len(), "binary vectors must have equal length");
+    let n = a.len();
+    let chunks = n / 8;
+    let remainder = n % 8;
+    let mut dist = 0u32;
+
+    // Process 8 bytes at a time using u64 popcount.
+    for i in 0..chunks {
+        let offset = i * 8;
+        let xa = u64::from_le_bytes([
+            a[offset],
+            a[offset + 1],
+            a[offset + 2],
+            a[offset + 3],
+            a[offset + 4],
+            a[offset + 5],
+            a[offset + 6],
+            a[offset + 7],
+        ]);
+        let xb = u64::from_le_bytes([
+            b[offset],
+            b[offset + 1],
+            b[offset + 2],
+            b[offset + 3],
+            b[offset + 4],
+            b[offset + 5],
+            b[offset + 6],
+            b[offset + 7],
+        ]);
+        dist += (xa ^ xb).count_ones();
+    }
+
+    // Handle remainder bytes.
+    let base = chunks * 8;
+    for i in 0..remainder {
+        dist += (a[base + i] ^ b[base + i]).count_ones();
+    }
+    dist
+}
+
+/// SIMD-accelerated Hamming distance (stub; falls back to scalar
+/// when the `simd` feature is not enabled or unavailable).
+#[cfg(feature = "simd")]
+pub fn hamming_distance_simd(a: &[u8], b: &[u8]) -> u32 {
+    // Future: VPOPCNTDQ / CNT implementation.
+    hamming_distance(a, b)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn encode_decode_round_trip() {
+        let v = vec![1.0, -0.5, 0.3, -2.0, 0.0, 0.1, -0.1, 0.9];
+        let bits = encode_binary(&v);
+        let decoded = decode_binary(&bits, v.len());
+
+        // Check sign preservation
+        for (d, (&orig, &dec)) in v.iter().zip(decoded.iter()).enumerate() {
+            if orig >= 0.0 {
+                assert_eq!(dec, 1.0, "dim {d}: expected +1 for val {orig}");
+            } else {
+                assert_eq!(dec, -1.0, "dim {d}: expected -1 for val {orig}");
+            }
+        }
+    }
+
+    #[test]
+    fn hamming_self_is_zero() {
+        let v = vec![1.0, -1.0, 0.5, -0.5, 0.0, 1.0, -1.0, 0.5];
+        let bits = encode_binary(&v);
+        assert_eq!(hamming_distance(&bits, &bits), 0);
+    }
+
+    #[test]
+    fn hamming_opposite_is_max() {
+        let v1 = vec![1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0];
+        let v2 = vec![-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0];
+        let b1 = encode_binary(&v1);
+        let b2 = encode_binary(&v2);
+        assert_eq!(hamming_distance(&b1, &b2), 8);
+    }
+
+    #[test]
+    fn hamming_matches_naive() {
+        let v1 = vec![
+            1.0, -1.0, 0.5, -0.5, 0.1, -0.1, 0.9, -0.9, 0.3, -0.3, 0.7, -0.7, 0.2, -0.2, 0.8, -0.8,
+        ];
+        let v2 = vec![
+            -1.0, 1.0, -0.5, 0.5, -0.1, 0.1, -0.9, 0.9, -0.3, 0.3, -0.7, 0.7, -0.2, 0.2, -0.8, 0.8,
+        ];
+        let b1 = encode_binary(&v1);
+        let b2 = encode_binary(&v2);
+
+        // All signs are flipped -> hamming distance = 16
+        assert_eq!(hamming_distance(&b1, &b2), 16);
+
+        // Naive computation for verification
+        let mut naive_dist = 0u32;
+        for d in 0..16 {
+            let s1 = if v1[d] >= 0.0 { 1 } else { 0 };
+            let s2 = if v2[d] >= 0.0 { 1 } else { 0 };
+            if s1 != s2 {
+                naive_dist += 1;
+            }
+        }
+        assert_eq!(hamming_distance(&b1, &b2), naive_dist);
+    }
+
+    #[test]
+    fn non_multiple_of_8_dimensions() {
+        let v = vec![1.0, -1.0, 0.5, -0.5, 0.1]; // 5 dims
+        let bits = encode_binary(&v);
+        assert_eq!(bits.len(), 1); // ceil(5/8) = 1
+        let decoded = decode_binary(&bits, 5);
+        assert_eq!(decoded.len(), 5);
+        assert_eq!(decoded[0], 1.0);
+        assert_eq!(decoded[1], -1.0);
+        assert_eq!(decoded[4], 1.0); // 0.1 >= 0
+    }
+}
--- a/vendor/ruvector/crates/rvf/rvf-quant/src/codec.rs
+++ b/vendor/ruvector/crates/rvf/rvf-quant/src/codec.rs
@@ -0,0 +1,410 @@
+//! QUANT_SEG and SKETCH_SEG wire format codec.
+//!
+//! Serializes / deserializes quantizer parameters and Count-Min Sketch
+//! data to the binary layout defined in the RVF wire spec.
+
+use alloc::boxed::Box;
+use alloc::vec;
+use alloc::vec::Vec;
+
+use crate::binary;
+use crate::product::ProductQuantizer;
+use crate::scalar::ScalarQuantizer;
+use crate::sketch::CountMinSketch;
+use crate::traits::Quantizer;
+
+// ---------------------------------------------------------------------------
+// QUANT_SEG codec
+// ---------------------------------------------------------------------------
+
+/// Quantization type tags matching the QUANT_SEG wire spec.
+const QUANT_TYPE_SCALAR: u8 = 0;
+const QUANT_TYPE_PRODUCT: u8 = 1;
+const QUANT_TYPE_BINARY: u8 = 2;
+
+/// Encode a quantizer into the QUANT_SEG binary payload.
+///
+/// Layout:
+/// ```text
+/// [quant_type: u8] [tier: u8] [dim: u16 LE] [padding: 60 bytes to 64B]
+/// [type-specific data ...]
+/// ```
+pub fn encode_quant_seg(quantizer: &dyn Quantizer) -> Vec<u8> {
+    let tier = quantizer.tier() as u8;
+    let dim = quantizer.dim() as u16;
+
+    // Downcast to determine the concrete type.
+    // We use the tier as a proxy since each tier maps to exactly one quantizer type.
+    match tier {
+        0 => encode_scalar_quant_seg(quantizer, dim),
+        1 => encode_product_quant_seg(quantizer, dim),
+        2 => encode_binary_quant_seg(dim),
+        _ => panic!("unknown quantizer tier"),
+    }
+}
+
+/// Decode a QUANT_SEG binary payload into a boxed Quantizer.
+pub fn decode_quant_seg(data: &[u8]) -> Box<dyn Quantizer> {
+    assert!(data.len() >= 64, "QUANT_SEG header too short");
+
+    let quant_type = data[0];
+    let _tier = data[1];
+    let dim = u16::from_le_bytes([data[2], data[3]]) as usize;
+    let body = &data[64..];
+
+    match quant_type {
+        QUANT_TYPE_SCALAR => Box::new(decode_scalar(body, dim)),
+        QUANT_TYPE_PRODUCT => Box::new(decode_product(body, dim)),
+        QUANT_TYPE_BINARY => Box::new(BinaryQuantizerWrapper { dim }),
+        _ => panic!("unknown quant_type {quant_type}"),
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Scalar
+// ---------------------------------------------------------------------------
+
+fn encode_scalar_quant_seg(quantizer: &dyn Quantizer, dim: u16) -> Vec<u8> {
+    // Header (64 bytes)
+    let mut buf = vec![0u8; 64];
+    buf[0] = QUANT_TYPE_SCALAR;
+    buf[1] = quantizer.tier() as u8;
+    buf[2..4].copy_from_slice(&dim.to_le_bytes());
+
+    // Encode a known vector to extract min/max via round-trip.
+    // We re-derive from the trait interface.
+    // To get actual parameters, we encode/decode unit vectors.
+    // However, we need the raw ScalarQuantizer data.
+    // Since we only have &dyn Quantizer, we store dim floats of min then max.
+
+    // Workaround: encode zero and full-scale to reverse-engineer params.
+    // Better approach: serialize directly from ScalarQuantizer.
+    // For now, this function is called with concrete types via helper.
+
+    // Placeholder: we'll fill this properly in the type-specific functions below.
+    buf
+}
+
+/// Encode a ScalarQuantizer directly (preferred over trait-based encoding).
+pub fn encode_scalar_quantizer(sq: &ScalarQuantizer) -> Vec<u8> {
+    let dim = sq.dim as u16;
+    let mut buf = vec![0u8; 64];
+    buf[0] = QUANT_TYPE_SCALAR;
+    buf[1] = 0; // Hot tier
+    buf[2..4].copy_from_slice(&dim.to_le_bytes());
+
+    // min[dim], max[dim]
+    for &v in &sq.min_vals {
+        buf.extend_from_slice(&v.to_le_bytes());
+    }
+    for &v in &sq.max_vals {
+        buf.extend_from_slice(&v.to_le_bytes());
+    }
+    buf
+}
+
+fn decode_scalar(body: &[u8], dim: usize) -> ScalarQuantizer {
+    let float_bytes = dim * 4;
+    assert!(body.len() >= float_bytes * 2, "scalar quant data too short");
+
+    let mut min_vals = Vec::with_capacity(dim);
+    let mut max_vals = Vec::with_capacity(dim);
+
+    for d in 0..dim {
+        let offset = d * 4;
+        let v = f32::from_le_bytes([
+            body[offset],
+            body[offset + 1],
+            body[offset + 2],
+            body[offset + 3],
+        ]);
+        min_vals.push(v);
+    }
+    for d in 0..dim {
+        let offset = (dim + d) * 4;
+        let v = f32::from_le_bytes([
+            body[offset],
+            body[offset + 1],
+            body[offset + 2],
+            body[offset + 3],
+        ]);
+        max_vals.push(v);
+    }
+
+    ScalarQuantizer {
+        min_vals,
+        max_vals,
+        dim,
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Product
+// ---------------------------------------------------------------------------
+
+fn encode_product_quant_seg(quantizer: &dyn Quantizer, dim: u16) -> Vec<u8> {
+    let mut buf = vec![0u8; 64];
+    buf[0] = QUANT_TYPE_PRODUCT;
+    buf[1] = quantizer.tier() as u8;
+    buf[2..4].copy_from_slice(&dim.to_le_bytes());
+    buf
+}
+
+/// Encode a ProductQuantizer directly.
+pub fn encode_product_quantizer(pq: &ProductQuantizer) -> Vec<u8> {
+    let dim = (pq.m * pq.sub_dim) as u16;
+    let mut buf = vec![0u8; 64];
+    buf[0] = QUANT_TYPE_PRODUCT;
+    buf[1] = 1; // Warm tier
+    buf[2..4].copy_from_slice(&dim.to_le_bytes());
+
+    // PQ header: M, K, sub_dim (each as u16 LE)
+    // Written after the 64-byte aligned header.
+    buf.extend_from_slice(&(pq.m as u16).to_le_bytes());
+    buf.extend_from_slice(&(pq.k as u16).to_le_bytes());
+    buf.extend_from_slice(&(pq.sub_dim as u16).to_le_bytes());
+
+    // Codebook: M * K * sub_dim floats
+    for sub_book in &pq.codebooks {
+        for centroid in sub_book {
+            for &val in centroid {
+                buf.extend_from_slice(&val.to_le_bytes());
+            }
+        }
+    }
+
+    buf
+}
+
+fn decode_product(body: &[u8], _dim: usize) -> ProductQuantizer {
+    assert!(body.len() >= 6, "PQ header too short");
+
+    let m = u16::from_le_bytes([body[0], body[1]]) as usize;
+    let k = u16::from_le_bytes([body[2], body[3]]) as usize;
+    let sub_dim = u16::from_le_bytes([body[4], body[5]]) as usize;
+
+    let codebook_floats = m * k * sub_dim;
+    let codebook_bytes = codebook_floats * 4;
+    assert!(
+        body.len() >= 6 + codebook_bytes,
+        "PQ codebook data too short"
+    );
+
+    let mut codebooks = Vec::with_capacity(m);
+    let mut offset = 6;
+    for _ in 0..m {
+        let mut sub_book = Vec::with_capacity(k);
+        for _ in 0..k {
+            let mut centroid = Vec::with_capacity(sub_dim);
+            for _ in 0..sub_dim {
+                let v = f32::from_le_bytes([
+                    body[offset],
+                    body[offset + 1],
+                    body[offset + 2],
+                    body[offset + 3],
+                ]);
+                centroid.push(v);
+                offset += 4;
+            }
+            sub_book.push(centroid);
+        }
+        codebooks.push(sub_book);
+    }
+
+    ProductQuantizer {
+        m,
+        k,
+        sub_dim,
+        codebooks,
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Binary
+// ---------------------------------------------------------------------------
+
+fn encode_binary_quant_seg(dim: u16) -> Vec<u8> {
+    let mut buf = vec![0u8; 64];
+    buf[0] = QUANT_TYPE_BINARY;
+    buf[1] = 2; // Cold tier
+    buf[2..4].copy_from_slice(&dim.to_le_bytes());
+    // Binary quantization has no additional parameters (sign-based).
+    buf
+}
+
+/// Wrapper to implement `Quantizer` for binary quantization.
+struct BinaryQuantizerWrapper {
+    dim: usize,
+}
+
+impl Quantizer for BinaryQuantizerWrapper {
+    fn encode(&self, vector: &[f32]) -> Vec<u8> {
+        binary::encode_binary(vector)
+    }
+
+    fn decode(&self, codes: &[u8]) -> Vec<f32> {
+        binary::decode_binary(codes, self.dim)
+    }
+
+    fn tier(&self) -> crate::tier::TemperatureTier {
+        crate::tier::TemperatureTier::Cold
+    }
+
+    fn dim(&self) -> usize {
+        self.dim
+    }
+}
+
+// ---------------------------------------------------------------------------
+// SKETCH_SEG codec
+// ---------------------------------------------------------------------------
+
+/// Encode a CountMinSketch into the SKETCH_SEG binary payload.
+///
+/// Layout:
+/// ```text
+/// [width: u32 LE] [depth: u32 LE] [total_accesses: u64 LE] [padding: 48 bytes to 64B]
+/// [counters: depth * width bytes]
+/// ```
+pub fn encode_sketch_seg(sketch: &CountMinSketch) -> Vec<u8> {
+    let mut buf = vec![0u8; 64]; // 64-byte aligned header
+
+    buf[0..4].copy_from_slice(&(sketch.width as u32).to_le_bytes());
+    buf[4..8].copy_from_slice(&(sketch.depth as u32).to_le_bytes());
+    buf[8..16].copy_from_slice(&sketch.total_accesses.to_le_bytes());
+
+    // Counter data: row-major
+    for row in &sketch.counters {
+        buf.extend_from_slice(row);
+    }
+
+    buf
+}
+
+/// Decode a SKETCH_SEG binary payload into a CountMinSketch.
+pub fn decode_sketch_seg(data: &[u8]) -> CountMinSketch {
+    assert!(data.len() >= 64, "SKETCH_SEG header too short");
+
+    let width = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
+    let depth = u32::from_le_bytes([data[4], data[5], data[6], data[7]]) as usize;
+    let total_accesses = u64::from_le_bytes([
+        data[8], data[9], data[10], data[11], data[12], data[13], data[14], data[15],
+    ]);
+
+    let body = &data[64..];
+    let expected = width * depth;
+    assert!(body.len() >= expected, "SKETCH_SEG counter data too short");
+
+    let mut counters = Vec::with_capacity(depth);
+    for row in 0..depth {
+        let start = row * width;
+        counters.push(body[start..start + width].to_vec());
+    }
+
+    CountMinSketch {
+        counters,
+        width,
+        depth,
+        total_accesses,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn scalar_quant_seg_round_trip() {
+        let sq = ScalarQuantizer {
+            min_vals: vec![-1.0, -2.0, -0.5, 0.0],
+            max_vals: vec![1.0, 2.0, 0.5, 1.0],
+            dim: 4,
+        };
+
+        let encoded = encode_scalar_quantizer(&sq);
+        let decoded = decode_quant_seg(&encoded);
+
+        assert_eq!(decoded.dim(), 4);
+        assert_eq!(decoded.tier(), crate::tier::TemperatureTier::Hot);
+
+        // Verify round-trip: encode a test vector, check similar output
+        let test_vec = vec![0.5, 1.0, 0.0, 0.5];
+        let codes_orig = sq.encode_vec(&test_vec);
+        let codes_decoded = decoded.encode(&test_vec);
+        assert_eq!(codes_orig, codes_decoded);
+    }
+
+    #[test]
+    fn product_quant_seg_round_trip() {
+        // Build a small PQ manually
+        let pq = ProductQuantizer {
+            m: 2,
+            k: 4,
+            sub_dim: 2,
+            codebooks: vec![
+                vec![
+                    vec![0.0, 0.1],
+                    vec![0.2, 0.3],
+                    vec![0.4, 0.5],
+                    vec![0.6, 0.7],
+                ],
+                vec![
+                    vec![0.8, 0.9],
+                    vec![1.0, 1.1],
+                    vec![1.2, 1.3],
+                    vec![1.4, 1.5],
+                ],
+            ],
+        };
+
+        let encoded = encode_product_quantizer(&pq);
+        let decoded = decode_quant_seg(&encoded);
+
+        assert_eq!(decoded.dim(), 4);
+        assert_eq!(decoded.tier(), crate::tier::TemperatureTier::Warm);
+
+        let test_vec = vec![0.1, 0.2, 0.9, 1.0];
+        let codes_orig = pq.encode_vec(&test_vec);
+        let codes_decoded = decoded.encode(&test_vec);
+        assert_eq!(codes_orig, codes_decoded);
+    }
+
+    #[test]
+    fn binary_quant_seg_round_trip() {
+        let dim: u16 = 16;
+        let encoded = encode_binary_quant_seg(dim);
+        let decoded = decode_quant_seg(&encoded);
+
+        assert_eq!(decoded.dim(), 16);
+        assert_eq!(decoded.tier(), crate::tier::TemperatureTier::Cold);
+
+        let test_vec: Vec<f32> = (0..16)
+            .map(|i| if i % 2 == 0 { 1.0 } else { -1.0 })
+            .collect();
+        let codes = decoded.encode(&test_vec);
+        let recon = decoded.decode(&codes);
+        assert_eq!(recon.len(), 16);
+    }
+
+    #[test]
+    fn sketch_seg_round_trip() {
+        let mut sketch = CountMinSketch::new(64, 4);
+        for block_id in 0..20u64 {
+            for _ in 0..(block_id + 1) {
+                sketch.increment(block_id);
+            }
+        }
+
+        let encoded = encode_sketch_seg(&sketch);
+        let decoded = decode_sketch_seg(&encoded);
+
+        assert_eq!(decoded.width, sketch.width);
+        assert_eq!(decoded.depth, sketch.depth);
+        assert_eq!(decoded.total_accesses, sketch.total_accesses);
+
+        // Verify estimates match
+        for block_id in 0..20u64 {
+            assert_eq!(decoded.estimate(block_id), sketch.estimate(block_id));
+        }
+    }
+}
--- a/vendor/ruvector/crates/rvf/rvf-quant/src/lib.rs
+++ b/vendor/ruvector/crates/rvf/rvf-quant/src/lib.rs
@@ -0,0 +1,31 @@
+//! Temperature-tiered vector quantization for the RuVector Format (RVF).
+//!
+//! Provides three quantization levels mapped to temperature tiers:
+//!
+//! | Tier | Quantization | Compression |
+//! |------|-------------|-------------|
+//! | Hot  | Scalar (int8) | 4x |
+//! | Warm | Product (PQ)  | 8-16x |
+//! | Cold | Binary (1-bit)| 32x |
+//!
+//! A Count-Min Sketch tracks per-block access frequency to drive
+//! promotion/demotion decisions.
+
+#![cfg_attr(not(feature = "std"), no_std)]
+
+extern crate alloc;
+
+pub mod binary;
+pub mod codec;
+pub mod product;
+pub mod scalar;
+pub mod sketch;
+pub mod tier;
+pub mod traits;
+
+pub use binary::{decode_binary, encode_binary, hamming_distance};
+pub use product::ProductQuantizer;
+pub use scalar::ScalarQuantizer;
+pub use sketch::CountMinSketch;
+pub use tier::TemperatureTier;
+pub use traits::Quantizer;
--- a/vendor/ruvector/crates/rvf/rvf-quant/src/product.rs
+++ b/vendor/ruvector/crates/rvf/rvf-quant/src/product.rs
@@ -0,0 +1,333 @@
+//! Product Quantization (PQ) — 8-16x compression.
+//!
+//! Splits a vector into M subspaces, learns K centroids per subspace
+//! via k-means, and encodes each sub-vector as a centroid index (1 byte
+//! when K <= 256).
+//!
+//! Used for the **Warm** (Tier 1) tier.
+
+use crate::tier::TemperatureTier;
+use crate::traits::Quantizer;
+use alloc::vec;
+use alloc::vec::Vec;
+
+/// Product quantizer parameters and codebooks.
+#[derive(Clone, Debug)]
+pub struct ProductQuantizer {
+    /// Number of subspaces.
+    pub m: usize,
+    /// Number of centroids per subspace.
+    pub k: usize,
+    /// Dimensions per subspace.
+    pub sub_dim: usize,
+    /// Codebooks: `codebooks[subspace][centroid]` is a `Vec<f32>` of length `sub_dim`.
+    pub codebooks: Vec<Vec<Vec<f32>>>,
+}
+
+impl ProductQuantizer {
+    /// Train a product quantizer using k-means clustering per subspace.
+    ///
+    /// # Arguments
+    ///
+    /// - `vectors`: Training vectors (all must have the same dimensionality).
+    /// - `m`: Number of subspaces.
+    /// - `k`: Number of centroids per subspace (typically 64, 128, or 256).
+    /// - `iterations`: Number of k-means iterations.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the vector dimensionality is not divisible by `m`, if `vectors`
+    /// is empty, or if `k` or `m` is zero.
+    pub fn train(vectors: &[&[f32]], m: usize, k: usize, iterations: usize) -> Self {
+        assert!(!vectors.is_empty(), "need training data");
+        assert!(m > 0 && k > 0, "m and k must be > 0");
+        let dim = vectors[0].len();
+        assert!(
+            dim.is_multiple_of(m),
+            "dim ({dim}) must be divisible by m ({m})"
+        );
+        let sub_dim = dim / m;
+
+        let mut codebooks = Vec::with_capacity(m);
+
+        for sub in 0..m {
+            let start = sub * sub_dim;
+            let end = start + sub_dim;
+
+            // Extract sub-vectors for this subspace.
+            let sub_vecs: Vec<&[f32]> = vectors.iter().map(|v| &v[start..end]).collect();
+
+            let centroids = kmeans(&sub_vecs, k, sub_dim, iterations);
+            codebooks.push(centroids);
+        }
+
+        Self {
+            m,
+            k,
+            sub_dim,
+            codebooks,
+        }
+    }
+
+    /// Encode a vector: for each subspace, find the nearest centroid index.
+    pub fn encode_vec(&self, vector: &[f32]) -> Vec<u8> {
+        assert_eq!(vector.len(), self.m * self.sub_dim);
+        let mut codes = Vec::with_capacity(self.m);
+        for sub in 0..self.m {
+            let start = sub * self.sub_dim;
+            let sub_vec = &vector[start..start + self.sub_dim];
+            let idx = nearest_centroid(sub_vec, &self.codebooks[sub]);
+            codes.push(idx as u8);
+        }
+        codes
+    }
+
+    /// Decode codes back to an approximate vector by concatenating centroids.
+    pub fn decode_vec(&self, codes: &[u8]) -> Vec<f32> {
+        assert_eq!(codes.len(), self.m);
+        let mut vector = Vec::with_capacity(self.m * self.sub_dim);
+        for (sub, &code) in codes.iter().enumerate() {
+            vector.extend_from_slice(&self.codebooks[sub][code as usize]);
+        }
+        vector
+    }
+
+    /// Precompute distance tables for Asymmetric Distance Computation (ADC).
+    ///
+    /// Returns a table `[subspace][centroid]` where entry (s, c) is the
+    /// squared L2 distance from the query sub-vector s to centroid c.
+    pub fn compute_distance_tables(&self, query: &[f32]) -> Vec<Vec<f32>> {
+        assert_eq!(query.len(), self.m * self.sub_dim);
+        let mut tables = Vec::with_capacity(self.m);
+        for sub in 0..self.m {
+            let start = sub * self.sub_dim;
+            let q_sub = &query[start..start + self.sub_dim];
+            let mut table = Vec::with_capacity(self.k);
+            for centroid in &self.codebooks[sub] {
+                table.push(l2_squared(q_sub, centroid));
+            }
+            tables.push(table);
+        }
+        tables
+    }
+
+    /// Compute the ADC distance using precomputed tables.
+    ///
+    /// Sum of table lookups: `dist = sum over s of tables[s][codes[s]]`.
+    /// Uses `get_unchecked` for the inner lookup since `code` is always
+    /// a valid centroid index (0..k) produced by `encode_vec`.
+    pub fn distance_adc(tables: &[Vec<f32>], codes: &[u8]) -> f32 {
+        assert_eq!(tables.len(), codes.len());
+        let mut dist = 0.0f32;
+        for (table, &code) in tables.iter().zip(codes.iter()) {
+            // Safety: code is always in [0, k) as produced by encode_vec,
+            // and each table has exactly k entries. Bounds check with debug_assert.
+            debug_assert!((code as usize) < table.len());
+            unsafe {
+                dist += *table.get_unchecked(code as usize);
+            }
+        }
+        dist
+    }
+}
+
+impl Quantizer for ProductQuantizer {
+    fn encode(&self, vector: &[f32]) -> Vec<u8> {
+        self.encode_vec(vector)
+    }
+
+    fn decode(&self, codes: &[u8]) -> Vec<f32> {
+        self.decode_vec(codes)
+    }
+
+    fn tier(&self) -> TemperatureTier {
+        TemperatureTier::Warm
+    }
+
+    fn dim(&self) -> usize {
+        self.m * self.sub_dim
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Internal helpers
+// ---------------------------------------------------------------------------
+
+/// Squared L2 distance between two slices.
+fn l2_squared(a: &[f32], b: &[f32]) -> f32 {
+    a.iter()
+        .zip(b.iter())
+        .map(|(x, y)| {
+            let d = x - y;
+            d * d
+        })
+        .sum()
+}
+
+/// Find the index of the nearest centroid to `point`.
+fn nearest_centroid(point: &[f32], centroids: &[Vec<f32>]) -> usize {
+    let mut best_idx = 0;
+    let mut best_dist = f32::INFINITY;
+    for (i, c) in centroids.iter().enumerate() {
+        let d = l2_squared(point, c);
+        if d < best_dist {
+            best_dist = d;
+            best_idx = i;
+        }
+    }
+    best_idx
+}
+
+/// Simple k-means clustering.
+///
+/// Initializes centroids from the first K data points (wrapping if needed),
+/// then runs Lloyd's algorithm for `iterations` rounds.
+fn kmeans(data: &[&[f32]], k: usize, sub_dim: usize, iterations: usize) -> Vec<Vec<f32>> {
+    let n = data.len();
+    let actual_k = k.min(n); // can't have more centroids than data points
+
+    // Initialize centroids from data.
+    let mut centroids: Vec<Vec<f32>> = (0..actual_k).map(|i| data[i % n].to_vec()).collect();
+
+    let mut assignments = vec![0usize; n];
+    let mut counts = vec![0usize; actual_k];
+    let mut sums = vec![vec![0.0f32; sub_dim]; actual_k];
+
+    for _ in 0..iterations {
+        // Assignment step.
+        for (i, point) in data.iter().enumerate() {
+            assignments[i] = nearest_centroid(point, &centroids);
+        }
+
+        // Update step.
+        counts.fill(0);
+        for s in &mut sums {
+            for v in s.iter_mut() {
+                *v = 0.0;
+            }
+        }
+
+        for (i, point) in data.iter().enumerate() {
+            let c = assignments[i];
+            counts[c] += 1;
+            for (d, &val) in point.iter().enumerate() {
+                sums[c][d] += val;
+            }
+        }
+
+        for c in 0..actual_k {
+            if counts[c] > 0 {
+                for d in 0..sub_dim {
+                    centroids[c][d] = sums[c][d] / counts[c] as f32;
+                }
+            }
+        }
+    }
+
+    // If we need more centroids than data points, duplicate the last centroid.
+    while centroids.len() < k {
+        centroids.push(centroids[centroids.len() - 1].clone());
+    }
+
+    centroids
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn make_pq_data() -> Vec<Vec<f32>> {
+        // 50 vectors of dim 16
+        let mut vecs = Vec::new();
+        for i in 0..50 {
+            let v: Vec<f32> = (0..16)
+                .map(|d| ((i * 7 + d * 13 + 3) % 200) as f32 / 100.0 - 1.0)
+                .collect();
+            vecs.push(v);
+        }
+        vecs
+    }
+
+    #[test]
+    fn train_and_encode_decode() {
+        let data = make_pq_data();
+        let refs: Vec<&[f32]> = data.iter().map(|v| v.as_slice()).collect();
+        let pq = ProductQuantizer::train(&refs, 4, 8, 10);
+
+        assert_eq!(pq.m, 4);
+        assert_eq!(pq.k, 8);
+        assert_eq!(pq.sub_dim, 4);
+        assert_eq!(pq.codebooks.len(), 4);
+
+        let codes = pq.encode_vec(&data[0]);
+        assert_eq!(codes.len(), 4);
+        for &c in &codes {
+            assert!((c as usize) < 8);
+        }
+
+        let recon = pq.decode_vec(&codes);
+        assert_eq!(recon.len(), 16);
+    }
+
+    #[test]
+    fn adc_distance() {
+        let data = make_pq_data();
+        let refs: Vec<&[f32]> = data.iter().map(|v| v.as_slice()).collect();
+        let pq = ProductQuantizer::train(&refs, 4, 8, 10);
+
+        let query = &data[0];
+        let tables = pq.compute_distance_tables(query);
+        assert_eq!(tables.len(), 4);
+
+        let codes = pq.encode_vec(&data[1]);
+        let dist = ProductQuantizer::distance_adc(&tables, &codes);
+        assert!(dist >= 0.0);
+
+        // Distance to self should be very small
+        let self_codes = pq.encode_vec(query);
+        let self_dist = ProductQuantizer::distance_adc(&tables, &self_codes);
+        assert!(self_dist < dist || dist == 0.0);
+    }
+
+    #[test]
+    fn pq_convergence() {
+        // After training, reconstruction error should decrease with more iterations.
+        let data = make_pq_data();
+        let refs: Vec<&[f32]> = data.iter().map(|v| v.as_slice()).collect();
+
+        let pq_1 = ProductQuantizer::train(&refs, 4, 8, 1);
+        let pq_20 = ProductQuantizer::train(&refs, 4, 8, 20);
+
+        let error_1: f32 = data
+            .iter()
+            .map(|v| {
+                let codes = pq_1.encode_vec(v);
+                let recon = pq_1.decode_vec(&codes);
+                l2_squared(v, &recon)
+            })
+            .sum();
+
+        let error_20: f32 = data
+            .iter()
+            .map(|v| {
+                let codes = pq_20.encode_vec(v);
+                let recon = pq_20.decode_vec(&codes);
+                l2_squared(v, &recon)
+            })
+            .sum();
+
+        assert!(
+            error_20 <= error_1 + f32::EPSILON,
+            "more iterations should not increase error: {error_1} vs {error_20}"
+        );
+    }
+
+    #[test]
+    fn quantizer_trait() {
+        let data = make_pq_data();
+        let refs: Vec<&[f32]> = data.iter().map(|v| v.as_slice()).collect();
+        let pq = ProductQuantizer::train(&refs, 4, 8, 5);
+        assert_eq!(pq.tier(), TemperatureTier::Warm);
+        assert_eq!(pq.dim(), 16);
+    }
+}
--- a/vendor/ruvector/crates/rvf/rvf-quant/src/scalar.rs
+++ b/vendor/ruvector/crates/rvf/rvf-quant/src/scalar.rs
@@ -0,0 +1,218 @@
+//! Scalar Quantization (SQ) — fp32 to u8, 4x compression.
+//!
+//! Each dimension is independently mapped from [min, max] to [0, 255].
+//! This is the quantization used for the **Hot** (Tier 0) tier.
+
+use crate::tier::TemperatureTier;
+use crate::traits::Quantizer;
+use alloc::vec;
+use alloc::vec::Vec;
+
+/// Scalar quantizer parameters: per-dimension min/max ranges.
+#[derive(Clone, Debug)]
+pub struct ScalarQuantizer {
+    /// Minimum value per dimension (training set).
+    pub min_vals: Vec<f32>,
+    /// Maximum value per dimension (training set).
+    pub max_vals: Vec<f32>,
+    /// Vector dimensionality.
+    pub dim: usize,
+}
+
+impl ScalarQuantizer {
+    /// Train a scalar quantizer by computing per-dimension min/max over
+    /// a set of training vectors.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `vectors` is empty or any vector has inconsistent dimensionality.
+    pub fn train(vectors: &[&[f32]]) -> Self {
+        assert!(!vectors.is_empty(), "need at least one training vector");
+        let dim = vectors[0].len();
+        assert!(dim > 0, "vector dimensionality must be > 0");
+
+        let mut min_vals = vec![f32::INFINITY; dim];
+        let mut max_vals = vec![f32::NEG_INFINITY; dim];
+
+        for v in vectors {
+            assert_eq!(v.len(), dim, "dimension mismatch in training data");
+            for (d, &val) in v.iter().enumerate() {
+                if val < min_vals[d] {
+                    min_vals[d] = val;
+                }
+                if val > max_vals[d] {
+                    max_vals[d] = val;
+                }
+            }
+        }
+
+        // Avoid zero-range dimensions (would cause division by zero).
+        for d in 0..dim {
+            if (max_vals[d] - min_vals[d]).abs() < f32::EPSILON {
+                max_vals[d] = min_vals[d] + 1.0;
+            }
+        }
+
+        Self {
+            min_vals,
+            max_vals,
+            dim,
+        }
+    }
+
+    /// Quantize a float vector to u8 codes.
+    ///
+    /// `q[d] = round((v[d] - min[d]) / (max[d] - min[d]) * 255)`
+    pub fn encode_vec(&self, vector: &[f32]) -> Vec<u8> {
+        assert_eq!(vector.len(), self.dim);
+        let mut codes = Vec::with_capacity(self.dim);
+        for (d, &val) in vector.iter().enumerate().take(self.dim) {
+            let range = self.max_vals[d] - self.min_vals[d];
+            let normalized = (val - self.min_vals[d]) / range;
+            let clamped = normalized.clamp(0.0, 1.0);
+            codes.push((clamped * 255.0).round() as u8);
+        }
+        codes
+    }
+
+    /// Dequantize u8 codes back to approximate float values.
+    ///
+    /// `v[d] = q[d] / 255 * (max[d] - min[d]) + min[d]`
+    pub fn decode_vec(&self, codes: &[u8]) -> Vec<f32> {
+        assert_eq!(codes.len(), self.dim);
+        let mut vector = Vec::with_capacity(self.dim);
+        for (d, &code) in codes.iter().enumerate().take(self.dim) {
+            let range = self.max_vals[d] - self.min_vals[d];
+            let val = (code as f32 / 255.0) * range + self.min_vals[d];
+            vector.push(val);
+        }
+        vector
+    }
+
+    /// Compute approximate L2 squared distance between two quantized vectors.
+    ///
+    /// Accumulates differences in i32 arithmetic to avoid per-element f32
+    /// conversion, then converts to f32 only for the final scaling step.
+    /// This is significantly faster when the dimension is large.
+    pub fn distance_l2_quantized(&self, a: &[u8], b: &[u8]) -> f32 {
+        assert_eq!(a.len(), self.dim);
+        assert_eq!(b.len(), self.dim);
+
+        // Accumulate (a[d] - b[d])^2 in integer, then scale per-dimension.
+        // Since dequantized value = code / 255 * range + min, the difference
+        // is (a_code - b_code) / 255 * range, so squared diff is
+        // (a_code - b_code)^2 / 65025 * range^2.
+        // We group by range to minimize f32 ops.
+        let mut dist = 0.0f32;
+        let inv_255_sq = 1.0f32 / (255.0 * 255.0);
+        for d in 0..self.dim {
+            let diff = a[d] as i32 - b[d] as i32;
+            let range = self.max_vals[d] - self.min_vals[d];
+            dist += (diff * diff) as f32 * (range * range) * inv_255_sq;
+        }
+        dist
+    }
+
+    /// SIMD-accelerated L2 distance (stub; falls back to scalar when
+    /// the `simd` feature is not enabled).
+    #[cfg(feature = "simd")]
+    pub fn distance_l2_simd(&self, a: &[u8], b: &[u8]) -> f32 {
+        // Future: AVX-512 / NEON implementation.
+        self.distance_l2_quantized(a, b)
+    }
+}
+
+impl Quantizer for ScalarQuantizer {
+    fn encode(&self, vector: &[f32]) -> Vec<u8> {
+        self.encode_vec(vector)
+    }
+
+    fn decode(&self, codes: &[u8]) -> Vec<f32> {
+        self.decode_vec(codes)
+    }
+
+    fn tier(&self) -> TemperatureTier {
+        TemperatureTier::Hot
+    }
+
+    fn dim(&self) -> usize {
+        self.dim
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn make_training_data() -> Vec<Vec<f32>> {
+        // 10 vectors of dim 8 in [-1, 1]
+        let mut vecs = Vec::new();
+        for i in 0..10 {
+            let v: Vec<f32> = (0..8)
+                .map(|d| ((i * 7 + d * 13) % 200) as f32 / 100.0 - 1.0)
+                .collect();
+            vecs.push(v);
+        }
+        vecs
+    }
+
+    #[test]
+    fn round_trip_low_error() {
+        let data = make_training_data();
+        let refs: Vec<&[f32]> = data.iter().map(|v| v.as_slice()).collect();
+        let sq = ScalarQuantizer::train(&refs);
+
+        for v in &data {
+            let codes = sq.encode_vec(v);
+            let reconstructed = sq.decode_vec(&codes);
+            assert_eq!(reconstructed.len(), v.len());
+
+            // Check reconstruction error per dimension
+            for (orig, recon) in v.iter().zip(reconstructed.iter()) {
+                let max_error = (sq
+                    .max_vals
+                    .iter()
+                    .zip(sq.min_vals.iter())
+                    .map(|(mx, mn)| mx - mn)
+                    .fold(0.0f32, f32::max))
+                    / 255.0;
+                assert!(
+                    (orig - recon).abs() <= max_error + f32::EPSILON,
+                    "reconstruction error too large: orig={orig}, recon={recon}"
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn quantized_distance_nonnegative() {
+        let data = make_training_data();
+        let refs: Vec<&[f32]> = data.iter().map(|v| v.as_slice()).collect();
+        let sq = ScalarQuantizer::train(&refs);
+
+        let a = sq.encode_vec(&data[0]);
+        let b = sq.encode_vec(&data[1]);
+        let dist = sq.distance_l2_quantized(&a, &b);
+        assert!(dist >= 0.0);
+    }
+
+    #[test]
+    fn identical_vectors_zero_distance() {
+        let data = make_training_data();
+        let refs: Vec<&[f32]> = data.iter().map(|v| v.as_slice()).collect();
+        let sq = ScalarQuantizer::train(&refs);
+
+        let a = sq.encode_vec(&data[0]);
+        let dist = sq.distance_l2_quantized(&a, &a);
+        assert!(dist.abs() < f32::EPSILON);
+    }
+
+    #[test]
+    fn quantizer_trait() {
+        let data = make_training_data();
+        let refs: Vec<&[f32]> = data.iter().map(|v| v.as_slice()).collect();
+        let sq = ScalarQuantizer::train(&refs);
+        assert_eq!(sq.tier(), TemperatureTier::Hot);
+        assert_eq!(sq.dim(), 8);
+    }
+}
--- a/vendor/ruvector/crates/rvf/rvf-quant/src/sketch.rs
+++ b/vendor/ruvector/crates/rvf/rvf-quant/src/sketch.rs
@@ -0,0 +1,228 @@
+//! Count-Min Sketch for temperature tracking.
+//!
+//! Tracks per-block access frequency to drive tier promotion/demotion.
+//! Spec defaults: width=1024, depth=4, 8-bit saturating counters.
+
+use alloc::vec;
+use alloc::vec::Vec;
+
+/// Count-Min Sketch for access frequency estimation.
+#[derive(Clone, Debug)]
+pub struct CountMinSketch {
+    /// Counter matrix: `counters[row][col]`, each row uses a different hash.
+    pub counters: Vec<Vec<u8>>,
+    /// Number of counters per row.
+    pub width: usize,
+    /// Number of hash functions (rows).
+    pub depth: usize,
+    /// Total number of increment operations (for aging decisions).
+    pub total_accesses: u64,
+}
+
+/// Default width (counters per row).
+pub const DEFAULT_WIDTH: usize = 1024;
+
+/// Default depth (hash functions / rows).
+pub const DEFAULT_DEPTH: usize = 4;
+
+/// Aging trigger: halve all counters every 2^16 accesses.
+const AGING_INTERVAL: u64 = 1 << 16;
+
+impl CountMinSketch {
+    /// Create a new sketch with the given width and depth.
+    pub fn new(width: usize, depth: usize) -> Self {
+        Self {
+            counters: vec![vec![0u8; width]; depth],
+            width,
+            depth,
+            total_accesses: 0,
+        }
+    }
+
+    /// Create a sketch with default parameters (w=1024, d=4).
+    pub fn default_sketch() -> Self {
+        Self::new(DEFAULT_WIDTH, DEFAULT_DEPTH)
+    }
+
+    /// Increment the count for `block_id` using saturating addition.
+    ///
+    /// Updates all `depth` hash rows with `min(counter + 1, 255)`.
+    pub fn increment(&mut self, block_id: u64) {
+        for row in 0..self.depth {
+            let idx = self.hash(block_id, row) % self.width;
+            self.counters[row][idx] = self.counters[row][idx].saturating_add(1);
+        }
+        self.total_accesses = self.total_accesses.wrapping_add(1);
+    }
+
+    /// Estimate the access count for `block_id`.
+    ///
+    /// Returns the minimum across all hash rows (Count-Min guarantee:
+    /// estimate >= true count, with bounded overestimation).
+    pub fn estimate(&self, block_id: u64) -> u8 {
+        let mut min_val = u8::MAX;
+        for row in 0..self.depth {
+            let idx = self.hash(block_id, row) % self.width;
+            min_val = min_val.min(self.counters[row][idx]);
+        }
+        min_val
+    }
+
+    /// Age (decay) all counters by right-shifting by 1 (halving).
+    ///
+    /// This ensures the sketch tracks *recent* access patterns rather
+    /// than cumulative history.
+    pub fn age(&mut self) {
+        for row in &mut self.counters {
+            for counter in row.iter_mut() {
+                *counter >>= 1;
+            }
+        }
+    }
+
+    /// Returns true if aging should be triggered (every 2^16 accesses).
+    pub fn should_age(&self) -> bool {
+        self.total_accesses > 0 && self.total_accesses.is_multiple_of(AGING_INTERVAL)
+    }
+
+    /// Memory footprint in bytes (counters only, excluding struct overhead).
+    pub fn memory_bytes(&self) -> usize {
+        self.width * self.depth
+    }
+
+    /// Hash function using FNV-1a style multiplicative hashing.
+    ///
+    /// Each row uses a different seed to produce independent hash values.
+    fn hash(&self, block_id: u64, row: usize) -> usize {
+        // FNV-1a inspired: mix block_id with row-dependent seed.
+        const FNV_OFFSET: u64 = 0xcbf29ce484222325;
+        const FNV_PRIME: u64 = 0x100000001b3;
+
+        let seed = HASH_SEEDS[row % HASH_SEEDS.len()];
+        let mut h = FNV_OFFSET ^ seed;
+        let bytes = block_id.to_le_bytes();
+        for &b in &bytes {
+            h ^= b as u64;
+            h = h.wrapping_mul(FNV_PRIME);
+        }
+        h as usize
+    }
+}
+
+/// Seeds for hash functions (one per row).
+const HASH_SEEDS: [u64; 8] = [
+    0x517cc1b727220a95,
+    0x6c62272e07bb0142,
+    0x44c6b90e0f294e41,
+    0x3b9f7a3e2d8f1c5b,
+    0x7e4a1b3c5d6f8a9e,
+    0x1a2b3c4d5e6f7089,
+    0x9f8e7d6c5b4a3210,
+    0xdeadbeefcafebabe,
+];
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn new_sketch_all_zeros() {
+        let s = CountMinSketch::new(64, 4);
+        for row in &s.counters {
+            for &c in row {
+                assert_eq!(c, 0);
+            }
+        }
+        assert_eq!(s.total_accesses, 0);
+    }
+
+    #[test]
+    fn estimate_ge_true_count() {
+        let mut s = CountMinSketch::new(256, 4);
+        let block = 42u64;
+
+        for _ in 0..10 {
+            s.increment(block);
+        }
+        let est = s.estimate(block);
+        assert!(est >= 10, "estimate {est} should be >= true count 10");
+    }
+
+    #[test]
+    fn increment_saturates_at_255() {
+        let mut s = CountMinSketch::new(64, 2);
+        let block = 1u64;
+
+        for _ in 0..300 {
+            s.increment(block);
+        }
+        let est = s.estimate(block);
+        assert_eq!(est, 255);
+    }
+
+    #[test]
+    fn aging_halves_counters() {
+        let mut s = CountMinSketch::new(64, 2);
+        let block = 7u64;
+
+        for _ in 0..100 {
+            s.increment(block);
+        }
+        let before = s.estimate(block);
+        s.age();
+        let after = s.estimate(block);
+
+        // After aging, counts should be approximately halved.
+        assert!(after <= before, "aging should not increase count");
+        assert!(
+            after >= before / 2 - 1,
+            "aging should halve: before={before}, after={after}"
+        );
+    }
+
+    #[test]
+    fn should_age_at_power_of_two() {
+        let mut s = CountMinSketch::new(64, 2);
+        // Not at boundary
+        s.total_accesses = 100;
+        assert!(!s.should_age());
+
+        // At 2^16
+        s.total_accesses = 1 << 16;
+        assert!(s.should_age());
+
+        // At 2 * 2^16
+        s.total_accesses = 2 << 16;
+        assert!(s.should_age());
+    }
+
+    #[test]
+    fn different_blocks_independent() {
+        let mut s = CountMinSketch::new(1024, 4);
+
+        for _ in 0..50 {
+            s.increment(100);
+        }
+        for _ in 0..10 {
+            s.increment(200);
+        }
+
+        let est_100 = s.estimate(100);
+        let est_200 = s.estimate(200);
+        assert!(est_100 >= 50);
+        assert!(est_200 >= 10);
+        assert!(est_100 > est_200);
+    }
+
+    #[test]
+    fn memory_bytes() {
+        let s = CountMinSketch::new(1024, 4);
+        assert_eq!(s.memory_bytes(), 4096);
+    }
+
+    #[test]
+    fn unseen_block_is_zero() {
+        let s = CountMinSketch::new(1024, 4);
+        assert_eq!(s.estimate(999), 0);
+    }
+}
--- a/vendor/ruvector/crates/rvf/rvf-quant/src/tier.rs
+++ b/vendor/ruvector/crates/rvf/rvf-quant/src/tier.rs
@@ -0,0 +1,95 @@
+//! Temperature tier assignment for vector blocks.
+
+/// Access count above which a block is considered "hot" (Tier 0).
+pub const HOT_THRESHOLD: u8 = 128;
+
+/// Access count above which a block is considered "warm" (Tier 1).
+/// Below this threshold, a block is "cold" (Tier 2).
+pub const WARM_THRESHOLD: u8 = 16;
+
+/// Temperature tier for a vector block.
+///
+/// Determines the quantization level and storage layout.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[repr(u8)]
+pub enum TemperatureTier {
+    /// Frequently accessed. Scalar quantized (int8), interleaved layout.
+    Hot = 0,
+    /// Moderately accessed. Product quantized, columnar layout.
+    Warm = 1,
+    /// Rarely accessed. Binary quantized, columnar + heavy compression.
+    Cold = 2,
+}
+
+/// Assign a temperature tier based on the estimated access count.
+///
+/// Uses the thresholds from the RVF spec:
+/// - `count > HOT_THRESHOLD`  -> Hot
+/// - `count > WARM_THRESHOLD` -> Warm
+/// - otherwise                -> Cold
+pub fn assign_tier(access_count: u8) -> TemperatureTier {
+    if access_count > HOT_THRESHOLD {
+        TemperatureTier::Hot
+    } else if access_count > WARM_THRESHOLD {
+        TemperatureTier::Warm
+    } else {
+        TemperatureTier::Cold
+    }
+}
+
+impl TemperatureTier {
+    /// Returns the wire representation (0, 1, or 2).
+    #[inline]
+    pub const fn as_u8(self) -> u8 {
+        self as u8
+    }
+}
+
+impl TryFrom<u8> for TemperatureTier {
+    type Error = u8;
+
+    fn try_from(value: u8) -> Result<Self, Self::Error> {
+        match value {
+            0 => Ok(Self::Hot),
+            1 => Ok(Self::Warm),
+            2 => Ok(Self::Cold),
+            other => Err(other),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn tier_assignment() {
+        assert_eq!(assign_tier(255), TemperatureTier::Hot);
+        assert_eq!(assign_tier(129), TemperatureTier::Hot);
+        assert_eq!(assign_tier(128), TemperatureTier::Warm); // not >128
+        assert_eq!(assign_tier(64), TemperatureTier::Warm);
+        assert_eq!(assign_tier(17), TemperatureTier::Warm);
+        assert_eq!(assign_tier(16), TemperatureTier::Cold); // not >16
+        assert_eq!(assign_tier(1), TemperatureTier::Cold);
+        assert_eq!(assign_tier(0), TemperatureTier::Cold);
+    }
+
+    #[test]
+    fn round_trip() {
+        for raw in 0..=2u8 {
+            let t = TemperatureTier::try_from(raw).unwrap();
+            assert_eq!(t.as_u8(), raw);
+        }
+    }
+
+    #[test]
+    fn invalid_tier() {
+        assert_eq!(TemperatureTier::try_from(3), Err(3));
+    }
+
+    #[test]
+    fn ordering() {
+        assert!(TemperatureTier::Hot < TemperatureTier::Warm);
+        assert!(TemperatureTier::Warm < TemperatureTier::Cold);
+    }
+}
--- a/vendor/ruvector/crates/rvf/rvf-quant/src/traits.rs
+++ b/vendor/ruvector/crates/rvf/rvf-quant/src/traits.rs
@@ -0,0 +1,22 @@
+//! Common quantization trait shared by all quantizer types.
+
+use crate::tier::TemperatureTier;
+use alloc::vec::Vec;
+
+/// Trait for vector quantization codecs.
+///
+/// Every quantizer can encode a float vector into a compact byte representation
+/// and decode it back to an approximate float vector.
+pub trait Quantizer {
+    /// Encode a float vector into compact codes.
+    fn encode(&self, vector: &[f32]) -> Vec<u8>;
+
+    /// Decode compact codes back to an approximate float vector.
+    fn decode(&self, codes: &[u8]) -> Vec<f32>;
+
+    /// The temperature tier this quantizer is designed for.
+    fn tier(&self) -> TemperatureTier;
+
+    /// The dimensionality this quantizer was trained for.
+    fn dim(&self) -> usize;
+}