Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/crates/ruvector-dither/src/channel.rs
+++ b/vendor/ruvector/crates/ruvector-dither/src/channel.rs
@@ -0,0 +1,92 @@
+//! Per-channel and per-layer dither management.
+//!
+//! `ChannelDither` bundles one `GoldenRatioDither` state per channel,
+//! seeded from `(layer_id, channel_id)` pairs so every channel is
+//! structurally decorrelated without any RNG.
+
+use crate::{DitherSource, GoldenRatioDither};
+
+/// Per-channel dither pool seeded from `(layer_id, channel_id)` pairs.
+///
+/// Allocates one `GoldenRatioDither` per channel; each is independently
+/// advanced, so channels cannot constructively interfere.
+pub struct ChannelDither {
+    channels: Vec<GoldenRatioDither>,
+    bits: u32,
+    eps: f32,
+}
+
+impl ChannelDither {
+    /// Build a pool of `n_channels` dithers for `layer_id` / `bits` / `eps`.
+    pub fn new(layer_id: u32, n_channels: usize, bits: u32, eps: f32) -> Self {
+        let channels = (0..n_channels)
+            .map(|ch| GoldenRatioDither::from_ids(layer_id, ch as u32))
+            .collect();
+        Self {
+            channels,
+            bits,
+            eps,
+        }
+    }
+
+    /// Quantize `activations` in-place.  Each column (channel dimension) uses
+    /// its own independent dither state.
+    ///
+    /// `activations` is a flat row-major tensor of shape `[batch, channels]`.
+    /// If the slice is not a multiple of `n_channels`, the remainder is
+    /// processed using channel 0.
+    pub fn quantize_batch(&mut self, activations: &mut [f32]) {
+        assert!(
+            !self.channels.is_empty(),
+            "ChannelDither must have >= 1 channel"
+        );
+        assert!(self.bits >= 2 && self.bits <= 31, "bits must be in [2, 31]");
+        let nc = self.channels.len();
+        let qmax = ((1u32 << (self.bits - 1)) - 1) as f32;
+        let lsb = 1.0 / qmax;
+        for (i, x) in activations.iter_mut().enumerate() {
+            let ch = i % nc;
+            let d = self.channels[ch].next(self.eps * lsb);
+            *x = ((*x + d) * qmax).round().clamp(-qmax, qmax) / qmax;
+        }
+    }
+
+    /// Number of channels in this pool.
+    pub fn n_channels(&self) -> usize {
+        self.channels.len()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn channel_dither_correct_count() {
+        let cd = ChannelDither::new(0, 16, 8, 0.5);
+        assert_eq!(cd.n_channels(), 16);
+    }
+
+    #[test]
+    fn channel_dither_in_bounds() {
+        let mut cd = ChannelDither::new(1, 8, 5, 0.5);
+        let mut acts: Vec<f32> = (0..64).map(|i| (i as f32 / 63.0) * 2.0 - 1.0).collect();
+        cd.quantize_batch(&mut acts);
+        for v in acts {
+            assert!(v >= -1.0 && v <= 1.0, "out of bounds: {v}");
+        }
+    }
+
+    #[test]
+    fn different_layers_produce_different_outputs() {
+        let input: Vec<f32> = vec![0.5; 16];
+        let mut buf0 = input.clone();
+        let mut buf1 = input.clone();
+        ChannelDither::new(0, 8, 8, 0.5).quantize_batch(&mut buf0);
+        ChannelDither::new(99, 8, 8, 0.5).quantize_batch(&mut buf1);
+        assert_ne!(
+            buf0, buf1,
+            "different layer_ids must yield different dithered outputs"
+        );
+    }
+}
--- a/vendor/ruvector/crates/ruvector-dither/src/golden.rs
+++ b/vendor/ruvector/crates/ruvector-dither/src/golden.rs
@@ -0,0 +1,100 @@
+//! Golden-ratio quasi-random dither sequence.
+//!
+//! State update: `state = frac(state + φ)` where φ = (√5−1)/2 ≈ 0.618…
+//!
+//! This is the 1-D Halton sequence in base φ — it has the best possible
+//! equidistribution for a 1-D low-discrepancy sequence.
+
+use crate::DitherSource;
+
+/// Additive golden-ratio dither with zero-mean output in `[-0.5, 0.5]`.
+///
+/// The sequence has period 1 (irrational) so it never exactly repeats.
+/// Two instances with different seeds stay decorrelated.
+#[derive(Clone, Debug)]
+pub struct GoldenRatioDither {
+    state: f32,
+}
+
+/// φ = (√5 − 1) / 2
+const PHI: f32 = 0.618_033_98_f32;
+
+impl GoldenRatioDither {
+    /// Create a new sequence seeded at `initial_state` ∈ [0, 1).
+    ///
+    /// For per-layer / per-channel decorrelation, seed with
+    /// `frac(layer_id × φ + channel_id × φ²)`.
+    #[inline]
+    pub fn new(initial_state: f32) -> Self {
+        Self {
+            state: initial_state.abs().fract(),
+        }
+    }
+
+    /// Construct from a `(layer_id, channel_id)` pair for structural decorrelation.
+    #[inline]
+    pub fn from_ids(layer_id: u32, channel_id: u32) -> Self {
+        let s = ((layer_id as f32) * PHI + (channel_id as f32) * PHI * PHI).fract();
+        Self { state: s }
+    }
+
+    /// Current state (useful for serialisation / checkpointing).
+    #[inline]
+    pub fn state(&self) -> f32 {
+        self.state
+    }
+}
+
+impl DitherSource for GoldenRatioDither {
+    /// Advance and return next value in `[-0.5, 0.5]`.
+    #[inline]
+    fn next_unit(&mut self) -> f32 {
+        self.state = (self.state + PHI).fract();
+        self.state - 0.5
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::DitherSource;
+
+    #[test]
+    fn output_is_in_range() {
+        let mut d = GoldenRatioDither::new(0.0);
+        for _ in 0..10_000 {
+            let v = d.next_unit();
+            assert!(v >= -0.5 && v <= 0.5, "out of range: {v}");
+        }
+    }
+
+    #[test]
+    fn mean_is_near_zero() {
+        let mut d = GoldenRatioDither::new(0.0);
+        let n = 100_000;
+        let mean: f32 = (0..n).map(|_| d.next_unit()).sum::<f32>() / n as f32;
+        assert!(mean.abs() < 0.01, "mean too large: {mean}");
+    }
+
+    #[test]
+    fn from_ids_decorrelates() {
+        let mut d0 = GoldenRatioDither::from_ids(0, 0);
+        let mut d1 = GoldenRatioDither::from_ids(1, 7);
+        // Confirm they start at different states
+        let v0 = d0.next_unit();
+        let v1 = d1.next_unit();
+        assert!(
+            (v0 - v1).abs() > 1e-4,
+            "distinct seeds should produce distinct first values"
+        );
+    }
+
+    #[test]
+    fn deterministic_across_calls() {
+        let mut d1 = GoldenRatioDither::new(0.123);
+        let mut d2 = GoldenRatioDither::new(0.123);
+        for _ in 0..1000 {
+            assert_eq!(d1.next_unit(), d2.next_unit());
+        }
+    }
+}
--- a/vendor/ruvector/crates/ruvector-dither/src/lib.rs
+++ b/vendor/ruvector/crates/ruvector-dither/src/lib.rs
@@ -0,0 +1,63 @@
+//! # ruvector-dither
+//!
+//! Deterministic, low-discrepancy **pre-quantization dithering** for low-bit
+//! inference on tiny devices (WASM, Seed, STM32).
+//!
+//! ## Why dither?
+//!
+//! Quantizers at 3 / 5 / 7 bits can align with power-of-two boundaries and
+//! produce idle tones / limit cycles — sticky activations and periodic errors
+//! that degrade accuracy.  A sub-LSB pre-quantization offset:
+//!
+//! - Decorrelates the signal from grid boundaries.
+//! - Pushes quantization error toward high frequencies (blue-noise-like),
+//!   which average out downstream.
+//! - Uses **no RNG** — outputs are deterministic, reproducible across
+//!   platforms (WASM / x86 / ARM), and cache-friendly.
+//!
+//! ## Sequences
+//!
+//! | Type | State update | Properties |
+//! |------|-------------|------------|
+//! | [`GoldenRatioDither`] | frac(state + φ) | Best 1-D equidistribution |
+//! | [`PiDither`] | table of π bytes | Reproducible, period = 256 |
+//!
+//! ## Quick start
+//!
+//! ```
+//! use ruvector_dither::{GoldenRatioDither, PiDither, quantize_dithered};
+//!
+//! // Quantize with golden-ratio dither, 8-bit, ε = 0.5 LSB
+//! let mut gr = GoldenRatioDither::new(0.0);
+//! let q = quantize_dithered(0.314, 8, 0.5, &mut gr);
+//! assert!(q >= -1.0 && q <= 1.0);
+//!
+//! // Quantize with π-digit dither
+//! let mut pi = PiDither::new(0);
+//! let q2 = quantize_dithered(0.271, 5, 0.5, &mut pi);
+//! assert!(q2 >= -1.0 && q2 <= 1.0);
+//! ```
+
+#![cfg_attr(feature = "no_std", no_std)]
+
+pub mod channel;
+pub mod golden;
+pub mod pi;
+pub mod quantize;
+
+pub use channel::ChannelDither;
+pub use golden::GoldenRatioDither;
+pub use pi::PiDither;
+pub use quantize::{quantize_dithered, quantize_slice_dithered};
+
+/// Trait implemented by any deterministic dither source.
+pub trait DitherSource {
+    /// Advance the sequence and return the next zero-mean offset in `[-0.5, +0.5]`.
+    fn next_unit(&mut self) -> f32;
+
+    /// Scale output to ε × LSB amplitude.
+    #[inline]
+    fn next(&mut self, eps_lsb: f32) -> f32 {
+        self.next_unit() * eps_lsb
+    }
+}
--- a/vendor/ruvector/crates/ruvector-dither/src/pi.rs
+++ b/vendor/ruvector/crates/ruvector-dither/src/pi.rs
@@ -0,0 +1,110 @@
+//! π-digit dither: cyclic table of the first 256 digits of π scaled to [-0.5, 0.5].
+//!
+//! Period = 256.  Each entry is an independent offset making the sequence
+//! suitable for small buffers where you want exact reproducibility from a
+//! named tensor / layer rather than a stateful RNG.
+
+use crate::DitherSource;
+
+/// First 256 bytes of π (hex digits 3.243F6A8885A308D3…).
+///
+/// Each byte spans [0, 255]; we map to [-0.5, 0.5] by `(b as f32 / 255.0) - 0.5`.
+#[rustfmt::skip]
+const PI_BYTES: [u8; 256] = [
+    0x32, 0x43, 0xF6, 0xA8, 0x88, 0x5A, 0x30, 0x8D, 0x31, 0x31, 0x98, 0xA2,
+    0xE0, 0x37, 0x07, 0x34, 0x4A, 0x40, 0x93, 0x82, 0x22, 0x99, 0xF3, 0x1D,
+    0x00, 0x82, 0xEF, 0xA9, 0x8E, 0xC4, 0xE6, 0xC8, 0x94, 0x52, 0x21, 0xE6,
+    0x38, 0xD0, 0x13, 0x77, 0xBE, 0x54, 0x66, 0xCF, 0x34, 0xE9, 0x0C, 0x6C,
+    0xC0, 0xAC, 0x29, 0xB7, 0xC9, 0x7C, 0x50, 0xDD, 0x3F, 0x84, 0xD5, 0xB5,
+    0xB5, 0x47, 0x09, 0x17, 0x92, 0x16, 0xD5, 0xD9, 0x89, 0x79, 0xFB, 0x1B,
+    0xD1, 0x31, 0x0B, 0xA6, 0x98, 0xDF, 0xB5, 0xAC, 0x2F, 0xFD, 0x72, 0xDB,
+    0xD0, 0x1A, 0xDF, 0xB7, 0xB8, 0xE1, 0xAF, 0xED, 0x6A, 0x26, 0x7E, 0x96,
+    0xBA, 0x7C, 0x90, 0x45, 0xF1, 0x2C, 0x7F, 0x99, 0x24, 0xA1, 0x99, 0x47,
+    0xB3, 0x91, 0x6C, 0xF7, 0x08, 0x01, 0xF2, 0xE2, 0x85, 0x8E, 0xFC, 0x16,
+    0x63, 0x69, 0x20, 0xD8, 0x71, 0x57, 0x4E, 0x69, 0xA4, 0x58, 0xFE, 0xA3,
+    0xF4, 0x93, 0x3D, 0x7E, 0x0D, 0x95, 0x74, 0x8F, 0x72, 0x8E, 0xB6, 0x58,
+    0x71, 0x8B, 0xCD, 0x58, 0x82, 0x15, 0x4A, 0xEE, 0x7B, 0x54, 0xA4, 0x1D,
+    0xC2, 0x5A, 0x59, 0xB5, 0x9C, 0x30, 0xD5, 0x39, 0x2A, 0xF2, 0x60, 0x13,
+    0xC5, 0xD1, 0xB0, 0x23, 0x28, 0x60, 0x85, 0xF0, 0xCA, 0x41, 0x79, 0x18,
+    0xB8, 0xDB, 0x38, 0xEF, 0x8E, 0x79, 0xDC, 0xB0, 0x60, 0x3A, 0x18, 0x0E,
+    0x6C, 0x9E, 0xD0, 0xE8, 0x9D, 0x44, 0x8F, 0x39, 0xF9, 0x93, 0xDB, 0x07,
+    0x3A, 0xA3, 0x45, 0x22, 0x7E, 0xD8, 0xAC, 0x87, 0x2F, 0x85, 0x5D, 0x28,
+    0x55, 0xB0, 0x89, 0x73, 0x36, 0xF3, 0xEB, 0xCD, 0xF6, 0x00, 0x4A, 0xDB,
+    0x36, 0x47, 0xDB, 0xF7, 0x82, 0x48, 0xDB, 0xF3, 0xD3, 0x7C, 0x45, 0x10,
+    0xC6, 0x7A, 0x70, 0xAA, 0x56, 0x78, 0x5A, 0xC6, 0x37, 0x10, 0xA2, 0x44,
+    0x32, 0x34, 0xFE, 0x08,
+];
+
+/// Cyclic π-digit dither.  Period = 256; index wraps with bitwise AND.
+#[derive(Clone, Debug)]
+pub struct PiDither {
+    idx: u8,
+}
+
+impl PiDither {
+    /// Create a new instance starting at `offset` (0–255).
+    #[inline]
+    pub fn new(offset: u8) -> Self {
+        Self { idx: offset }
+    }
+
+    /// Construct from a tensor/layer identifier for structural reproducibility.
+    #[inline]
+    pub fn from_tensor_id(tensor_id: u32) -> Self {
+        // Mix bits so different tensor IDs get distinct offsets
+        let mixed = tensor_id
+            .wrapping_mul(0x9E37_79B9)
+            .wrapping_add(tensor_id >> 16);
+        Self {
+            idx: (mixed & 0xFF) as u8,
+        }
+    }
+}
+
+impl DitherSource for PiDither {
+    /// Advance and return next value in `[-0.5, 0.5]`.
+    #[inline]
+    fn next_unit(&mut self) -> f32 {
+        let b = PI_BYTES[self.idx as usize];
+        self.idx = self.idx.wrapping_add(1);
+        (b as f32 / 255.0) - 0.5
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::DitherSource;
+
+    #[test]
+    fn output_is_in_range() {
+        let mut d = PiDither::new(0);
+        for _ in 0..256 * 4 {
+            let v = d.next_unit();
+            assert!(v >= -0.5 && v <= 0.5, "out of range: {v}");
+        }
+    }
+
+    #[test]
+    fn period_is_256() {
+        let mut d = PiDither::new(0);
+        let first: Vec<f32> = (0..256).map(|_| d.next_unit()).collect();
+        let second: Vec<f32> = (0..256).map(|_| d.next_unit()).collect();
+        assert_eq!(first, second);
+    }
+
+    #[test]
+    fn mean_is_near_zero() {
+        let mut d = PiDither::new(0);
+        let sum: f32 = (0..256).map(|_| d.next_unit()).sum();
+        let mean = sum / 256.0;
+        assert!(mean.abs() < 0.05, "π-digit mean too large: {mean}");
+    }
+
+    #[test]
+    fn from_tensor_id_gives_distinct_offsets() {
+        let d0 = PiDither::from_tensor_id(0);
+        let d1 = PiDither::from_tensor_id(1);
+        assert_ne!(d0.idx, d1.idx);
+    }
+}
--- a/vendor/ruvector/crates/ruvector-dither/src/quantize.rs
+++ b/vendor/ruvector/crates/ruvector-dither/src/quantize.rs
@@ -0,0 +1,134 @@
+//! Drop-in quantization helpers that apply dither before rounding.
+
+use crate::DitherSource;
+
+/// Quantize a single value with deterministic dither.
+///
+/// # Arguments
+/// - `x`      – input activation in `[-1.0, 1.0]`
+/// - `bits`   – quantizer bit-width (e.g. 3, 5, 7, 8)
+/// - `eps`    – dither amplitude in LSB units (0.0 = no dither, 0.5 = half-LSB recommended)
+/// - `source` – stateful dither sequence
+///
+/// Returns the quantized value in `[-1.0, 1.0]`.
+///
+/// # Example
+/// ```
+/// use ruvector_dither::{GoldenRatioDither, quantize_dithered};
+/// let mut d = GoldenRatioDither::new(0.0);
+/// let q = quantize_dithered(0.314, 8, 0.5, &mut d);
+/// assert!(q >= -1.0 && q <= 1.0);
+/// ```
+#[inline]
+pub fn quantize_dithered(x: f32, bits: u32, eps: f32, source: &mut impl DitherSource) -> f32 {
+    assert!(bits >= 2 && bits <= 31, "bits must be in [2, 31]");
+    let qmax = ((1u32 << (bits - 1)) - 1) as f32;
+    let lsb = 1.0 / qmax;
+    let dither = source.next(eps * lsb);
+    let shifted = (x + dither) * qmax;
+    let rounded = shifted.round().clamp(-qmax, qmax);
+    rounded / qmax
+}
+
+/// Quantize a slice in-place with deterministic dither.
+///
+/// Each element gets an independent dither sample from `source`.
+///
+/// # Example
+/// ```
+/// use ruvector_dither::{GoldenRatioDither, quantize_slice_dithered};
+/// let mut vals = vec![0.1_f32, 0.5, -0.3, 0.9, -0.8];
+/// let mut d = GoldenRatioDither::new(0.0);
+/// quantize_slice_dithered(&mut vals, 5, 0.5, &mut d);
+/// for &v in &vals {
+///     assert!(v >= -1.0 && v <= 1.0);
+/// }
+/// ```
+pub fn quantize_slice_dithered(
+    xs: &mut [f32],
+    bits: u32,
+    eps: f32,
+    source: &mut impl DitherSource,
+) {
+    assert!(bits >= 2 && bits <= 31, "bits must be in [2, 31]");
+    let qmax = ((1u32 << (bits - 1)) - 1) as f32;
+    let lsb = 1.0 / qmax;
+    for x in xs.iter_mut() {
+        let dither = source.next(eps * lsb);
+        let shifted = (*x + dither) * qmax;
+        *x = shifted.round().clamp(-qmax, qmax) / qmax;
+    }
+}
+
+/// Quantize to a raw integer code (signed, in `[-(2^(bits-1)), 2^(bits-1)-1]`).
+///
+/// Useful when you need the integer representation rather than a re-scaled float.
+#[inline]
+pub fn quantize_to_code(x: f32, bits: u32, eps: f32, source: &mut impl DitherSource) -> i32 {
+    assert!(bits >= 2 && bits <= 31, "bits must be in [2, 31]");
+    let qmax = ((1u32 << (bits - 1)) - 1) as f32;
+    let lsb = 1.0 / qmax;
+    let dither = source.next(eps * lsb);
+    ((x + dither) * qmax).round().clamp(-qmax, qmax) as i32
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::{GoldenRatioDither, PiDither};
+
+    #[test]
+    fn output_in_unit_range() {
+        let mut d = GoldenRatioDither::new(0.0);
+        for bits in [3u32, 5, 7, 8] {
+            for &x in &[-1.0_f32, -0.5, 0.0, 0.5, 1.0] {
+                let q = quantize_dithered(x, bits, 0.5, &mut d);
+                assert!(q >= -1.0 && q <= 1.0, "bits={bits}, x={x}, q={q}");
+            }
+        }
+    }
+
+    #[test]
+    fn dither_reduces_idle_tones() {
+        // A constant signal at exactly 0.5 * LSB without dither quantizes
+        // to the same code every time (idle tone).  With dither the code
+        // alternates, so the variance of codes should be > 0.
+        let bits = 5u32;
+        let qmax = ((1u32 << (bits - 1)) - 1) as f32;
+        let lsb = 1.0 / qmax;
+        let x = 0.5 * lsb; // exactly half an LSB
+
+        let mut codes_with: Vec<i32> = Vec::with_capacity(256);
+        let mut d = GoldenRatioDither::new(0.0);
+        for _ in 0..256 {
+            codes_with.push(quantize_to_code(x, bits, 0.5, &mut d));
+        }
+        let unique: std::collections::HashSet<i32> = codes_with.iter().copied().collect();
+        assert!(
+            unique.len() > 1,
+            "dithered signal must produce >1 unique code"
+        );
+    }
+
+    #[test]
+    fn slice_quantize_in_bounds() {
+        let mut vals: Vec<f32> = (-50..=50).map(|i| i as f32 * 0.02).collect();
+        let mut pi = PiDither::new(0);
+        quantize_slice_dithered(&mut vals, 7, 0.5, &mut pi);
+        for v in vals {
+            assert!(v >= -1.0 && v <= 1.0, "out of range: {v}");
+        }
+    }
+
+    #[test]
+    fn deterministic_with_same_seed() {
+        let input = vec![0.1_f32, 0.4, -0.7, 0.9];
+        let quantize = |input: &[f32]| {
+            let mut buf = input.to_vec();
+            let mut d = GoldenRatioDither::new(0.5);
+            quantize_slice_dithered(&mut buf, 8, 0.5, &mut d);
+            buf
+        };
+        assert_eq!(quantize(&input), quantize(&input));
+    }
+}