Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,92 @@
//! Per-channel and per-layer dither management.
//!
//! `ChannelDither` bundles one `GoldenRatioDither` state per channel,
//! seeded from `(layer_id, channel_id)` pairs so every channel is
//! structurally decorrelated without any RNG.
use crate::{DitherSource, GoldenRatioDither};
/// Per-channel dither pool seeded from `(layer_id, channel_id)` pairs.
///
/// Allocates one `GoldenRatioDither` per channel; each is independently
/// advanced, so channels cannot constructively interfere.
pub struct ChannelDither {
channels: Vec<GoldenRatioDither>,
bits: u32,
eps: f32,
}
impl ChannelDither {
/// Build a pool of `n_channels` dithers for `layer_id` / `bits` / `eps`.
pub fn new(layer_id: u32, n_channels: usize, bits: u32, eps: f32) -> Self {
let channels = (0..n_channels)
.map(|ch| GoldenRatioDither::from_ids(layer_id, ch as u32))
.collect();
Self {
channels,
bits,
eps,
}
}
/// Quantize `activations` in-place. Each column (channel dimension) uses
/// its own independent dither state.
///
/// `activations` is a flat row-major tensor of shape `[batch, channels]`.
/// If the slice is not a multiple of `n_channels`, the remainder is
/// processed using channel 0.
pub fn quantize_batch(&mut self, activations: &mut [f32]) {
assert!(
!self.channels.is_empty(),
"ChannelDither must have >= 1 channel"
);
assert!(self.bits >= 2 && self.bits <= 31, "bits must be in [2, 31]");
let nc = self.channels.len();
let qmax = ((1u32 << (self.bits - 1)) - 1) as f32;
let lsb = 1.0 / qmax;
for (i, x) in activations.iter_mut().enumerate() {
let ch = i % nc;
let d = self.channels[ch].next(self.eps * lsb);
*x = ((*x + d) * qmax).round().clamp(-qmax, qmax) / qmax;
}
}
/// Number of channels in this pool.
pub fn n_channels(&self) -> usize {
self.channels.len()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn channel_dither_correct_count() {
let cd = ChannelDither::new(0, 16, 8, 0.5);
assert_eq!(cd.n_channels(), 16);
}
#[test]
fn channel_dither_in_bounds() {
let mut cd = ChannelDither::new(1, 8, 5, 0.5);
let mut acts: Vec<f32> = (0..64).map(|i| (i as f32 / 63.0) * 2.0 - 1.0).collect();
cd.quantize_batch(&mut acts);
for v in acts {
assert!(v >= -1.0 && v <= 1.0, "out of bounds: {v}");
}
}
#[test]
fn different_layers_produce_different_outputs() {
let input: Vec<f32> = vec![0.5; 16];
let mut buf0 = input.clone();
let mut buf1 = input.clone();
ChannelDither::new(0, 8, 8, 0.5).quantize_batch(&mut buf0);
ChannelDither::new(99, 8, 8, 0.5).quantize_batch(&mut buf1);
assert_ne!(
buf0, buf1,
"different layer_ids must yield different dithered outputs"
);
}
}

View File

@@ -0,0 +1,100 @@
//! Golden-ratio quasi-random dither sequence.
//!
//! State update: `state = frac(state + φ)` where φ = (√51)/2 ≈ 0.618…
//!
//! This is the 1-D Halton sequence in base φ — it has the best possible
//! equidistribution for a 1-D low-discrepancy sequence.
use crate::DitherSource;
/// Additive golden-ratio dither with zero-mean output in `[-0.5, 0.5]`.
///
/// The sequence has period 1 (irrational) so it never exactly repeats.
/// Two instances with different seeds stay decorrelated.
#[derive(Clone, Debug)]
pub struct GoldenRatioDither {
state: f32,
}
/// φ = (√5 1) / 2
const PHI: f32 = 0.618_033_98_f32;
impl GoldenRatioDither {
/// Create a new sequence seeded at `initial_state` ∈ [0, 1).
///
/// For per-layer / per-channel decorrelation, seed with
/// `frac(layer_id × φ + channel_id × φ²)`.
#[inline]
pub fn new(initial_state: f32) -> Self {
Self {
state: initial_state.abs().fract(),
}
}
/// Construct from a `(layer_id, channel_id)` pair for structural decorrelation.
#[inline]
pub fn from_ids(layer_id: u32, channel_id: u32) -> Self {
let s = ((layer_id as f32) * PHI + (channel_id as f32) * PHI * PHI).fract();
Self { state: s }
}
/// Current state (useful for serialisation / checkpointing).
#[inline]
pub fn state(&self) -> f32 {
self.state
}
}
impl DitherSource for GoldenRatioDither {
/// Advance and return next value in `[-0.5, 0.5]`.
#[inline]
fn next_unit(&mut self) -> f32 {
self.state = (self.state + PHI).fract();
self.state - 0.5
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::DitherSource;
#[test]
fn output_is_in_range() {
let mut d = GoldenRatioDither::new(0.0);
for _ in 0..10_000 {
let v = d.next_unit();
assert!(v >= -0.5 && v <= 0.5, "out of range: {v}");
}
}
#[test]
fn mean_is_near_zero() {
let mut d = GoldenRatioDither::new(0.0);
let n = 100_000;
let mean: f32 = (0..n).map(|_| d.next_unit()).sum::<f32>() / n as f32;
assert!(mean.abs() < 0.01, "mean too large: {mean}");
}
#[test]
fn from_ids_decorrelates() {
let mut d0 = GoldenRatioDither::from_ids(0, 0);
let mut d1 = GoldenRatioDither::from_ids(1, 7);
// Confirm they start at different states
let v0 = d0.next_unit();
let v1 = d1.next_unit();
assert!(
(v0 - v1).abs() > 1e-4,
"distinct seeds should produce distinct first values"
);
}
#[test]
fn deterministic_across_calls() {
let mut d1 = GoldenRatioDither::new(0.123);
let mut d2 = GoldenRatioDither::new(0.123);
for _ in 0..1000 {
assert_eq!(d1.next_unit(), d2.next_unit());
}
}
}

View File

@@ -0,0 +1,63 @@
//! # ruvector-dither
//!
//! Deterministic, low-discrepancy **pre-quantization dithering** for low-bit
//! inference on tiny devices (WASM, Seed, STM32).
//!
//! ## Why dither?
//!
//! Quantizers at 3 / 5 / 7 bits can align with power-of-two boundaries and
//! produce idle tones / limit cycles — sticky activations and periodic errors
//! that degrade accuracy. A sub-LSB pre-quantization offset:
//!
//! - Decorrelates the signal from grid boundaries.
//! - Pushes quantization error toward high frequencies (blue-noise-like),
//! which average out downstream.
//! - Uses **no RNG** — outputs are deterministic, reproducible across
//! platforms (WASM / x86 / ARM), and cache-friendly.
//!
//! ## Sequences
//!
//! | Type | State update | Properties |
//! |------|-------------|------------|
//! | [`GoldenRatioDither`] | frac(state + φ) | Best 1-D equidistribution |
//! | [`PiDither`] | table of π bytes | Reproducible, period = 256 |
//!
//! ## Quick start
//!
//! ```
//! use ruvector_dither::{GoldenRatioDither, PiDither, quantize_dithered};
//!
//! // Quantize with golden-ratio dither, 8-bit, ε = 0.5 LSB
//! let mut gr = GoldenRatioDither::new(0.0);
//! let q = quantize_dithered(0.314, 8, 0.5, &mut gr);
//! assert!(q >= -1.0 && q <= 1.0);
//!
//! // Quantize with π-digit dither
//! let mut pi = PiDither::new(0);
//! let q2 = quantize_dithered(0.271, 5, 0.5, &mut pi);
//! assert!(q2 >= -1.0 && q2 <= 1.0);
//! ```
#![cfg_attr(feature = "no_std", no_std)]
pub mod channel;
pub mod golden;
pub mod pi;
pub mod quantize;
pub use channel::ChannelDither;
pub use golden::GoldenRatioDither;
pub use pi::PiDither;
pub use quantize::{quantize_dithered, quantize_slice_dithered};
/// Trait implemented by any deterministic dither source.
pub trait DitherSource {
/// Advance the sequence and return the next zero-mean offset in `[-0.5, +0.5]`.
fn next_unit(&mut self) -> f32;
/// Scale output to ε × LSB amplitude.
#[inline]
fn next(&mut self, eps_lsb: f32) -> f32 {
self.next_unit() * eps_lsb
}
}

View File

@@ -0,0 +1,110 @@
//! π-digit dither: cyclic table of the first 256 digits of π scaled to [-0.5, 0.5].
//!
//! Period = 256. Each entry is an independent offset making the sequence
//! suitable for small buffers where you want exact reproducibility from a
//! named tensor / layer rather than a stateful RNG.
use crate::DitherSource;
/// First 256 bytes of π (hex digits 3.243F6A8885A308D3…).
///
/// Each byte spans [0, 255]; we map to [-0.5, 0.5] by `(b as f32 / 255.0) - 0.5`.
#[rustfmt::skip]
const PI_BYTES: [u8; 256] = [
0x32, 0x43, 0xF6, 0xA8, 0x88, 0x5A, 0x30, 0x8D, 0x31, 0x31, 0x98, 0xA2,
0xE0, 0x37, 0x07, 0x34, 0x4A, 0x40, 0x93, 0x82, 0x22, 0x99, 0xF3, 0x1D,
0x00, 0x82, 0xEF, 0xA9, 0x8E, 0xC4, 0xE6, 0xC8, 0x94, 0x52, 0x21, 0xE6,
0x38, 0xD0, 0x13, 0x77, 0xBE, 0x54, 0x66, 0xCF, 0x34, 0xE9, 0x0C, 0x6C,
0xC0, 0xAC, 0x29, 0xB7, 0xC9, 0x7C, 0x50, 0xDD, 0x3F, 0x84, 0xD5, 0xB5,
0xB5, 0x47, 0x09, 0x17, 0x92, 0x16, 0xD5, 0xD9, 0x89, 0x79, 0xFB, 0x1B,
0xD1, 0x31, 0x0B, 0xA6, 0x98, 0xDF, 0xB5, 0xAC, 0x2F, 0xFD, 0x72, 0xDB,
0xD0, 0x1A, 0xDF, 0xB7, 0xB8, 0xE1, 0xAF, 0xED, 0x6A, 0x26, 0x7E, 0x96,
0xBA, 0x7C, 0x90, 0x45, 0xF1, 0x2C, 0x7F, 0x99, 0x24, 0xA1, 0x99, 0x47,
0xB3, 0x91, 0x6C, 0xF7, 0x08, 0x01, 0xF2, 0xE2, 0x85, 0x8E, 0xFC, 0x16,
0x63, 0x69, 0x20, 0xD8, 0x71, 0x57, 0x4E, 0x69, 0xA4, 0x58, 0xFE, 0xA3,
0xF4, 0x93, 0x3D, 0x7E, 0x0D, 0x95, 0x74, 0x8F, 0x72, 0x8E, 0xB6, 0x58,
0x71, 0x8B, 0xCD, 0x58, 0x82, 0x15, 0x4A, 0xEE, 0x7B, 0x54, 0xA4, 0x1D,
0xC2, 0x5A, 0x59, 0xB5, 0x9C, 0x30, 0xD5, 0x39, 0x2A, 0xF2, 0x60, 0x13,
0xC5, 0xD1, 0xB0, 0x23, 0x28, 0x60, 0x85, 0xF0, 0xCA, 0x41, 0x79, 0x18,
0xB8, 0xDB, 0x38, 0xEF, 0x8E, 0x79, 0xDC, 0xB0, 0x60, 0x3A, 0x18, 0x0E,
0x6C, 0x9E, 0xD0, 0xE8, 0x9D, 0x44, 0x8F, 0x39, 0xF9, 0x93, 0xDB, 0x07,
0x3A, 0xA3, 0x45, 0x22, 0x7E, 0xD8, 0xAC, 0x87, 0x2F, 0x85, 0x5D, 0x28,
0x55, 0xB0, 0x89, 0x73, 0x36, 0xF3, 0xEB, 0xCD, 0xF6, 0x00, 0x4A, 0xDB,
0x36, 0x47, 0xDB, 0xF7, 0x82, 0x48, 0xDB, 0xF3, 0xD3, 0x7C, 0x45, 0x10,
0xC6, 0x7A, 0x70, 0xAA, 0x56, 0x78, 0x5A, 0xC6, 0x37, 0x10, 0xA2, 0x44,
0x32, 0x34, 0xFE, 0x08,
];
/// Cyclic π-digit dither. Period = 256; index wraps with bitwise AND.
#[derive(Clone, Debug)]
pub struct PiDither {
idx: u8,
}
impl PiDither {
/// Create a new instance starting at `offset` (0255).
#[inline]
pub fn new(offset: u8) -> Self {
Self { idx: offset }
}
/// Construct from a tensor/layer identifier for structural reproducibility.
#[inline]
pub fn from_tensor_id(tensor_id: u32) -> Self {
// Mix bits so different tensor IDs get distinct offsets
let mixed = tensor_id
.wrapping_mul(0x9E37_79B9)
.wrapping_add(tensor_id >> 16);
Self {
idx: (mixed & 0xFF) as u8,
}
}
}
impl DitherSource for PiDither {
/// Advance and return next value in `[-0.5, 0.5]`.
#[inline]
fn next_unit(&mut self) -> f32 {
let b = PI_BYTES[self.idx as usize];
self.idx = self.idx.wrapping_add(1);
(b as f32 / 255.0) - 0.5
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::DitherSource;
#[test]
fn output_is_in_range() {
let mut d = PiDither::new(0);
for _ in 0..256 * 4 {
let v = d.next_unit();
assert!(v >= -0.5 && v <= 0.5, "out of range: {v}");
}
}
#[test]
fn period_is_256() {
let mut d = PiDither::new(0);
let first: Vec<f32> = (0..256).map(|_| d.next_unit()).collect();
let second: Vec<f32> = (0..256).map(|_| d.next_unit()).collect();
assert_eq!(first, second);
}
#[test]
fn mean_is_near_zero() {
let mut d = PiDither::new(0);
let sum: f32 = (0..256).map(|_| d.next_unit()).sum();
let mean = sum / 256.0;
assert!(mean.abs() < 0.05, "π-digit mean too large: {mean}");
}
#[test]
fn from_tensor_id_gives_distinct_offsets() {
let d0 = PiDither::from_tensor_id(0);
let d1 = PiDither::from_tensor_id(1);
assert_ne!(d0.idx, d1.idx);
}
}

View File

@@ -0,0 +1,134 @@
//! Drop-in quantization helpers that apply dither before rounding.
use crate::DitherSource;
/// Quantize a single value with deterministic dither.
///
/// # Arguments
/// - `x` input activation in `[-1.0, 1.0]`
/// - `bits` quantizer bit-width (e.g. 3, 5, 7, 8)
/// - `eps` dither amplitude in LSB units (0.0 = no dither, 0.5 = half-LSB recommended)
/// - `source` stateful dither sequence
///
/// Returns the quantized value in `[-1.0, 1.0]`.
///
/// # Example
/// ```
/// use ruvector_dither::{GoldenRatioDither, quantize_dithered};
/// let mut d = GoldenRatioDither::new(0.0);
/// let q = quantize_dithered(0.314, 8, 0.5, &mut d);
/// assert!(q >= -1.0 && q <= 1.0);
/// ```
#[inline]
pub fn quantize_dithered(x: f32, bits: u32, eps: f32, source: &mut impl DitherSource) -> f32 {
assert!(bits >= 2 && bits <= 31, "bits must be in [2, 31]");
let qmax = ((1u32 << (bits - 1)) - 1) as f32;
let lsb = 1.0 / qmax;
let dither = source.next(eps * lsb);
let shifted = (x + dither) * qmax;
let rounded = shifted.round().clamp(-qmax, qmax);
rounded / qmax
}
/// Quantize a slice in-place with deterministic dither.
///
/// Each element gets an independent dither sample from `source`.
///
/// # Example
/// ```
/// use ruvector_dither::{GoldenRatioDither, quantize_slice_dithered};
/// let mut vals = vec![0.1_f32, 0.5, -0.3, 0.9, -0.8];
/// let mut d = GoldenRatioDither::new(0.0);
/// quantize_slice_dithered(&mut vals, 5, 0.5, &mut d);
/// for &v in &vals {
/// assert!(v >= -1.0 && v <= 1.0);
/// }
/// ```
pub fn quantize_slice_dithered(
xs: &mut [f32],
bits: u32,
eps: f32,
source: &mut impl DitherSource,
) {
assert!(bits >= 2 && bits <= 31, "bits must be in [2, 31]");
let qmax = ((1u32 << (bits - 1)) - 1) as f32;
let lsb = 1.0 / qmax;
for x in xs.iter_mut() {
let dither = source.next(eps * lsb);
let shifted = (*x + dither) * qmax;
*x = shifted.round().clamp(-qmax, qmax) / qmax;
}
}
/// Quantize to a raw integer code (signed, in `[-(2^(bits-1)), 2^(bits-1)-1]`).
///
/// Useful when you need the integer representation rather than a re-scaled float.
#[inline]
pub fn quantize_to_code(x: f32, bits: u32, eps: f32, source: &mut impl DitherSource) -> i32 {
assert!(bits >= 2 && bits <= 31, "bits must be in [2, 31]");
let qmax = ((1u32 << (bits - 1)) - 1) as f32;
let lsb = 1.0 / qmax;
let dither = source.next(eps * lsb);
((x + dither) * qmax).round().clamp(-qmax, qmax) as i32
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{GoldenRatioDither, PiDither};
#[test]
fn output_in_unit_range() {
let mut d = GoldenRatioDither::new(0.0);
for bits in [3u32, 5, 7, 8] {
for &x in &[-1.0_f32, -0.5, 0.0, 0.5, 1.0] {
let q = quantize_dithered(x, bits, 0.5, &mut d);
assert!(q >= -1.0 && q <= 1.0, "bits={bits}, x={x}, q={q}");
}
}
}
#[test]
fn dither_reduces_idle_tones() {
// A constant signal at exactly 0.5 * LSB without dither quantizes
// to the same code every time (idle tone). With dither the code
// alternates, so the variance of codes should be > 0.
let bits = 5u32;
let qmax = ((1u32 << (bits - 1)) - 1) as f32;
let lsb = 1.0 / qmax;
let x = 0.5 * lsb; // exactly half an LSB
let mut codes_with: Vec<i32> = Vec::with_capacity(256);
let mut d = GoldenRatioDither::new(0.0);
for _ in 0..256 {
codes_with.push(quantize_to_code(x, bits, 0.5, &mut d));
}
let unique: std::collections::HashSet<i32> = codes_with.iter().copied().collect();
assert!(
unique.len() > 1,
"dithered signal must produce >1 unique code"
);
}
#[test]
fn slice_quantize_in_bounds() {
let mut vals: Vec<f32> = (-50..=50).map(|i| i as f32 * 0.02).collect();
let mut pi = PiDither::new(0);
quantize_slice_dithered(&mut vals, 7, 0.5, &mut pi);
for v in vals {
assert!(v >= -1.0 && v <= 1.0, "out of range: {v}");
}
}
#[test]
fn deterministic_with_same_seed() {
let input = vec![0.1_f32, 0.4, -0.7, 0.9];
let quantize = |input: &[f32]| {
let mut buf = input.to_vec();
let mut d = GoldenRatioDither::new(0.5);
quantize_slice_dithered(&mut buf, 8, 0.5, &mut d);
buf
};
assert_eq!(quantize(&input), quantize(&input));
}
}