Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions
--- a/crates/ruvector-dither/Cargo.toml
+++ b/crates/ruvector-dither/Cargo.toml
@@ -0,0 +1,28 @@
+[package]
+name = "ruvector-dither"
+version = "0.1.0"
+edition = "2021"
+license = "MIT OR Apache-2.0"
+authors = ["rUv <ruv@ruv.io>"]
+repository = "https://github.com/ruvnet/ruvector"
+homepage = "https://ruv.io"
+documentation = "https://docs.rs/ruvector-dither"
+description = "Deterministic low-discrepancy dithering for low-bit quantization: golden-ratio and π-digit sequences for blue-noise error shaping"
+keywords = ["quantization", "dither", "golden-ratio", "inference", "wasm"]
+categories = ["science", "algorithms", "no-std"]
+readme = "README.md"
+
+[features]
+default = []
+# Enable no_std mode (requires an allocator)
+no_std = []
+
+[dependencies]
+# No runtime deps — fully no_std compatible
+
+[dev-dependencies]
+criterion = { version = "0.5", features = ["html_reports"] }
+
+[[bench]]
+name = "dither_bench"
+harness = false
--- a/crates/ruvector-dither/README.md
+++ b/crates/ruvector-dither/README.md
@@ -0,0 +1,75 @@
+# ruvector-dither
+
+Deterministic, low-discrepancy **pre-quantization dithering** for low-bit
+neural network inference on tiny devices (WASM, Seed, STM32).
+
+## Why dither?
+
+Quantizers at 3/5/7 bits can align with power-of-two boundaries, producing
+idle tones, sticky activations, and periodic errors that degrade accuracy.
+A sub-LSB pre-quantization offset:
+
+- Decorrelates the signal from grid boundaries.
+- Pushes quantization error toward high frequencies (blue-noise-like),
+  which average out downstream.
+- Uses **no RNG** -- outputs are deterministic, reproducible across
+  platforms (WASM / x86 / ARM), and cache-friendly.
+
+## Features
+
+- **Golden-ratio sequence** -- best 1-D equidistribution, irrational period (never repeats).
+- **Pi-digit table** -- 256-byte cyclic lookup, exact reproducibility from a tensor/layer ID.
+- **Per-channel dither pools** -- structurally decorrelated channels without any randomness.
+- **Scalar, slice, and integer-code quantization** helpers included.
+- **`no_std`-compatible** -- zero runtime dependencies; enable with `features = ["no_std"]`.
+
+## Quick start
+
+```rust
+use ruvector_dither::{GoldenRatioDither, PiDither, quantize_dithered};
+
+// Golden-ratio dither, 8-bit, epsilon = 0.5 LSB
+let mut gr = GoldenRatioDither::new(0.0);
+let q = quantize_dithered(0.314, 8, 0.5, &mut gr);
+assert!(q >= -1.0 && q <= 1.0);
+
+// Pi-digit dither, 5-bit
+let mut pi = PiDither::new(0);
+let q2 = quantize_dithered(0.271, 5, 0.5, &mut pi);
+assert!(q2 >= -1.0 && q2 <= 1.0);
+```
+
+### Per-channel batch quantization
+
+```rust
+use ruvector_dither::ChannelDither;
+
+let mut cd = ChannelDither::new(/*layer_id=*/ 0, /*channels=*/ 8, /*bits=*/ 5, /*eps=*/ 0.5);
+let mut activations = vec![0.5_f32; 64]; // shape [batch=8, channels=8]
+cd.quantize_batch(&mut activations);
+```
+
+## Modules
+
+| Module | Description |
+|--------|-------------|
+| `golden` | `GoldenRatioDither` -- additive golden-ratio quasi-random sequence |
+| `pi` | `PiDither` -- cyclic 256-byte table derived from digits of pi |
+| `quantize` | `quantize_dithered`, `quantize_slice_dithered`, `quantize_to_code` |
+| `channel` | `ChannelDither` -- per-channel dither pool seeded from layer/channel IDs |
+
+## Trait: `DitherSource`
+
+Implement `DitherSource` to plug in your own deterministic sequence:
+
+```rust
+pub trait DitherSource {
+    /// Return the next zero-mean offset in [-0.5, +0.5].
+    fn next_unit(&mut self) -> f32;
+}
+```
+
+## License
+
+Licensed under either of [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
+or [MIT License](http://opensource.org/licenses/MIT) at your option.
--- a/crates/ruvector-dither/benches/dither_bench.rs
+++ b/crates/ruvector-dither/benches/dither_bench.rs
@@ -0,0 +1,60 @@
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use ruvector_dither::{
+    channel::ChannelDither, quantize_dithered, quantize_slice_dithered, GoldenRatioDither, PiDither,
+};
+
+fn bench_single_quantize(c: &mut Criterion) {
+    let mut group = c.benchmark_group("quantize_dithered_single");
+    for bits in [5u32, 7, 8] {
+        group.bench_with_input(BenchmarkId::from_parameter(bits), &bits, |b, &bits| {
+            let mut d = GoldenRatioDither::new(0.0);
+            b.iter(|| quantize_dithered(black_box(0.314_f32), bits, 0.5, &mut d));
+        });
+    }
+    group.finish();
+}
+
+fn bench_slice_quantize(c: &mut Criterion) {
+    let mut group = c.benchmark_group("quantize_slice");
+    for n in [64usize, 256, 1024] {
+        group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &n| {
+            let input: Vec<f32> = (0..n).map(|i| (i as f32 / n as f32) * 2.0 - 1.0).collect();
+            b.iter(|| {
+                let mut buf = input.clone();
+                let mut d = GoldenRatioDither::new(0.0);
+                quantize_slice_dithered(black_box(&mut buf), 8, 0.5, &mut d);
+                black_box(buf)
+            });
+        });
+    }
+    group.finish();
+}
+
+fn bench_pi_dither(c: &mut Criterion) {
+    c.bench_function("pi_dither_1k", |b| {
+        let mut d = PiDither::new(0);
+        let mut buf: Vec<f32> = vec![0.5; 1024];
+        b.iter(|| {
+            quantize_slice_dithered(black_box(&mut buf), 7, 0.5, &mut d);
+        });
+    });
+}
+
+fn bench_channel_dither(c: &mut Criterion) {
+    c.bench_function("channel_dither_256activations_32ch", |b| {
+        let mut cd = ChannelDither::new(0, 32, 8, 0.5);
+        let mut acts: Vec<f32> = vec![0.314; 256];
+        b.iter(|| {
+            cd.quantize_batch(black_box(&mut acts));
+        });
+    });
+}
+
+criterion_group!(
+    benches,
+    bench_single_quantize,
+    bench_slice_quantize,
+    bench_pi_dither,
+    bench_channel_dither
+);
+criterion_main!(benches);
--- a/crates/ruvector-dither/src/channel.rs
+++ b/crates/ruvector-dither/src/channel.rs
@@ -0,0 +1,92 @@
+//! Per-channel and per-layer dither management.
+//!
+//! `ChannelDither` bundles one `GoldenRatioDither` state per channel,
+//! seeded from `(layer_id, channel_id)` pairs so every channel is
+//! structurally decorrelated without any RNG.
+
+use crate::{DitherSource, GoldenRatioDither};
+
+/// Per-channel dither pool seeded from `(layer_id, channel_id)` pairs.
+///
+/// Allocates one `GoldenRatioDither` per channel; each is independently
+/// advanced, so channels cannot constructively interfere.
+pub struct ChannelDither {
+    channels: Vec<GoldenRatioDither>,
+    bits: u32,
+    eps: f32,
+}
+
+impl ChannelDither {
+    /// Build a pool of `n_channels` dithers for `layer_id` / `bits` / `eps`.
+    pub fn new(layer_id: u32, n_channels: usize, bits: u32, eps: f32) -> Self {
+        let channels = (0..n_channels)
+            .map(|ch| GoldenRatioDither::from_ids(layer_id, ch as u32))
+            .collect();
+        Self {
+            channels,
+            bits,
+            eps,
+        }
+    }
+
+    /// Quantize `activations` in-place.  Each column (channel dimension) uses
+    /// its own independent dither state.
+    ///
+    /// `activations` is a flat row-major tensor of shape `[batch, channels]`.
+    /// If the slice is not a multiple of `n_channels`, the remainder is
+    /// processed using channel 0.
+    pub fn quantize_batch(&mut self, activations: &mut [f32]) {
+        assert!(
+            !self.channels.is_empty(),
+            "ChannelDither must have >= 1 channel"
+        );
+        assert!(self.bits >= 2 && self.bits <= 31, "bits must be in [2, 31]");
+        let nc = self.channels.len();
+        let qmax = ((1u32 << (self.bits - 1)) - 1) as f32;
+        let lsb = 1.0 / qmax;
+        for (i, x) in activations.iter_mut().enumerate() {
+            let ch = i % nc;
+            let d = self.channels[ch].next(self.eps * lsb);
+            *x = ((*x + d) * qmax).round().clamp(-qmax, qmax) / qmax;
+        }
+    }
+
+    /// Number of channels in this pool.
+    pub fn n_channels(&self) -> usize {
+        self.channels.len()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn channel_dither_correct_count() {
+        let cd = ChannelDither::new(0, 16, 8, 0.5);
+        assert_eq!(cd.n_channels(), 16);
+    }
+
+    #[test]
+    fn channel_dither_in_bounds() {
+        let mut cd = ChannelDither::new(1, 8, 5, 0.5);
+        let mut acts: Vec<f32> = (0..64).map(|i| (i as f32 / 63.0) * 2.0 - 1.0).collect();
+        cd.quantize_batch(&mut acts);
+        for v in acts {
+            assert!(v >= -1.0 && v <= 1.0, "out of bounds: {v}");
+        }
+    }
+
+    #[test]
+    fn different_layers_produce_different_outputs() {
+        let input: Vec<f32> = vec![0.5; 16];
+        let mut buf0 = input.clone();
+        let mut buf1 = input.clone();
+        ChannelDither::new(0, 8, 8, 0.5).quantize_batch(&mut buf0);
+        ChannelDither::new(99, 8, 8, 0.5).quantize_batch(&mut buf1);
+        assert_ne!(
+            buf0, buf1,
+            "different layer_ids must yield different dithered outputs"
+        );
+    }
+}
--- a/crates/ruvector-dither/src/golden.rs
+++ b/crates/ruvector-dither/src/golden.rs
@@ -0,0 +1,100 @@
+//! Golden-ratio quasi-random dither sequence.
+//!
+//! State update: `state = frac(state + φ)` where φ = (√5−1)/2 ≈ 0.618…
+//!
+//! This is the 1-D Halton sequence in base φ — it has the best possible
+//! equidistribution for a 1-D low-discrepancy sequence.
+
+use crate::DitherSource;
+
+/// Additive golden-ratio dither with zero-mean output in `[-0.5, 0.5]`.
+///
+/// The sequence has period 1 (irrational) so it never exactly repeats.
+/// Two instances with different seeds stay decorrelated.
+#[derive(Clone, Debug)]
+pub struct GoldenRatioDither {
+    state: f32,
+}
+
+/// φ = (√5 − 1) / 2
+const PHI: f32 = 0.618_033_98_f32;
+
+impl GoldenRatioDither {
+    /// Create a new sequence seeded at `initial_state` ∈ [0, 1).
+    ///
+    /// For per-layer / per-channel decorrelation, seed with
+    /// `frac(layer_id × φ + channel_id × φ²)`.
+    #[inline]
+    pub fn new(initial_state: f32) -> Self {
+        Self {
+            state: initial_state.abs().fract(),
+        }
+    }
+
+    /// Construct from a `(layer_id, channel_id)` pair for structural decorrelation.
+    #[inline]
+    pub fn from_ids(layer_id: u32, channel_id: u32) -> Self {
+        let s = ((layer_id as f32) * PHI + (channel_id as f32) * PHI * PHI).fract();
+        Self { state: s }
+    }
+
+    /// Current state (useful for serialisation / checkpointing).
+    #[inline]
+    pub fn state(&self) -> f32 {
+        self.state
+    }
+}
+
+impl DitherSource for GoldenRatioDither {
+    /// Advance and return next value in `[-0.5, 0.5]`.
+    #[inline]
+    fn next_unit(&mut self) -> f32 {
+        self.state = (self.state + PHI).fract();
+        self.state - 0.5
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::DitherSource;
+
+    #[test]
+    fn output_is_in_range() {
+        let mut d = GoldenRatioDither::new(0.0);
+        for _ in 0..10_000 {
+            let v = d.next_unit();
+            assert!(v >= -0.5 && v <= 0.5, "out of range: {v}");
+        }
+    }
+
+    #[test]
+    fn mean_is_near_zero() {
+        let mut d = GoldenRatioDither::new(0.0);
+        let n = 100_000;
+        let mean: f32 = (0..n).map(|_| d.next_unit()).sum::<f32>() / n as f32;
+        assert!(mean.abs() < 0.01, "mean too large: {mean}");
+    }
+
+    #[test]
+    fn from_ids_decorrelates() {
+        let mut d0 = GoldenRatioDither::from_ids(0, 0);
+        let mut d1 = GoldenRatioDither::from_ids(1, 7);
+        // Confirm they start at different states
+        let v0 = d0.next_unit();
+        let v1 = d1.next_unit();
+        assert!(
+            (v0 - v1).abs() > 1e-4,
+            "distinct seeds should produce distinct first values"
+        );
+    }
+
+    #[test]
+    fn deterministic_across_calls() {
+        let mut d1 = GoldenRatioDither::new(0.123);
+        let mut d2 = GoldenRatioDither::new(0.123);
+        for _ in 0..1000 {
+            assert_eq!(d1.next_unit(), d2.next_unit());
+        }
+    }
+}
--- a/crates/ruvector-dither/src/lib.rs
+++ b/crates/ruvector-dither/src/lib.rs
@@ -0,0 +1,63 @@
+//! # ruvector-dither
+//!
+//! Deterministic, low-discrepancy **pre-quantization dithering** for low-bit
+//! inference on tiny devices (WASM, Seed, STM32).
+//!
+//! ## Why dither?
+//!
+//! Quantizers at 3 / 5 / 7 bits can align with power-of-two boundaries and
+//! produce idle tones / limit cycles — sticky activations and periodic errors
+//! that degrade accuracy.  A sub-LSB pre-quantization offset:
+//!
+//! - Decorrelates the signal from grid boundaries.
+//! - Pushes quantization error toward high frequencies (blue-noise-like),
+//!   which average out downstream.
+//! - Uses **no RNG** — outputs are deterministic, reproducible across
+//!   platforms (WASM / x86 / ARM), and cache-friendly.
+//!
+//! ## Sequences
+//!
+//! | Type | State update | Properties |
+//! |------|-------------|------------|
+//! | [`GoldenRatioDither`] | frac(state + φ) | Best 1-D equidistribution |
+//! | [`PiDither`] | table of π bytes | Reproducible, period = 256 |
+//!
+//! ## Quick start
+//!
+//! ```
+//! use ruvector_dither::{GoldenRatioDither, PiDither, quantize_dithered};
+//!
+//! // Quantize with golden-ratio dither, 8-bit, ε = 0.5 LSB
+//! let mut gr = GoldenRatioDither::new(0.0);
+//! let q = quantize_dithered(0.314, 8, 0.5, &mut gr);
+//! assert!(q >= -1.0 && q <= 1.0);
+//!
+//! // Quantize with π-digit dither
+//! let mut pi = PiDither::new(0);
+//! let q2 = quantize_dithered(0.271, 5, 0.5, &mut pi);
+//! assert!(q2 >= -1.0 && q2 <= 1.0);
+//! ```
+
+#![cfg_attr(feature = "no_std", no_std)]
+
+pub mod channel;
+pub mod golden;
+pub mod pi;
+pub mod quantize;
+
+pub use channel::ChannelDither;
+pub use golden::GoldenRatioDither;
+pub use pi::PiDither;
+pub use quantize::{quantize_dithered, quantize_slice_dithered};
+
+/// Trait implemented by any deterministic dither source.
+pub trait DitherSource {
+    /// Advance the sequence and return the next zero-mean offset in `[-0.5, +0.5]`.
+    fn next_unit(&mut self) -> f32;
+
+    /// Scale output to ε × LSB amplitude.
+    #[inline]
+    fn next(&mut self, eps_lsb: f32) -> f32 {
+        self.next_unit() * eps_lsb
+    }
+}
--- a/crates/ruvector-dither/src/pi.rs
+++ b/crates/ruvector-dither/src/pi.rs
@@ -0,0 +1,110 @@
+//! π-digit dither: cyclic table of the first 256 digits of π scaled to [-0.5, 0.5].
+//!
+//! Period = 256.  Each entry is an independent offset making the sequence
+//! suitable for small buffers where you want exact reproducibility from a
+//! named tensor / layer rather than a stateful RNG.
+
+use crate::DitherSource;
+
+/// First 256 bytes of π (hex digits 3.243F6A8885A308D3…).
+///
+/// Each byte spans [0, 255]; we map to [-0.5, 0.5] by `(b as f32 / 255.0) - 0.5`.
+#[rustfmt::skip]
+const PI_BYTES: [u8; 256] = [
+    0x32, 0x43, 0xF6, 0xA8, 0x88, 0x5A, 0x30, 0x8D, 0x31, 0x31, 0x98, 0xA2,
+    0xE0, 0x37, 0x07, 0x34, 0x4A, 0x40, 0x93, 0x82, 0x22, 0x99, 0xF3, 0x1D,
+    0x00, 0x82, 0xEF, 0xA9, 0x8E, 0xC4, 0xE6, 0xC8, 0x94, 0x52, 0x21, 0xE6,
+    0x38, 0xD0, 0x13, 0x77, 0xBE, 0x54, 0x66, 0xCF, 0x34, 0xE9, 0x0C, 0x6C,
+    0xC0, 0xAC, 0x29, 0xB7, 0xC9, 0x7C, 0x50, 0xDD, 0x3F, 0x84, 0xD5, 0xB5,
+    0xB5, 0x47, 0x09, 0x17, 0x92, 0x16, 0xD5, 0xD9, 0x89, 0x79, 0xFB, 0x1B,
+    0xD1, 0x31, 0x0B, 0xA6, 0x98, 0xDF, 0xB5, 0xAC, 0x2F, 0xFD, 0x72, 0xDB,
+    0xD0, 0x1A, 0xDF, 0xB7, 0xB8, 0xE1, 0xAF, 0xED, 0x6A, 0x26, 0x7E, 0x96,
+    0xBA, 0x7C, 0x90, 0x45, 0xF1, 0x2C, 0x7F, 0x99, 0x24, 0xA1, 0x99, 0x47,
+    0xB3, 0x91, 0x6C, 0xF7, 0x08, 0x01, 0xF2, 0xE2, 0x85, 0x8E, 0xFC, 0x16,
+    0x63, 0x69, 0x20, 0xD8, 0x71, 0x57, 0x4E, 0x69, 0xA4, 0x58, 0xFE, 0xA3,
+    0xF4, 0x93, 0x3D, 0x7E, 0x0D, 0x95, 0x74, 0x8F, 0x72, 0x8E, 0xB6, 0x58,
+    0x71, 0x8B, 0xCD, 0x58, 0x82, 0x15, 0x4A, 0xEE, 0x7B, 0x54, 0xA4, 0x1D,
+    0xC2, 0x5A, 0x59, 0xB5, 0x9C, 0x30, 0xD5, 0x39, 0x2A, 0xF2, 0x60, 0x13,
+    0xC5, 0xD1, 0xB0, 0x23, 0x28, 0x60, 0x85, 0xF0, 0xCA, 0x41, 0x79, 0x18,
+    0xB8, 0xDB, 0x38, 0xEF, 0x8E, 0x79, 0xDC, 0xB0, 0x60, 0x3A, 0x18, 0x0E,
+    0x6C, 0x9E, 0xD0, 0xE8, 0x9D, 0x44, 0x8F, 0x39, 0xF9, 0x93, 0xDB, 0x07,
+    0x3A, 0xA3, 0x45, 0x22, 0x7E, 0xD8, 0xAC, 0x87, 0x2F, 0x85, 0x5D, 0x28,
+    0x55, 0xB0, 0x89, 0x73, 0x36, 0xF3, 0xEB, 0xCD, 0xF6, 0x00, 0x4A, 0xDB,
+    0x36, 0x47, 0xDB, 0xF7, 0x82, 0x48, 0xDB, 0xF3, 0xD3, 0x7C, 0x45, 0x10,
+    0xC6, 0x7A, 0x70, 0xAA, 0x56, 0x78, 0x5A, 0xC6, 0x37, 0x10, 0xA2, 0x44,
+    0x32, 0x34, 0xFE, 0x08,
+];
+
+/// Cyclic π-digit dither.  Period = 256; index wraps with bitwise AND.
+#[derive(Clone, Debug)]
+pub struct PiDither {
+    idx: u8,
+}
+
+impl PiDither {
+    /// Create a new instance starting at `offset` (0–255).
+    #[inline]
+    pub fn new(offset: u8) -> Self {
+        Self { idx: offset }
+    }
+
+    /// Construct from a tensor/layer identifier for structural reproducibility.
+    #[inline]
+    pub fn from_tensor_id(tensor_id: u32) -> Self {
+        // Mix bits so different tensor IDs get distinct offsets
+        let mixed = tensor_id
+            .wrapping_mul(0x9E37_79B9)
+            .wrapping_add(tensor_id >> 16);
+        Self {
+            idx: (mixed & 0xFF) as u8,
+        }
+    }
+}
+
+impl DitherSource for PiDither {
+    /// Advance and return next value in `[-0.5, 0.5]`.
+    #[inline]
+    fn next_unit(&mut self) -> f32 {
+        let b = PI_BYTES[self.idx as usize];
+        self.idx = self.idx.wrapping_add(1);
+        (b as f32 / 255.0) - 0.5
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::DitherSource;
+
+    #[test]
+    fn output_is_in_range() {
+        let mut d = PiDither::new(0);
+        for _ in 0..256 * 4 {
+            let v = d.next_unit();
+            assert!(v >= -0.5 && v <= 0.5, "out of range: {v}");
+        }
+    }
+
+    #[test]
+    fn period_is_256() {
+        let mut d = PiDither::new(0);
+        let first: Vec<f32> = (0..256).map(|_| d.next_unit()).collect();
+        let second: Vec<f32> = (0..256).map(|_| d.next_unit()).collect();
+        assert_eq!(first, second);
+    }
+
+    #[test]
+    fn mean_is_near_zero() {
+        let mut d = PiDither::new(0);
+        let sum: f32 = (0..256).map(|_| d.next_unit()).sum();
+        let mean = sum / 256.0;
+        assert!(mean.abs() < 0.05, "π-digit mean too large: {mean}");
+    }
+
+    #[test]
+    fn from_tensor_id_gives_distinct_offsets() {
+        let d0 = PiDither::from_tensor_id(0);
+        let d1 = PiDither::from_tensor_id(1);
+        assert_ne!(d0.idx, d1.idx);
+    }
+}
--- a/crates/ruvector-dither/src/quantize.rs
+++ b/crates/ruvector-dither/src/quantize.rs
@@ -0,0 +1,134 @@
+//! Drop-in quantization helpers that apply dither before rounding.
+
+use crate::DitherSource;
+
+/// Quantize a single value with deterministic dither.
+///
+/// # Arguments
+/// - `x`      – input activation in `[-1.0, 1.0]`
+/// - `bits`   – quantizer bit-width (e.g. 3, 5, 7, 8)
+/// - `eps`    – dither amplitude in LSB units (0.0 = no dither, 0.5 = half-LSB recommended)
+/// - `source` – stateful dither sequence
+///
+/// Returns the quantized value in `[-1.0, 1.0]`.
+///
+/// # Example
+/// ```
+/// use ruvector_dither::{GoldenRatioDither, quantize_dithered};
+/// let mut d = GoldenRatioDither::new(0.0);
+/// let q = quantize_dithered(0.314, 8, 0.5, &mut d);
+/// assert!(q >= -1.0 && q <= 1.0);
+/// ```
+#[inline]
+pub fn quantize_dithered(x: f32, bits: u32, eps: f32, source: &mut impl DitherSource) -> f32 {
+    assert!(bits >= 2 && bits <= 31, "bits must be in [2, 31]");
+    let qmax = ((1u32 << (bits - 1)) - 1) as f32;
+    let lsb = 1.0 / qmax;
+    let dither = source.next(eps * lsb);
+    let shifted = (x + dither) * qmax;
+    let rounded = shifted.round().clamp(-qmax, qmax);
+    rounded / qmax
+}
+
+/// Quantize a slice in-place with deterministic dither.
+///
+/// Each element gets an independent dither sample from `source`.
+///
+/// # Example
+/// ```
+/// use ruvector_dither::{GoldenRatioDither, quantize_slice_dithered};
+/// let mut vals = vec![0.1_f32, 0.5, -0.3, 0.9, -0.8];
+/// let mut d = GoldenRatioDither::new(0.0);
+/// quantize_slice_dithered(&mut vals, 5, 0.5, &mut d);
+/// for &v in &vals {
+///     assert!(v >= -1.0 && v <= 1.0);
+/// }
+/// ```
+pub fn quantize_slice_dithered(
+    xs: &mut [f32],
+    bits: u32,
+    eps: f32,
+    source: &mut impl DitherSource,
+) {
+    assert!(bits >= 2 && bits <= 31, "bits must be in [2, 31]");
+    let qmax = ((1u32 << (bits - 1)) - 1) as f32;
+    let lsb = 1.0 / qmax;
+    for x in xs.iter_mut() {
+        let dither = source.next(eps * lsb);
+        let shifted = (*x + dither) * qmax;
+        *x = shifted.round().clamp(-qmax, qmax) / qmax;
+    }
+}
+
+/// Quantize to a raw integer code (signed, in `[-(2^(bits-1)), 2^(bits-1)-1]`).
+///
+/// Useful when you need the integer representation rather than a re-scaled float.
+#[inline]
+pub fn quantize_to_code(x: f32, bits: u32, eps: f32, source: &mut impl DitherSource) -> i32 {
+    assert!(bits >= 2 && bits <= 31, "bits must be in [2, 31]");
+    let qmax = ((1u32 << (bits - 1)) - 1) as f32;
+    let lsb = 1.0 / qmax;
+    let dither = source.next(eps * lsb);
+    ((x + dither) * qmax).round().clamp(-qmax, qmax) as i32
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::{GoldenRatioDither, PiDither};
+
+    #[test]
+    fn output_in_unit_range() {
+        let mut d = GoldenRatioDither::new(0.0);
+        for bits in [3u32, 5, 7, 8] {
+            for &x in &[-1.0_f32, -0.5, 0.0, 0.5, 1.0] {
+                let q = quantize_dithered(x, bits, 0.5, &mut d);
+                assert!(q >= -1.0 && q <= 1.0, "bits={bits}, x={x}, q={q}");
+            }
+        }
+    }
+
+    #[test]
+    fn dither_reduces_idle_tones() {
+        // A constant signal at exactly 0.5 * LSB without dither quantizes
+        // to the same code every time (idle tone).  With dither the code
+        // alternates, so the variance of codes should be > 0.
+        let bits = 5u32;
+        let qmax = ((1u32 << (bits - 1)) - 1) as f32;
+        let lsb = 1.0 / qmax;
+        let x = 0.5 * lsb; // exactly half an LSB
+
+        let mut codes_with: Vec<i32> = Vec::with_capacity(256);
+        let mut d = GoldenRatioDither::new(0.0);
+        for _ in 0..256 {
+            codes_with.push(quantize_to_code(x, bits, 0.5, &mut d));
+        }
+        let unique: std::collections::HashSet<i32> = codes_with.iter().copied().collect();
+        assert!(
+            unique.len() > 1,
+            "dithered signal must produce >1 unique code"
+        );
+    }
+
+    #[test]
+    fn slice_quantize_in_bounds() {
+        let mut vals: Vec<f32> = (-50..=50).map(|i| i as f32 * 0.02).collect();
+        let mut pi = PiDither::new(0);
+        quantize_slice_dithered(&mut vals, 7, 0.5, &mut pi);
+        for v in vals {
+            assert!(v >= -1.0 && v <= 1.0, "out of range: {v}");
+        }
+    }
+
+    #[test]
+    fn deterministic_with_same_seed() {
+        let input = vec![0.1_f32, 0.4, -0.7, 0.9];
+        let quantize = |input: &[f32]| {
+            let mut buf = input.to_vec();
+            let mut d = GoldenRatioDither::new(0.5);
+            quantize_slice_dithered(&mut buf, 8, 0.5, &mut d);
+            buf
+        };
+        assert_eq!(quantize(&input), quantize(&input));
+    }
+}