Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/crates/ruvector-solver/benches/solver_baseline.rs
+++ b/vendor/ruvector/crates/ruvector-solver/benches/solver_baseline.rs
@@ -0,0 +1,193 @@
+//! Baseline benchmarks for dense and sparse matrix-vector operations.
+//!
+//! These benchmarks establish performance baselines for the core linear algebra
+//! primitives used throughout the solver crate: naive dense matrix-vector
+//! multiply and CSR sparse matrix-vector multiply (SpMV).
+
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
+use std::time::Duration;
+
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+
+use ruvector_solver::types::CsrMatrix;
+
+// ---------------------------------------------------------------------------
+// Helpers: deterministic random data generation
+// ---------------------------------------------------------------------------
+
+/// Generate a dense matrix stored as a flat row-major `Vec<f32>`.
+///
+/// Uses a deterministic seed so benchmark results are reproducible across runs.
+fn random_dense_matrix(rows: usize, cols: usize, seed: u64) -> Vec<f32> {
+    let mut rng = StdRng::seed_from_u64(seed);
+    (0..rows * cols).map(|_| rng.gen_range(-1.0..1.0)).collect()
+}
+
+/// Generate a random CSR matrix with approximately `density` fraction of
+/// non-zero entries.
+///
+/// The matrix is square (`n x n`). Each entry in the upper triangle is
+/// included independently with probability `density`, then mirrored to the
+/// lower triangle for symmetry. Diagonal entries are always present and set
+/// to a value ensuring strict diagonal dominance.
+fn random_csr_matrix(n: usize, density: f64, seed: u64) -> CsrMatrix<f32> {
+    let mut rng = StdRng::seed_from_u64(seed);
+    let mut entries: Vec<(usize, usize, f32)> = Vec::new();
+
+    // Off-diagonal entries (symmetric).
+    for i in 0..n {
+        for j in (i + 1)..n {
+            if rng.gen::<f64>() < density {
+                let val: f32 = rng.gen_range(-0.5..0.5);
+                entries.push((i, j, val));
+                entries.push((j, i, val));
+            }
+        }
+    }
+
+    // Build row-wise absolute sums for diagonal dominance.
+    let mut row_abs_sums = vec![0.0f32; n];
+    for &(r, _c, v) in &entries {
+        row_abs_sums[r] += v.abs();
+    }
+
+    // Diagonal entries: ensure diagonal dominance for solver stability.
+    for i in 0..n {
+        entries.push((i, i, row_abs_sums[i] + 1.0));
+    }
+
+    CsrMatrix::<f32>::from_coo(n, n, entries)
+}
+
+/// Generate a random vector of length `n` with values in [-1, 1].
+fn random_vector(n: usize, seed: u64) -> Vec<f32> {
+    let mut rng = StdRng::seed_from_u64(seed);
+    (0..n).map(|_| rng.gen_range(-1.0..1.0)).collect()
+}
+
+// ---------------------------------------------------------------------------
+// Dense matrix-vector multiply (naive baseline)
+// ---------------------------------------------------------------------------
+
+/// Naive dense matrix-vector multiply: `y = A * x`.
+///
+/// `a` is stored in row-major order with dimensions `rows x cols`.
+#[inline(never)]
+fn dense_matvec(a: &[f32], x: &[f32], y: &mut [f32], rows: usize, cols: usize) {
+    for i in 0..rows {
+        let mut sum = 0.0f32;
+        let row_start = i * cols;
+        for j in 0..cols {
+            sum += a[row_start + j] * x[j];
+        }
+        y[i] = sum;
+    }
+}
+
+fn dense_matvec_baseline(c: &mut Criterion) {
+    let mut group = c.benchmark_group("dense_matvec");
+    group.warm_up_time(Duration::from_secs(3));
+    group.sample_size(100);
+
+    for size in [64, 256, 1024, 4096] {
+        let a = random_dense_matrix(size, size, 42);
+        let x = random_vector(size, 43);
+        let mut y = vec![0.0f32; size];
+
+        group.throughput(Throughput::Elements((size * size) as u64));
+        group.bench_with_input(BenchmarkId::new("naive", size), &size, |b, &n| {
+            b.iter(|| {
+                dense_matvec(
+                    criterion::black_box(&a),
+                    criterion::black_box(&x),
+                    criterion::black_box(&mut y),
+                    n,
+                    n,
+                );
+            });
+        });
+    }
+    group.finish();
+}
+
+// ---------------------------------------------------------------------------
+// Sparse matrix-vector multiply (CSR SpMV)
+// ---------------------------------------------------------------------------
+
+fn sparse_spmv_baseline(c: &mut Criterion) {
+    let mut group = c.benchmark_group("sparse_spmv");
+    group.warm_up_time(Duration::from_secs(3));
+    group.sample_size(100);
+
+    for (n, density) in [(1000, 0.01), (1000, 0.05), (10_000, 0.01)] {
+        let csr = random_csr_matrix(n, density, 44);
+        let x = random_vector(n, 45);
+        let mut y = vec![0.0f32; n];
+
+        let label = format!("{}x{}_{:.0}pct", n, n, density * 100.0);
+        group.throughput(Throughput::Elements(csr.nnz() as u64));
+        group.bench_with_input(BenchmarkId::new(&label, n), &n, |b, _| {
+            b.iter(|| {
+                csr.spmv(criterion::black_box(&x), criterion::black_box(&mut y));
+            });
+        });
+    }
+    group.finish();
+}
+
+// ---------------------------------------------------------------------------
+// Dense vs sparse crossover
+// ---------------------------------------------------------------------------
+
+/// Benchmark that compares dense and sparse matvec at the same dimension
+/// to help identify the crossover point where sparse becomes faster.
+fn dense_vs_sparse_crossover(c: &mut Criterion) {
+    let mut group = c.benchmark_group("dense_vs_sparse_crossover");
+    group.warm_up_time(Duration::from_secs(3));
+    group.sample_size(100);
+
+    for size in [64, 128, 256, 512, 1024] {
+        let density = 0.05;
+
+        // Dense setup.
+        let a_dense = random_dense_matrix(size, size, 42);
+        let x = random_vector(size, 43);
+        let mut y_dense = vec![0.0f32; size];
+
+        group.throughput(Throughput::Elements((size * size) as u64));
+        group.bench_with_input(BenchmarkId::new("dense", size), &size, |b, &n| {
+            b.iter(|| {
+                dense_matvec(
+                    criterion::black_box(&a_dense),
+                    criterion::black_box(&x),
+                    criterion::black_box(&mut y_dense),
+                    n,
+                    n,
+                );
+            });
+        });
+
+        // Sparse setup.
+        let csr = random_csr_matrix(size, density, 44);
+        let mut y_sparse = vec![0.0f32; size];
+
+        group.bench_with_input(BenchmarkId::new("sparse_5pct", size), &size, |b, _| {
+            b.iter(|| {
+                csr.spmv(
+                    criterion::black_box(&x),
+                    criterion::black_box(&mut y_sparse),
+                );
+            });
+        });
+    }
+    group.finish();
+}
+
+criterion_group!(
+    baselines,
+    dense_matvec_baseline,
+    sparse_spmv_baseline,
+    dense_vs_sparse_crossover
+);
+criterion_main!(baselines);
--- a/vendor/ruvector/crates/ruvector-solver/benches/solver_cg.rs
+++ b/vendor/ruvector/crates/ruvector-solver/benches/solver_cg.rs
@@ -0,0 +1,378 @@
+//! Benchmarks for the Conjugate Gradient (CG) solver.
+//!
+//! CG is the method of choice for symmetric positive-definite (SPD) systems.
+//! These benchmarks measure scaling behaviour, the effect of diagonal
+//! preconditioning, and a head-to-head comparison with the Neumann series
+//! solver.
+
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
+use std::time::Duration;
+
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+
+use ruvector_solver::types::CsrMatrix;
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/// Build a symmetric positive-definite (SPD) CSR matrix.
+///
+/// Constructs a sparse SPD matrix by generating random off-diagonal entries
+/// and ensuring strict diagonal dominance: `a_{ii} = sum_j |a_{ij}| + 1`.
+fn spd_csr_matrix(n: usize, density: f64, seed: u64) -> CsrMatrix<f32> {
+    let mut rng = StdRng::seed_from_u64(seed);
+    let mut entries: Vec<(usize, usize, f32)> = Vec::new();
+
+    for i in 0..n {
+        for j in (i + 1)..n {
+            if rng.gen::<f64>() < density {
+                let val: f32 = rng.gen_range(-0.3..0.3);
+                entries.push((i, j, val));
+                entries.push((j, i, val));
+            }
+        }
+    }
+
+    let mut row_abs_sums = vec![0.0f32; n];
+    for &(r, _c, v) in &entries {
+        row_abs_sums[r] += v.abs();
+    }
+    for i in 0..n {
+        entries.push((i, i, row_abs_sums[i] + 1.0));
+    }
+
+    CsrMatrix::<f32>::from_coo(n, n, entries)
+}
+
+/// Random vector with deterministic seed.
+fn random_vector(n: usize, seed: u64) -> Vec<f32> {
+    let mut rng = StdRng::seed_from_u64(seed);
+    (0..n).map(|_| rng.gen_range(-1.0..1.0)).collect()
+}
+
+// ---------------------------------------------------------------------------
+// Inline CG solver for benchmarking
+// ---------------------------------------------------------------------------
+
+/// Conjugate gradient solver for SPD systems `Ax = b`.
+///
+/// This is a textbook CG implementation inlined here so the benchmark does
+/// not depend on the (currently stub) cg module.
+#[inline(never)]
+fn cg_solve(
+    matrix: &CsrMatrix<f32>,
+    rhs: &[f32],
+    tolerance: f64,
+    max_iter: usize,
+) -> (Vec<f32>, usize, f64) {
+    let n = matrix.rows;
+    let mut x = vec![0.0f32; n];
+    let mut r = rhs.to_vec(); // r_0 = b - A*x_0, with x_0 = 0 => r_0 = b
+    let mut p = r.clone();
+    let mut ap = vec![0.0f32; n];
+
+    let mut rs_old: f64 = r.iter().map(|&v| (v as f64) * (v as f64)).sum();
+    let mut iterations = 0;
+
+    for k in 0..max_iter {
+        // ap = A * p
+        matrix.spmv(&p, &mut ap);
+
+        // alpha = (r^T r) / (p^T A p)
+        let p_ap: f64 = p
+            .iter()
+            .zip(ap.iter())
+            .map(|(&pi, &api)| (pi as f64) * (api as f64))
+            .sum();
+
+        if p_ap.abs() < 1e-30 {
+            iterations = k + 1;
+            break;
+        }
+
+        let alpha = rs_old / p_ap;
+
+        // x = x + alpha * p
+        for i in 0..n {
+            x[i] += (alpha as f32) * p[i];
+        }
+
+        // r = r - alpha * ap
+        for i in 0..n {
+            r[i] -= (alpha as f32) * ap[i];
+        }
+
+        let rs_new: f64 = r.iter().map(|&v| (v as f64) * (v as f64)).sum();
+        iterations = k + 1;
+
+        if rs_new.sqrt() < tolerance {
+            break;
+        }
+
+        // p = r + (rs_new / rs_old) * p
+        let beta = rs_new / rs_old;
+        for i in 0..n {
+            p[i] = r[i] + (beta as f32) * p[i];
+        }
+
+        rs_old = rs_new;
+    }
+
+    let residual_norm = rs_old.sqrt();
+    (x, iterations, residual_norm)
+}
+
+/// Diagonal-preconditioned CG solver.
+///
+/// Uses the Jacobi (diagonal) preconditioner: `M = diag(A)`.
+/// Solves `M^{-1} A x = M^{-1} b` via the preconditioned CG algorithm.
+#[inline(never)]
+fn pcg_solve(
+    matrix: &CsrMatrix<f32>,
+    rhs: &[f32],
+    tolerance: f64,
+    max_iter: usize,
+) -> (Vec<f32>, usize, f64) {
+    let n = matrix.rows;
+
+    // Extract diagonal for preconditioner.
+    let mut diag_inv = vec![1.0f32; n];
+    for i in 0..n {
+        let start = matrix.row_ptr[i];
+        let end = matrix.row_ptr[i + 1];
+        for idx in start..end {
+            if matrix.col_indices[idx] == i {
+                let d = matrix.values[idx];
+                diag_inv[i] = if d.abs() > 1e-12 { 1.0 / d } else { 1.0 };
+                break;
+            }
+        }
+    }
+
+    let mut x = vec![0.0f32; n];
+    let mut r = rhs.to_vec();
+    let mut z: Vec<f32> = r
+        .iter()
+        .zip(diag_inv.iter())
+        .map(|(&ri, &di)| ri * di)
+        .collect();
+    let mut p = z.clone();
+    let mut ap = vec![0.0f32; n];
+
+    let mut rz_old: f64 = r
+        .iter()
+        .zip(z.iter())
+        .map(|(&ri, &zi)| (ri as f64) * (zi as f64))
+        .sum();
+
+    let mut iterations = 0;
+
+    for k in 0..max_iter {
+        matrix.spmv(&p, &mut ap);
+
+        let p_ap: f64 = p
+            .iter()
+            .zip(ap.iter())
+            .map(|(&pi, &api)| (pi as f64) * (api as f64))
+            .sum();
+
+        if p_ap.abs() < 1e-30 {
+            iterations = k + 1;
+            break;
+        }
+
+        let alpha = rz_old / p_ap;
+
+        for i in 0..n {
+            x[i] += (alpha as f32) * p[i];
+            r[i] -= (alpha as f32) * ap[i];
+        }
+
+        let residual_norm: f64 = r
+            .iter()
+            .map(|&v| (v as f64) * (v as f64))
+            .sum::<f64>()
+            .sqrt();
+        iterations = k + 1;
+
+        if residual_norm < tolerance {
+            break;
+        }
+
+        // z = M^{-1} r
+        for i in 0..n {
+            z[i] = r[i] * diag_inv[i];
+        }
+
+        let rz_new: f64 = r
+            .iter()
+            .zip(z.iter())
+            .map(|(&ri, &zi)| (ri as f64) * (zi as f64))
+            .sum();
+
+        let beta = rz_new / rz_old;
+        for i in 0..n {
+            p[i] = z[i] + (beta as f32) * p[i];
+        }
+
+        rz_old = rz_new;
+    }
+
+    let residual_norm = r
+        .iter()
+        .map(|&v| (v as f64) * (v as f64))
+        .sum::<f64>()
+        .sqrt();
+    (x, iterations, residual_norm)
+}
+
+/// Neumann series iteration (inlined for comparison benchmark).
+#[inline(never)]
+fn neumann_solve(
+    matrix: &CsrMatrix<f32>,
+    rhs: &[f32],
+    tolerance: f64,
+    max_iter: usize,
+) -> (Vec<f32>, usize, f64) {
+    let n = matrix.rows;
+    let mut x = vec![0.0f32; n];
+    let mut residual_buf = vec![0.0f32; n];
+    let mut iterations = 0;
+    let mut residual_norm = f64::MAX;
+
+    for k in 0..max_iter {
+        matrix.spmv(&x, &mut residual_buf);
+        for i in 0..n {
+            residual_buf[i] = rhs[i] - residual_buf[i];
+        }
+
+        residual_norm = residual_buf
+            .iter()
+            .map(|&v| (v as f64) * (v as f64))
+            .sum::<f64>()
+            .sqrt();
+
+        iterations = k + 1;
+        if residual_norm < tolerance {
+            break;
+        }
+
+        for i in 0..n {
+            x[i] += residual_buf[i];
+        }
+    }
+
+    (x, iterations, residual_norm)
+}
+
+// ---------------------------------------------------------------------------
+// Benchmark: CG scaling with problem size
+// ---------------------------------------------------------------------------
+
+fn cg_scaling(c: &mut Criterion) {
+    let mut group = c.benchmark_group("cg_scaling");
+    group.warm_up_time(Duration::from_secs(3));
+
+    for &n in &[100, 1000, 10_000] {
+        let density = if n <= 1000 { 0.02 } else { 0.005 };
+        let matrix = spd_csr_matrix(n, density, 42);
+        let rhs = random_vector(n, 43);
+
+        let sample_count = if n >= 10_000 { 20 } else { 100 };
+        group.sample_size(sample_count);
+        group.throughput(Throughput::Elements(matrix.nnz() as u64));
+
+        group.bench_with_input(BenchmarkId::new("n", n), &n, |b, _| {
+            b.iter(|| {
+                cg_solve(
+                    criterion::black_box(&matrix),
+                    criterion::black_box(&rhs),
+                    1e-6,
+                    5000,
+                )
+            });
+        });
+    }
+    group.finish();
+}
+
+// ---------------------------------------------------------------------------
+// Benchmark: with vs without diagonal preconditioner
+// ---------------------------------------------------------------------------
+
+fn cg_preconditioning(c: &mut Criterion) {
+    let mut group = c.benchmark_group("cg_preconditioning");
+    group.warm_up_time(Duration::from_secs(3));
+    group.sample_size(100);
+
+    for &n in &[500, 1000, 2000] {
+        let matrix = spd_csr_matrix(n, 0.02, 42);
+        let rhs = random_vector(n, 43);
+
+        group.bench_with_input(BenchmarkId::new("cg_plain", n), &n, |b, _| {
+            b.iter(|| {
+                cg_solve(
+                    criterion::black_box(&matrix),
+                    criterion::black_box(&rhs),
+                    1e-6,
+                    5000,
+                )
+            });
+        });
+
+        group.bench_with_input(BenchmarkId::new("cg_diag_precond", n), &n, |b, _| {
+            b.iter(|| {
+                pcg_solve(
+                    criterion::black_box(&matrix),
+                    criterion::black_box(&rhs),
+                    1e-6,
+                    5000,
+                )
+            });
+        });
+    }
+    group.finish();
+}
+
+// ---------------------------------------------------------------------------
+// Benchmark: CG vs Neumann for same problem
+// ---------------------------------------------------------------------------
+
+fn cg_vs_neumann(c: &mut Criterion) {
+    let mut group = c.benchmark_group("cg_vs_neumann");
+    group.warm_up_time(Duration::from_secs(3));
+    group.sample_size(100);
+
+    for &n in &[100, 500, 1000] {
+        let matrix = spd_csr_matrix(n, 0.02, 42);
+        let rhs = random_vector(n, 43);
+
+        group.bench_with_input(BenchmarkId::new("cg", n), &n, |b, _| {
+            b.iter(|| {
+                cg_solve(
+                    criterion::black_box(&matrix),
+                    criterion::black_box(&rhs),
+                    1e-6,
+                    5000,
+                )
+            });
+        });
+
+        group.bench_with_input(BenchmarkId::new("neumann", n), &n, |b, _| {
+            b.iter(|| {
+                neumann_solve(
+                    criterion::black_box(&matrix),
+                    criterion::black_box(&rhs),
+                    1e-6,
+                    5000,
+                )
+            });
+        });
+    }
+    group.finish();
+}
+
+criterion_group!(cg, cg_scaling, cg_preconditioning, cg_vs_neumann);
+criterion_main!(cg);
--- a/vendor/ruvector/crates/ruvector-solver/benches/solver_e2e.rs
+++ b/vendor/ruvector/crates/ruvector-solver/benches/solver_e2e.rs
@@ -0,0 +1,390 @@
+//! End-to-end benchmarks for the solver orchestration layer.
+//!
+//! These benchmarks measure the overhead of algorithm selection (routing) and
+//! the full end-to-end solve path including routing, validation, solver
+//! dispatch, and result construction.
+
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
+use std::time::Duration;
+
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+
+use ruvector_solver::types::{Algorithm, CsrMatrix};
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/// Build a diagonally dominant CSR matrix.
+fn diag_dominant_csr(n: usize, density: f64, seed: u64) -> CsrMatrix<f32> {
+    let mut rng = StdRng::seed_from_u64(seed);
+    let mut entries: Vec<(usize, usize, f32)> = Vec::new();
+
+    for i in 0..n {
+        for j in (i + 1)..n {
+            if rng.gen::<f64>() < density {
+                let val: f32 = rng.gen_range(-0.3..0.3);
+                entries.push((i, j, val));
+                entries.push((j, i, val));
+            }
+        }
+    }
+
+    let mut row_abs_sums = vec![0.0f32; n];
+    for &(r, _c, v) in &entries {
+        row_abs_sums[r] += v.abs();
+    }
+    for i in 0..n {
+        entries.push((i, i, row_abs_sums[i] + 1.0));
+    }
+
+    CsrMatrix::<f32>::from_coo(n, n, entries)
+}
+
+/// Random vector with deterministic seed.
+fn random_vector(n: usize, seed: u64) -> Vec<f32> {
+    let mut rng = StdRng::seed_from_u64(seed);
+    (0..n).map(|_| rng.gen_range(-1.0..1.0)).collect()
+}
+
+// ---------------------------------------------------------------------------
+// Inline algorithm router for benchmarking
+// ---------------------------------------------------------------------------
+
+/// Properties extracted from the matrix for routing decisions.
+#[allow(dead_code)]
+struct MatrixProperties {
+    n: usize,
+    nnz: usize,
+    density: f64,
+    is_symmetric: bool,
+    max_row_degree: usize,
+    diag_dominance_ratio: f64,
+}
+
+/// Analyze a CSR matrix to extract routing-relevant properties.
+#[inline(never)]
+fn analyze_matrix(matrix: &CsrMatrix<f32>) -> MatrixProperties {
+    let n = matrix.rows;
+    let nnz = matrix.nnz();
+    let density = nnz as f64 / (n as f64 * n as f64);
+
+    // Check symmetry (sample-based for large matrices).
+    let sample_size = n.min(100);
+    let mut is_symmetric = true;
+    'outer: for i in 0..sample_size {
+        let start = matrix.row_ptr[i];
+        let end = matrix.row_ptr[i + 1];
+        for idx in start..end {
+            let j = matrix.col_indices[idx];
+            if j == i {
+                continue;
+            }
+            // Check if (j, i) exists with the same value.
+            let j_start = matrix.row_ptr[j];
+            let j_end = matrix.row_ptr[j + 1];
+            let mut found = false;
+            for jidx in j_start..j_end {
+                if matrix.col_indices[jidx] == i {
+                    if (matrix.values[jidx] - matrix.values[idx]).abs() > 1e-6 {
+                        is_symmetric = false;
+                        break 'outer;
+                    }
+                    found = true;
+                    break;
+                }
+            }
+            if !found {
+                is_symmetric = false;
+                break 'outer;
+            }
+        }
+    }
+
+    // Max row degree.
+    let mut max_row_degree = 0;
+    for i in 0..n {
+        let deg = matrix.row_ptr[i + 1] - matrix.row_ptr[i];
+        max_row_degree = max_row_degree.max(deg);
+    }
+
+    // Diagonal dominance ratio (sampled).
+    let mut diag_dominance_ratio = 0.0;
+    let check_rows = n.min(100);
+    for i in 0..check_rows {
+        let start = matrix.row_ptr[i];
+        let end = matrix.row_ptr[i + 1];
+        let mut diag = 0.0f32;
+        let mut off_diag_sum = 0.0f32;
+        for idx in start..end {
+            if matrix.col_indices[idx] == i {
+                diag = matrix.values[idx].abs();
+            } else {
+                off_diag_sum += matrix.values[idx].abs();
+            }
+        }
+        if off_diag_sum > 0.0 {
+            diag_dominance_ratio += (diag / off_diag_sum) as f64;
+        } else {
+            diag_dominance_ratio += 10.0; // Perfect dominance.
+        }
+    }
+    diag_dominance_ratio /= check_rows as f64;
+
+    MatrixProperties {
+        n,
+        nnz,
+        density,
+        is_symmetric,
+        max_row_degree,
+        diag_dominance_ratio,
+    }
+}
+
+/// Select the best algorithm based on matrix properties.
+#[inline(never)]
+fn select_algorithm(props: &MatrixProperties, tolerance: f64) -> Algorithm {
+    // High diagonal dominance => Neumann series converges fast.
+    if props.diag_dominance_ratio > 2.0 && tolerance > 1e-8 {
+        return Algorithm::Neumann;
+    }
+
+    // SPD matrix => CG is optimal.
+    if props.is_symmetric && props.diag_dominance_ratio > 1.0 {
+        return Algorithm::CG;
+    }
+
+    // Very sparse, large graph => forward push for PPR-like problems.
+    if props.density < 0.01 && props.n > 1000 {
+        return Algorithm::ForwardPush;
+    }
+
+    // Default fallback.
+    if props.is_symmetric {
+        Algorithm::CG
+    } else {
+        Algorithm::Neumann
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Inline solvers for e2e benchmarking
+// ---------------------------------------------------------------------------
+
+/// Neumann series (Richardson iteration).
+#[inline(never)]
+fn neumann_solve(
+    matrix: &CsrMatrix<f32>,
+    rhs: &[f32],
+    tolerance: f64,
+    max_iter: usize,
+) -> (Vec<f32>, usize, f64) {
+    let n = matrix.rows;
+    let mut x = vec![0.0f32; n];
+    let mut r = vec![0.0f32; n];
+    let mut iterations = 0;
+    let mut residual_norm = f64::MAX;
+
+    for k in 0..max_iter {
+        matrix.spmv(&x, &mut r);
+        for i in 0..n {
+            r[i] = rhs[i] - r[i];
+        }
+        residual_norm = r
+            .iter()
+            .map(|&v| (v as f64) * (v as f64))
+            .sum::<f64>()
+            .sqrt();
+        iterations = k + 1;
+        if residual_norm < tolerance {
+            break;
+        }
+        for i in 0..n {
+            x[i] += r[i];
+        }
+    }
+    (x, iterations, residual_norm)
+}
+
+/// Conjugate gradient.
+#[inline(never)]
+fn cg_solve(
+    matrix: &CsrMatrix<f32>,
+    rhs: &[f32],
+    tolerance: f64,
+    max_iter: usize,
+) -> (Vec<f32>, usize, f64) {
+    let n = matrix.rows;
+    let mut x = vec![0.0f32; n];
+    let mut r = rhs.to_vec();
+    let mut p = r.clone();
+    let mut ap = vec![0.0f32; n];
+
+    let mut rs_old: f64 = r.iter().map(|&v| (v as f64) * (v as f64)).sum();
+    let mut iterations = 0;
+
+    for k in 0..max_iter {
+        matrix.spmv(&p, &mut ap);
+
+        let p_ap: f64 = p
+            .iter()
+            .zip(ap.iter())
+            .map(|(&pi, &api)| (pi as f64) * (api as f64))
+            .sum();
+
+        if p_ap.abs() < 1e-30 {
+            iterations = k + 1;
+            break;
+        }
+        let alpha = rs_old / p_ap;
+
+        for i in 0..n {
+            x[i] += (alpha as f32) * p[i];
+            r[i] -= (alpha as f32) * ap[i];
+        }
+
+        let rs_new: f64 = r.iter().map(|&v| (v as f64) * (v as f64)).sum();
+        iterations = k + 1;
+        if rs_new.sqrt() < tolerance {
+            break;
+        }
+
+        let beta = rs_new / rs_old;
+        for i in 0..n {
+            p[i] = r[i] + (beta as f32) * p[i];
+        }
+        rs_old = rs_new;
+    }
+
+    let residual_norm = rs_old.sqrt();
+    (x, iterations, residual_norm)
+}
+
+/// Full orchestrated solve: analyze -> route -> solve.
+#[inline(never)]
+fn orchestrator_solve_impl(
+    matrix: &CsrMatrix<f32>,
+    rhs: &[f32],
+    tolerance: f64,
+    max_iter: usize,
+) -> (Vec<f32>, usize, f64, Algorithm) {
+    let props = analyze_matrix(matrix);
+    let algorithm = select_algorithm(&props, tolerance);
+
+    let (solution, iterations, residual) = match algorithm {
+        Algorithm::Neumann => neumann_solve(matrix, rhs, tolerance, max_iter),
+        Algorithm::CG => cg_solve(matrix, rhs, tolerance, max_iter),
+        // Fall back to CG for unimplemented algorithms.
+        _ => cg_solve(matrix, rhs, tolerance, max_iter),
+    };
+
+    (solution, iterations, residual, algorithm)
+}
+
+// ---------------------------------------------------------------------------
+// Benchmark: router overhead (analyze + select, no solve)
+// ---------------------------------------------------------------------------
+
+fn router_overhead(c: &mut Criterion) {
+    let mut group = c.benchmark_group("router_overhead");
+    group.warm_up_time(Duration::from_secs(3));
+    group.sample_size(100);
+
+    for &n in &[100, 1000, 10_000] {
+        let density = if n <= 1000 { 0.02 } else { 0.005 };
+        let matrix = diag_dominant_csr(n, density, 42);
+
+        group.throughput(Throughput::Elements(n as u64));
+        group.bench_with_input(BenchmarkId::new("analyze_and_route", n), &n, |b, _| {
+            b.iter(|| {
+                let props = analyze_matrix(criterion::black_box(&matrix));
+                select_algorithm(criterion::black_box(&props), 1e-6)
+            });
+        });
+    }
+    group.finish();
+}
+
+// ---------------------------------------------------------------------------
+// Benchmark: full orchestrated solve (end-to-end)
+// ---------------------------------------------------------------------------
+
+fn orchestrator_solve(c: &mut Criterion) {
+    let mut group = c.benchmark_group("orchestrator_solve");
+    group.warm_up_time(Duration::from_secs(3));
+
+    for &n in &[100, 500, 1000, 5000] {
+        let density = if n <= 1000 { 0.02 } else { 0.005 };
+        let matrix = diag_dominant_csr(n, density, 42);
+        let rhs = random_vector(n, 43);
+
+        let sample_count = if n >= 5000 { 20 } else { 100 };
+        group.sample_size(sample_count);
+        group.throughput(Throughput::Elements(matrix.nnz() as u64));
+
+        group.bench_with_input(BenchmarkId::new("e2e", n), &n, |b, _| {
+            b.iter(|| {
+                orchestrator_solve_impl(
+                    criterion::black_box(&matrix),
+                    criterion::black_box(&rhs),
+                    1e-6,
+                    5000,
+                )
+            });
+        });
+    }
+    group.finish();
+}
+
+// ---------------------------------------------------------------------------
+// Benchmark: routing overhead as fraction of total solve time
+// ---------------------------------------------------------------------------
+
+fn routing_fraction(c: &mut Criterion) {
+    let mut group = c.benchmark_group("routing_fraction");
+    group.warm_up_time(Duration::from_secs(3));
+    group.sample_size(100);
+
+    let n = 1000;
+    let density = 0.02;
+    let matrix = diag_dominant_csr(n, density, 42);
+    let rhs = random_vector(n, 43);
+
+    // Route only.
+    group.bench_function("route_only", |b| {
+        b.iter(|| {
+            let props = analyze_matrix(criterion::black_box(&matrix));
+            select_algorithm(criterion::black_box(&props), 1e-6)
+        });
+    });
+
+    // Solve only (skip routing).
+    group.bench_function("solve_only_cg", |b| {
+        b.iter(|| {
+            cg_solve(
+                criterion::black_box(&matrix),
+                criterion::black_box(&rhs),
+                1e-6,
+                5000,
+            )
+        });
+    });
+
+    // Full e2e (route + solve).
+    group.bench_function("e2e_routed", |b| {
+        b.iter(|| {
+            orchestrator_solve_impl(
+                criterion::black_box(&matrix),
+                criterion::black_box(&rhs),
+                1e-6,
+                5000,
+            )
+        });
+    });
+
+    group.finish();
+}
+
+criterion_group!(e2e, router_overhead, orchestrator_solve, routing_fraction);
+criterion_main!(e2e);
--- a/vendor/ruvector/crates/ruvector-solver/benches/solver_neumann.rs
+++ b/vendor/ruvector/crates/ruvector-solver/benches/solver_neumann.rs
@@ -0,0 +1,313 @@
+//! Benchmarks for the Neumann series solver.
+//!
+//! The Neumann series approximates `(I - M)^{-1} b = sum_{k=0}^{K} M^k b`
+//! and converges when the spectral radius of `M` is less than 1. These
+//! benchmarks measure convergence rate vs tolerance, scaling behaviour, and
+//! crossover against dense direct solves.
+
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
+use std::time::Duration;
+
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+
+use ruvector_solver::types::CsrMatrix;
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/// Build a diagonally dominant CSR matrix suitable for Neumann iteration.
+///
+/// The iteration matrix `M = I - D^{-1} A` has spectral radius < 1 when `A`
+/// is strictly diagonally dominant. We construct `A` so that each diagonal
+/// entry equals the sum of absolute off-diagonal values in its row plus 1.0.
+fn diag_dominant_csr(n: usize, density: f64, seed: u64) -> CsrMatrix<f32> {
+    let mut rng = StdRng::seed_from_u64(seed);
+    let mut entries: Vec<(usize, usize, f32)> = Vec::new();
+
+    for i in 0..n {
+        for j in (i + 1)..n {
+            if rng.gen::<f64>() < density {
+                let val: f32 = rng.gen_range(-0.3..0.3);
+                entries.push((i, j, val));
+                entries.push((j, i, val));
+            }
+        }
+    }
+
+    let mut row_abs_sums = vec![0.0f32; n];
+    for &(r, _c, v) in &entries {
+        row_abs_sums[r] += v.abs();
+    }
+    for i in 0..n {
+        entries.push((i, i, row_abs_sums[i] + 1.0));
+    }
+
+    CsrMatrix::<f32>::from_coo(n, n, entries)
+}
+
+/// Random vector with deterministic seed.
+fn random_vector(n: usize, seed: u64) -> Vec<f32> {
+    let mut rng = StdRng::seed_from_u64(seed);
+    (0..n).map(|_| rng.gen_range(-1.0..1.0)).collect()
+}
+
+// ---------------------------------------------------------------------------
+// Inline Neumann series solver for benchmarking
+// ---------------------------------------------------------------------------
+
+/// Neumann series iteration: x_{k+1} = x_k + (b - A * x_k).
+///
+/// This is equivalent to the Richardson iteration with omega = 1 for a
+/// diagonally-dominant system. We inline it here so the benchmark does
+/// not depend on the (currently stub) neumann module.
+#[inline(never)]
+fn neumann_solve(
+    matrix: &CsrMatrix<f32>,
+    rhs: &[f32],
+    tolerance: f64,
+    max_iter: usize,
+) -> (Vec<f32>, usize, f64) {
+    let n = matrix.rows;
+    let mut x = vec![0.0f32; n];
+    let mut residual_buf = vec![0.0f32; n];
+    let mut iterations = 0;
+    let mut residual_norm = f64::MAX;
+
+    for k in 0..max_iter {
+        // Compute residual: r = b - A*x.
+        matrix.spmv(&x, &mut residual_buf);
+        for i in 0..n {
+            residual_buf[i] = rhs[i] - residual_buf[i];
+        }
+
+        // Residual L2 norm.
+        residual_norm = residual_buf
+            .iter()
+            .map(|&v| (v as f64) * (v as f64))
+            .sum::<f64>()
+            .sqrt();
+
+        iterations = k + 1;
+        if residual_norm < tolerance {
+            break;
+        }
+
+        // Update: x = x + r (Richardson step).
+        for i in 0..n {
+            x[i] += residual_buf[i];
+        }
+    }
+
+    (x, iterations, residual_norm)
+}
+
+// ---------------------------------------------------------------------------
+// Benchmark: convergence vs tolerance
+// ---------------------------------------------------------------------------
+
+fn neumann_convergence(c: &mut Criterion) {
+    let mut group = c.benchmark_group("neumann_convergence");
+    group.warm_up_time(Duration::from_secs(3));
+    group.sample_size(100);
+
+    let n = 500;
+    let matrix = diag_dominant_csr(n, 0.02, 42);
+    let rhs = random_vector(n, 43);
+
+    for &tol in &[1e-2, 1e-4, 1e-6] {
+        let label = format!("eps_{:.0e}", tol);
+        group.bench_with_input(BenchmarkId::new(&label, n), &tol, |b, &eps| {
+            b.iter(|| {
+                neumann_solve(
+                    criterion::black_box(&matrix),
+                    criterion::black_box(&rhs),
+                    eps,
+                    5000,
+                )
+            });
+        });
+    }
+    group.finish();
+}
+
+// ---------------------------------------------------------------------------
+// Benchmark: scaling with problem size
+// ---------------------------------------------------------------------------
+
+fn neumann_scaling(c: &mut Criterion) {
+    let mut group = c.benchmark_group("neumann_scaling");
+    group.warm_up_time(Duration::from_secs(3));
+
+    for &n in &[100, 1000, 10_000] {
+        // Use sparser matrices for larger sizes to keep runtime reasonable.
+        let density = if n <= 1000 { 0.02 } else { 0.005 };
+        let matrix = diag_dominant_csr(n, density, 42);
+        let rhs = random_vector(n, 43);
+
+        let sample_count = if n >= 10_000 { 20 } else { 100 };
+        group.sample_size(sample_count);
+        group.throughput(Throughput::Elements(matrix.nnz() as u64));
+
+        group.bench_with_input(BenchmarkId::new("n", n), &n, |b, _| {
+            b.iter(|| {
+                neumann_solve(
+                    criterion::black_box(&matrix),
+                    criterion::black_box(&rhs),
+                    1e-4,
+                    5000,
+                )
+            });
+        });
+    }
+    group.finish();
+}
+
+// ---------------------------------------------------------------------------
+// Benchmark: Neumann vs dense direct solve crossover
+// ---------------------------------------------------------------------------
+
+/// Naive dense direct solve via Gaussian elimination with partial pivoting.
+///
+/// This is intentionally unoptimized to represent a "no-library" baseline.
+#[inline(never)]
+fn dense_direct_solve(a: &[f32], b: &[f32], n: usize) -> Vec<f32> {
+    // Build augmented matrix [A | b] in row-major order.
+    let mut aug = vec![0.0f64; n * (n + 1)];
+    for i in 0..n {
+        for j in 0..n {
+            aug[i * (n + 1) + j] = a[i * n + j] as f64;
+        }
+        aug[i * (n + 1) + n] = b[i] as f64;
+    }
+
+    // Forward elimination with partial pivoting.
+    for col in 0..n {
+        // Find pivot.
+        let mut max_row = col;
+        let mut max_val = aug[col * (n + 1) + col].abs();
+        for row in (col + 1)..n {
+            let val = aug[row * (n + 1) + col].abs();
+            if val > max_val {
+                max_val = val;
+                max_row = row;
+            }
+        }
+
+        // Swap rows.
+        if max_row != col {
+            for j in 0..=n {
+                let idx_a = col * (n + 1) + j;
+                let idx_b = max_row * (n + 1) + j;
+                aug.swap(idx_a, idx_b);
+            }
+        }
+
+        let pivot = aug[col * (n + 1) + col];
+        if pivot.abs() < 1e-15 {
+            continue;
+        }
+
+        // Eliminate below.
+        for row in (col + 1)..n {
+            let factor = aug[row * (n + 1) + col] / pivot;
+            for j in col..=n {
+                let val = aug[col * (n + 1) + j];
+                aug[row * (n + 1) + j] -= factor * val;
+            }
+        }
+    }
+
+    // Back substitution.
+    let mut x = vec![0.0f64; n];
+    for i in (0..n).rev() {
+        let mut sum = aug[i * (n + 1) + n];
+        for j in (i + 1)..n {
+            sum -= aug[i * (n + 1) + j] * x[j];
+        }
+        let diag = aug[i * (n + 1) + i];
+        x[i] = if diag.abs() > 1e-15 { sum / diag } else { 0.0 };
+    }
+
+    x.iter().map(|&v| v as f32).collect()
+}
+
+/// Generate the dense representation of a diag-dominant matrix.
+fn diag_dominant_dense(n: usize, density: f64, seed: u64) -> Vec<f32> {
+    let mut rng = StdRng::seed_from_u64(seed);
+    let mut a = vec![0.0f32; n * n];
+
+    // Off-diagonal.
+    for i in 0..n {
+        for j in (i + 1)..n {
+            if rng.gen::<f64>() < density {
+                let val: f32 = rng.gen_range(-0.3..0.3);
+                a[i * n + j] = val;
+                a[j * n + i] = val;
+            }
+        }
+    }
+
+    // Diagonal dominance.
+    for i in 0..n {
+        let mut row_sum = 0.0f32;
+        for j in 0..n {
+            if j != i {
+                row_sum += a[i * n + j].abs();
+            }
+        }
+        a[i * n + i] = row_sum + 1.0;
+    }
+
+    a
+}
+
+fn neumann_vs_dense(c: &mut Criterion) {
+    let mut group = c.benchmark_group("neumann_vs_dense");
+    group.warm_up_time(Duration::from_secs(3));
+
+    // Crossover analysis: compare iterative Neumann vs dense direct solve.
+    // For small n, dense wins; for large sparse n, Neumann should win.
+    for &n in &[50, 100, 200, 500] {
+        let density = 0.05;
+        let rhs = random_vector(n, 43);
+
+        let sample_count = if n >= 500 { 20 } else { 100 };
+        group.sample_size(sample_count);
+
+        // Neumann (sparse).
+        let csr = diag_dominant_csr(n, density, 42);
+        group.bench_with_input(BenchmarkId::new("neumann_sparse", n), &n, |b, _| {
+            b.iter(|| {
+                neumann_solve(
+                    criterion::black_box(&csr),
+                    criterion::black_box(&rhs),
+                    1e-4,
+                    5000,
+                )
+            });
+        });
+
+        // Dense direct solve.
+        let a_dense = diag_dominant_dense(n, density, 42);
+        group.bench_with_input(BenchmarkId::new("dense_direct", n), &n, |b, _| {
+            b.iter(|| {
+                dense_direct_solve(
+                    criterion::black_box(&a_dense),
+                    criterion::black_box(&rhs),
+                    n,
+                )
+            });
+        });
+    }
+    group.finish();
+}
+
+criterion_group!(
+    neumann,
+    neumann_convergence,
+    neumann_scaling,
+    neumann_vs_dense
+);
+criterion_main!(neumann);
--- a/vendor/ruvector/crates/ruvector-solver/benches/solver_push.rs
+++ b/vendor/ruvector/crates/ruvector-solver/benches/solver_push.rs
@@ -0,0 +1,222 @@
+//! Benchmarks for the forward push algorithm (Andersen-Chung-Lang).
+//!
+//! Forward push computes approximate Personalized PageRank (PPR) vectors in
+//! sublinear time. These benchmarks measure scaling with graph size and the
+//! effect of tolerance on the number of push operations.
+
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
+use std::collections::VecDeque;
+use std::time::Duration;
+
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+
+use ruvector_solver::types::CsrMatrix;
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/// Build a random sparse graph as a CSR matrix suitable for PageRank.
+///
+/// Each entry `A[i][j]` represents the transition probability from node `i`
+/// to node `j`. The matrix is row-stochastic: each row sums to 1. The
+/// graph is constructed by giving each node `avg_degree` random outgoing
+/// edges.
+fn random_graph_csr(n: usize, avg_degree: usize, seed: u64) -> CsrMatrix<f32> {
+    let mut rng = StdRng::seed_from_u64(seed);
+    let mut entries: Vec<(usize, usize, f32)> = Vec::new();
+
+    for i in 0..n {
+        let degree = (avg_degree as f64 * (0.5 + rng.gen::<f64>())) as usize;
+        let degree = degree.max(1).min(n - 1);
+
+        // Select random neighbours (without replacement for small degree).
+        let mut neighbours = Vec::with_capacity(degree);
+        for _ in 0..degree {
+            let mut j = rng.gen_range(0..n);
+            while j == i {
+                j = rng.gen_range(0..n);
+            }
+            neighbours.push(j);
+        }
+        neighbours.sort_unstable();
+        neighbours.dedup();
+
+        let weight = 1.0 / neighbours.len() as f32;
+        for &j in &neighbours {
+            entries.push((i, j, weight));
+        }
+    }
+
+    CsrMatrix::<f32>::from_coo(n, n, entries)
+}
+
+// ---------------------------------------------------------------------------
+// Inline forward push for benchmarking
+// ---------------------------------------------------------------------------
+
+/// Forward push algorithm for approximate Personalized PageRank.
+///
+/// Computes an approximate PPR vector `pi` for a source node `source` with
+/// teleport probability `alpha`. The algorithm maintains a residual vector
+/// and pushes mass from nodes whose residual exceeds `tolerance`.
+///
+/// Returns `(estimate, residual, num_pushes)`.
+#[inline(never)]
+fn forward_push(
+    matrix: &CsrMatrix<f32>,
+    source: usize,
+    alpha: f32,
+    tolerance: f32,
+) -> (Vec<f32>, Vec<f32>, usize) {
+    let n = matrix.rows;
+    let mut estimate = vec![0.0f32; n];
+    let mut residual = vec![0.0f32; n];
+    residual[source] = 1.0;
+
+    let mut queue: VecDeque<usize> = VecDeque::new();
+    queue.push_back(source);
+    let mut in_queue = vec![false; n];
+    in_queue[source] = true;
+
+    let mut num_pushes = 0usize;
+
+    while let Some(u) = queue.pop_front() {
+        in_queue[u] = false;
+        let r_u = residual[u];
+
+        if r_u.abs() < tolerance {
+            continue;
+        }
+
+        num_pushes += 1;
+
+        // Absorb alpha fraction.
+        estimate[u] += alpha * r_u;
+        let push_mass = (1.0 - alpha) * r_u;
+        residual[u] = 0.0;
+
+        // Distribute remaining mass to neighbours.
+        let start = matrix.row_ptr[u];
+        let end = matrix.row_ptr[u + 1];
+        let degree = end - start;
+
+        if degree > 0 {
+            for idx in start..end {
+                let v = matrix.col_indices[idx];
+                let w = matrix.values[idx];
+                residual[v] += push_mass * w;
+
+                if !in_queue[v] && residual[v].abs() >= tolerance {
+                    queue.push_back(v);
+                    in_queue[v] = true;
+                }
+            }
+        } else {
+            // Dangling node: teleport back to source.
+            residual[source] += push_mass;
+            if !in_queue[source] && residual[source].abs() >= tolerance {
+                queue.push_back(source);
+                in_queue[source] = true;
+            }
+        }
+    }
+
+    (estimate, residual, num_pushes)
+}
+
+// ---------------------------------------------------------------------------
+// Benchmark: forward push scaling with graph size
+// ---------------------------------------------------------------------------
+
+fn forward_push_scaling(c: &mut Criterion) {
+    let mut group = c.benchmark_group("forward_push_scaling");
+    group.warm_up_time(Duration::from_secs(3));
+
+    let alpha = 0.15f32;
+    let tolerance = 1e-4f32;
+
+    for &n in &[100, 1000, 10_000, 100_000] {
+        let avg_degree = 10;
+        let graph = random_graph_csr(n, avg_degree, 42);
+
+        let sample_count = if n >= 100_000 {
+            10
+        } else if n >= 10_000 {
+            20
+        } else {
+            100
+        };
+        group.sample_size(sample_count);
+        group.throughput(Throughput::Elements(n as u64));
+
+        group.bench_with_input(BenchmarkId::new("n", n), &n, |b, _| {
+            b.iter(|| {
+                forward_push(
+                    criterion::black_box(&graph),
+                    0, // source node
+                    alpha,
+                    tolerance,
+                )
+            });
+        });
+    }
+    group.finish();
+}
+
+// ---------------------------------------------------------------------------
+// Benchmark: forward push tolerance sensitivity
+// ---------------------------------------------------------------------------
+
+fn forward_push_tolerance(c: &mut Criterion) {
+    let mut group = c.benchmark_group("forward_push_tolerance");
+    group.warm_up_time(Duration::from_secs(3));
+    group.sample_size(100);
+
+    let n = 10_000;
+    let avg_degree = 10;
+    let alpha = 0.15f32;
+    let graph = random_graph_csr(n, avg_degree, 42);
+
+    for &tol in &[1e-2f32, 1e-4, 1e-6] {
+        let label = format!("eps_{:.0e}", tol);
+        group.bench_with_input(BenchmarkId::new(&label, n), &tol, |b, &eps| {
+            b.iter(|| forward_push(criterion::black_box(&graph), 0, alpha, eps));
+        });
+    }
+    group.finish();
+}
+
+// ---------------------------------------------------------------------------
+// Benchmark: forward push with varying graph density
+// ---------------------------------------------------------------------------
+
+fn forward_push_density(c: &mut Criterion) {
+    let mut group = c.benchmark_group("forward_push_density");
+    group.warm_up_time(Duration::from_secs(3));
+    group.sample_size(50);
+
+    let n = 10_000;
+    let alpha = 0.15f32;
+    let tolerance = 1e-4f32;
+
+    for &avg_degree in &[5, 10, 20, 50] {
+        let graph = random_graph_csr(n, avg_degree, 42);
+
+        let label = format!("deg_{}", avg_degree);
+        group.throughput(Throughput::Elements(graph.nnz() as u64));
+        group.bench_with_input(BenchmarkId::new(&label, n), &avg_degree, |b, _| {
+            b.iter(|| forward_push(criterion::black_box(&graph), 0, alpha, tolerance));
+        });
+    }
+    group.finish();
+}
+
+criterion_group!(
+    push,
+    forward_push_scaling,
+    forward_push_tolerance,
+    forward_push_density
+);
+criterion_main!(push);