Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,193 @@
//! Baseline benchmarks for dense and sparse matrix-vector operations.
//!
//! These benchmarks establish performance baselines for the core linear algebra
//! primitives used throughout the solver crate: naive dense matrix-vector
//! multiply and CSR sparse matrix-vector multiply (SpMV).
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use std::time::Duration;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use ruvector_solver::types::CsrMatrix;
// ---------------------------------------------------------------------------
// Helpers: deterministic random data generation
// ---------------------------------------------------------------------------
/// Generate a dense matrix stored as a flat row-major `Vec<f32>`.
///
/// Uses a deterministic seed so benchmark results are reproducible across runs.
fn random_dense_matrix(rows: usize, cols: usize, seed: u64) -> Vec<f32> {
let mut rng = StdRng::seed_from_u64(seed);
(0..rows * cols).map(|_| rng.gen_range(-1.0..1.0)).collect()
}
/// Generate a random CSR matrix with approximately `density` fraction of
/// non-zero entries.
///
/// The matrix is square (`n x n`). Each entry in the upper triangle is
/// included independently with probability `density`, then mirrored to the
/// lower triangle for symmetry. Diagonal entries are always present and set
/// to a value ensuring strict diagonal dominance.
fn random_csr_matrix(n: usize, density: f64, seed: u64) -> CsrMatrix<f32> {
let mut rng = StdRng::seed_from_u64(seed);
let mut entries: Vec<(usize, usize, f32)> = Vec::new();
// Off-diagonal entries (symmetric).
for i in 0..n {
for j in (i + 1)..n {
if rng.gen::<f64>() < density {
let val: f32 = rng.gen_range(-0.5..0.5);
entries.push((i, j, val));
entries.push((j, i, val));
}
}
}
// Build row-wise absolute sums for diagonal dominance.
let mut row_abs_sums = vec![0.0f32; n];
for &(r, _c, v) in &entries {
row_abs_sums[r] += v.abs();
}
// Diagonal entries: ensure diagonal dominance for solver stability.
for i in 0..n {
entries.push((i, i, row_abs_sums[i] + 1.0));
}
CsrMatrix::<f32>::from_coo(n, n, entries)
}
/// Generate a random vector of length `n` with values in [-1, 1].
fn random_vector(n: usize, seed: u64) -> Vec<f32> {
let mut rng = StdRng::seed_from_u64(seed);
(0..n).map(|_| rng.gen_range(-1.0..1.0)).collect()
}
// ---------------------------------------------------------------------------
// Dense matrix-vector multiply (naive baseline)
// ---------------------------------------------------------------------------
/// Naive dense matrix-vector multiply: `y = A * x`.
///
/// `a` is stored in row-major order with dimensions `rows x cols`.
#[inline(never)]
fn dense_matvec(a: &[f32], x: &[f32], y: &mut [f32], rows: usize, cols: usize) {
for i in 0..rows {
let mut sum = 0.0f32;
let row_start = i * cols;
for j in 0..cols {
sum += a[row_start + j] * x[j];
}
y[i] = sum;
}
}
fn dense_matvec_baseline(c: &mut Criterion) {
let mut group = c.benchmark_group("dense_matvec");
group.warm_up_time(Duration::from_secs(3));
group.sample_size(100);
for size in [64, 256, 1024, 4096] {
let a = random_dense_matrix(size, size, 42);
let x = random_vector(size, 43);
let mut y = vec![0.0f32; size];
group.throughput(Throughput::Elements((size * size) as u64));
group.bench_with_input(BenchmarkId::new("naive", size), &size, |b, &n| {
b.iter(|| {
dense_matvec(
criterion::black_box(&a),
criterion::black_box(&x),
criterion::black_box(&mut y),
n,
n,
);
});
});
}
group.finish();
}
// ---------------------------------------------------------------------------
// Sparse matrix-vector multiply (CSR SpMV)
// ---------------------------------------------------------------------------
fn sparse_spmv_baseline(c: &mut Criterion) {
let mut group = c.benchmark_group("sparse_spmv");
group.warm_up_time(Duration::from_secs(3));
group.sample_size(100);
for (n, density) in [(1000, 0.01), (1000, 0.05), (10_000, 0.01)] {
let csr = random_csr_matrix(n, density, 44);
let x = random_vector(n, 45);
let mut y = vec![0.0f32; n];
let label = format!("{}x{}_{:.0}pct", n, n, density * 100.0);
group.throughput(Throughput::Elements(csr.nnz() as u64));
group.bench_with_input(BenchmarkId::new(&label, n), &n, |b, _| {
b.iter(|| {
csr.spmv(criterion::black_box(&x), criterion::black_box(&mut y));
});
});
}
group.finish();
}
// ---------------------------------------------------------------------------
// Dense vs sparse crossover
// ---------------------------------------------------------------------------
/// Benchmark that compares dense and sparse matvec at the same dimension
/// to help identify the crossover point where sparse becomes faster.
fn dense_vs_sparse_crossover(c: &mut Criterion) {
let mut group = c.benchmark_group("dense_vs_sparse_crossover");
group.warm_up_time(Duration::from_secs(3));
group.sample_size(100);
for size in [64, 128, 256, 512, 1024] {
let density = 0.05;
// Dense setup.
let a_dense = random_dense_matrix(size, size, 42);
let x = random_vector(size, 43);
let mut y_dense = vec![0.0f32; size];
group.throughput(Throughput::Elements((size * size) as u64));
group.bench_with_input(BenchmarkId::new("dense", size), &size, |b, &n| {
b.iter(|| {
dense_matvec(
criterion::black_box(&a_dense),
criterion::black_box(&x),
criterion::black_box(&mut y_dense),
n,
n,
);
});
});
// Sparse setup.
let csr = random_csr_matrix(size, density, 44);
let mut y_sparse = vec![0.0f32; size];
group.bench_with_input(BenchmarkId::new("sparse_5pct", size), &size, |b, _| {
b.iter(|| {
csr.spmv(
criterion::black_box(&x),
criterion::black_box(&mut y_sparse),
);
});
});
}
group.finish();
}
criterion_group!(
baselines,
dense_matvec_baseline,
sparse_spmv_baseline,
dense_vs_sparse_crossover
);
criterion_main!(baselines);

View File

@@ -0,0 +1,378 @@
//! Benchmarks for the Conjugate Gradient (CG) solver.
//!
//! CG is the method of choice for symmetric positive-definite (SPD) systems.
//! These benchmarks measure scaling behaviour, the effect of diagonal
//! preconditioning, and a head-to-head comparison with the Neumann series
//! solver.
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use std::time::Duration;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use ruvector_solver::types::CsrMatrix;
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/// Build a symmetric positive-definite (SPD) CSR matrix.
///
/// Constructs a sparse SPD matrix by generating random off-diagonal entries
/// and ensuring strict diagonal dominance: `a_{ii} = sum_j |a_{ij}| + 1`.
fn spd_csr_matrix(n: usize, density: f64, seed: u64) -> CsrMatrix<f32> {
let mut rng = StdRng::seed_from_u64(seed);
let mut entries: Vec<(usize, usize, f32)> = Vec::new();
for i in 0..n {
for j in (i + 1)..n {
if rng.gen::<f64>() < density {
let val: f32 = rng.gen_range(-0.3..0.3);
entries.push((i, j, val));
entries.push((j, i, val));
}
}
}
let mut row_abs_sums = vec![0.0f32; n];
for &(r, _c, v) in &entries {
row_abs_sums[r] += v.abs();
}
for i in 0..n {
entries.push((i, i, row_abs_sums[i] + 1.0));
}
CsrMatrix::<f32>::from_coo(n, n, entries)
}
/// Random vector with deterministic seed.
fn random_vector(n: usize, seed: u64) -> Vec<f32> {
let mut rng = StdRng::seed_from_u64(seed);
(0..n).map(|_| rng.gen_range(-1.0..1.0)).collect()
}
// ---------------------------------------------------------------------------
// Inline CG solver for benchmarking
// ---------------------------------------------------------------------------
/// Conjugate gradient solver for SPD systems `Ax = b`.
///
/// This is a textbook CG implementation inlined here so the benchmark does
/// not depend on the (currently stub) cg module.
#[inline(never)]
fn cg_solve(
matrix: &CsrMatrix<f32>,
rhs: &[f32],
tolerance: f64,
max_iter: usize,
) -> (Vec<f32>, usize, f64) {
let n = matrix.rows;
let mut x = vec![0.0f32; n];
let mut r = rhs.to_vec(); // r_0 = b - A*x_0, with x_0 = 0 => r_0 = b
let mut p = r.clone();
let mut ap = vec![0.0f32; n];
let mut rs_old: f64 = r.iter().map(|&v| (v as f64) * (v as f64)).sum();
let mut iterations = 0;
for k in 0..max_iter {
// ap = A * p
matrix.spmv(&p, &mut ap);
// alpha = (r^T r) / (p^T A p)
let p_ap: f64 = p
.iter()
.zip(ap.iter())
.map(|(&pi, &api)| (pi as f64) * (api as f64))
.sum();
if p_ap.abs() < 1e-30 {
iterations = k + 1;
break;
}
let alpha = rs_old / p_ap;
// x = x + alpha * p
for i in 0..n {
x[i] += (alpha as f32) * p[i];
}
// r = r - alpha * ap
for i in 0..n {
r[i] -= (alpha as f32) * ap[i];
}
let rs_new: f64 = r.iter().map(|&v| (v as f64) * (v as f64)).sum();
iterations = k + 1;
if rs_new.sqrt() < tolerance {
break;
}
// p = r + (rs_new / rs_old) * p
let beta = rs_new / rs_old;
for i in 0..n {
p[i] = r[i] + (beta as f32) * p[i];
}
rs_old = rs_new;
}
let residual_norm = rs_old.sqrt();
(x, iterations, residual_norm)
}
/// Diagonal-preconditioned CG solver.
///
/// Uses the Jacobi (diagonal) preconditioner: `M = diag(A)`.
/// Solves `M^{-1} A x = M^{-1} b` via the preconditioned CG algorithm.
#[inline(never)]
fn pcg_solve(
matrix: &CsrMatrix<f32>,
rhs: &[f32],
tolerance: f64,
max_iter: usize,
) -> (Vec<f32>, usize, f64) {
let n = matrix.rows;
// Extract diagonal for preconditioner.
let mut diag_inv = vec![1.0f32; n];
for i in 0..n {
let start = matrix.row_ptr[i];
let end = matrix.row_ptr[i + 1];
for idx in start..end {
if matrix.col_indices[idx] == i {
let d = matrix.values[idx];
diag_inv[i] = if d.abs() > 1e-12 { 1.0 / d } else { 1.0 };
break;
}
}
}
let mut x = vec![0.0f32; n];
let mut r = rhs.to_vec();
let mut z: Vec<f32> = r
.iter()
.zip(diag_inv.iter())
.map(|(&ri, &di)| ri * di)
.collect();
let mut p = z.clone();
let mut ap = vec![0.0f32; n];
let mut rz_old: f64 = r
.iter()
.zip(z.iter())
.map(|(&ri, &zi)| (ri as f64) * (zi as f64))
.sum();
let mut iterations = 0;
for k in 0..max_iter {
matrix.spmv(&p, &mut ap);
let p_ap: f64 = p
.iter()
.zip(ap.iter())
.map(|(&pi, &api)| (pi as f64) * (api as f64))
.sum();
if p_ap.abs() < 1e-30 {
iterations = k + 1;
break;
}
let alpha = rz_old / p_ap;
for i in 0..n {
x[i] += (alpha as f32) * p[i];
r[i] -= (alpha as f32) * ap[i];
}
let residual_norm: f64 = r
.iter()
.map(|&v| (v as f64) * (v as f64))
.sum::<f64>()
.sqrt();
iterations = k + 1;
if residual_norm < tolerance {
break;
}
// z = M^{-1} r
for i in 0..n {
z[i] = r[i] * diag_inv[i];
}
let rz_new: f64 = r
.iter()
.zip(z.iter())
.map(|(&ri, &zi)| (ri as f64) * (zi as f64))
.sum();
let beta = rz_new / rz_old;
for i in 0..n {
p[i] = z[i] + (beta as f32) * p[i];
}
rz_old = rz_new;
}
let residual_norm = r
.iter()
.map(|&v| (v as f64) * (v as f64))
.sum::<f64>()
.sqrt();
(x, iterations, residual_norm)
}
/// Neumann series iteration (inlined for comparison benchmark).
#[inline(never)]
fn neumann_solve(
matrix: &CsrMatrix<f32>,
rhs: &[f32],
tolerance: f64,
max_iter: usize,
) -> (Vec<f32>, usize, f64) {
let n = matrix.rows;
let mut x = vec![0.0f32; n];
let mut residual_buf = vec![0.0f32; n];
let mut iterations = 0;
let mut residual_norm = f64::MAX;
for k in 0..max_iter {
matrix.spmv(&x, &mut residual_buf);
for i in 0..n {
residual_buf[i] = rhs[i] - residual_buf[i];
}
residual_norm = residual_buf
.iter()
.map(|&v| (v as f64) * (v as f64))
.sum::<f64>()
.sqrt();
iterations = k + 1;
if residual_norm < tolerance {
break;
}
for i in 0..n {
x[i] += residual_buf[i];
}
}
(x, iterations, residual_norm)
}
// ---------------------------------------------------------------------------
// Benchmark: CG scaling with problem size
// ---------------------------------------------------------------------------
fn cg_scaling(c: &mut Criterion) {
let mut group = c.benchmark_group("cg_scaling");
group.warm_up_time(Duration::from_secs(3));
for &n in &[100, 1000, 10_000] {
let density = if n <= 1000 { 0.02 } else { 0.005 };
let matrix = spd_csr_matrix(n, density, 42);
let rhs = random_vector(n, 43);
let sample_count = if n >= 10_000 { 20 } else { 100 };
group.sample_size(sample_count);
group.throughput(Throughput::Elements(matrix.nnz() as u64));
group.bench_with_input(BenchmarkId::new("n", n), &n, |b, _| {
b.iter(|| {
cg_solve(
criterion::black_box(&matrix),
criterion::black_box(&rhs),
1e-6,
5000,
)
});
});
}
group.finish();
}
// ---------------------------------------------------------------------------
// Benchmark: with vs without diagonal preconditioner
// ---------------------------------------------------------------------------
fn cg_preconditioning(c: &mut Criterion) {
let mut group = c.benchmark_group("cg_preconditioning");
group.warm_up_time(Duration::from_secs(3));
group.sample_size(100);
for &n in &[500, 1000, 2000] {
let matrix = spd_csr_matrix(n, 0.02, 42);
let rhs = random_vector(n, 43);
group.bench_with_input(BenchmarkId::new("cg_plain", n), &n, |b, _| {
b.iter(|| {
cg_solve(
criterion::black_box(&matrix),
criterion::black_box(&rhs),
1e-6,
5000,
)
});
});
group.bench_with_input(BenchmarkId::new("cg_diag_precond", n), &n, |b, _| {
b.iter(|| {
pcg_solve(
criterion::black_box(&matrix),
criterion::black_box(&rhs),
1e-6,
5000,
)
});
});
}
group.finish();
}
// ---------------------------------------------------------------------------
// Benchmark: CG vs Neumann for same problem
// ---------------------------------------------------------------------------
fn cg_vs_neumann(c: &mut Criterion) {
let mut group = c.benchmark_group("cg_vs_neumann");
group.warm_up_time(Duration::from_secs(3));
group.sample_size(100);
for &n in &[100, 500, 1000] {
let matrix = spd_csr_matrix(n, 0.02, 42);
let rhs = random_vector(n, 43);
group.bench_with_input(BenchmarkId::new("cg", n), &n, |b, _| {
b.iter(|| {
cg_solve(
criterion::black_box(&matrix),
criterion::black_box(&rhs),
1e-6,
5000,
)
});
});
group.bench_with_input(BenchmarkId::new("neumann", n), &n, |b, _| {
b.iter(|| {
neumann_solve(
criterion::black_box(&matrix),
criterion::black_box(&rhs),
1e-6,
5000,
)
});
});
}
group.finish();
}
criterion_group!(cg, cg_scaling, cg_preconditioning, cg_vs_neumann);
criterion_main!(cg);

View File

@@ -0,0 +1,390 @@
//! End-to-end benchmarks for the solver orchestration layer.
//!
//! These benchmarks measure the overhead of algorithm selection (routing) and
//! the full end-to-end solve path including routing, validation, solver
//! dispatch, and result construction.
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use std::time::Duration;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use ruvector_solver::types::{Algorithm, CsrMatrix};
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/// Build a diagonally dominant CSR matrix.
fn diag_dominant_csr(n: usize, density: f64, seed: u64) -> CsrMatrix<f32> {
let mut rng = StdRng::seed_from_u64(seed);
let mut entries: Vec<(usize, usize, f32)> = Vec::new();
for i in 0..n {
for j in (i + 1)..n {
if rng.gen::<f64>() < density {
let val: f32 = rng.gen_range(-0.3..0.3);
entries.push((i, j, val));
entries.push((j, i, val));
}
}
}
let mut row_abs_sums = vec![0.0f32; n];
for &(r, _c, v) in &entries {
row_abs_sums[r] += v.abs();
}
for i in 0..n {
entries.push((i, i, row_abs_sums[i] + 1.0));
}
CsrMatrix::<f32>::from_coo(n, n, entries)
}
/// Random vector with deterministic seed.
fn random_vector(n: usize, seed: u64) -> Vec<f32> {
let mut rng = StdRng::seed_from_u64(seed);
(0..n).map(|_| rng.gen_range(-1.0..1.0)).collect()
}
// ---------------------------------------------------------------------------
// Inline algorithm router for benchmarking
// ---------------------------------------------------------------------------
/// Properties extracted from the matrix for routing decisions.
#[allow(dead_code)]
struct MatrixProperties {
n: usize,
nnz: usize,
density: f64,
is_symmetric: bool,
max_row_degree: usize,
diag_dominance_ratio: f64,
}
/// Analyze a CSR matrix to extract routing-relevant properties.
#[inline(never)]
fn analyze_matrix(matrix: &CsrMatrix<f32>) -> MatrixProperties {
let n = matrix.rows;
let nnz = matrix.nnz();
let density = nnz as f64 / (n as f64 * n as f64);
// Check symmetry (sample-based for large matrices).
let sample_size = n.min(100);
let mut is_symmetric = true;
'outer: for i in 0..sample_size {
let start = matrix.row_ptr[i];
let end = matrix.row_ptr[i + 1];
for idx in start..end {
let j = matrix.col_indices[idx];
if j == i {
continue;
}
// Check if (j, i) exists with the same value.
let j_start = matrix.row_ptr[j];
let j_end = matrix.row_ptr[j + 1];
let mut found = false;
for jidx in j_start..j_end {
if matrix.col_indices[jidx] == i {
if (matrix.values[jidx] - matrix.values[idx]).abs() > 1e-6 {
is_symmetric = false;
break 'outer;
}
found = true;
break;
}
}
if !found {
is_symmetric = false;
break 'outer;
}
}
}
// Max row degree.
let mut max_row_degree = 0;
for i in 0..n {
let deg = matrix.row_ptr[i + 1] - matrix.row_ptr[i];
max_row_degree = max_row_degree.max(deg);
}
// Diagonal dominance ratio (sampled).
let mut diag_dominance_ratio = 0.0;
let check_rows = n.min(100);
for i in 0..check_rows {
let start = matrix.row_ptr[i];
let end = matrix.row_ptr[i + 1];
let mut diag = 0.0f32;
let mut off_diag_sum = 0.0f32;
for idx in start..end {
if matrix.col_indices[idx] == i {
diag = matrix.values[idx].abs();
} else {
off_diag_sum += matrix.values[idx].abs();
}
}
if off_diag_sum > 0.0 {
diag_dominance_ratio += (diag / off_diag_sum) as f64;
} else {
diag_dominance_ratio += 10.0; // Perfect dominance.
}
}
diag_dominance_ratio /= check_rows as f64;
MatrixProperties {
n,
nnz,
density,
is_symmetric,
max_row_degree,
diag_dominance_ratio,
}
}
/// Select the best algorithm based on matrix properties.
#[inline(never)]
fn select_algorithm(props: &MatrixProperties, tolerance: f64) -> Algorithm {
// High diagonal dominance => Neumann series converges fast.
if props.diag_dominance_ratio > 2.0 && tolerance > 1e-8 {
return Algorithm::Neumann;
}
// SPD matrix => CG is optimal.
if props.is_symmetric && props.diag_dominance_ratio > 1.0 {
return Algorithm::CG;
}
// Very sparse, large graph => forward push for PPR-like problems.
if props.density < 0.01 && props.n > 1000 {
return Algorithm::ForwardPush;
}
// Default fallback.
if props.is_symmetric {
Algorithm::CG
} else {
Algorithm::Neumann
}
}
// ---------------------------------------------------------------------------
// Inline solvers for e2e benchmarking
// ---------------------------------------------------------------------------
/// Neumann series (Richardson iteration).
#[inline(never)]
fn neumann_solve(
matrix: &CsrMatrix<f32>,
rhs: &[f32],
tolerance: f64,
max_iter: usize,
) -> (Vec<f32>, usize, f64) {
let n = matrix.rows;
let mut x = vec![0.0f32; n];
let mut r = vec![0.0f32; n];
let mut iterations = 0;
let mut residual_norm = f64::MAX;
for k in 0..max_iter {
matrix.spmv(&x, &mut r);
for i in 0..n {
r[i] = rhs[i] - r[i];
}
residual_norm = r
.iter()
.map(|&v| (v as f64) * (v as f64))
.sum::<f64>()
.sqrt();
iterations = k + 1;
if residual_norm < tolerance {
break;
}
for i in 0..n {
x[i] += r[i];
}
}
(x, iterations, residual_norm)
}
/// Conjugate gradient.
#[inline(never)]
fn cg_solve(
matrix: &CsrMatrix<f32>,
rhs: &[f32],
tolerance: f64,
max_iter: usize,
) -> (Vec<f32>, usize, f64) {
let n = matrix.rows;
let mut x = vec![0.0f32; n];
let mut r = rhs.to_vec();
let mut p = r.clone();
let mut ap = vec![0.0f32; n];
let mut rs_old: f64 = r.iter().map(|&v| (v as f64) * (v as f64)).sum();
let mut iterations = 0;
for k in 0..max_iter {
matrix.spmv(&p, &mut ap);
let p_ap: f64 = p
.iter()
.zip(ap.iter())
.map(|(&pi, &api)| (pi as f64) * (api as f64))
.sum();
if p_ap.abs() < 1e-30 {
iterations = k + 1;
break;
}
let alpha = rs_old / p_ap;
for i in 0..n {
x[i] += (alpha as f32) * p[i];
r[i] -= (alpha as f32) * ap[i];
}
let rs_new: f64 = r.iter().map(|&v| (v as f64) * (v as f64)).sum();
iterations = k + 1;
if rs_new.sqrt() < tolerance {
break;
}
let beta = rs_new / rs_old;
for i in 0..n {
p[i] = r[i] + (beta as f32) * p[i];
}
rs_old = rs_new;
}
let residual_norm = rs_old.sqrt();
(x, iterations, residual_norm)
}
/// Full orchestrated solve: analyze -> route -> solve.
#[inline(never)]
fn orchestrator_solve_impl(
matrix: &CsrMatrix<f32>,
rhs: &[f32],
tolerance: f64,
max_iter: usize,
) -> (Vec<f32>, usize, f64, Algorithm) {
let props = analyze_matrix(matrix);
let algorithm = select_algorithm(&props, tolerance);
let (solution, iterations, residual) = match algorithm {
Algorithm::Neumann => neumann_solve(matrix, rhs, tolerance, max_iter),
Algorithm::CG => cg_solve(matrix, rhs, tolerance, max_iter),
// Fall back to CG for unimplemented algorithms.
_ => cg_solve(matrix, rhs, tolerance, max_iter),
};
(solution, iterations, residual, algorithm)
}
// ---------------------------------------------------------------------------
// Benchmark: router overhead (analyze + select, no solve)
// ---------------------------------------------------------------------------
fn router_overhead(c: &mut Criterion) {
let mut group = c.benchmark_group("router_overhead");
group.warm_up_time(Duration::from_secs(3));
group.sample_size(100);
for &n in &[100, 1000, 10_000] {
let density = if n <= 1000 { 0.02 } else { 0.005 };
let matrix = diag_dominant_csr(n, density, 42);
group.throughput(Throughput::Elements(n as u64));
group.bench_with_input(BenchmarkId::new("analyze_and_route", n), &n, |b, _| {
b.iter(|| {
let props = analyze_matrix(criterion::black_box(&matrix));
select_algorithm(criterion::black_box(&props), 1e-6)
});
});
}
group.finish();
}
// ---------------------------------------------------------------------------
// Benchmark: full orchestrated solve (end-to-end)
// ---------------------------------------------------------------------------
fn orchestrator_solve(c: &mut Criterion) {
let mut group = c.benchmark_group("orchestrator_solve");
group.warm_up_time(Duration::from_secs(3));
for &n in &[100, 500, 1000, 5000] {
let density = if n <= 1000 { 0.02 } else { 0.005 };
let matrix = diag_dominant_csr(n, density, 42);
let rhs = random_vector(n, 43);
let sample_count = if n >= 5000 { 20 } else { 100 };
group.sample_size(sample_count);
group.throughput(Throughput::Elements(matrix.nnz() as u64));
group.bench_with_input(BenchmarkId::new("e2e", n), &n, |b, _| {
b.iter(|| {
orchestrator_solve_impl(
criterion::black_box(&matrix),
criterion::black_box(&rhs),
1e-6,
5000,
)
});
});
}
group.finish();
}
// ---------------------------------------------------------------------------
// Benchmark: routing overhead as fraction of total solve time
// ---------------------------------------------------------------------------
fn routing_fraction(c: &mut Criterion) {
let mut group = c.benchmark_group("routing_fraction");
group.warm_up_time(Duration::from_secs(3));
group.sample_size(100);
let n = 1000;
let density = 0.02;
let matrix = diag_dominant_csr(n, density, 42);
let rhs = random_vector(n, 43);
// Route only.
group.bench_function("route_only", |b| {
b.iter(|| {
let props = analyze_matrix(criterion::black_box(&matrix));
select_algorithm(criterion::black_box(&props), 1e-6)
});
});
// Solve only (skip routing).
group.bench_function("solve_only_cg", |b| {
b.iter(|| {
cg_solve(
criterion::black_box(&matrix),
criterion::black_box(&rhs),
1e-6,
5000,
)
});
});
// Full e2e (route + solve).
group.bench_function("e2e_routed", |b| {
b.iter(|| {
orchestrator_solve_impl(
criterion::black_box(&matrix),
criterion::black_box(&rhs),
1e-6,
5000,
)
});
});
group.finish();
}
criterion_group!(e2e, router_overhead, orchestrator_solve, routing_fraction);
criterion_main!(e2e);

View File

@@ -0,0 +1,313 @@
//! Benchmarks for the Neumann series solver.
//!
//! The Neumann series approximates `(I - M)^{-1} b = sum_{k=0}^{K} M^k b`
//! and converges when the spectral radius of `M` is less than 1. These
//! benchmarks measure convergence rate vs tolerance, scaling behaviour, and
//! crossover against dense direct solves.
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use std::time::Duration;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use ruvector_solver::types::CsrMatrix;
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/// Build a diagonally dominant CSR matrix suitable for Neumann iteration.
///
/// The iteration matrix `M = I - D^{-1} A` has spectral radius < 1 when `A`
/// is strictly diagonally dominant. We construct `A` so that each diagonal
/// entry equals the sum of absolute off-diagonal values in its row plus 1.0.
fn diag_dominant_csr(n: usize, density: f64, seed: u64) -> CsrMatrix<f32> {
let mut rng = StdRng::seed_from_u64(seed);
let mut entries: Vec<(usize, usize, f32)> = Vec::new();
for i in 0..n {
for j in (i + 1)..n {
if rng.gen::<f64>() < density {
let val: f32 = rng.gen_range(-0.3..0.3);
entries.push((i, j, val));
entries.push((j, i, val));
}
}
}
let mut row_abs_sums = vec![0.0f32; n];
for &(r, _c, v) in &entries {
row_abs_sums[r] += v.abs();
}
for i in 0..n {
entries.push((i, i, row_abs_sums[i] + 1.0));
}
CsrMatrix::<f32>::from_coo(n, n, entries)
}
/// Random vector with deterministic seed.
fn random_vector(n: usize, seed: u64) -> Vec<f32> {
let mut rng = StdRng::seed_from_u64(seed);
(0..n).map(|_| rng.gen_range(-1.0..1.0)).collect()
}
// ---------------------------------------------------------------------------
// Inline Neumann series solver for benchmarking
// ---------------------------------------------------------------------------
/// Neumann series iteration: x_{k+1} = x_k + (b - A * x_k).
///
/// This is equivalent to the Richardson iteration with omega = 1 for a
/// diagonally-dominant system. We inline it here so the benchmark does
/// not depend on the (currently stub) neumann module.
#[inline(never)]
fn neumann_solve(
matrix: &CsrMatrix<f32>,
rhs: &[f32],
tolerance: f64,
max_iter: usize,
) -> (Vec<f32>, usize, f64) {
let n = matrix.rows;
let mut x = vec![0.0f32; n];
let mut residual_buf = vec![0.0f32; n];
let mut iterations = 0;
let mut residual_norm = f64::MAX;
for k in 0..max_iter {
// Compute residual: r = b - A*x.
matrix.spmv(&x, &mut residual_buf);
for i in 0..n {
residual_buf[i] = rhs[i] - residual_buf[i];
}
// Residual L2 norm.
residual_norm = residual_buf
.iter()
.map(|&v| (v as f64) * (v as f64))
.sum::<f64>()
.sqrt();
iterations = k + 1;
if residual_norm < tolerance {
break;
}
// Update: x = x + r (Richardson step).
for i in 0..n {
x[i] += residual_buf[i];
}
}
(x, iterations, residual_norm)
}
// ---------------------------------------------------------------------------
// Benchmark: convergence vs tolerance
// ---------------------------------------------------------------------------
fn neumann_convergence(c: &mut Criterion) {
let mut group = c.benchmark_group("neumann_convergence");
group.warm_up_time(Duration::from_secs(3));
group.sample_size(100);
let n = 500;
let matrix = diag_dominant_csr(n, 0.02, 42);
let rhs = random_vector(n, 43);
for &tol in &[1e-2, 1e-4, 1e-6] {
let label = format!("eps_{:.0e}", tol);
group.bench_with_input(BenchmarkId::new(&label, n), &tol, |b, &eps| {
b.iter(|| {
neumann_solve(
criterion::black_box(&matrix),
criterion::black_box(&rhs),
eps,
5000,
)
});
});
}
group.finish();
}
// ---------------------------------------------------------------------------
// Benchmark: scaling with problem size
// ---------------------------------------------------------------------------
fn neumann_scaling(c: &mut Criterion) {
let mut group = c.benchmark_group("neumann_scaling");
group.warm_up_time(Duration::from_secs(3));
for &n in &[100, 1000, 10_000] {
// Use sparser matrices for larger sizes to keep runtime reasonable.
let density = if n <= 1000 { 0.02 } else { 0.005 };
let matrix = diag_dominant_csr(n, density, 42);
let rhs = random_vector(n, 43);
let sample_count = if n >= 10_000 { 20 } else { 100 };
group.sample_size(sample_count);
group.throughput(Throughput::Elements(matrix.nnz() as u64));
group.bench_with_input(BenchmarkId::new("n", n), &n, |b, _| {
b.iter(|| {
neumann_solve(
criterion::black_box(&matrix),
criterion::black_box(&rhs),
1e-4,
5000,
)
});
});
}
group.finish();
}
// ---------------------------------------------------------------------------
// Benchmark: Neumann vs dense direct solve crossover
// ---------------------------------------------------------------------------
/// Naive dense direct solve via Gaussian elimination with partial pivoting.
///
/// This is intentionally unoptimized to represent a "no-library" baseline.
#[inline(never)]
fn dense_direct_solve(a: &[f32], b: &[f32], n: usize) -> Vec<f32> {
// Build augmented matrix [A | b] in row-major order.
let mut aug = vec![0.0f64; n * (n + 1)];
for i in 0..n {
for j in 0..n {
aug[i * (n + 1) + j] = a[i * n + j] as f64;
}
aug[i * (n + 1) + n] = b[i] as f64;
}
// Forward elimination with partial pivoting.
for col in 0..n {
// Find pivot.
let mut max_row = col;
let mut max_val = aug[col * (n + 1) + col].abs();
for row in (col + 1)..n {
let val = aug[row * (n + 1) + col].abs();
if val > max_val {
max_val = val;
max_row = row;
}
}
// Swap rows.
if max_row != col {
for j in 0..=n {
let idx_a = col * (n + 1) + j;
let idx_b = max_row * (n + 1) + j;
aug.swap(idx_a, idx_b);
}
}
let pivot = aug[col * (n + 1) + col];
if pivot.abs() < 1e-15 {
continue;
}
// Eliminate below.
for row in (col + 1)..n {
let factor = aug[row * (n + 1) + col] / pivot;
for j in col..=n {
let val = aug[col * (n + 1) + j];
aug[row * (n + 1) + j] -= factor * val;
}
}
}
// Back substitution.
let mut x = vec![0.0f64; n];
for i in (0..n).rev() {
let mut sum = aug[i * (n + 1) + n];
for j in (i + 1)..n {
sum -= aug[i * (n + 1) + j] * x[j];
}
let diag = aug[i * (n + 1) + i];
x[i] = if diag.abs() > 1e-15 { sum / diag } else { 0.0 };
}
x.iter().map(|&v| v as f32).collect()
}
/// Generate the dense representation of a diag-dominant matrix.
fn diag_dominant_dense(n: usize, density: f64, seed: u64) -> Vec<f32> {
let mut rng = StdRng::seed_from_u64(seed);
let mut a = vec![0.0f32; n * n];
// Off-diagonal.
for i in 0..n {
for j in (i + 1)..n {
if rng.gen::<f64>() < density {
let val: f32 = rng.gen_range(-0.3..0.3);
a[i * n + j] = val;
a[j * n + i] = val;
}
}
}
// Diagonal dominance.
for i in 0..n {
let mut row_sum = 0.0f32;
for j in 0..n {
if j != i {
row_sum += a[i * n + j].abs();
}
}
a[i * n + i] = row_sum + 1.0;
}
a
}
fn neumann_vs_dense(c: &mut Criterion) {
let mut group = c.benchmark_group("neumann_vs_dense");
group.warm_up_time(Duration::from_secs(3));
// Crossover analysis: compare iterative Neumann vs dense direct solve.
// For small n, dense wins; for large sparse n, Neumann should win.
for &n in &[50, 100, 200, 500] {
let density = 0.05;
let rhs = random_vector(n, 43);
let sample_count = if n >= 500 { 20 } else { 100 };
group.sample_size(sample_count);
// Neumann (sparse).
let csr = diag_dominant_csr(n, density, 42);
group.bench_with_input(BenchmarkId::new("neumann_sparse", n), &n, |b, _| {
b.iter(|| {
neumann_solve(
criterion::black_box(&csr),
criterion::black_box(&rhs),
1e-4,
5000,
)
});
});
// Dense direct solve.
let a_dense = diag_dominant_dense(n, density, 42);
group.bench_with_input(BenchmarkId::new("dense_direct", n), &n, |b, _| {
b.iter(|| {
dense_direct_solve(
criterion::black_box(&a_dense),
criterion::black_box(&rhs),
n,
)
});
});
}
group.finish();
}
criterion_group!(
neumann,
neumann_convergence,
neumann_scaling,
neumann_vs_dense
);
criterion_main!(neumann);

View File

@@ -0,0 +1,222 @@
//! Benchmarks for the forward push algorithm (Andersen-Chung-Lang).
//!
//! Forward push computes approximate Personalized PageRank (PPR) vectors in
//! sublinear time. These benchmarks measure scaling with graph size and the
//! effect of tolerance on the number of push operations.
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use std::collections::VecDeque;
use std::time::Duration;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use ruvector_solver::types::CsrMatrix;
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/// Build a random sparse graph as a CSR matrix suitable for PageRank.
///
/// Each entry `A[i][j]` represents the transition probability from node `i`
/// to node `j`. The matrix is row-stochastic: each row sums to 1. The
/// graph is constructed by giving each node `avg_degree` random outgoing
/// edges.
fn random_graph_csr(n: usize, avg_degree: usize, seed: u64) -> CsrMatrix<f32> {
let mut rng = StdRng::seed_from_u64(seed);
let mut entries: Vec<(usize, usize, f32)> = Vec::new();
for i in 0..n {
let degree = (avg_degree as f64 * (0.5 + rng.gen::<f64>())) as usize;
let degree = degree.max(1).min(n - 1);
// Select random neighbours (without replacement for small degree).
let mut neighbours = Vec::with_capacity(degree);
for _ in 0..degree {
let mut j = rng.gen_range(0..n);
while j == i {
j = rng.gen_range(0..n);
}
neighbours.push(j);
}
neighbours.sort_unstable();
neighbours.dedup();
let weight = 1.0 / neighbours.len() as f32;
for &j in &neighbours {
entries.push((i, j, weight));
}
}
CsrMatrix::<f32>::from_coo(n, n, entries)
}
// ---------------------------------------------------------------------------
// Inline forward push for benchmarking
// ---------------------------------------------------------------------------
/// Forward push algorithm for approximate Personalized PageRank.
///
/// Computes an approximate PPR vector `pi` for a source node `source` with
/// teleport probability `alpha`. The algorithm maintains a residual vector
/// and pushes mass from nodes whose residual exceeds `tolerance`.
///
/// Returns `(estimate, residual, num_pushes)`.
#[inline(never)]
fn forward_push(
matrix: &CsrMatrix<f32>,
source: usize,
alpha: f32,
tolerance: f32,
) -> (Vec<f32>, Vec<f32>, usize) {
let n = matrix.rows;
let mut estimate = vec![0.0f32; n];
let mut residual = vec![0.0f32; n];
residual[source] = 1.0;
let mut queue: VecDeque<usize> = VecDeque::new();
queue.push_back(source);
let mut in_queue = vec![false; n];
in_queue[source] = true;
let mut num_pushes = 0usize;
while let Some(u) = queue.pop_front() {
in_queue[u] = false;
let r_u = residual[u];
if r_u.abs() < tolerance {
continue;
}
num_pushes += 1;
// Absorb alpha fraction.
estimate[u] += alpha * r_u;
let push_mass = (1.0 - alpha) * r_u;
residual[u] = 0.0;
// Distribute remaining mass to neighbours.
let start = matrix.row_ptr[u];
let end = matrix.row_ptr[u + 1];
let degree = end - start;
if degree > 0 {
for idx in start..end {
let v = matrix.col_indices[idx];
let w = matrix.values[idx];
residual[v] += push_mass * w;
if !in_queue[v] && residual[v].abs() >= tolerance {
queue.push_back(v);
in_queue[v] = true;
}
}
} else {
// Dangling node: teleport back to source.
residual[source] += push_mass;
if !in_queue[source] && residual[source].abs() >= tolerance {
queue.push_back(source);
in_queue[source] = true;
}
}
}
(estimate, residual, num_pushes)
}
// ---------------------------------------------------------------------------
// Benchmark: forward push scaling with graph size
// ---------------------------------------------------------------------------
fn forward_push_scaling(c: &mut Criterion) {
let mut group = c.benchmark_group("forward_push_scaling");
group.warm_up_time(Duration::from_secs(3));
let alpha = 0.15f32;
let tolerance = 1e-4f32;
for &n in &[100, 1000, 10_000, 100_000] {
let avg_degree = 10;
let graph = random_graph_csr(n, avg_degree, 42);
let sample_count = if n >= 100_000 {
10
} else if n >= 10_000 {
20
} else {
100
};
group.sample_size(sample_count);
group.throughput(Throughput::Elements(n as u64));
group.bench_with_input(BenchmarkId::new("n", n), &n, |b, _| {
b.iter(|| {
forward_push(
criterion::black_box(&graph),
0, // source node
alpha,
tolerance,
)
});
});
}
group.finish();
}
// ---------------------------------------------------------------------------
// Benchmark: forward push tolerance sensitivity
// ---------------------------------------------------------------------------
fn forward_push_tolerance(c: &mut Criterion) {
let mut group = c.benchmark_group("forward_push_tolerance");
group.warm_up_time(Duration::from_secs(3));
group.sample_size(100);
let n = 10_000;
let avg_degree = 10;
let alpha = 0.15f32;
let graph = random_graph_csr(n, avg_degree, 42);
for &tol in &[1e-2f32, 1e-4, 1e-6] {
let label = format!("eps_{:.0e}", tol);
group.bench_with_input(BenchmarkId::new(&label, n), &tol, |b, &eps| {
b.iter(|| forward_push(criterion::black_box(&graph), 0, alpha, eps));
});
}
group.finish();
}
// ---------------------------------------------------------------------------
// Benchmark: forward push with varying graph density
// ---------------------------------------------------------------------------
fn forward_push_density(c: &mut Criterion) {
let mut group = c.benchmark_group("forward_push_density");
group.warm_up_time(Duration::from_secs(3));
group.sample_size(50);
let n = 10_000;
let alpha = 0.15f32;
let tolerance = 1e-4f32;
for &avg_degree in &[5, 10, 20, 50] {
let graph = random_graph_csr(n, avg_degree, 42);
let label = format!("deg_{}", avg_degree);
group.throughput(Throughput::Elements(graph.nnz() as u64));
group.bench_with_input(BenchmarkId::new(&label, n), &avg_degree, |b, _| {
b.iter(|| forward_push(criterion::black_box(&graph), 0, alpha, tolerance));
});
}
group.finish();
}
criterion_group!(
push,
forward_push_scaling,
forward_push_tolerance,
forward_push_density
);
criterion_main!(push);