Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
193
vendor/ruvector/crates/ruvector-solver/benches/solver_baseline.rs
vendored
Normal file
193
vendor/ruvector/crates/ruvector-solver/benches/solver_baseline.rs
vendored
Normal file
@@ -0,0 +1,193 @@
|
||||
//! Baseline benchmarks for dense and sparse matrix-vector operations.
|
||||
//!
|
||||
//! These benchmarks establish performance baselines for the core linear algebra
|
||||
//! primitives used throughout the solver crate: naive dense matrix-vector
|
||||
//! multiply and CSR sparse matrix-vector multiply (SpMV).
|
||||
|
||||
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
||||
use std::time::Duration;
|
||||
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
|
||||
use ruvector_solver::types::CsrMatrix;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers: deterministic random data generation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Generate a dense matrix stored as a flat row-major `Vec<f32>`.
|
||||
///
|
||||
/// Uses a deterministic seed so benchmark results are reproducible across runs.
|
||||
fn random_dense_matrix(rows: usize, cols: usize, seed: u64) -> Vec<f32> {
|
||||
let mut rng = StdRng::seed_from_u64(seed);
|
||||
(0..rows * cols).map(|_| rng.gen_range(-1.0..1.0)).collect()
|
||||
}
|
||||
|
||||
/// Generate a random CSR matrix with approximately `density` fraction of
|
||||
/// non-zero entries.
|
||||
///
|
||||
/// The matrix is square (`n x n`). Each entry in the upper triangle is
|
||||
/// included independently with probability `density`, then mirrored to the
|
||||
/// lower triangle for symmetry. Diagonal entries are always present and set
|
||||
/// to a value ensuring strict diagonal dominance.
|
||||
fn random_csr_matrix(n: usize, density: f64, seed: u64) -> CsrMatrix<f32> {
|
||||
let mut rng = StdRng::seed_from_u64(seed);
|
||||
let mut entries: Vec<(usize, usize, f32)> = Vec::new();
|
||||
|
||||
// Off-diagonal entries (symmetric).
|
||||
for i in 0..n {
|
||||
for j in (i + 1)..n {
|
||||
if rng.gen::<f64>() < density {
|
||||
let val: f32 = rng.gen_range(-0.5..0.5);
|
||||
entries.push((i, j, val));
|
||||
entries.push((j, i, val));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build row-wise absolute sums for diagonal dominance.
|
||||
let mut row_abs_sums = vec![0.0f32; n];
|
||||
for &(r, _c, v) in &entries {
|
||||
row_abs_sums[r] += v.abs();
|
||||
}
|
||||
|
||||
// Diagonal entries: ensure diagonal dominance for solver stability.
|
||||
for i in 0..n {
|
||||
entries.push((i, i, row_abs_sums[i] + 1.0));
|
||||
}
|
||||
|
||||
CsrMatrix::<f32>::from_coo(n, n, entries)
|
||||
}
|
||||
|
||||
/// Generate a random vector of length `n` with values in [-1, 1].
|
||||
fn random_vector(n: usize, seed: u64) -> Vec<f32> {
|
||||
let mut rng = StdRng::seed_from_u64(seed);
|
||||
(0..n).map(|_| rng.gen_range(-1.0..1.0)).collect()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Dense matrix-vector multiply (naive baseline)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Naive dense matrix-vector multiply: `y = A * x`.
|
||||
///
|
||||
/// `a` is stored in row-major order with dimensions `rows x cols`.
|
||||
#[inline(never)]
|
||||
fn dense_matvec(a: &[f32], x: &[f32], y: &mut [f32], rows: usize, cols: usize) {
|
||||
for i in 0..rows {
|
||||
let mut sum = 0.0f32;
|
||||
let row_start = i * cols;
|
||||
for j in 0..cols {
|
||||
sum += a[row_start + j] * x[j];
|
||||
}
|
||||
y[i] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
fn dense_matvec_baseline(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("dense_matvec");
|
||||
group.warm_up_time(Duration::from_secs(3));
|
||||
group.sample_size(100);
|
||||
|
||||
for size in [64, 256, 1024, 4096] {
|
||||
let a = random_dense_matrix(size, size, 42);
|
||||
let x = random_vector(size, 43);
|
||||
let mut y = vec![0.0f32; size];
|
||||
|
||||
group.throughput(Throughput::Elements((size * size) as u64));
|
||||
group.bench_with_input(BenchmarkId::new("naive", size), &size, |b, &n| {
|
||||
b.iter(|| {
|
||||
dense_matvec(
|
||||
criterion::black_box(&a),
|
||||
criterion::black_box(&x),
|
||||
criterion::black_box(&mut y),
|
||||
n,
|
||||
n,
|
||||
);
|
||||
});
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Sparse matrix-vector multiply (CSR SpMV)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn sparse_spmv_baseline(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("sparse_spmv");
|
||||
group.warm_up_time(Duration::from_secs(3));
|
||||
group.sample_size(100);
|
||||
|
||||
for (n, density) in [(1000, 0.01), (1000, 0.05), (10_000, 0.01)] {
|
||||
let csr = random_csr_matrix(n, density, 44);
|
||||
let x = random_vector(n, 45);
|
||||
let mut y = vec![0.0f32; n];
|
||||
|
||||
let label = format!("{}x{}_{:.0}pct", n, n, density * 100.0);
|
||||
group.throughput(Throughput::Elements(csr.nnz() as u64));
|
||||
group.bench_with_input(BenchmarkId::new(&label, n), &n, |b, _| {
|
||||
b.iter(|| {
|
||||
csr.spmv(criterion::black_box(&x), criterion::black_box(&mut y));
|
||||
});
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Dense vs sparse crossover
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Benchmark that compares dense and sparse matvec at the same dimension
|
||||
/// to help identify the crossover point where sparse becomes faster.
|
||||
fn dense_vs_sparse_crossover(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("dense_vs_sparse_crossover");
|
||||
group.warm_up_time(Duration::from_secs(3));
|
||||
group.sample_size(100);
|
||||
|
||||
for size in [64, 128, 256, 512, 1024] {
|
||||
let density = 0.05;
|
||||
|
||||
// Dense setup.
|
||||
let a_dense = random_dense_matrix(size, size, 42);
|
||||
let x = random_vector(size, 43);
|
||||
let mut y_dense = vec![0.0f32; size];
|
||||
|
||||
group.throughput(Throughput::Elements((size * size) as u64));
|
||||
group.bench_with_input(BenchmarkId::new("dense", size), &size, |b, &n| {
|
||||
b.iter(|| {
|
||||
dense_matvec(
|
||||
criterion::black_box(&a_dense),
|
||||
criterion::black_box(&x),
|
||||
criterion::black_box(&mut y_dense),
|
||||
n,
|
||||
n,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// Sparse setup.
|
||||
let csr = random_csr_matrix(size, density, 44);
|
||||
let mut y_sparse = vec![0.0f32; size];
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("sparse_5pct", size), &size, |b, _| {
|
||||
b.iter(|| {
|
||||
csr.spmv(
|
||||
criterion::black_box(&x),
|
||||
criterion::black_box(&mut y_sparse),
|
||||
);
|
||||
});
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
baselines,
|
||||
dense_matvec_baseline,
|
||||
sparse_spmv_baseline,
|
||||
dense_vs_sparse_crossover
|
||||
);
|
||||
criterion_main!(baselines);
|
||||
378
vendor/ruvector/crates/ruvector-solver/benches/solver_cg.rs
vendored
Normal file
378
vendor/ruvector/crates/ruvector-solver/benches/solver_cg.rs
vendored
Normal file
@@ -0,0 +1,378 @@
|
||||
//! Benchmarks for the Conjugate Gradient (CG) solver.
|
||||
//!
|
||||
//! CG is the method of choice for symmetric positive-definite (SPD) systems.
|
||||
//! These benchmarks measure scaling behaviour, the effect of diagonal
|
||||
//! preconditioning, and a head-to-head comparison with the Neumann series
|
||||
//! solver.
|
||||
|
||||
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
||||
use std::time::Duration;
|
||||
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
|
||||
use ruvector_solver::types::CsrMatrix;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Build a symmetric positive-definite (SPD) CSR matrix.
|
||||
///
|
||||
/// Constructs a sparse SPD matrix by generating random off-diagonal entries
|
||||
/// and ensuring strict diagonal dominance: `a_{ii} = sum_j |a_{ij}| + 1`.
|
||||
fn spd_csr_matrix(n: usize, density: f64, seed: u64) -> CsrMatrix<f32> {
|
||||
let mut rng = StdRng::seed_from_u64(seed);
|
||||
let mut entries: Vec<(usize, usize, f32)> = Vec::new();
|
||||
|
||||
for i in 0..n {
|
||||
for j in (i + 1)..n {
|
||||
if rng.gen::<f64>() < density {
|
||||
let val: f32 = rng.gen_range(-0.3..0.3);
|
||||
entries.push((i, j, val));
|
||||
entries.push((j, i, val));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut row_abs_sums = vec![0.0f32; n];
|
||||
for &(r, _c, v) in &entries {
|
||||
row_abs_sums[r] += v.abs();
|
||||
}
|
||||
for i in 0..n {
|
||||
entries.push((i, i, row_abs_sums[i] + 1.0));
|
||||
}
|
||||
|
||||
CsrMatrix::<f32>::from_coo(n, n, entries)
|
||||
}
|
||||
|
||||
/// Random vector with deterministic seed.
|
||||
fn random_vector(n: usize, seed: u64) -> Vec<f32> {
|
||||
let mut rng = StdRng::seed_from_u64(seed);
|
||||
(0..n).map(|_| rng.gen_range(-1.0..1.0)).collect()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Inline CG solver for benchmarking
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Conjugate gradient solver for SPD systems `Ax = b`.
|
||||
///
|
||||
/// This is a textbook CG implementation inlined here so the benchmark does
|
||||
/// not depend on the (currently stub) cg module.
|
||||
#[inline(never)]
|
||||
fn cg_solve(
|
||||
matrix: &CsrMatrix<f32>,
|
||||
rhs: &[f32],
|
||||
tolerance: f64,
|
||||
max_iter: usize,
|
||||
) -> (Vec<f32>, usize, f64) {
|
||||
let n = matrix.rows;
|
||||
let mut x = vec![0.0f32; n];
|
||||
let mut r = rhs.to_vec(); // r_0 = b - A*x_0, with x_0 = 0 => r_0 = b
|
||||
let mut p = r.clone();
|
||||
let mut ap = vec![0.0f32; n];
|
||||
|
||||
let mut rs_old: f64 = r.iter().map(|&v| (v as f64) * (v as f64)).sum();
|
||||
let mut iterations = 0;
|
||||
|
||||
for k in 0..max_iter {
|
||||
// ap = A * p
|
||||
matrix.spmv(&p, &mut ap);
|
||||
|
||||
// alpha = (r^T r) / (p^T A p)
|
||||
let p_ap: f64 = p
|
||||
.iter()
|
||||
.zip(ap.iter())
|
||||
.map(|(&pi, &api)| (pi as f64) * (api as f64))
|
||||
.sum();
|
||||
|
||||
if p_ap.abs() < 1e-30 {
|
||||
iterations = k + 1;
|
||||
break;
|
||||
}
|
||||
|
||||
let alpha = rs_old / p_ap;
|
||||
|
||||
// x = x + alpha * p
|
||||
for i in 0..n {
|
||||
x[i] += (alpha as f32) * p[i];
|
||||
}
|
||||
|
||||
// r = r - alpha * ap
|
||||
for i in 0..n {
|
||||
r[i] -= (alpha as f32) * ap[i];
|
||||
}
|
||||
|
||||
let rs_new: f64 = r.iter().map(|&v| (v as f64) * (v as f64)).sum();
|
||||
iterations = k + 1;
|
||||
|
||||
if rs_new.sqrt() < tolerance {
|
||||
break;
|
||||
}
|
||||
|
||||
// p = r + (rs_new / rs_old) * p
|
||||
let beta = rs_new / rs_old;
|
||||
for i in 0..n {
|
||||
p[i] = r[i] + (beta as f32) * p[i];
|
||||
}
|
||||
|
||||
rs_old = rs_new;
|
||||
}
|
||||
|
||||
let residual_norm = rs_old.sqrt();
|
||||
(x, iterations, residual_norm)
|
||||
}
|
||||
|
||||
/// Diagonal-preconditioned CG solver.
|
||||
///
|
||||
/// Uses the Jacobi (diagonal) preconditioner: `M = diag(A)`.
|
||||
/// Solves `M^{-1} A x = M^{-1} b` via the preconditioned CG algorithm.
|
||||
#[inline(never)]
|
||||
fn pcg_solve(
|
||||
matrix: &CsrMatrix<f32>,
|
||||
rhs: &[f32],
|
||||
tolerance: f64,
|
||||
max_iter: usize,
|
||||
) -> (Vec<f32>, usize, f64) {
|
||||
let n = matrix.rows;
|
||||
|
||||
// Extract diagonal for preconditioner.
|
||||
let mut diag_inv = vec![1.0f32; n];
|
||||
for i in 0..n {
|
||||
let start = matrix.row_ptr[i];
|
||||
let end = matrix.row_ptr[i + 1];
|
||||
for idx in start..end {
|
||||
if matrix.col_indices[idx] == i {
|
||||
let d = matrix.values[idx];
|
||||
diag_inv[i] = if d.abs() > 1e-12 { 1.0 / d } else { 1.0 };
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut x = vec![0.0f32; n];
|
||||
let mut r = rhs.to_vec();
|
||||
let mut z: Vec<f32> = r
|
||||
.iter()
|
||||
.zip(diag_inv.iter())
|
||||
.map(|(&ri, &di)| ri * di)
|
||||
.collect();
|
||||
let mut p = z.clone();
|
||||
let mut ap = vec![0.0f32; n];
|
||||
|
||||
let mut rz_old: f64 = r
|
||||
.iter()
|
||||
.zip(z.iter())
|
||||
.map(|(&ri, &zi)| (ri as f64) * (zi as f64))
|
||||
.sum();
|
||||
|
||||
let mut iterations = 0;
|
||||
|
||||
for k in 0..max_iter {
|
||||
matrix.spmv(&p, &mut ap);
|
||||
|
||||
let p_ap: f64 = p
|
||||
.iter()
|
||||
.zip(ap.iter())
|
||||
.map(|(&pi, &api)| (pi as f64) * (api as f64))
|
||||
.sum();
|
||||
|
||||
if p_ap.abs() < 1e-30 {
|
||||
iterations = k + 1;
|
||||
break;
|
||||
}
|
||||
|
||||
let alpha = rz_old / p_ap;
|
||||
|
||||
for i in 0..n {
|
||||
x[i] += (alpha as f32) * p[i];
|
||||
r[i] -= (alpha as f32) * ap[i];
|
||||
}
|
||||
|
||||
let residual_norm: f64 = r
|
||||
.iter()
|
||||
.map(|&v| (v as f64) * (v as f64))
|
||||
.sum::<f64>()
|
||||
.sqrt();
|
||||
iterations = k + 1;
|
||||
|
||||
if residual_norm < tolerance {
|
||||
break;
|
||||
}
|
||||
|
||||
// z = M^{-1} r
|
||||
for i in 0..n {
|
||||
z[i] = r[i] * diag_inv[i];
|
||||
}
|
||||
|
||||
let rz_new: f64 = r
|
||||
.iter()
|
||||
.zip(z.iter())
|
||||
.map(|(&ri, &zi)| (ri as f64) * (zi as f64))
|
||||
.sum();
|
||||
|
||||
let beta = rz_new / rz_old;
|
||||
for i in 0..n {
|
||||
p[i] = z[i] + (beta as f32) * p[i];
|
||||
}
|
||||
|
||||
rz_old = rz_new;
|
||||
}
|
||||
|
||||
let residual_norm = r
|
||||
.iter()
|
||||
.map(|&v| (v as f64) * (v as f64))
|
||||
.sum::<f64>()
|
||||
.sqrt();
|
||||
(x, iterations, residual_norm)
|
||||
}
|
||||
|
||||
/// Neumann series iteration (inlined for comparison benchmark).
|
||||
#[inline(never)]
|
||||
fn neumann_solve(
|
||||
matrix: &CsrMatrix<f32>,
|
||||
rhs: &[f32],
|
||||
tolerance: f64,
|
||||
max_iter: usize,
|
||||
) -> (Vec<f32>, usize, f64) {
|
||||
let n = matrix.rows;
|
||||
let mut x = vec![0.0f32; n];
|
||||
let mut residual_buf = vec![0.0f32; n];
|
||||
let mut iterations = 0;
|
||||
let mut residual_norm = f64::MAX;
|
||||
|
||||
for k in 0..max_iter {
|
||||
matrix.spmv(&x, &mut residual_buf);
|
||||
for i in 0..n {
|
||||
residual_buf[i] = rhs[i] - residual_buf[i];
|
||||
}
|
||||
|
||||
residual_norm = residual_buf
|
||||
.iter()
|
||||
.map(|&v| (v as f64) * (v as f64))
|
||||
.sum::<f64>()
|
||||
.sqrt();
|
||||
|
||||
iterations = k + 1;
|
||||
if residual_norm < tolerance {
|
||||
break;
|
||||
}
|
||||
|
||||
for i in 0..n {
|
||||
x[i] += residual_buf[i];
|
||||
}
|
||||
}
|
||||
|
||||
(x, iterations, residual_norm)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Benchmark: CG scaling with problem size
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn cg_scaling(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("cg_scaling");
|
||||
group.warm_up_time(Duration::from_secs(3));
|
||||
|
||||
for &n in &[100, 1000, 10_000] {
|
||||
let density = if n <= 1000 { 0.02 } else { 0.005 };
|
||||
let matrix = spd_csr_matrix(n, density, 42);
|
||||
let rhs = random_vector(n, 43);
|
||||
|
||||
let sample_count = if n >= 10_000 { 20 } else { 100 };
|
||||
group.sample_size(sample_count);
|
||||
group.throughput(Throughput::Elements(matrix.nnz() as u64));
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("n", n), &n, |b, _| {
|
||||
b.iter(|| {
|
||||
cg_solve(
|
||||
criterion::black_box(&matrix),
|
||||
criterion::black_box(&rhs),
|
||||
1e-6,
|
||||
5000,
|
||||
)
|
||||
});
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Benchmark: with vs without diagonal preconditioner
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn cg_preconditioning(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("cg_preconditioning");
|
||||
group.warm_up_time(Duration::from_secs(3));
|
||||
group.sample_size(100);
|
||||
|
||||
for &n in &[500, 1000, 2000] {
|
||||
let matrix = spd_csr_matrix(n, 0.02, 42);
|
||||
let rhs = random_vector(n, 43);
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("cg_plain", n), &n, |b, _| {
|
||||
b.iter(|| {
|
||||
cg_solve(
|
||||
criterion::black_box(&matrix),
|
||||
criterion::black_box(&rhs),
|
||||
1e-6,
|
||||
5000,
|
||||
)
|
||||
});
|
||||
});
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("cg_diag_precond", n), &n, |b, _| {
|
||||
b.iter(|| {
|
||||
pcg_solve(
|
||||
criterion::black_box(&matrix),
|
||||
criterion::black_box(&rhs),
|
||||
1e-6,
|
||||
5000,
|
||||
)
|
||||
});
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Benchmark: CG vs Neumann for same problem
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn cg_vs_neumann(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("cg_vs_neumann");
|
||||
group.warm_up_time(Duration::from_secs(3));
|
||||
group.sample_size(100);
|
||||
|
||||
for &n in &[100, 500, 1000] {
|
||||
let matrix = spd_csr_matrix(n, 0.02, 42);
|
||||
let rhs = random_vector(n, 43);
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("cg", n), &n, |b, _| {
|
||||
b.iter(|| {
|
||||
cg_solve(
|
||||
criterion::black_box(&matrix),
|
||||
criterion::black_box(&rhs),
|
||||
1e-6,
|
||||
5000,
|
||||
)
|
||||
});
|
||||
});
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("neumann", n), &n, |b, _| {
|
||||
b.iter(|| {
|
||||
neumann_solve(
|
||||
criterion::black_box(&matrix),
|
||||
criterion::black_box(&rhs),
|
||||
1e-6,
|
||||
5000,
|
||||
)
|
||||
});
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(cg, cg_scaling, cg_preconditioning, cg_vs_neumann);
|
||||
criterion_main!(cg);
|
||||
390
vendor/ruvector/crates/ruvector-solver/benches/solver_e2e.rs
vendored
Normal file
390
vendor/ruvector/crates/ruvector-solver/benches/solver_e2e.rs
vendored
Normal file
@@ -0,0 +1,390 @@
|
||||
//! End-to-end benchmarks for the solver orchestration layer.
|
||||
//!
|
||||
//! These benchmarks measure the overhead of algorithm selection (routing) and
|
||||
//! the full end-to-end solve path including routing, validation, solver
|
||||
//! dispatch, and result construction.
|
||||
|
||||
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
||||
use std::time::Duration;
|
||||
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
|
||||
use ruvector_solver::types::{Algorithm, CsrMatrix};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Build a diagonally dominant CSR matrix.
|
||||
fn diag_dominant_csr(n: usize, density: f64, seed: u64) -> CsrMatrix<f32> {
|
||||
let mut rng = StdRng::seed_from_u64(seed);
|
||||
let mut entries: Vec<(usize, usize, f32)> = Vec::new();
|
||||
|
||||
for i in 0..n {
|
||||
for j in (i + 1)..n {
|
||||
if rng.gen::<f64>() < density {
|
||||
let val: f32 = rng.gen_range(-0.3..0.3);
|
||||
entries.push((i, j, val));
|
||||
entries.push((j, i, val));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut row_abs_sums = vec![0.0f32; n];
|
||||
for &(r, _c, v) in &entries {
|
||||
row_abs_sums[r] += v.abs();
|
||||
}
|
||||
for i in 0..n {
|
||||
entries.push((i, i, row_abs_sums[i] + 1.0));
|
||||
}
|
||||
|
||||
CsrMatrix::<f32>::from_coo(n, n, entries)
|
||||
}
|
||||
|
||||
/// Random vector with deterministic seed.
|
||||
fn random_vector(n: usize, seed: u64) -> Vec<f32> {
|
||||
let mut rng = StdRng::seed_from_u64(seed);
|
||||
(0..n).map(|_| rng.gen_range(-1.0..1.0)).collect()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Inline algorithm router for benchmarking
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Properties extracted from the matrix for routing decisions.
|
||||
#[allow(dead_code)]
|
||||
struct MatrixProperties {
|
||||
n: usize,
|
||||
nnz: usize,
|
||||
density: f64,
|
||||
is_symmetric: bool,
|
||||
max_row_degree: usize,
|
||||
diag_dominance_ratio: f64,
|
||||
}
|
||||
|
||||
/// Analyze a CSR matrix to extract routing-relevant properties.
|
||||
#[inline(never)]
|
||||
fn analyze_matrix(matrix: &CsrMatrix<f32>) -> MatrixProperties {
|
||||
let n = matrix.rows;
|
||||
let nnz = matrix.nnz();
|
||||
let density = nnz as f64 / (n as f64 * n as f64);
|
||||
|
||||
// Check symmetry (sample-based for large matrices).
|
||||
let sample_size = n.min(100);
|
||||
let mut is_symmetric = true;
|
||||
'outer: for i in 0..sample_size {
|
||||
let start = matrix.row_ptr[i];
|
||||
let end = matrix.row_ptr[i + 1];
|
||||
for idx in start..end {
|
||||
let j = matrix.col_indices[idx];
|
||||
if j == i {
|
||||
continue;
|
||||
}
|
||||
// Check if (j, i) exists with the same value.
|
||||
let j_start = matrix.row_ptr[j];
|
||||
let j_end = matrix.row_ptr[j + 1];
|
||||
let mut found = false;
|
||||
for jidx in j_start..j_end {
|
||||
if matrix.col_indices[jidx] == i {
|
||||
if (matrix.values[jidx] - matrix.values[idx]).abs() > 1e-6 {
|
||||
is_symmetric = false;
|
||||
break 'outer;
|
||||
}
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
is_symmetric = false;
|
||||
break 'outer;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Max row degree.
|
||||
let mut max_row_degree = 0;
|
||||
for i in 0..n {
|
||||
let deg = matrix.row_ptr[i + 1] - matrix.row_ptr[i];
|
||||
max_row_degree = max_row_degree.max(deg);
|
||||
}
|
||||
|
||||
// Diagonal dominance ratio (sampled).
|
||||
let mut diag_dominance_ratio = 0.0;
|
||||
let check_rows = n.min(100);
|
||||
for i in 0..check_rows {
|
||||
let start = matrix.row_ptr[i];
|
||||
let end = matrix.row_ptr[i + 1];
|
||||
let mut diag = 0.0f32;
|
||||
let mut off_diag_sum = 0.0f32;
|
||||
for idx in start..end {
|
||||
if matrix.col_indices[idx] == i {
|
||||
diag = matrix.values[idx].abs();
|
||||
} else {
|
||||
off_diag_sum += matrix.values[idx].abs();
|
||||
}
|
||||
}
|
||||
if off_diag_sum > 0.0 {
|
||||
diag_dominance_ratio += (diag / off_diag_sum) as f64;
|
||||
} else {
|
||||
diag_dominance_ratio += 10.0; // Perfect dominance.
|
||||
}
|
||||
}
|
||||
diag_dominance_ratio /= check_rows as f64;
|
||||
|
||||
MatrixProperties {
|
||||
n,
|
||||
nnz,
|
||||
density,
|
||||
is_symmetric,
|
||||
max_row_degree,
|
||||
diag_dominance_ratio,
|
||||
}
|
||||
}
|
||||
|
||||
/// Select the best algorithm based on matrix properties.
|
||||
#[inline(never)]
|
||||
fn select_algorithm(props: &MatrixProperties, tolerance: f64) -> Algorithm {
|
||||
// High diagonal dominance => Neumann series converges fast.
|
||||
if props.diag_dominance_ratio > 2.0 && tolerance > 1e-8 {
|
||||
return Algorithm::Neumann;
|
||||
}
|
||||
|
||||
// SPD matrix => CG is optimal.
|
||||
if props.is_symmetric && props.diag_dominance_ratio > 1.0 {
|
||||
return Algorithm::CG;
|
||||
}
|
||||
|
||||
// Very sparse, large graph => forward push for PPR-like problems.
|
||||
if props.density < 0.01 && props.n > 1000 {
|
||||
return Algorithm::ForwardPush;
|
||||
}
|
||||
|
||||
// Default fallback.
|
||||
if props.is_symmetric {
|
||||
Algorithm::CG
|
||||
} else {
|
||||
Algorithm::Neumann
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Inline solvers for e2e benchmarking
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Neumann series (Richardson iteration).
|
||||
#[inline(never)]
|
||||
fn neumann_solve(
|
||||
matrix: &CsrMatrix<f32>,
|
||||
rhs: &[f32],
|
||||
tolerance: f64,
|
||||
max_iter: usize,
|
||||
) -> (Vec<f32>, usize, f64) {
|
||||
let n = matrix.rows;
|
||||
let mut x = vec![0.0f32; n];
|
||||
let mut r = vec![0.0f32; n];
|
||||
let mut iterations = 0;
|
||||
let mut residual_norm = f64::MAX;
|
||||
|
||||
for k in 0..max_iter {
|
||||
matrix.spmv(&x, &mut r);
|
||||
for i in 0..n {
|
||||
r[i] = rhs[i] - r[i];
|
||||
}
|
||||
residual_norm = r
|
||||
.iter()
|
||||
.map(|&v| (v as f64) * (v as f64))
|
||||
.sum::<f64>()
|
||||
.sqrt();
|
||||
iterations = k + 1;
|
||||
if residual_norm < tolerance {
|
||||
break;
|
||||
}
|
||||
for i in 0..n {
|
||||
x[i] += r[i];
|
||||
}
|
||||
}
|
||||
(x, iterations, residual_norm)
|
||||
}
|
||||
|
||||
/// Conjugate gradient.
|
||||
#[inline(never)]
|
||||
fn cg_solve(
|
||||
matrix: &CsrMatrix<f32>,
|
||||
rhs: &[f32],
|
||||
tolerance: f64,
|
||||
max_iter: usize,
|
||||
) -> (Vec<f32>, usize, f64) {
|
||||
let n = matrix.rows;
|
||||
let mut x = vec![0.0f32; n];
|
||||
let mut r = rhs.to_vec();
|
||||
let mut p = r.clone();
|
||||
let mut ap = vec![0.0f32; n];
|
||||
|
||||
let mut rs_old: f64 = r.iter().map(|&v| (v as f64) * (v as f64)).sum();
|
||||
let mut iterations = 0;
|
||||
|
||||
for k in 0..max_iter {
|
||||
matrix.spmv(&p, &mut ap);
|
||||
|
||||
let p_ap: f64 = p
|
||||
.iter()
|
||||
.zip(ap.iter())
|
||||
.map(|(&pi, &api)| (pi as f64) * (api as f64))
|
||||
.sum();
|
||||
|
||||
if p_ap.abs() < 1e-30 {
|
||||
iterations = k + 1;
|
||||
break;
|
||||
}
|
||||
let alpha = rs_old / p_ap;
|
||||
|
||||
for i in 0..n {
|
||||
x[i] += (alpha as f32) * p[i];
|
||||
r[i] -= (alpha as f32) * ap[i];
|
||||
}
|
||||
|
||||
let rs_new: f64 = r.iter().map(|&v| (v as f64) * (v as f64)).sum();
|
||||
iterations = k + 1;
|
||||
if rs_new.sqrt() < tolerance {
|
||||
break;
|
||||
}
|
||||
|
||||
let beta = rs_new / rs_old;
|
||||
for i in 0..n {
|
||||
p[i] = r[i] + (beta as f32) * p[i];
|
||||
}
|
||||
rs_old = rs_new;
|
||||
}
|
||||
|
||||
let residual_norm = rs_old.sqrt();
|
||||
(x, iterations, residual_norm)
|
||||
}
|
||||
|
||||
/// Full orchestrated solve: analyze -> route -> solve.
|
||||
#[inline(never)]
|
||||
fn orchestrator_solve_impl(
|
||||
matrix: &CsrMatrix<f32>,
|
||||
rhs: &[f32],
|
||||
tolerance: f64,
|
||||
max_iter: usize,
|
||||
) -> (Vec<f32>, usize, f64, Algorithm) {
|
||||
let props = analyze_matrix(matrix);
|
||||
let algorithm = select_algorithm(&props, tolerance);
|
||||
|
||||
let (solution, iterations, residual) = match algorithm {
|
||||
Algorithm::Neumann => neumann_solve(matrix, rhs, tolerance, max_iter),
|
||||
Algorithm::CG => cg_solve(matrix, rhs, tolerance, max_iter),
|
||||
// Fall back to CG for unimplemented algorithms.
|
||||
_ => cg_solve(matrix, rhs, tolerance, max_iter),
|
||||
};
|
||||
|
||||
(solution, iterations, residual, algorithm)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Benchmark: router overhead (analyze + select, no solve)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn router_overhead(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("router_overhead");
|
||||
group.warm_up_time(Duration::from_secs(3));
|
||||
group.sample_size(100);
|
||||
|
||||
for &n in &[100, 1000, 10_000] {
|
||||
let density = if n <= 1000 { 0.02 } else { 0.005 };
|
||||
let matrix = diag_dominant_csr(n, density, 42);
|
||||
|
||||
group.throughput(Throughput::Elements(n as u64));
|
||||
group.bench_with_input(BenchmarkId::new("analyze_and_route", n), &n, |b, _| {
|
||||
b.iter(|| {
|
||||
let props = analyze_matrix(criterion::black_box(&matrix));
|
||||
select_algorithm(criterion::black_box(&props), 1e-6)
|
||||
});
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Benchmark: full orchestrated solve (end-to-end)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn orchestrator_solve(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("orchestrator_solve");
|
||||
group.warm_up_time(Duration::from_secs(3));
|
||||
|
||||
for &n in &[100, 500, 1000, 5000] {
|
||||
let density = if n <= 1000 { 0.02 } else { 0.005 };
|
||||
let matrix = diag_dominant_csr(n, density, 42);
|
||||
let rhs = random_vector(n, 43);
|
||||
|
||||
let sample_count = if n >= 5000 { 20 } else { 100 };
|
||||
group.sample_size(sample_count);
|
||||
group.throughput(Throughput::Elements(matrix.nnz() as u64));
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("e2e", n), &n, |b, _| {
|
||||
b.iter(|| {
|
||||
orchestrator_solve_impl(
|
||||
criterion::black_box(&matrix),
|
||||
criterion::black_box(&rhs),
|
||||
1e-6,
|
||||
5000,
|
||||
)
|
||||
});
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Benchmark: routing overhead as fraction of total solve time
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn routing_fraction(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("routing_fraction");
|
||||
group.warm_up_time(Duration::from_secs(3));
|
||||
group.sample_size(100);
|
||||
|
||||
let n = 1000;
|
||||
let density = 0.02;
|
||||
let matrix = diag_dominant_csr(n, density, 42);
|
||||
let rhs = random_vector(n, 43);
|
||||
|
||||
// Route only.
|
||||
group.bench_function("route_only", |b| {
|
||||
b.iter(|| {
|
||||
let props = analyze_matrix(criterion::black_box(&matrix));
|
||||
select_algorithm(criterion::black_box(&props), 1e-6)
|
||||
});
|
||||
});
|
||||
|
||||
// Solve only (skip routing).
|
||||
group.bench_function("solve_only_cg", |b| {
|
||||
b.iter(|| {
|
||||
cg_solve(
|
||||
criterion::black_box(&matrix),
|
||||
criterion::black_box(&rhs),
|
||||
1e-6,
|
||||
5000,
|
||||
)
|
||||
});
|
||||
});
|
||||
|
||||
// Full e2e (route + solve).
|
||||
group.bench_function("e2e_routed", |b| {
|
||||
b.iter(|| {
|
||||
orchestrator_solve_impl(
|
||||
criterion::black_box(&matrix),
|
||||
criterion::black_box(&rhs),
|
||||
1e-6,
|
||||
5000,
|
||||
)
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(e2e, router_overhead, orchestrator_solve, routing_fraction);
|
||||
criterion_main!(e2e);
|
||||
313
vendor/ruvector/crates/ruvector-solver/benches/solver_neumann.rs
vendored
Normal file
313
vendor/ruvector/crates/ruvector-solver/benches/solver_neumann.rs
vendored
Normal file
@@ -0,0 +1,313 @@
|
||||
//! Benchmarks for the Neumann series solver.
|
||||
//!
|
||||
//! The Neumann series approximates `(I - M)^{-1} b = sum_{k=0}^{K} M^k b`
|
||||
//! and converges when the spectral radius of `M` is less than 1. These
|
||||
//! benchmarks measure convergence rate vs tolerance, scaling behaviour, and
|
||||
//! crossover against dense direct solves.
|
||||
|
||||
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
||||
use std::time::Duration;
|
||||
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
|
||||
use ruvector_solver::types::CsrMatrix;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Build a diagonally dominant CSR matrix suitable for Neumann iteration.
|
||||
///
|
||||
/// The iteration matrix `M = I - D^{-1} A` has spectral radius < 1 when `A`
|
||||
/// is strictly diagonally dominant. We construct `A` so that each diagonal
|
||||
/// entry equals the sum of absolute off-diagonal values in its row plus 1.0.
|
||||
fn diag_dominant_csr(n: usize, density: f64, seed: u64) -> CsrMatrix<f32> {
|
||||
let mut rng = StdRng::seed_from_u64(seed);
|
||||
let mut entries: Vec<(usize, usize, f32)> = Vec::new();
|
||||
|
||||
for i in 0..n {
|
||||
for j in (i + 1)..n {
|
||||
if rng.gen::<f64>() < density {
|
||||
let val: f32 = rng.gen_range(-0.3..0.3);
|
||||
entries.push((i, j, val));
|
||||
entries.push((j, i, val));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut row_abs_sums = vec![0.0f32; n];
|
||||
for &(r, _c, v) in &entries {
|
||||
row_abs_sums[r] += v.abs();
|
||||
}
|
||||
for i in 0..n {
|
||||
entries.push((i, i, row_abs_sums[i] + 1.0));
|
||||
}
|
||||
|
||||
CsrMatrix::<f32>::from_coo(n, n, entries)
|
||||
}
|
||||
|
||||
/// Random vector with deterministic seed.
|
||||
fn random_vector(n: usize, seed: u64) -> Vec<f32> {
|
||||
let mut rng = StdRng::seed_from_u64(seed);
|
||||
(0..n).map(|_| rng.gen_range(-1.0..1.0)).collect()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Inline Neumann series solver for benchmarking
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Neumann series iteration: x_{k+1} = x_k + (b - A * x_k).
|
||||
///
|
||||
/// This is equivalent to the Richardson iteration with omega = 1 for a
|
||||
/// diagonally-dominant system. We inline it here so the benchmark does
|
||||
/// not depend on the (currently stub) neumann module.
|
||||
#[inline(never)]
|
||||
fn neumann_solve(
|
||||
matrix: &CsrMatrix<f32>,
|
||||
rhs: &[f32],
|
||||
tolerance: f64,
|
||||
max_iter: usize,
|
||||
) -> (Vec<f32>, usize, f64) {
|
||||
let n = matrix.rows;
|
||||
let mut x = vec![0.0f32; n];
|
||||
let mut residual_buf = vec![0.0f32; n];
|
||||
let mut iterations = 0;
|
||||
let mut residual_norm = f64::MAX;
|
||||
|
||||
for k in 0..max_iter {
|
||||
// Compute residual: r = b - A*x.
|
||||
matrix.spmv(&x, &mut residual_buf);
|
||||
for i in 0..n {
|
||||
residual_buf[i] = rhs[i] - residual_buf[i];
|
||||
}
|
||||
|
||||
// Residual L2 norm.
|
||||
residual_norm = residual_buf
|
||||
.iter()
|
||||
.map(|&v| (v as f64) * (v as f64))
|
||||
.sum::<f64>()
|
||||
.sqrt();
|
||||
|
||||
iterations = k + 1;
|
||||
if residual_norm < tolerance {
|
||||
break;
|
||||
}
|
||||
|
||||
// Update: x = x + r (Richardson step).
|
||||
for i in 0..n {
|
||||
x[i] += residual_buf[i];
|
||||
}
|
||||
}
|
||||
|
||||
(x, iterations, residual_norm)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Benchmark: convergence vs tolerance
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn neumann_convergence(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("neumann_convergence");
|
||||
group.warm_up_time(Duration::from_secs(3));
|
||||
group.sample_size(100);
|
||||
|
||||
let n = 500;
|
||||
let matrix = diag_dominant_csr(n, 0.02, 42);
|
||||
let rhs = random_vector(n, 43);
|
||||
|
||||
for &tol in &[1e-2, 1e-4, 1e-6] {
|
||||
let label = format!("eps_{:.0e}", tol);
|
||||
group.bench_with_input(BenchmarkId::new(&label, n), &tol, |b, &eps| {
|
||||
b.iter(|| {
|
||||
neumann_solve(
|
||||
criterion::black_box(&matrix),
|
||||
criterion::black_box(&rhs),
|
||||
eps,
|
||||
5000,
|
||||
)
|
||||
});
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Benchmark: scaling with problem size
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn neumann_scaling(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("neumann_scaling");
|
||||
group.warm_up_time(Duration::from_secs(3));
|
||||
|
||||
for &n in &[100, 1000, 10_000] {
|
||||
// Use sparser matrices for larger sizes to keep runtime reasonable.
|
||||
let density = if n <= 1000 { 0.02 } else { 0.005 };
|
||||
let matrix = diag_dominant_csr(n, density, 42);
|
||||
let rhs = random_vector(n, 43);
|
||||
|
||||
let sample_count = if n >= 10_000 { 20 } else { 100 };
|
||||
group.sample_size(sample_count);
|
||||
group.throughput(Throughput::Elements(matrix.nnz() as u64));
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("n", n), &n, |b, _| {
|
||||
b.iter(|| {
|
||||
neumann_solve(
|
||||
criterion::black_box(&matrix),
|
||||
criterion::black_box(&rhs),
|
||||
1e-4,
|
||||
5000,
|
||||
)
|
||||
});
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Benchmark: Neumann vs dense direct solve crossover
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Naive dense direct solve via Gaussian elimination with partial pivoting.
|
||||
///
|
||||
/// This is intentionally unoptimized to represent a "no-library" baseline.
|
||||
#[inline(never)]
|
||||
fn dense_direct_solve(a: &[f32], b: &[f32], n: usize) -> Vec<f32> {
|
||||
// Build augmented matrix [A | b] in row-major order.
|
||||
let mut aug = vec![0.0f64; n * (n + 1)];
|
||||
for i in 0..n {
|
||||
for j in 0..n {
|
||||
aug[i * (n + 1) + j] = a[i * n + j] as f64;
|
||||
}
|
||||
aug[i * (n + 1) + n] = b[i] as f64;
|
||||
}
|
||||
|
||||
// Forward elimination with partial pivoting.
|
||||
for col in 0..n {
|
||||
// Find pivot.
|
||||
let mut max_row = col;
|
||||
let mut max_val = aug[col * (n + 1) + col].abs();
|
||||
for row in (col + 1)..n {
|
||||
let val = aug[row * (n + 1) + col].abs();
|
||||
if val > max_val {
|
||||
max_val = val;
|
||||
max_row = row;
|
||||
}
|
||||
}
|
||||
|
||||
// Swap rows.
|
||||
if max_row != col {
|
||||
for j in 0..=n {
|
||||
let idx_a = col * (n + 1) + j;
|
||||
let idx_b = max_row * (n + 1) + j;
|
||||
aug.swap(idx_a, idx_b);
|
||||
}
|
||||
}
|
||||
|
||||
let pivot = aug[col * (n + 1) + col];
|
||||
if pivot.abs() < 1e-15 {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Eliminate below.
|
||||
for row in (col + 1)..n {
|
||||
let factor = aug[row * (n + 1) + col] / pivot;
|
||||
for j in col..=n {
|
||||
let val = aug[col * (n + 1) + j];
|
||||
aug[row * (n + 1) + j] -= factor * val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Back substitution.
|
||||
let mut x = vec![0.0f64; n];
|
||||
for i in (0..n).rev() {
|
||||
let mut sum = aug[i * (n + 1) + n];
|
||||
for j in (i + 1)..n {
|
||||
sum -= aug[i * (n + 1) + j] * x[j];
|
||||
}
|
||||
let diag = aug[i * (n + 1) + i];
|
||||
x[i] = if diag.abs() > 1e-15 { sum / diag } else { 0.0 };
|
||||
}
|
||||
|
||||
x.iter().map(|&v| v as f32).collect()
|
||||
}
|
||||
|
||||
/// Generate the dense representation of a diag-dominant matrix.
|
||||
fn diag_dominant_dense(n: usize, density: f64, seed: u64) -> Vec<f32> {
|
||||
let mut rng = StdRng::seed_from_u64(seed);
|
||||
let mut a = vec![0.0f32; n * n];
|
||||
|
||||
// Off-diagonal.
|
||||
for i in 0..n {
|
||||
for j in (i + 1)..n {
|
||||
if rng.gen::<f64>() < density {
|
||||
let val: f32 = rng.gen_range(-0.3..0.3);
|
||||
a[i * n + j] = val;
|
||||
a[j * n + i] = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Diagonal dominance.
|
||||
for i in 0..n {
|
||||
let mut row_sum = 0.0f32;
|
||||
for j in 0..n {
|
||||
if j != i {
|
||||
row_sum += a[i * n + j].abs();
|
||||
}
|
||||
}
|
||||
a[i * n + i] = row_sum + 1.0;
|
||||
}
|
||||
|
||||
a
|
||||
}
|
||||
|
||||
fn neumann_vs_dense(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("neumann_vs_dense");
|
||||
group.warm_up_time(Duration::from_secs(3));
|
||||
|
||||
// Crossover analysis: compare iterative Neumann vs dense direct solve.
|
||||
// For small n, dense wins; for large sparse n, Neumann should win.
|
||||
for &n in &[50, 100, 200, 500] {
|
||||
let density = 0.05;
|
||||
let rhs = random_vector(n, 43);
|
||||
|
||||
let sample_count = if n >= 500 { 20 } else { 100 };
|
||||
group.sample_size(sample_count);
|
||||
|
||||
// Neumann (sparse).
|
||||
let csr = diag_dominant_csr(n, density, 42);
|
||||
group.bench_with_input(BenchmarkId::new("neumann_sparse", n), &n, |b, _| {
|
||||
b.iter(|| {
|
||||
neumann_solve(
|
||||
criterion::black_box(&csr),
|
||||
criterion::black_box(&rhs),
|
||||
1e-4,
|
||||
5000,
|
||||
)
|
||||
});
|
||||
});
|
||||
|
||||
// Dense direct solve.
|
||||
let a_dense = diag_dominant_dense(n, density, 42);
|
||||
group.bench_with_input(BenchmarkId::new("dense_direct", n), &n, |b, _| {
|
||||
b.iter(|| {
|
||||
dense_direct_solve(
|
||||
criterion::black_box(&a_dense),
|
||||
criterion::black_box(&rhs),
|
||||
n,
|
||||
)
|
||||
});
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
neumann,
|
||||
neumann_convergence,
|
||||
neumann_scaling,
|
||||
neumann_vs_dense
|
||||
);
|
||||
criterion_main!(neumann);
|
||||
222
vendor/ruvector/crates/ruvector-solver/benches/solver_push.rs
vendored
Normal file
222
vendor/ruvector/crates/ruvector-solver/benches/solver_push.rs
vendored
Normal file
@@ -0,0 +1,222 @@
|
||||
//! Benchmarks for the forward push algorithm (Andersen-Chung-Lang).
|
||||
//!
|
||||
//! Forward push computes approximate Personalized PageRank (PPR) vectors in
|
||||
//! sublinear time. These benchmarks measure scaling with graph size and the
|
||||
//! effect of tolerance on the number of push operations.
|
||||
|
||||
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
||||
use std::collections::VecDeque;
|
||||
use std::time::Duration;
|
||||
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
|
||||
use ruvector_solver::types::CsrMatrix;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Build a random sparse graph as a CSR matrix suitable for PageRank.
|
||||
///
|
||||
/// Each entry `A[i][j]` represents the transition probability from node `i`
|
||||
/// to node `j`. The matrix is row-stochastic: each row sums to 1. The
|
||||
/// graph is constructed by giving each node `avg_degree` random outgoing
|
||||
/// edges.
|
||||
fn random_graph_csr(n: usize, avg_degree: usize, seed: u64) -> CsrMatrix<f32> {
|
||||
let mut rng = StdRng::seed_from_u64(seed);
|
||||
let mut entries: Vec<(usize, usize, f32)> = Vec::new();
|
||||
|
||||
for i in 0..n {
|
||||
let degree = (avg_degree as f64 * (0.5 + rng.gen::<f64>())) as usize;
|
||||
let degree = degree.max(1).min(n - 1);
|
||||
|
||||
// Select random neighbours (without replacement for small degree).
|
||||
let mut neighbours = Vec::with_capacity(degree);
|
||||
for _ in 0..degree {
|
||||
let mut j = rng.gen_range(0..n);
|
||||
while j == i {
|
||||
j = rng.gen_range(0..n);
|
||||
}
|
||||
neighbours.push(j);
|
||||
}
|
||||
neighbours.sort_unstable();
|
||||
neighbours.dedup();
|
||||
|
||||
let weight = 1.0 / neighbours.len() as f32;
|
||||
for &j in &neighbours {
|
||||
entries.push((i, j, weight));
|
||||
}
|
||||
}
|
||||
|
||||
CsrMatrix::<f32>::from_coo(n, n, entries)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Inline forward push for benchmarking
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Forward push algorithm for approximate Personalized PageRank.
|
||||
///
|
||||
/// Computes an approximate PPR vector `pi` for a source node `source` with
|
||||
/// teleport probability `alpha`. The algorithm maintains a residual vector
|
||||
/// and pushes mass from nodes whose residual exceeds `tolerance`.
|
||||
///
|
||||
/// Returns `(estimate, residual, num_pushes)`.
|
||||
#[inline(never)]
|
||||
fn forward_push(
|
||||
matrix: &CsrMatrix<f32>,
|
||||
source: usize,
|
||||
alpha: f32,
|
||||
tolerance: f32,
|
||||
) -> (Vec<f32>, Vec<f32>, usize) {
|
||||
let n = matrix.rows;
|
||||
let mut estimate = vec![0.0f32; n];
|
||||
let mut residual = vec![0.0f32; n];
|
||||
residual[source] = 1.0;
|
||||
|
||||
let mut queue: VecDeque<usize> = VecDeque::new();
|
||||
queue.push_back(source);
|
||||
let mut in_queue = vec![false; n];
|
||||
in_queue[source] = true;
|
||||
|
||||
let mut num_pushes = 0usize;
|
||||
|
||||
while let Some(u) = queue.pop_front() {
|
||||
in_queue[u] = false;
|
||||
let r_u = residual[u];
|
||||
|
||||
if r_u.abs() < tolerance {
|
||||
continue;
|
||||
}
|
||||
|
||||
num_pushes += 1;
|
||||
|
||||
// Absorb alpha fraction.
|
||||
estimate[u] += alpha * r_u;
|
||||
let push_mass = (1.0 - alpha) * r_u;
|
||||
residual[u] = 0.0;
|
||||
|
||||
// Distribute remaining mass to neighbours.
|
||||
let start = matrix.row_ptr[u];
|
||||
let end = matrix.row_ptr[u + 1];
|
||||
let degree = end - start;
|
||||
|
||||
if degree > 0 {
|
||||
for idx in start..end {
|
||||
let v = matrix.col_indices[idx];
|
||||
let w = matrix.values[idx];
|
||||
residual[v] += push_mass * w;
|
||||
|
||||
if !in_queue[v] && residual[v].abs() >= tolerance {
|
||||
queue.push_back(v);
|
||||
in_queue[v] = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Dangling node: teleport back to source.
|
||||
residual[source] += push_mass;
|
||||
if !in_queue[source] && residual[source].abs() >= tolerance {
|
||||
queue.push_back(source);
|
||||
in_queue[source] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(estimate, residual, num_pushes)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Benchmark: forward push scaling with graph size
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn forward_push_scaling(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("forward_push_scaling");
|
||||
group.warm_up_time(Duration::from_secs(3));
|
||||
|
||||
let alpha = 0.15f32;
|
||||
let tolerance = 1e-4f32;
|
||||
|
||||
for &n in &[100, 1000, 10_000, 100_000] {
|
||||
let avg_degree = 10;
|
||||
let graph = random_graph_csr(n, avg_degree, 42);
|
||||
|
||||
let sample_count = if n >= 100_000 {
|
||||
10
|
||||
} else if n >= 10_000 {
|
||||
20
|
||||
} else {
|
||||
100
|
||||
};
|
||||
group.sample_size(sample_count);
|
||||
group.throughput(Throughput::Elements(n as u64));
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("n", n), &n, |b, _| {
|
||||
b.iter(|| {
|
||||
forward_push(
|
||||
criterion::black_box(&graph),
|
||||
0, // source node
|
||||
alpha,
|
||||
tolerance,
|
||||
)
|
||||
});
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Benchmark: forward push tolerance sensitivity
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn forward_push_tolerance(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("forward_push_tolerance");
|
||||
group.warm_up_time(Duration::from_secs(3));
|
||||
group.sample_size(100);
|
||||
|
||||
let n = 10_000;
|
||||
let avg_degree = 10;
|
||||
let alpha = 0.15f32;
|
||||
let graph = random_graph_csr(n, avg_degree, 42);
|
||||
|
||||
for &tol in &[1e-2f32, 1e-4, 1e-6] {
|
||||
let label = format!("eps_{:.0e}", tol);
|
||||
group.bench_with_input(BenchmarkId::new(&label, n), &tol, |b, &eps| {
|
||||
b.iter(|| forward_push(criterion::black_box(&graph), 0, alpha, eps));
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Benchmark: forward push with varying graph density
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn forward_push_density(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("forward_push_density");
|
||||
group.warm_up_time(Duration::from_secs(3));
|
||||
group.sample_size(50);
|
||||
|
||||
let n = 10_000;
|
||||
let alpha = 0.15f32;
|
||||
let tolerance = 1e-4f32;
|
||||
|
||||
for &avg_degree in &[5, 10, 20, 50] {
|
||||
let graph = random_graph_csr(n, avg_degree, 42);
|
||||
|
||||
let label = format!("deg_{}", avg_degree);
|
||||
group.throughput(Throughput::Elements(graph.nnz() as u64));
|
||||
group.bench_with_input(BenchmarkId::new(&label, n), &avg_degree, |b, _| {
|
||||
b.iter(|| forward_push(criterion::black_box(&graph), 0, alpha, tolerance));
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
push,
|
||||
forward_push_scaling,
|
||||
forward_push_tolerance,
|
||||
forward_push_density
|
||||
);
|
||||
criterion_main!(push);
|
||||
Reference in New Issue
Block a user