Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
@@ -0,0 +1,503 @@
|
||||
# ADR-STS-006: Benchmark Framework and Performance Validation
|
||||
|
||||
**Status**: Accepted
|
||||
**Date**: 2026-02-20
|
||||
**Authors**: RuVector Performance Team
|
||||
**Deciders**: Architecture Review Board
|
||||
|
||||
## Version History
|
||||
|
||||
| Version | Date | Author | Changes |
|
||||
|---------|------|--------|---------|
|
||||
| 0.1 | 2026-02-20 | RuVector Team | Initial proposal |
|
||||
| 1.0 | 2026-02-20 | RuVector Team | Accepted: full implementation complete |
|
||||
|
||||
---
|
||||
|
||||
## Context
|
||||
|
||||
### Existing Benchmark Infrastructure
|
||||
|
||||
RuVector maintains 90+ benchmark files using Criterion.rs 0.5 with HTML reports. The release profile enables aggressive optimization (`lto = "fat"`, `codegen-units = 1`, `opt-level = 3`), and the bench profile inherits release with debug symbols for profiling.
|
||||
|
||||
### Published Performance Baselines
|
||||
|
||||
| Metric | Value | Platform | Source |
|
||||
|--------|-------|----------|--------|
|
||||
| Euclidean 128D | 14.9 ns | M4 Pro NEON | BENCHMARK_RESULTS.md |
|
||||
| Dot Product 128D | 12.0 ns | M4 Pro NEON | BENCHMARK_RESULTS.md |
|
||||
| HNSW k=10, 10K vectors | 25.2 μs | M4 Pro | BENCHMARK_RESULTS.md |
|
||||
| Batch 1K×384D | 278 μs | Linux AVX2 | BENCHMARK_RESULTS.md |
|
||||
| Binary hamming 384D | 0.9 ns | M4 Pro | BENCHMARK_RESULTS.md |
|
||||
|
||||
### Validation Requirements
|
||||
|
||||
The sublinear-time solver claims 10-600x speedups. These must be validated with:
|
||||
- Statistical significance (Criterion p < 0.05)
|
||||
- Crossover point identification (where sublinear beats traditional)
|
||||
- Accuracy-performance tradeoff quantification
|
||||
- Multi-platform consistency verification
|
||||
- Regression detection in CI
|
||||
|
||||
---
|
||||
|
||||
## Decision
|
||||
|
||||
### 1. Six New Benchmark Suites
|
||||
|
||||
#### Suite 1: `benches/solver_baseline.rs`
|
||||
|
||||
Establishes baselines for operations the solver replaces:
|
||||
|
||||
```rust
|
||||
use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId, Throughput};
|
||||
|
||||
fn dense_matmul_baseline(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("dense_matmul_baseline");
|
||||
|
||||
for size in [64, 256, 1024, 4096] {
|
||||
let a = random_dense_matrix(size, size, 42);
|
||||
let x = random_vector(size, 43);
|
||||
let mut y = vec![0.0f32; size];
|
||||
|
||||
group.throughput(Throughput::Elements((size * size) as u64));
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("naive", size),
|
||||
&size,
|
||||
|b, _| b.iter(|| dense_matvec_naive(&a, &x, &mut y)),
|
||||
);
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("simd_unrolled", size),
|
||||
&size,
|
||||
|b, _| b.iter(|| dense_matvec_simd(&a, &x, &mut y)),
|
||||
);
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn sparse_matmul_baseline(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("sparse_matmul_baseline");
|
||||
|
||||
for (n, density) in [(1000, 0.01), (1000, 0.05), (10000, 0.01), (10000, 0.05)] {
|
||||
let csr = random_csr_matrix(n, n, density, 44);
|
||||
let x = random_vector(n, 45);
|
||||
let mut y = vec![0.0f32; n];
|
||||
|
||||
group.throughput(Throughput::Elements(csr.nnz() as u64));
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new(format!("csr_{}x{}_{:.0}pct", n, n, density * 100.0), n),
|
||||
&n,
|
||||
|b, _| b.iter(|| csr.spmv(&x, &mut y)),
|
||||
);
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(baselines, dense_matmul_baseline, sparse_matmul_baseline);
|
||||
criterion_main!(baselines);
|
||||
```
|
||||
|
||||
#### Suite 2: `benches/solver_neumann.rs`
|
||||
|
||||
```rust
|
||||
fn neumann_convergence(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("neumann_convergence");
|
||||
group.warm_up_time(Duration::from_secs(5));
|
||||
group.sample_size(200);
|
||||
|
||||
let csr = random_diag_dominant_csr(10000, 0.01, 46);
|
||||
let b = random_vector(10000, 47);
|
||||
|
||||
for eps in [1e-2, 1e-4, 1e-6, 1e-8] {
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("eps", format!("{:.0e}", eps)),
|
||||
&eps,
|
||||
|bench, &eps| {
|
||||
bench.iter(|| {
|
||||
let solver = NeumannSolver::new(eps, 1000);
|
||||
solver.solve(&csr, &b)
|
||||
})
|
||||
},
|
||||
);
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn neumann_sparsity_impact(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("neumann_sparsity_impact");
|
||||
let n = 10000;
|
||||
|
||||
for density in [0.001, 0.01, 0.05, 0.10, 0.50] {
|
||||
let csr = random_diag_dominant_csr(n, density, 48);
|
||||
let b = random_vector(n, 49);
|
||||
|
||||
group.throughput(Throughput::Elements(csr.nnz() as u64));
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("density", format!("{:.1}pct", density * 100.0)),
|
||||
&density,
|
||||
|bench, _| {
|
||||
bench.iter(|| {
|
||||
NeumannSolver::new(1e-4, 1000).solve(&csr, &b)
|
||||
})
|
||||
},
|
||||
);
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn neumann_vs_direct(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("neumann_vs_direct");
|
||||
|
||||
for n in [100, 500, 1000, 5000, 10000] {
|
||||
let csr = random_diag_dominant_csr(n, 0.01, 50);
|
||||
let b = random_vector(n, 51);
|
||||
let dense = csr.to_dense();
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("neumann", n), &n,
|
||||
|bench, _| bench.iter(|| NeumannSolver::new(1e-6, 1000).solve(&csr, &b)),
|
||||
);
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("dense_direct", n), &n,
|
||||
|bench, _| bench.iter(|| dense_solve(&dense, &b)),
|
||||
);
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(neumann, neumann_convergence, neumann_sparsity_impact, neumann_vs_direct);
|
||||
```
|
||||
|
||||
#### Suite 3: `benches/solver_push.rs`
|
||||
|
||||
```rust
|
||||
fn forward_push_scaling(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("forward_push_scaling");
|
||||
|
||||
for n in [100, 1000, 10000, 100000] {
|
||||
let graph = random_sparse_graph(n, 0.005, 52);
|
||||
|
||||
for eps in [1e-2, 1e-4, 1e-6] {
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new(format!("n{}_eps{:.0e}", n, eps), n),
|
||||
&(n, eps),
|
||||
|bench, &(_, eps)| {
|
||||
bench.iter(|| {
|
||||
let solver = ForwardPushSolver::new(0.85, eps);
|
||||
solver.ppr_from_source(&graph, 0)
|
||||
})
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn backward_push_vs_forward(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("push_direction_comparison");
|
||||
let n = 10000;
|
||||
let graph = random_sparse_graph(n, 0.005, 53);
|
||||
|
||||
for eps in [1e-2, 1e-4] {
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("forward", format!("{:.0e}", eps)), &eps,
|
||||
|bench, &eps| bench.iter(|| ForwardPushSolver::new(0.85, eps).ppr_from_source(&graph, 0)),
|
||||
);
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("backward", format!("{:.0e}", eps)), &eps,
|
||||
|bench, &eps| bench.iter(|| BackwardPushSolver::new(0.85, eps).ppr_to_target(&graph, 0)),
|
||||
);
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
```
|
||||
|
||||
#### Suite 4: `benches/solver_random_walk.rs`
|
||||
|
||||
```rust
|
||||
fn random_walk_entry_estimation(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("random_walk_estimation");
|
||||
|
||||
for n in [1000, 10000, 100000] {
|
||||
let csr = random_laplacian_csr(n, 0.005, 54);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("single_entry", n), &n,
|
||||
|bench, _| bench.iter(|| {
|
||||
HybridRandomWalkSolver::new(1e-4, 1000).estimate_entry(&csr, 0, n/2)
|
||||
}),
|
||||
);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("batch_100_entries", n), &n,
|
||||
|bench, _| bench.iter(|| {
|
||||
let pairs: Vec<(usize, usize)> = (0..100).map(|i| (i, n - 1 - i)).collect();
|
||||
HybridRandomWalkSolver::new(1e-4, 1000).estimate_batch(&csr, &pairs)
|
||||
}),
|
||||
);
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
```
|
||||
|
||||
#### Suite 5: `benches/solver_scheduler.rs`
|
||||
|
||||
```rust
|
||||
fn scheduler_latency(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("scheduler_latency");
|
||||
|
||||
group.bench_function("noop_task", |b| {
|
||||
let scheduler = SolverScheduler::new(4);
|
||||
b.iter(|| scheduler.submit(|| {}))
|
||||
});
|
||||
|
||||
group.bench_function("100ns_task", |b| {
|
||||
let scheduler = SolverScheduler::new(4);
|
||||
b.iter(|| scheduler.submit(|| {
|
||||
std::hint::spin_loop(); // ~100ns
|
||||
}))
|
||||
});
|
||||
|
||||
group.bench_function("1us_task", |b| {
|
||||
let scheduler = SolverScheduler::new(4);
|
||||
b.iter(|| scheduler.submit(|| {
|
||||
for _ in 0..100 { std::hint::spin_loop(); }
|
||||
}))
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn scheduler_throughput(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("scheduler_throughput");
|
||||
|
||||
for task_count in [1000, 10_000, 100_000, 1_000_000] {
|
||||
group.throughput(Throughput::Elements(task_count));
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("tasks", task_count), &task_count,
|
||||
|bench, &count| {
|
||||
let scheduler = SolverScheduler::new(4);
|
||||
let counter = Arc::new(AtomicU64::new(0));
|
||||
bench.iter(|| {
|
||||
counter.store(0, Ordering::Relaxed);
|
||||
for _ in 0..count {
|
||||
let c = counter.clone();
|
||||
scheduler.submit(move || { c.fetch_add(1, Ordering::Relaxed); });
|
||||
}
|
||||
scheduler.flush();
|
||||
assert_eq!(counter.load(Ordering::Relaxed), count);
|
||||
})
|
||||
},
|
||||
);
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
```
|
||||
|
||||
#### Suite 6: `benches/solver_e2e.rs`
|
||||
|
||||
```rust
|
||||
fn accelerated_search(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("accelerated_search");
|
||||
group.sample_size(50);
|
||||
group.warm_up_time(Duration::from_secs(5));
|
||||
|
||||
for n in [10_000, 100_000] {
|
||||
let db = build_test_db(n, 384, 56);
|
||||
let query = random_vector(384, 57);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("hnsw_only", n), &n,
|
||||
|bench, _| bench.iter(|| db.search(&query, 10)),
|
||||
);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("hnsw_plus_solver_rerank", n), &n,
|
||||
|bench, _| bench.iter(|| {
|
||||
let candidates = db.search(&query, 100); // Broad HNSW
|
||||
solver_rerank(&db, &query, &candidates, 10) // Solver-accelerated reranking
|
||||
}),
|
||||
);
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn accelerated_batch_analytics(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("batch_analytics");
|
||||
group.sample_size(10);
|
||||
|
||||
let n = 10_000;
|
||||
let vectors = random_matrix(n, 384, 58);
|
||||
|
||||
group.bench_function("pairwise_brute_force", |b| {
|
||||
b.iter(|| pairwise_distances_brute(&vectors))
|
||||
});
|
||||
|
||||
group.bench_function("pairwise_solver_estimated", |b| {
|
||||
b.iter(|| pairwise_distances_solver(&vectors, 1e-4))
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Regression Prevention
|
||||
|
||||
Hard thresholds enforced in CI:
|
||||
|
||||
```rust
|
||||
// In each benchmark suite, add regression markers
|
||||
fn solver_regression_tests(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("solver_regression");
|
||||
|
||||
// These thresholds trigger CI failure if exceeded
|
||||
group.bench_function("neumann_10k_1pct", |b| {
|
||||
let csr = random_diag_dominant_csr(10000, 0.01, 60);
|
||||
let rhs = random_vector(10000, 61);
|
||||
b.iter(|| NeumannSolver::new(1e-4, 1000).solve(&csr, &rhs))
|
||||
// Target: < 500μs
|
||||
});
|
||||
|
||||
group.bench_function("forward_push_10k", |b| {
|
||||
let graph = random_sparse_graph(10000, 0.005, 62);
|
||||
b.iter(|| ForwardPushSolver::new(0.85, 1e-4).ppr_from_source(&graph, 0))
|
||||
// Target: < 100μs
|
||||
});
|
||||
|
||||
group.bench_function("cg_10k_1pct", |b| {
|
||||
let csr = random_laplacian_csr(10000, 0.01, 63);
|
||||
let rhs = random_vector(10000, 64);
|
||||
b.iter(|| ConjugateGradientSolver::new(1e-6, 1000).solve(&csr, &rhs))
|
||||
// Target: < 1ms
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Accuracy Validation Suite
|
||||
|
||||
Alongside latency benchmarks, accuracy must be tracked:
|
||||
|
||||
```rust
|
||||
fn accuracy_validation() {
|
||||
// Neumann vs exact solve
|
||||
let csr = random_diag_dominant_csr(1000, 0.01, 70);
|
||||
let b = random_vector(1000, 71);
|
||||
let exact = dense_solve(&csr.to_dense(), &b);
|
||||
|
||||
for eps in [1e-2, 1e-4, 1e-6] {
|
||||
let approx = NeumannSolver::new(eps, 1000).solve(&csr, &b).unwrap();
|
||||
let relative_error = l2_distance(&exact, &approx.solution) / l2_norm(&exact);
|
||||
assert!(relative_error < eps * 10.0, // 10x margin
|
||||
"Neumann eps={}: relative error {} exceeds bound {}",
|
||||
eps, relative_error, eps * 10.0);
|
||||
}
|
||||
|
||||
// Forward Push recall@k
|
||||
let graph = random_sparse_graph(10000, 0.005, 72);
|
||||
let exact_ppr = exact_pagerank(&graph, 0, 0.85);
|
||||
let top_k_exact: Vec<usize> = exact_ppr.top_k(100);
|
||||
|
||||
for eps in [1e-2, 1e-4] {
|
||||
let approx_ppr = ForwardPushSolver::new(0.85, eps).ppr_from_source(&graph, 0);
|
||||
let top_k_approx: Vec<usize> = approx_ppr.top_k(100);
|
||||
let recall = set_overlap(&top_k_exact, &top_k_approx) as f64 / 100.0;
|
||||
assert!(recall > 0.9, "Forward Push eps={}: recall@100 = {} < 0.9", eps, recall);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 4. CI Integration
|
||||
|
||||
```yaml
|
||||
# .github/workflows/bench.yml
|
||||
name: Benchmark Suite
|
||||
on:
|
||||
pull_request:
|
||||
paths: ['crates/ruvector-solver/**']
|
||||
schedule:
|
||||
- cron: '0 2 * * *' # Nightly at 2 AM
|
||||
|
||||
jobs:
|
||||
bench-pr:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'pull_request'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- run: cargo bench -p ruvector-solver -- solver_regression
|
||||
- uses: benchmark-action/github-action-benchmark@v1
|
||||
with:
|
||||
tool: 'cargo'
|
||||
output-file-path: target/criterion/report/index.html
|
||||
|
||||
bench-nightly:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'schedule'
|
||||
strategy:
|
||||
matrix:
|
||||
target: [x86_64-unknown-linux-gnu, aarch64-unknown-linux-gnu]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- run: cargo bench -p ruvector-solver --target ${{ matrix.target }}
|
||||
- run: cargo bench -p ruvector-solver -- solver_accuracy
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: bench-results-${{ matrix.target }}
|
||||
path: target/criterion/
|
||||
```
|
||||
|
||||
### 5. Reporting Format
|
||||
|
||||
Following existing BENCHMARK_RESULTS.md conventions:
|
||||
|
||||
```markdown
|
||||
## Solver Integration Benchmarks
|
||||
|
||||
### Environment
|
||||
- **Date**: 2026-02-20
|
||||
- **Platform**: Linux x86_64, AMD EPYC 7763 (AVX-512)
|
||||
- **Rust**: 1.77, release profile (lto=fat, codegen-units=1)
|
||||
- **Criterion**: 0.5, 200 samples, 5s warmup
|
||||
|
||||
### Results
|
||||
|
||||
| Operation | Baseline | Solver | Speedup | Accuracy |
|
||||
|-----------|----------|--------|---------|----------|
|
||||
| MatVec 10K×10K (1%) | 400 μs | 15 μs | 26.7x | ε < 1e-4 |
|
||||
| PageRank 10K nodes | 50 ms | 80 μs | 625x | recall@100 > 0.95 |
|
||||
| Spectral gap est. | N/A | 50 μs | New | within 5% of exact |
|
||||
| Batch pairwise 10K | 480 s | 15 s | 32x | ε < 1e-3 |
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Consequences
|
||||
|
||||
### Positive
|
||||
|
||||
1. **Reproducible validation**: All speedup claims backed by Criterion benchmarks
|
||||
2. **Regression prevention**: CI catches performance degradations before merge
|
||||
3. **Multi-platform**: Benchmarks run on x86_64 and aarch64
|
||||
4. **Accuracy tracking**: Approximate algorithms validated against exact baselines
|
||||
5. **Aligned infrastructure**: Uses existing Criterion.rs setup, no new tools
|
||||
|
||||
### Negative
|
||||
|
||||
1. **Benchmark maintenance**: 6 new benchmark files to maintain
|
||||
2. **CI time**: Nightly full suite adds ~30 minutes to CI
|
||||
3. **Flaky thresholds**: Regression thresholds may need periodic recalibration
|
||||
|
||||
---
|
||||
|
||||
## Implementation Status
|
||||
|
||||
Complete Criterion benchmark suite delivered with 5 benchmark groups: solver_baseline (dense reference), solver_neumann (Neumann series profiling), solver_cg (conjugate gradient scaling), solver_push (push algorithm comparison), solver_e2e (end-to-end pipeline). Min-cut gating benchmark script (scripts/run_mincut_bench.sh) with 1k-sample grid search over lambda/tau parameters. Profiler crate (ruvector-profiler) provides memory, latency, power measurement with CSV output.
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- [08-performance-analysis.md](../08-performance-analysis.md) — Existing benchmarks and methodology
|
||||
- [10-algorithm-analysis.md](../10-algorithm-analysis.md) — Algorithm complexity for threshold derivation
|
||||
- [12-testing-strategy.md](../12-testing-strategy.md) — Testing strategy integration
|
||||
Reference in New Issue
Block a user