Files
wifi-densepose/examples/scipix/benches/optimization_bench.rs
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

225 lines
6.7 KiB
Rust

use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use scipix_ocr::optimize::*;
fn bench_grayscale(c: &mut Criterion) {
let mut group = c.benchmark_group("grayscale");
for size in [256, 512, 1024, 2048].iter() {
let pixels = size * size;
let rgba: Vec<u8> = (0..pixels * 4).map(|i| (i % 256) as u8).collect();
let mut gray = vec![0u8; pixels];
group.throughput(Throughput::Elements(pixels as u64));
// Benchmark SIMD version
group.bench_with_input(BenchmarkId::new("simd", size), size, |b, _| {
b.iter(|| {
simd::simd_grayscale(black_box(&rgba), black_box(&mut gray));
});
});
// Benchmark scalar version
group.bench_with_input(BenchmarkId::new("scalar", size), size, |b, _| {
b.iter(|| {
for (i, chunk) in rgba.chunks_exact(4).enumerate() {
let r = chunk[0] as u32;
let g = chunk[1] as u32;
let b = chunk[2] as u32;
gray[i] = ((r * 77 + g * 150 + b * 29) >> 8) as u8;
}
});
});
}
group.finish();
}
fn bench_threshold(c: &mut Criterion) {
let mut group = c.benchmark_group("threshold");
for size in [1024, 4096, 16384, 65536].iter() {
let gray: Vec<u8> = (0..*size).map(|i| (i % 256) as u8).collect();
let mut out = vec![0u8; *size];
group.throughput(Throughput::Elements(*size as u64));
// SIMD version
group.bench_with_input(BenchmarkId::new("simd", size), size, |b, _| {
b.iter(|| {
simd::simd_threshold(black_box(&gray), black_box(128), black_box(&mut out));
});
});
// Scalar version
group.bench_with_input(BenchmarkId::new("scalar", size), size, |b, _| {
b.iter(|| {
for (g, o) in gray.iter().zip(out.iter_mut()) {
*o = if *g >= 128 { 255 } else { 0 };
}
});
});
}
group.finish();
}
fn bench_normalize(c: &mut Criterion) {
let mut group = c.benchmark_group("normalize");
for size in [128, 512, 2048, 8192].iter() {
let mut data: Vec<f32> = (0..*size).map(|i| i as f32).collect();
group.throughput(Throughput::Elements(*size as u64));
// SIMD version
group.bench_with_input(BenchmarkId::new("simd", size), size, |b, _| {
let mut data_copy = data.clone();
b.iter(|| {
simd::simd_normalize(black_box(&mut data_copy));
});
});
// Scalar version
group.bench_with_input(BenchmarkId::new("scalar", size), size, |b, _| {
let mut data_copy = data.clone();
b.iter(|| {
let sum: f32 = data_copy.iter().sum();
let mean = sum / data_copy.len() as f32;
let variance: f32 = data_copy.iter().map(|x| (x - mean).powi(2)).sum::<f32>()
/ data_copy.len() as f32;
let std_dev = variance.sqrt() + 1e-8;
for x in data_copy.iter_mut() {
*x = (*x - mean) / std_dev;
}
});
});
}
group.finish();
}
fn bench_parallel_map(c: &mut Criterion) {
let mut group = c.benchmark_group("parallel_map");
for size in [100, 1000, 10000].iter() {
let data: Vec<i32> = (0..*size).collect();
group.throughput(Throughput::Elements(*size as u64));
// Parallel version
group.bench_with_input(BenchmarkId::new("parallel", size), size, |b, _| {
b.iter(|| {
parallel::parallel_map_chunked(black_box(data.clone()), 100, |x| x * x + x * 2 + 1)
});
});
// Sequential version
group.bench_with_input(BenchmarkId::new("sequential", size), size, |b, _| {
b.iter(|| data.iter().map(|&x| x * x + x * 2 + 1).collect::<Vec<_>>());
});
}
group.finish();
}
fn bench_buffer_pool(c: &mut Criterion) {
let mut group = c.benchmark_group("buffer_pool");
let pool = memory::BufferPool::new(|| Vec::with_capacity(1024), 10, 100);
// Benchmark pooled allocation
group.bench_function("pooled", |b| {
b.iter(|| {
let mut buf = pool.acquire();
buf.extend_from_slice(&[0u8; 512]);
black_box(&buf);
});
});
// Benchmark direct allocation
group.bench_function("direct", |b| {
b.iter(|| {
let mut buf = Vec::with_capacity(1024);
buf.extend_from_slice(&[0u8; 512]);
black_box(&buf);
});
});
group.finish();
}
fn bench_quantization(c: &mut Criterion) {
let mut group = c.benchmark_group("quantization");
for size in [1024, 4096, 16384].iter() {
let weights: Vec<f32> = (0..*size)
.map(|i| (i as f32 / *size as f32) * 2.0 - 1.0)
.collect();
group.throughput(Throughput::Elements(*size as u64));
// Quantize
group.bench_with_input(BenchmarkId::new("quantize", size), size, |b, _| {
b.iter(|| quantize::quantize_weights(black_box(&weights)));
});
// Dequantize
let (quantized, params) = quantize::quantize_weights(&weights);
group.bench_with_input(BenchmarkId::new("dequantize", size), size, |b, _| {
b.iter(|| quantize::dequantize(black_box(&quantized), black_box(params)));
});
// Per-channel quantization
let shape = vec![*size / 64, 64];
group.bench_with_input(BenchmarkId::new("per_channel", size), size, |b, _| {
b.iter(|| {
quantize::PerChannelQuant::from_f32(black_box(&weights), black_box(shape.clone()))
});
});
}
group.finish();
}
fn bench_memory_operations(c: &mut Criterion) {
let mut group = c.benchmark_group("memory_ops");
// Arena allocation
let mut arena = memory::Arena::with_capacity(1024 * 1024);
group.bench_function("arena_alloc", |b| {
b.iter(|| {
arena.reset();
for _ in 0..100 {
let slice = arena.alloc(1024, 8);
black_box(slice);
}
});
});
// Vector allocation
group.bench_function("vec_alloc", |b| {
b.iter(|| {
for _ in 0..100 {
let mut vec = Vec::with_capacity(1024);
vec.resize(1024, 0u8);
black_box(&vec);
}
});
});
group.finish();
}
criterion_group!(
benches,
bench_grayscale,
bench_threshold,
bench_normalize,
bench_parallel_map,
bench_buffer_pool,
bench_quantization,
bench_memory_operations
);
criterion_main!(benches);