use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; use scipix_ocr::optimize::*; fn bench_grayscale(c: &mut Criterion) { let mut group = c.benchmark_group("grayscale"); for size in [256, 512, 1024, 2048].iter() { let pixels = size * size; let rgba: Vec = (0..pixels * 4).map(|i| (i % 256) as u8).collect(); let mut gray = vec![0u8; pixels]; group.throughput(Throughput::Elements(pixels as u64)); // Benchmark SIMD version group.bench_with_input(BenchmarkId::new("simd", size), size, |b, _| { b.iter(|| { simd::simd_grayscale(black_box(&rgba), black_box(&mut gray)); }); }); // Benchmark scalar version group.bench_with_input(BenchmarkId::new("scalar", size), size, |b, _| { b.iter(|| { for (i, chunk) in rgba.chunks_exact(4).enumerate() { let r = chunk[0] as u32; let g = chunk[1] as u32; let b = chunk[2] as u32; gray[i] = ((r * 77 + g * 150 + b * 29) >> 8) as u8; } }); }); } group.finish(); } fn bench_threshold(c: &mut Criterion) { let mut group = c.benchmark_group("threshold"); for size in [1024, 4096, 16384, 65536].iter() { let gray: Vec = (0..*size).map(|i| (i % 256) as u8).collect(); let mut out = vec![0u8; *size]; group.throughput(Throughput::Elements(*size as u64)); // SIMD version group.bench_with_input(BenchmarkId::new("simd", size), size, |b, _| { b.iter(|| { simd::simd_threshold(black_box(&gray), black_box(128), black_box(&mut out)); }); }); // Scalar version group.bench_with_input(BenchmarkId::new("scalar", size), size, |b, _| { b.iter(|| { for (g, o) in gray.iter().zip(out.iter_mut()) { *o = if *g >= 128 { 255 } else { 0 }; } }); }); } group.finish(); } fn bench_normalize(c: &mut Criterion) { let mut group = c.benchmark_group("normalize"); for size in [128, 512, 2048, 8192].iter() { let mut data: Vec = (0..*size).map(|i| i as f32).collect(); group.throughput(Throughput::Elements(*size as u64)); // SIMD version group.bench_with_input(BenchmarkId::new("simd", size), size, |b, _| { let mut data_copy = data.clone(); b.iter(|| { simd::simd_normalize(black_box(&mut data_copy)); }); }); // Scalar version group.bench_with_input(BenchmarkId::new("scalar", size), size, |b, _| { let mut data_copy = data.clone(); b.iter(|| { let sum: f32 = data_copy.iter().sum(); let mean = sum / data_copy.len() as f32; let variance: f32 = data_copy.iter().map(|x| (x - mean).powi(2)).sum::() / data_copy.len() as f32; let std_dev = variance.sqrt() + 1e-8; for x in data_copy.iter_mut() { *x = (*x - mean) / std_dev; } }); }); } group.finish(); } fn bench_parallel_map(c: &mut Criterion) { let mut group = c.benchmark_group("parallel_map"); for size in [100, 1000, 10000].iter() { let data: Vec = (0..*size).collect(); group.throughput(Throughput::Elements(*size as u64)); // Parallel version group.bench_with_input(BenchmarkId::new("parallel", size), size, |b, _| { b.iter(|| { parallel::parallel_map_chunked(black_box(data.clone()), 100, |x| x * x + x * 2 + 1) }); }); // Sequential version group.bench_with_input(BenchmarkId::new("sequential", size), size, |b, _| { b.iter(|| data.iter().map(|&x| x * x + x * 2 + 1).collect::>()); }); } group.finish(); } fn bench_buffer_pool(c: &mut Criterion) { let mut group = c.benchmark_group("buffer_pool"); let pool = memory::BufferPool::new(|| Vec::with_capacity(1024), 10, 100); // Benchmark pooled allocation group.bench_function("pooled", |b| { b.iter(|| { let mut buf = pool.acquire(); buf.extend_from_slice(&[0u8; 512]); black_box(&buf); }); }); // Benchmark direct allocation group.bench_function("direct", |b| { b.iter(|| { let mut buf = Vec::with_capacity(1024); buf.extend_from_slice(&[0u8; 512]); black_box(&buf); }); }); group.finish(); } fn bench_quantization(c: &mut Criterion) { let mut group = c.benchmark_group("quantization"); for size in [1024, 4096, 16384].iter() { let weights: Vec = (0..*size) .map(|i| (i as f32 / *size as f32) * 2.0 - 1.0) .collect(); group.throughput(Throughput::Elements(*size as u64)); // Quantize group.bench_with_input(BenchmarkId::new("quantize", size), size, |b, _| { b.iter(|| quantize::quantize_weights(black_box(&weights))); }); // Dequantize let (quantized, params) = quantize::quantize_weights(&weights); group.bench_with_input(BenchmarkId::new("dequantize", size), size, |b, _| { b.iter(|| quantize::dequantize(black_box(&quantized), black_box(params))); }); // Per-channel quantization let shape = vec![*size / 64, 64]; group.bench_with_input(BenchmarkId::new("per_channel", size), size, |b, _| { b.iter(|| { quantize::PerChannelQuant::from_f32(black_box(&weights), black_box(shape.clone())) }); }); } group.finish(); } fn bench_memory_operations(c: &mut Criterion) { let mut group = c.benchmark_group("memory_ops"); // Arena allocation let mut arena = memory::Arena::with_capacity(1024 * 1024); group.bench_function("arena_alloc", |b| { b.iter(|| { arena.reset(); for _ in 0..100 { let slice = arena.alloc(1024, 8); black_box(slice); } }); }); // Vector allocation group.bench_function("vec_alloc", |b| { b.iter(|| { for _ in 0..100 { let mut vec = Vec::with_capacity(1024); vec.resize(1024, 0u8); black_box(&vec); } }); }); group.finish(); } criterion_group!( benches, bench_grayscale, bench_threshold, bench_normalize, bench_parallel_map, bench_buffer_pool, bench_quantization, bench_memory_operations ); criterion_main!(benches);