Files

438 lines
12 KiB
Rust

use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use std::time::Duration;
/// Benchmark peak memory during inference
fn bench_peak_memory_inference(c: &mut Criterion) {
let mut group = c.benchmark_group("peak_memory_inference");
group.measurement_time(Duration::from_secs(10));
let sizes = [(224, 224), (384, 384), (512, 512)];
for (w, h) in sizes {
group.bench_with_input(
BenchmarkId::new("single_inference", format!("{}x{}", w, h)),
&(w, h),
|b, &(width, height)| {
b.iter_with_large_drop(|| {
let memory_tracker = MemoryTracker::new();
// Simulate model loading
let model = load_model();
// Create input
let image = create_image(width, height);
// Preprocessing
let preprocessed = preprocess(image);
// Inference
let output = run_inference(&model, preprocessed);
// Postprocessing
let result = postprocess(output);
let peak_memory = memory_tracker.peak_usage();
black_box((result, peak_memory))
});
},
);
}
group.finish();
}
/// Benchmark memory per image in batch
fn bench_memory_per_batch_image(c: &mut Criterion) {
let mut group = c.benchmark_group("memory_per_batch_image");
group.measurement_time(Duration::from_secs(15));
let batch_sizes = [1, 4, 8, 16, 32];
let size = (384, 384);
for batch_size in batch_sizes {
group.bench_with_input(
BenchmarkId::new("batch_inference", batch_size),
&batch_size,
|b, &size| {
b.iter_with_large_drop(|| {
let memory_tracker = MemoryTracker::new();
let model = load_model();
let batch = create_batch(size, 384, 384);
let output = run_batch_inference(&model, batch);
let total_memory = memory_tracker.peak_usage();
let per_image = total_memory / size;
black_box((output, per_image))
});
},
);
}
group.finish();
}
/// Benchmark model loading memory
fn bench_model_loading_memory(c: &mut Criterion) {
let mut group = c.benchmark_group("model_loading_memory");
group.measurement_time(Duration::from_secs(10));
group.bench_function("detection_model", |b| {
b.iter_with_large_drop(|| {
let tracker = MemoryTracker::new();
let model = load_detection_model();
let memory = tracker.peak_usage();
black_box((model, memory))
});
});
group.bench_function("recognition_model", |b| {
b.iter_with_large_drop(|| {
let tracker = MemoryTracker::new();
let model = load_recognition_model();
let memory = tracker.peak_usage();
black_box((model, memory))
});
});
group.bench_function("math_model", |b| {
b.iter_with_large_drop(|| {
let tracker = MemoryTracker::new();
let model = load_math_model();
let memory = tracker.peak_usage();
black_box((model, memory))
});
});
group.bench_function("all_models", |b| {
b.iter_with_large_drop(|| {
let tracker = MemoryTracker::new();
let detection = load_detection_model();
let recognition = load_recognition_model();
let math = load_math_model();
let total_memory = tracker.peak_usage();
black_box((detection, recognition, math, total_memory))
});
});
group.finish();
}
/// Benchmark memory growth over time
fn bench_memory_growth(c: &mut Criterion) {
let mut group = c.benchmark_group("memory_growth");
group.measurement_time(Duration::from_secs(20));
group.bench_function("sequential_inferences", |b| {
b.iter_with_large_drop(|| {
let tracker = MemoryTracker::new();
let model = load_model();
let mut memory_samples = Vec::new();
for i in 0..100 {
let image = create_image(384, 384);
let preprocessed = preprocess(image);
let _output = run_inference(&model, preprocessed);
if i % 10 == 0 {
memory_samples.push(tracker.current_usage());
}
}
let growth = calculate_memory_growth(&memory_samples);
black_box((memory_samples, growth))
});
});
group.finish();
}
/// Benchmark memory fragmentation
fn bench_memory_fragmentation(c: &mut Criterion) {
let mut group = c.benchmark_group("memory_fragmentation");
group.measurement_time(Duration::from_secs(10));
group.bench_function("allocate_deallocate_pattern", |b| {
b.iter(|| {
let mut allocations = Vec::new();
// Allocate various sizes
for i in 0..100 {
let size = (i % 10 + 1) * 1024;
allocations.push(vec![0u8; size]);
}
// Deallocate every other allocation
allocations = allocations
.into_iter()
.enumerate()
.filter_map(|(i, v)| if i % 2 == 0 { Some(v) } else { None })
.collect();
// Allocate more
for i in 0..50 {
let size = (i % 5 + 1) * 2048;
allocations.push(vec![0u8; size]);
}
black_box(allocations)
});
});
group.finish();
}
/// Benchmark cache memory overhead
fn bench_cache_memory(c: &mut Criterion) {
let mut group = c.benchmark_group("cache_memory");
group.measurement_time(Duration::from_secs(10));
let cache_sizes = [100, 1000, 10000];
for cache_size in cache_sizes {
group.bench_with_input(
BenchmarkId::new("embedding_cache", cache_size),
&cache_size,
|b, &size| {
b.iter_with_large_drop(|| {
let tracker = MemoryTracker::new();
let cache = create_embedding_cache(size);
let memory = tracker.peak_usage();
black_box((cache, memory))
});
},
);
}
group.finish();
}
/// Benchmark memory pool efficiency
fn bench_memory_pools(c: &mut Criterion) {
let mut group = c.benchmark_group("memory_pools");
group.measurement_time(Duration::from_secs(8));
group.bench_function("without_pool", |b| {
b.iter(|| {
let mut allocations = Vec::new();
for _ in 0..100 {
let buffer = vec![0u8; 1024 * 1024];
allocations.push(buffer);
}
black_box(allocations)
});
});
group.bench_function("with_pool", |b| {
let mut pool = MemoryPool::new(1024 * 1024, 100);
b.iter(|| {
let mut handles = Vec::new();
for _ in 0..100 {
let handle = pool.allocate();
handles.push(handle);
}
black_box(handles)
});
});
group.finish();
}
/// Benchmark tensor memory layouts
fn bench_tensor_layouts(c: &mut Criterion) {
let mut group = c.benchmark_group("tensor_layouts");
group.measurement_time(Duration::from_secs(8));
let size = (384, 384, 3);
group.bench_function("hwc_layout", |b| {
b.iter(|| {
let tracker = MemoryTracker::new();
let tensor = create_hwc_tensor(size.0, size.1, size.2);
let memory = tracker.peak_usage();
black_box((tensor, memory))
});
});
group.bench_function("chw_layout", |b| {
b.iter(|| {
let tracker = MemoryTracker::new();
let tensor = create_chw_tensor(size.0, size.1, size.2);
let memory = tracker.peak_usage();
black_box((tensor, memory))
});
});
group.bench_function("layout_conversion", |b| {
let hwc = create_hwc_tensor(size.0, size.1, size.2);
b.iter(|| {
let tracker = MemoryTracker::new();
let chw = convert_hwc_to_chw(&hwc, size.0, size.1, size.2);
let memory = tracker.peak_usage();
black_box((chw, memory))
});
});
group.finish();
}
// Mock implementations
struct MemoryTracker {
initial_usage: usize,
peak: usize,
}
impl MemoryTracker {
fn new() -> Self {
Self {
initial_usage: get_current_memory_usage(),
peak: 0,
}
}
fn current_usage(&self) -> usize {
get_current_memory_usage() - self.initial_usage
}
fn peak_usage(&mut self) -> usize {
let current = self.current_usage();
self.peak = self.peak.max(current);
self.peak
}
}
fn get_current_memory_usage() -> usize {
// In production, this would query actual memory usage
// For benchmarking, we'll estimate based on allocations
0
}
type Model = Vec<u8>;
type Image = Vec<u8>;
type Tensor = Vec<f32>;
type Output = Vec<f32>;
fn load_model() -> Model {
vec![0u8; 100 * 1024 * 1024] // 100 MB model
}
fn load_detection_model() -> Model {
vec![0u8; 150 * 1024 * 1024] // 150 MB
}
fn load_recognition_model() -> Model {
vec![0u8; 80 * 1024 * 1024] // 80 MB
}
fn load_math_model() -> Model {
vec![0u8; 120 * 1024 * 1024] // 120 MB
}
fn create_image(width: u32, height: u32) -> Image {
vec![128u8; (width * height * 3) as usize]
}
fn create_batch(batch_size: usize, width: u32, height: u32) -> Vec<Image> {
(0..batch_size)
.map(|_| create_image(width, height))
.collect()
}
fn preprocess(image: Image) -> Tensor {
image.iter().map(|&x| x as f32 / 255.0).collect()
}
fn run_inference(_model: &Model, input: Tensor) -> Output {
input.iter().map(|&x| x * 2.0).collect()
}
fn run_batch_inference(_model: &Model, batch: Vec<Image>) -> Vec<Output> {
batch
.into_iter()
.map(|img| {
let tensor = preprocess(img);
tensor.iter().map(|&x| x * 2.0).collect()
})
.collect()
}
fn postprocess(output: Output) -> String {
format!("result_{:.2}", output[0])
}
fn calculate_memory_growth(samples: &[usize]) -> f64 {
if samples.len() < 2 {
return 0.0;
}
let first = samples[0] as f64;
let last = samples[samples.len() - 1] as f64;
(last - first) / first
}
fn create_embedding_cache(size: usize) -> Vec<Vec<f32>> {
(0..size).map(|_| vec![0.5f32; 512]).collect()
}
struct MemoryPool {
block_size: usize,
blocks: Vec<Vec<u8>>,
available: Vec<usize>,
}
impl MemoryPool {
fn new(block_size: usize, count: usize) -> Self {
let blocks = (0..count).map(|_| vec![0u8; block_size]).collect();
let available = (0..count).collect();
Self {
block_size,
blocks,
available,
}
}
fn allocate(&mut self) -> Option<usize> {
self.available.pop()
}
}
fn create_hwc_tensor(height: u32, width: u32, channels: u32) -> Vec<f32> {
vec![0.5f32; (height * width * channels) as usize]
}
fn create_chw_tensor(height: u32, width: u32, channels: u32) -> Vec<f32> {
vec![0.5f32; (channels * height * width) as usize]
}
fn convert_hwc_to_chw(hwc: &[f32], height: u32, width: u32, channels: u32) -> Vec<f32> {
let mut chw = Vec::with_capacity(hwc.len());
for c in 0..channels {
for h in 0..height {
for w in 0..width {
let hwc_idx = ((h * width + w) * channels + c) as usize;
chw.push(hwc[hwc_idx]);
}
}
}
chw
}
criterion_group!(
benches,
bench_peak_memory_inference,
bench_memory_per_batch_image,
bench_model_loading_memory,
bench_memory_growth,
bench_memory_fragmentation,
bench_cache_memory,
bench_memory_pools,
bench_tensor_layouts
);
criterion_main!(benches);