Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,455 @@
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use std::time::Duration;
/// Benchmark API request parsing
fn bench_request_parsing(c: &mut Criterion) {
let mut group = c.benchmark_group("request_parsing");
group.measurement_time(Duration::from_secs(5));
let json_payloads = vec![
("small", r#"{"image_url": "http://example.com/img.jpg"}"#),
(
"medium",
r#"{
"image_url": "http://example.com/img.jpg",
"options": {
"languages": ["en", "es"],
"format": "latex",
"inline_mode": true
}
}"#,
),
(
"large",
r#"{
"image_url": "http://example.com/img.jpg",
"options": {
"languages": ["en", "es", "fr", "de"],
"format": "latex",
"inline_mode": true,
"detect_orientation": true,
"skip_preprocessing": false,
"models": ["text", "math", "table"],
"confidence_threshold": 0.8
},
"metadata": {
"user_id": "12345",
"session_id": "abcde",
"timestamp": 1234567890
}
}"#,
),
];
for (name, payload) in json_payloads {
group.bench_with_input(BenchmarkId::new("parse_json", name), &payload, |b, json| {
b.iter(|| black_box(parse_ocr_request(black_box(json))));
});
}
group.finish();
}
/// Benchmark response serialization
fn bench_response_serialization(c: &mut Criterion) {
let mut group = c.benchmark_group("response_serialization");
group.measurement_time(Duration::from_secs(5));
let responses = vec![
("simple", create_simple_response()),
("detailed", create_detailed_response()),
("batch", create_batch_response(10)),
];
for (name, response) in responses {
group.bench_with_input(
BenchmarkId::new("serialize_json", name),
&response,
|b, resp| {
b.iter(|| black_box(serialize_response(black_box(resp))));
},
);
}
group.finish();
}
/// Benchmark concurrent request handling
fn bench_concurrent_requests(c: &mut Criterion) {
let mut group = c.benchmark_group("concurrent_requests");
group.measurement_time(Duration::from_secs(10));
let concurrent_levels = [1, 5, 10, 20, 50];
for concurrency in concurrent_levels {
group.bench_with_input(
BenchmarkId::new("handle_requests", concurrency),
&concurrency,
|b, &level| {
b.iter(|| {
let handles: Vec<_> = (0..level).map(|_| handle_single_request()).collect();
black_box(handles)
});
},
);
}
group.finish();
}
/// Benchmark middleware overhead
fn bench_middleware_overhead(c: &mut Criterion) {
let mut group = c.benchmark_group("middleware_overhead");
group.measurement_time(Duration::from_secs(5));
let request = create_mock_request();
group.bench_function("no_middleware", |b| {
b.iter(|| black_box(handle_request_direct(black_box(&request))));
});
group.bench_function("with_auth", |b| {
b.iter(|| {
let authed = auth_middleware(black_box(&request));
black_box(handle_request_direct(black_box(&authed)))
});
});
group.bench_function("with_logging", |b| {
b.iter(|| {
let logged = logging_middleware(black_box(&request));
black_box(handle_request_direct(black_box(&logged)))
});
});
group.bench_function("full_stack", |b| {
b.iter(|| {
let req = black_box(&request);
let authed = auth_middleware(req);
let logged = logging_middleware(&authed);
let validated = validation_middleware(&logged);
let rate_limited = rate_limit_middleware(&validated);
black_box(handle_request_direct(black_box(&rate_limited)))
});
});
group.finish();
}
/// Benchmark request validation
fn bench_request_validation(c: &mut Criterion) {
let mut group = c.benchmark_group("request_validation");
group.measurement_time(Duration::from_secs(5));
let valid_request = create_valid_request();
let invalid_request = create_invalid_request();
group.bench_function("validate_valid", |b| {
b.iter(|| black_box(validate_request(black_box(&valid_request))));
});
group.bench_function("validate_invalid", |b| {
b.iter(|| black_box(validate_request(black_box(&invalid_request))));
});
group.finish();
}
/// Benchmark rate limiting
fn bench_rate_limiting(c: &mut Criterion) {
let mut group = c.benchmark_group("rate_limiting");
group.measurement_time(Duration::from_secs(5));
let mut limiter = RateLimiter::new(100, Duration::from_secs(60));
group.bench_function("check_limit", |b| {
b.iter(|| black_box(limiter.check_limit("user_123")));
});
group.bench_function("update_limit", |b| {
b.iter(|| {
limiter.record_request("user_123");
black_box(&limiter)
});
});
group.finish();
}
/// Benchmark error handling
fn bench_error_handling(c: &mut Criterion) {
let mut group = c.benchmark_group("error_handling");
group.measurement_time(Duration::from_secs(5));
group.bench_function("create_error_response", |b| {
b.iter(|| black_box(create_error_response("Invalid request", 400)));
});
group.bench_function("log_and_respond", |b| {
b.iter(|| {
let error = "Processing failed";
log_error(error);
black_box(create_error_response(error, 500))
});
});
group.finish();
}
/// Benchmark end-to-end API request
fn bench_e2e_api_request(c: &mut Criterion) {
let mut group = c.benchmark_group("e2e_api_request");
group.measurement_time(Duration::from_secs(15));
let request_json = r#"{
"image_url": "http://example.com/img.jpg",
"options": {
"format": "latex"
}
}"#;
group.bench_function("full_request_cycle", |b| {
b.iter(|| {
// Parse
let request = parse_ocr_request(black_box(request_json));
// Validate
let _validated = validate_request(&request);
// Auth
let _authed = auth_middleware(&request);
// Process (simulated)
let response = process_ocr_request(&request);
// Serialize
let json = serialize_response(&response);
black_box(json)
});
});
group.finish();
}
// Mock types and implementations
#[derive(Clone)]
struct OcrRequest {
image_url: String,
options: RequestOptions,
}
#[derive(Clone)]
struct RequestOptions {
format: String,
languages: Vec<String>,
confidence_threshold: f32,
}
#[derive(Clone)]
struct OcrResponse {
text: String,
latex: String,
confidence: f32,
regions: Vec<Region>,
}
#[derive(Clone)]
struct Region {
bbox: [f32; 4],
text: String,
confidence: f32,
}
struct RateLimiter {
max_requests: usize,
window: Duration,
requests: std::collections::HashMap<String, Vec<std::time::Instant>>,
}
impl RateLimiter {
fn new(max_requests: usize, window: Duration) -> Self {
Self {
max_requests,
window,
requests: std::collections::HashMap::new(),
}
}
fn check_limit(&mut self, user_id: &str) -> bool {
let now = std::time::Instant::now();
let requests = self
.requests
.entry(user_id.to_string())
.or_insert_with(Vec::new);
requests.retain(|&req_time| now.duration_since(req_time) < self.window);
requests.len() < self.max_requests
}
fn record_request(&mut self, user_id: &str) {
let now = std::time::Instant::now();
self.requests
.entry(user_id.to_string())
.or_insert_with(Vec::new)
.push(now);
}
}
fn parse_ocr_request(json: &str) -> OcrRequest {
// Simulate JSON parsing
OcrRequest {
image_url: "http://example.com/img.jpg".to_string(),
options: RequestOptions {
format: "latex".to_string(),
languages: vec!["en".to_string()],
confidence_threshold: 0.8,
},
}
}
fn serialize_response(response: &OcrResponse) -> String {
// Simulate JSON serialization
format!(
r#"{{"text":"{}","latex":"{}","confidence":{}}}"#,
response.text, response.latex, response.confidence
)
}
fn create_simple_response() -> OcrResponse {
OcrResponse {
text: "E = mc^2".to_string(),
latex: "E = mc^2".to_string(),
confidence: 0.95,
regions: vec![],
}
}
fn create_detailed_response() -> OcrResponse {
OcrResponse {
text: "Complex equation with multiple terms".to_string(),
latex: "\\int_0^1 x^2 dx = \\frac{1}{3}".to_string(),
confidence: 0.92,
regions: vec![
Region {
bbox: [0.0, 0.0, 100.0, 50.0],
text: "integral".to_string(),
confidence: 0.95,
},
Region {
bbox: [100.0, 0.0, 200.0, 50.0],
text: "equals".to_string(),
confidence: 0.98,
},
],
}
}
fn create_batch_response(count: usize) -> OcrResponse {
let regions: Vec<_> = (0..count)
.map(|i| Region {
bbox: [i as f32 * 10.0, 0.0, (i + 1) as f32 * 10.0, 50.0],
text: format!("region_{}", i),
confidence: 0.9,
})
.collect();
OcrResponse {
text: "Batch text".to_string(),
latex: "batch latex".to_string(),
confidence: 0.9,
regions,
}
}
fn handle_single_request() -> OcrResponse {
create_simple_response()
}
fn create_mock_request() -> OcrRequest {
OcrRequest {
image_url: "http://example.com/img.jpg".to_string(),
options: RequestOptions {
format: "latex".to_string(),
languages: vec!["en".to_string()],
confidence_threshold: 0.8,
},
}
}
fn handle_request_direct(request: &OcrRequest) -> OcrResponse {
process_ocr_request(request)
}
fn auth_middleware(request: &OcrRequest) -> OcrRequest {
// Simulate auth check
request.clone()
}
fn logging_middleware(request: &OcrRequest) -> OcrRequest {
// Simulate logging
request.clone()
}
fn validation_middleware(request: &OcrRequest) -> OcrRequest {
// Simulate validation
request.clone()
}
fn rate_limit_middleware(request: &OcrRequest) -> OcrRequest {
// Simulate rate limiting
request.clone()
}
fn create_valid_request() -> OcrRequest {
create_mock_request()
}
fn create_invalid_request() -> OcrRequest {
OcrRequest {
image_url: "".to_string(),
options: RequestOptions {
format: "invalid".to_string(),
languages: vec![],
confidence_threshold: -1.0,
},
}
}
fn validate_request(request: &OcrRequest) -> Result<(), String> {
if request.image_url.is_empty() {
return Err("Image URL is required".to_string());
}
if request.options.confidence_threshold < 0.0 || request.options.confidence_threshold > 1.0 {
return Err("Invalid confidence threshold".to_string());
}
Ok(())
}
fn create_error_response(message: &str, _code: u16) -> String {
format!(r#"{{"error":"{}"}}"#, message)
}
fn log_error(_message: &str) {
// Simulate logging
}
fn process_ocr_request(_request: &OcrRequest) -> OcrResponse {
// Simulate OCR processing
create_simple_response()
}
criterion_group!(
benches,
bench_request_parsing,
bench_response_serialization,
bench_concurrent_requests,
bench_middleware_overhead,
bench_request_validation,
bench_rate_limiting,
bench_error_handling,
bench_e2e_api_request
);
criterion_main!(benches);

View File

@@ -0,0 +1,450 @@
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use std::collections::HashMap;
use std::time::Duration;
/// Benchmark embedding generation
fn bench_embedding_generation(c: &mut Criterion) {
let mut group = c.benchmark_group("embedding_generation");
group.measurement_time(Duration::from_secs(8));
let image_sizes = [(224, 224), (384, 384), (512, 512)];
for (w, h) in image_sizes {
let image_data = generate_test_image(w, h);
group.bench_with_input(
BenchmarkId::new("generate", format!("{}x{}", w, h)),
&image_data,
|b, img| {
b.iter(|| black_box(generate_embedding(black_box(img))));
},
);
}
group.finish();
}
/// Benchmark similarity search (vector search)
fn bench_similarity_search(c: &mut Criterion) {
let mut group = c.benchmark_group("similarity_search");
group.measurement_time(Duration::from_secs(10));
// Create cache with varying sizes
let cache_sizes = [100, 1000, 10000];
for cache_size in cache_sizes {
let cache = create_embedding_cache(cache_size);
let query_embedding = generate_random_embedding(512);
group.bench_with_input(
BenchmarkId::new("linear_search", cache_size),
&(&cache, &query_embedding),
|b, (cache, query)| {
b.iter(|| {
black_box(linear_similarity_search(
black_box(cache),
black_box(query),
10,
))
});
},
);
// Approximate nearest neighbor search
group.bench_with_input(
BenchmarkId::new("ann_search", cache_size),
&(&cache, &query_embedding),
|b, (cache, query)| {
b.iter(|| {
black_box(ann_similarity_search(
black_box(cache),
black_box(query),
10,
))
});
},
);
}
group.finish();
}
/// Benchmark cache hit latency
fn bench_cache_hit_latency(c: &mut Criterion) {
let mut group = c.benchmark_group("cache_hit_latency");
group.measurement_time(Duration::from_secs(5));
let cache = create_embedding_cache(1000);
let query = generate_random_embedding(512);
group.bench_function("exact_match", |b| {
let cached_embedding = cache.values().next().unwrap();
b.iter(|| {
black_box(find_exact_match(
black_box(&cache),
black_box(cached_embedding),
))
});
});
group.bench_function("similarity_threshold", |b| {
b.iter(|| {
black_box(find_by_similarity_threshold(
black_box(&cache),
black_box(&query),
0.95,
))
});
});
group.finish();
}
/// Benchmark cache miss latency
fn bench_cache_miss_latency(c: &mut Criterion) {
let mut group = c.benchmark_group("cache_miss_latency");
group.measurement_time(Duration::from_secs(8));
let cache = create_embedding_cache(1000);
let new_image = generate_test_image(384, 384);
group.bench_function("miss_with_generation", |b| {
b.iter(|| {
let query_embedding = generate_embedding(black_box(&new_image));
let result = linear_similarity_search(black_box(&cache), &query_embedding, 1);
if result.is_empty() || result[0].1 < 0.95 {
// Cache miss - would need to process
black_box(process_new_image(black_box(&new_image)))
} else {
black_box(result[0].2.clone())
}
});
});
group.finish();
}
/// Benchmark cache insertion
fn bench_cache_insertion(c: &mut Criterion) {
let mut group = c.benchmark_group("cache_insertion");
group.measurement_time(Duration::from_secs(8));
group.bench_function("insert_new_entry", |b| {
let mut cache = create_embedding_cache(1000);
let mut counter = 0;
b.iter(|| {
let embedding = generate_random_embedding(512);
let key = format!("key_{}", counter);
cache.insert(key.clone(), embedding);
counter += 1;
black_box(&cache)
});
});
group.bench_function("insert_with_eviction", |b| {
let mut cache = LRUCache::new(1000);
let mut counter = 0;
b.iter(|| {
let embedding = generate_random_embedding(512);
let key = format!("key_{}", counter);
cache.insert(key, embedding);
counter += 1;
black_box(&cache)
});
});
group.finish();
}
/// Benchmark cache update operations
fn bench_cache_updates(c: &mut Criterion) {
let mut group = c.benchmark_group("cache_updates");
group.measurement_time(Duration::from_secs(5));
let mut cache = create_embedding_cache(1000);
let keys: Vec<_> = cache.keys().cloned().collect();
group.bench_function("update_existing", |b| {
let mut idx = 0;
b.iter(|| {
let key = &keys[idx % keys.len()];
let new_embedding = generate_random_embedding(512);
cache.insert(key.clone(), new_embedding);
idx += 1;
black_box(&cache)
});
});
group.finish();
}
/// Benchmark batch cache operations
fn bench_batch_cache_ops(c: &mut Criterion) {
let mut group = c.benchmark_group("batch_cache_operations");
group.measurement_time(Duration::from_secs(10));
let batch_sizes = [10, 50, 100];
for batch_size in batch_sizes {
let cache = create_embedding_cache(1000);
let queries: Vec<_> = (0..batch_size)
.map(|_| generate_random_embedding(512))
.collect();
group.bench_with_input(
BenchmarkId::new("batch_search", batch_size),
&(&cache, &queries),
|b, (cache, queries)| {
b.iter(|| {
let results: Vec<_> = queries
.iter()
.map(|q| linear_similarity_search(black_box(cache), q, 10))
.collect();
black_box(results)
});
},
);
group.bench_with_input(
BenchmarkId::new("batch_insert", batch_size),
&queries,
|b, queries| {
b.iter_with_setup(
|| create_embedding_cache(1000),
|mut cache| {
for (i, embedding) in queries.iter().enumerate() {
cache.insert(format!("batch_{}", i), embedding.clone());
}
black_box(cache)
},
);
},
);
}
group.finish();
}
/// Benchmark cache statistics and monitoring
fn bench_cache_statistics(c: &mut Criterion) {
let mut group = c.benchmark_group("cache_statistics");
group.measurement_time(Duration::from_secs(5));
let cache = create_embedding_cache(10000);
group.bench_function("compute_stats", |b| {
b.iter(|| black_box(compute_cache_statistics(black_box(&cache))));
});
group.bench_function("memory_usage", |b| {
b.iter(|| black_box(estimate_cache_memory(black_box(&cache))));
});
group.finish();
}
// Mock implementations
type Embedding = Vec<f32>;
struct LRUCache {
capacity: usize,
cache: HashMap<String, Embedding>,
access_order: Vec<String>,
}
impl LRUCache {
fn new(capacity: usize) -> Self {
Self {
capacity,
cache: HashMap::new(),
access_order: Vec::new(),
}
}
fn insert(&mut self, key: String, value: Embedding) {
if self.cache.len() >= self.capacity && !self.cache.contains_key(&key) {
if let Some(lru_key) = self.access_order.first().cloned() {
self.cache.remove(&lru_key);
self.access_order.remove(0);
}
}
self.cache.insert(key.clone(), value);
self.access_order.retain(|k| k != &key);
self.access_order.push(key);
}
}
fn generate_test_image(width: u32, height: u32) -> Vec<u8> {
vec![128u8; (width * height * 3) as usize]
}
fn generate_random_embedding(dim: usize) -> Embedding {
(0..dim).map(|i| (i as f32 * 0.001) % 1.0).collect()
}
fn generate_embedding(image_data: &[u8]) -> Embedding {
// Simulate embedding generation from image
let dim = 512;
let mut embedding = Vec::with_capacity(dim);
for i in 0..dim {
let idx = (i * image_data.len() / dim) % image_data.len();
embedding.push(image_data[idx] as f32 / 255.0);
}
// Normalize
let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
embedding.iter_mut().for_each(|x| *x /= norm);
embedding
}
fn create_embedding_cache(size: usize) -> HashMap<String, Embedding> {
let mut cache = HashMap::new();
for i in 0..size {
let embedding = generate_random_embedding(512);
cache.insert(format!("image_{}", i), embedding);
}
cache
}
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm_a > 0.0 && norm_b > 0.0 {
dot / (norm_a * norm_b)
} else {
0.0
}
}
fn linear_similarity_search(
cache: &HashMap<String, Embedding>,
query: &Embedding,
top_k: usize,
) -> Vec<(String, f32, Embedding)> {
let mut results: Vec<_> = cache
.iter()
.map(|(key, embedding)| {
let similarity = cosine_similarity(query, embedding);
(key.clone(), similarity, embedding.clone())
})
.collect();
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
results.truncate(top_k);
results
}
fn ann_similarity_search(
cache: &HashMap<String, Embedding>,
query: &Embedding,
top_k: usize,
) -> Vec<(String, f32, Embedding)> {
// Simplified ANN using random sampling
let sample_size = (cache.len() / 10).max(100).min(cache.len());
let mut results: Vec<_> = cache
.iter()
.enumerate()
.filter(|(i, _)| i % (cache.len() / sample_size.max(1)) == 0)
.map(|(_, (key, embedding))| {
let similarity = cosine_similarity(query, embedding);
(key.clone(), similarity, embedding.clone())
})
.collect();
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
results.truncate(top_k);
results
}
fn find_exact_match(cache: &HashMap<String, Embedding>, query: &Embedding) -> Option<String> {
cache.iter().find_map(|(key, embedding)| {
if embedding.len() == query.len()
&& embedding
.iter()
.zip(query.iter())
.all(|(a, b)| (a - b).abs() < 1e-6)
{
Some(key.clone())
} else {
None
}
})
}
fn find_by_similarity_threshold(
cache: &HashMap<String, Embedding>,
query: &Embedding,
threshold: f32,
) -> Option<(String, f32)> {
cache
.iter()
.filter_map(|(key, embedding)| {
let similarity = cosine_similarity(query, embedding);
if similarity >= threshold {
Some((key.clone(), similarity))
} else {
None
}
})
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
}
fn process_new_image(_image_data: &[u8]) -> String {
// Simulate OCR processing
std::thread::sleep(Duration::from_millis(50));
"processed_result".to_string()
}
struct CacheStatistics {
size: usize,
avg_embedding_norm: f32,
memory_bytes: usize,
}
fn compute_cache_statistics(cache: &HashMap<String, Embedding>) -> CacheStatistics {
let size = cache.len();
let avg_norm = if size > 0 {
let total_norm: f32 = cache
.values()
.map(|emb| emb.iter().map(|x| x * x).sum::<f32>().sqrt())
.sum();
total_norm / size as f32
} else {
0.0
};
let memory_bytes = estimate_cache_memory(cache);
CacheStatistics {
size,
avg_embedding_norm: avg_norm,
memory_bytes,
}
}
fn estimate_cache_memory(cache: &HashMap<String, Embedding>) -> usize {
let key_bytes: usize = cache.keys().map(|k| k.len()).sum();
let embedding_bytes: usize = cache.values().map(|e| e.len() * 4).sum();
key_bytes + embedding_bytes + cache.len() * 64 // HashMap overhead
}
criterion_group!(
benches,
bench_embedding_generation,
bench_similarity_search,
bench_cache_hit_latency,
bench_cache_miss_latency,
bench_cache_insertion,
bench_cache_updates,
bench_batch_cache_ops,
bench_cache_statistics
);
criterion_main!(benches);

View File

@@ -0,0 +1,413 @@
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use std::time::Duration;
/// Benchmark text detection model inference
fn bench_text_detection(c: &mut Criterion) {
let mut group = c.benchmark_group("text_detection_model");
group.measurement_time(Duration::from_secs(10));
let sizes = [(224, 224), (384, 384), (512, 512)];
for (w, h) in sizes {
let input_tensor = create_input_tensor(w, h, 3);
group.bench_with_input(
BenchmarkId::new("inference", format!("{}x{}", w, h)),
&input_tensor,
|b, tensor| {
b.iter(|| black_box(run_detection_model(black_box(tensor))));
},
);
}
group.finish();
}
/// Benchmark text recognition model inference
fn bench_text_recognition(c: &mut Criterion) {
let mut group = c.benchmark_group("text_recognition_model");
group.measurement_time(Duration::from_secs(10));
// Recognition typically works on smaller cropped regions
let sizes = [(32, 128), (48, 192), (64, 256)];
for (h, w) in sizes {
let input_tensor = create_input_tensor(w, h, 1);
group.bench_with_input(
BenchmarkId::new("inference", format!("{}x{}", w, h)),
&input_tensor,
|b, tensor| {
b.iter(|| black_box(run_recognition_model(black_box(tensor))));
},
);
}
group.finish();
}
/// Benchmark math equation model inference
fn bench_math_model(c: &mut Criterion) {
let mut group = c.benchmark_group("math_model");
group.measurement_time(Duration::from_secs(10));
let sizes = [(224, 224), (320, 320), (384, 384)];
for (w, h) in sizes {
let input_tensor = create_input_tensor(w, h, 3);
group.bench_with_input(
BenchmarkId::new("inference", format!("{}x{}", w, h)),
&input_tensor,
|b, tensor| {
b.iter(|| black_box(run_math_model(black_box(tensor))));
},
);
}
group.finish();
}
/// Benchmark tensor preprocessing operations
fn bench_tensor_preprocessing(c: &mut Criterion) {
let mut group = c.benchmark_group("tensor_preprocessing");
group.measurement_time(Duration::from_secs(8));
let image_data = vec![128u8; 384 * 384 * 3];
group.bench_function("normalization", |b| {
b.iter(|| black_box(normalize_tensor(black_box(&image_data))));
});
group.bench_function("standardization", |b| {
b.iter(|| black_box(standardize_tensor(black_box(&image_data))));
});
group.bench_function("to_chw_layout", |b| {
b.iter(|| black_box(convert_to_chw(black_box(&image_data), 384, 384)));
});
group.bench_function("add_batch_dimension", |b| {
let tensor = normalize_tensor(&image_data);
b.iter(|| black_box(add_batch_dim(black_box(&tensor))));
});
group.finish();
}
/// Benchmark output postprocessing
fn bench_output_postprocessing(c: &mut Criterion) {
let mut group = c.benchmark_group("output_postprocessing");
group.measurement_time(Duration::from_secs(8));
let detection_output = create_detection_output(1000);
let recognition_output = create_recognition_output(100);
group.bench_function("nms_filtering", |b| {
b.iter(|| black_box(apply_nms(black_box(&detection_output), 0.5)));
});
group.bench_function("confidence_filtering", |b| {
b.iter(|| black_box(filter_by_confidence(black_box(&detection_output), 0.7)));
});
group.bench_function("decode_sequence", |b| {
b.iter(|| black_box(decode_ctc_output(black_box(&recognition_output))));
});
group.bench_function("beam_search", |b| {
b.iter(|| black_box(beam_search_decode(black_box(&recognition_output), 5)));
});
group.finish();
}
/// Benchmark batch inference
fn bench_batch_inference(c: &mut Criterion) {
let mut group = c.benchmark_group("batch_inference");
group.measurement_time(Duration::from_secs(15));
let batch_sizes = [1, 4, 8, 16];
let size = (384, 384);
for batch_size in batch_sizes {
let batch_tensor = create_batch_tensor(batch_size, size.0, size.1, 3);
group.bench_with_input(
BenchmarkId::new("detection_batch", batch_size),
&batch_tensor,
|b, tensor| {
b.iter(|| black_box(run_detection_model(black_box(tensor))));
},
);
}
group.finish();
}
/// Benchmark model warm-up time
fn bench_model_warmup(c: &mut Criterion) {
let mut group = c.benchmark_group("model_warmup");
group.measurement_time(Duration::from_secs(10));
group.bench_function("detection_model_init", |b| {
b.iter_with_large_drop(|| black_box(initialize_detection_model()));
});
group.bench_function("recognition_model_init", |b| {
b.iter_with_large_drop(|| black_box(initialize_recognition_model()));
});
group.bench_function("math_model_init", |b| {
b.iter_with_large_drop(|| black_box(initialize_math_model()));
});
group.finish();
}
/// Benchmark end-to-end inference pipeline
fn bench_e2e_pipeline(c: &mut Criterion) {
let mut group = c.benchmark_group("e2e_inference_pipeline");
group.measurement_time(Duration::from_secs(15));
let image_data = vec![128u8; 384 * 384 * 3];
group.bench_function("full_pipeline", |b| {
b.iter(|| {
// Preprocessing
let normalized = normalize_tensor(black_box(&image_data));
let chw = convert_to_chw(&normalized, 384, 384);
let batched = add_batch_dim(&chw);
// Detection
let detection_output = run_detection_model(&batched);
let boxes = apply_nms(&detection_output, 0.5);
// Recognition (simulated for each box)
let mut results = Vec::new();
for _box in boxes.iter().take(5) {
let rec_output = run_recognition_model(&batched);
let text = decode_ctc_output(&rec_output);
results.push(text);
}
black_box(results)
});
});
group.finish();
}
// Mock implementations
fn create_input_tensor(width: u32, height: u32, channels: u32) -> Vec<f32> {
vec![0.5f32; (width * height * channels) as usize]
}
fn create_batch_tensor(batch: usize, width: u32, height: u32, channels: u32) -> Vec<f32> {
vec![0.5f32; batch * (width * height * channels) as usize]
}
fn run_detection_model(input: &[f32]) -> Vec<Detection> {
// Simulate model inference
let output_size = input.len() / 100;
(0..output_size)
.map(|i| Detection {
bbox: [i as f32, i as f32, (i + 10) as f32, (i + 10) as f32],
confidence: 0.8 + (i % 20) as f32 / 100.0,
class_id: i % 10,
})
.collect()
}
fn run_recognition_model(input: &[f32]) -> Vec<f32> {
// Simulate CTC output: [time_steps, vocab_size]
let time_steps = 32;
let vocab_size = 64;
vec![0.1f32; time_steps * vocab_size]
}
fn run_math_model(input: &[f32]) -> Vec<f32> {
// Simulate math model output
vec![0.5f32; input.len() / 10]
}
fn initialize_detection_model() -> Vec<u8> {
std::thread::sleep(Duration::from_millis(100));
vec![0u8; 1024 * 1024]
}
fn initialize_recognition_model() -> Vec<u8> {
std::thread::sleep(Duration::from_millis(80));
vec![0u8; 512 * 1024]
}
fn initialize_math_model() -> Vec<u8> {
std::thread::sleep(Duration::from_millis(120));
vec![0u8; 2048 * 1024]
}
fn normalize_tensor(data: &[u8]) -> Vec<f32> {
data.iter().map(|&x| x as f32 / 255.0).collect()
}
fn standardize_tensor(data: &[u8]) -> Vec<f32> {
let mean = 128.0f32;
let std = 64.0f32;
data.iter().map(|&x| (x as f32 - mean) / std).collect()
}
fn convert_to_chw(data: &[f32], width: u32, height: u32) -> Vec<f32> {
// Convert HWC to CHW layout
let channels = data.len() / (width * height) as usize;
let mut chw = Vec::with_capacity(data.len());
for c in 0..channels {
for h in 0..height {
for w in 0..width {
let hwc_idx = ((h * width + w) * channels as u32 + c as u32) as usize;
chw.push(data[hwc_idx]);
}
}
}
chw
}
fn add_batch_dim(tensor: &[f32]) -> Vec<f32> {
tensor.to_vec()
}
#[derive(Clone)]
struct Detection {
bbox: [f32; 4],
confidence: f32,
class_id: usize,
}
fn create_detection_output(count: usize) -> Vec<Detection> {
(0..count)
.map(|i| Detection {
bbox: [i as f32, i as f32, (i + 10) as f32, (i + 10) as f32],
confidence: 0.5 + (i % 50) as f32 / 100.0,
class_id: i % 10,
})
.collect()
}
fn create_recognition_output(time_steps: usize) -> Vec<f32> {
vec![0.1f32; time_steps * 64]
}
fn apply_nms(detections: &[Detection], iou_threshold: f32) -> Vec<Detection> {
let mut filtered = Vec::new();
let mut sorted = detections.to_vec();
sorted.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
for det in sorted {
let overlap = filtered
.iter()
.any(|kept: &Detection| calculate_iou(&det.bbox, &kept.bbox) > iou_threshold);
if !overlap {
filtered.push(det);
}
}
filtered
}
fn calculate_iou(box1: &[f32; 4], box2: &[f32; 4]) -> f32 {
let x1 = box1[0].max(box2[0]);
let y1 = box1[1].max(box2[1]);
let x2 = box1[2].min(box2[2]);
let y2 = box1[3].min(box2[3]);
let intersection = (x2 - x1).max(0.0) * (y2 - y1).max(0.0);
let area1 = (box1[2] - box1[0]) * (box1[3] - box1[1]);
let area2 = (box2[2] - box2[0]) * (box2[3] - box2[1]);
let union = area1 + area2 - intersection;
if union > 0.0 {
intersection / union
} else {
0.0
}
}
fn filter_by_confidence(detections: &[Detection], threshold: f32) -> Vec<Detection> {
detections
.iter()
.filter(|d| d.confidence >= threshold)
.cloned()
.collect()
}
fn decode_ctc_output(logits: &[f32]) -> String {
// Simple greedy CTC decoding
let time_steps = logits.len() / 64;
let mut result = String::new();
let mut prev_char = None;
for t in 0..time_steps {
let start_idx = t * 64;
let end_idx = start_idx + 64;
let step_logits = &logits[start_idx..end_idx];
let (max_idx, _) = step_logits
.iter()
.enumerate()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
.unwrap();
if max_idx > 0 && Some(max_idx) != prev_char {
result.push((b'a' + max_idx as u8 % 26) as char);
}
prev_char = Some(max_idx);
}
result
}
fn beam_search_decode(logits: &[f32], beam_width: usize) -> String {
// Simplified beam search
let time_steps = logits.len() / 64;
let mut beams: Vec<(String, f32)> = vec![(String::new(), 0.0)];
for t in 0..time_steps {
let start_idx = t * 64;
let end_idx = start_idx + 64;
let step_logits = &logits[start_idx..end_idx];
let mut new_beams = Vec::new();
for (text, score) in &beams {
for (char_idx, &logit) in step_logits.iter().enumerate().take(beam_width) {
let mut new_text = text.clone();
if char_idx > 0 {
new_text.push((b'a' + char_idx as u8 % 26) as char);
}
new_beams.push((new_text, score + logit));
}
}
new_beams.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
beams = new_beams.into_iter().take(beam_width).collect();
}
beams[0].0.clone()
}
criterion_group!(
benches,
bench_text_detection,
bench_text_recognition,
bench_math_model,
bench_tensor_preprocessing,
bench_output_postprocessing,
bench_batch_inference,
bench_model_warmup,
bench_e2e_pipeline
);
criterion_main!(benches);

View File

@@ -0,0 +1,395 @@
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use std::time::Duration;
/// Benchmark simple LaTeX expression generation
fn bench_simple_expressions(c: &mut Criterion) {
let mut group = c.benchmark_group("simple_expressions");
group.measurement_time(Duration::from_secs(5));
let test_cases = vec![
(
"fraction",
Expression::Fraction(
Box::new(Expression::Number(1)),
Box::new(Expression::Number(2)),
),
),
(
"power",
Expression::Power(
Box::new(Expression::Variable("x".to_string())),
Box::new(Expression::Number(2)),
),
),
(
"sum",
Expression::Sum(
Box::new(Expression::Number(1)),
Box::new(Expression::Number(2)),
),
),
(
"product",
Expression::Product(
Box::new(Expression::Variable("a".to_string())),
Box::new(Expression::Variable("b".to_string())),
),
),
];
for (name, expr) in test_cases {
group.bench_with_input(BenchmarkId::new("to_latex", name), &expr, |b, expr| {
b.iter(|| black_box(expr.to_latex()));
});
}
group.finish();
}
/// Benchmark complex LaTeX expression generation
fn bench_complex_expressions(c: &mut Criterion) {
let mut group = c.benchmark_group("complex_expressions");
group.measurement_time(Duration::from_secs(8));
// Create complex nested expressions
let test_cases = vec![
("matrix_2x2", create_matrix(2, 2)),
("matrix_3x3", create_matrix(3, 3)),
("matrix_4x4", create_matrix(4, 4)),
("integral", create_integral()),
("summation", create_summation()),
("nested_fraction", create_nested_fraction(3)),
("polynomial", create_polynomial(5)),
];
for (name, expr) in test_cases {
group.bench_with_input(BenchmarkId::new("to_latex", name), &expr, |b, expr| {
b.iter(|| black_box(expr.to_latex()));
});
}
group.finish();
}
/// Benchmark AST traversal performance
fn bench_ast_traversal(c: &mut Criterion) {
let mut group = c.benchmark_group("ast_traversal");
group.measurement_time(Duration::from_secs(5));
let depths = [3, 5, 7, 10];
for depth in depths {
let expr = create_nested_expression(depth);
group.bench_with_input(BenchmarkId::new("depth", depth), &expr, |b, expr| {
b.iter(|| black_box(count_nodes(black_box(expr))));
});
}
group.finish();
}
/// Benchmark string building and concatenation
fn bench_string_building(c: &mut Criterion) {
let mut group = c.benchmark_group("string_building");
group.measurement_time(Duration::from_secs(5));
let expr = create_polynomial(20);
// Compare different string building strategies
group.bench_function("to_latex_default", |b| {
b.iter(|| black_box(expr.to_latex()));
});
group.bench_function("to_latex_with_capacity", |b| {
b.iter(|| black_box(expr.to_latex_with_capacity()));
});
group.finish();
}
/// Benchmark LaTeX escaping and special characters
fn bench_latex_escaping(c: &mut Criterion) {
let mut group = c.benchmark_group("latex_escaping");
group.measurement_time(Duration::from_secs(5));
let test_strings = vec![
("no_special", "simple text"),
("underscores", "var_1 + var_2"),
("braces", "{x} + {y}"),
("mixed", "α + β_1^2 ∫ dx"),
];
for (name, text) in test_strings {
group.bench_with_input(BenchmarkId::new("escape", name), &text, |b, text| {
b.iter(|| black_box(escape_latex(black_box(text))));
});
}
group.finish();
}
/// Benchmark target: LaTeX generation should complete in <5ms
fn bench_latency_target(c: &mut Criterion) {
let mut group = c.benchmark_group("latency_target_5ms");
group.measurement_time(Duration::from_secs(10));
group.sample_size(100);
// Typical complex expression from OCR
let expr = create_typical_ocr_expression();
group.bench_function("typical_ocr_expression", |b| {
b.iter(|| black_box(expr.to_latex()));
});
group.finish();
}
/// Benchmark batch LaTeX generation
fn bench_batch_generation(c: &mut Criterion) {
let mut group = c.benchmark_group("batch_generation");
group.measurement_time(Duration::from_secs(10));
let batch_sizes = [10, 50, 100];
for size in batch_sizes {
let expressions: Vec<_> = (0..size).map(|i| create_polynomial(i % 10 + 1)).collect();
group.bench_with_input(
BenchmarkId::new("batch_size", size),
&expressions,
|b, exprs| {
b.iter(|| {
let results: Vec<_> = exprs.iter().map(|expr| expr.to_latex()).collect();
black_box(results)
});
},
);
}
group.finish();
}
// Mock AST and Expression types
#[derive(Clone)]
enum Expression {
Number(i32),
Variable(String),
Fraction(Box<Expression>, Box<Expression>),
Power(Box<Expression>, Box<Expression>),
Sum(Box<Expression>, Box<Expression>),
Product(Box<Expression>, Box<Expression>),
Matrix(Vec<Vec<Expression>>),
Integral(Box<Expression>, String, String, String),
Summation(Box<Expression>, String, String, String),
}
impl Expression {
fn to_latex(&self) -> String {
match self {
Expression::Number(n) => n.to_string(),
Expression::Variable(v) => v.clone(),
Expression::Fraction(num, den) => {
format!("\\frac{{{}}}{{{}}}", num.to_latex(), den.to_latex())
}
Expression::Power(base, exp) => {
format!("{{{}}}^{{{}}}", base.to_latex(), exp.to_latex())
}
Expression::Sum(a, b) => {
format!("{} + {}", a.to_latex(), b.to_latex())
}
Expression::Product(a, b) => {
format!("{} \\cdot {}", a.to_latex(), b.to_latex())
}
Expression::Matrix(rows) => {
let mut result = String::from("\\begin{bmatrix}");
for (i, row) in rows.iter().enumerate() {
for (j, cell) in row.iter().enumerate() {
result.push_str(&cell.to_latex());
if j < row.len() - 1 {
result.push_str(" & ");
}
}
if i < rows.len() - 1 {
result.push_str(" \\\\ ");
}
}
result.push_str("\\end{bmatrix}");
result
}
Expression::Integral(expr, var, lower, upper) => {
format!(
"\\int_{{{}}}^{{{}}} {} \\, d{}",
lower,
upper,
expr.to_latex(),
var
)
}
Expression::Summation(expr, var, lower, upper) => {
format!(
"\\sum_{{{}={}}}^{{{}}} {}",
var,
lower,
upper,
expr.to_latex()
)
}
}
}
fn to_latex_with_capacity(&self) -> String {
let mut result = String::with_capacity(256);
self.append_latex(&mut result);
result
}
fn append_latex(&self, buffer: &mut String) {
buffer.push_str(&self.to_latex());
}
}
fn create_matrix(rows: usize, cols: usize) -> Expression {
let matrix = (0..rows)
.map(|i| {
(0..cols)
.map(|j| Expression::Number((i * cols + j) as i32))
.collect()
})
.collect();
Expression::Matrix(matrix)
}
fn create_integral() -> Expression {
Expression::Integral(
Box::new(Expression::Power(
Box::new(Expression::Variable("x".to_string())),
Box::new(Expression::Number(2)),
)),
"x".to_string(),
"0".to_string(),
"1".to_string(),
)
}
fn create_summation() -> Expression {
Expression::Summation(
Box::new(Expression::Power(
Box::new(Expression::Variable("i".to_string())),
Box::new(Expression::Number(2)),
)),
"i".to_string(),
"1".to_string(),
"n".to_string(),
)
}
fn create_nested_fraction(depth: usize) -> Expression {
if depth == 0 {
Expression::Number(1)
} else {
Expression::Fraction(
Box::new(Expression::Number(1)),
Box::new(create_nested_fraction(depth - 1)),
)
}
}
fn create_polynomial(degree: usize) -> Expression {
let mut expr = Expression::Number(0);
for i in 0..=degree {
let term = Expression::Product(
Box::new(Expression::Number(i as i32 + 1)),
Box::new(Expression::Power(
Box::new(Expression::Variable("x".to_string())),
Box::new(Expression::Number(i as i32)),
)),
);
expr = Expression::Sum(Box::new(expr), Box::new(term));
}
expr
}
fn create_nested_expression(depth: usize) -> Expression {
if depth == 0 {
Expression::Variable("x".to_string())
} else {
Expression::Sum(
Box::new(create_nested_expression(depth - 1)),
Box::new(Expression::Number(depth as i32)),
)
}
}
fn create_typical_ocr_expression() -> Expression {
// Typical expression: (a + b)^2 = a^2 + 2ab + b^2
Expression::Sum(
Box::new(Expression::Sum(
Box::new(Expression::Power(
Box::new(Expression::Variable("a".to_string())),
Box::new(Expression::Number(2)),
)),
Box::new(Expression::Product(
Box::new(Expression::Product(
Box::new(Expression::Number(2)),
Box::new(Expression::Variable("a".to_string())),
)),
Box::new(Expression::Variable("b".to_string())),
)),
)),
Box::new(Expression::Power(
Box::new(Expression::Variable("b".to_string())),
Box::new(Expression::Number(2)),
)),
)
}
fn count_nodes(expr: &Expression) -> usize {
match expr {
Expression::Number(_) | Expression::Variable(_) => 1,
Expression::Fraction(a, b)
| Expression::Power(a, b)
| Expression::Sum(a, b)
| Expression::Product(a, b) => 1 + count_nodes(a) + count_nodes(b),
Expression::Matrix(rows) => {
1 + rows
.iter()
.map(|row| row.iter().map(|e| count_nodes(e)).sum::<usize>())
.sum::<usize>()
}
Expression::Integral(expr, _, _, _) | Expression::Summation(expr, _, _, _) => {
1 + count_nodes(expr)
}
}
}
fn escape_latex(text: &str) -> String {
text.chars()
.map(|c| match c {
'_' => "\\_".to_string(),
'{' => "\\{".to_string(),
'}' => "\\}".to_string(),
'&' => "\\&".to_string(),
'%' => "\\%".to_string(),
'$' => "\\$".to_string(),
'#' => "\\#".to_string(),
'^' => "\\^{}".to_string(),
'~' => "\\~{}".to_string(),
'\\' => "\\textbackslash{}".to_string(),
_ => c.to_string(),
})
.collect()
}
criterion_group!(
benches,
bench_simple_expressions,
bench_complex_expressions,
bench_ast_traversal,
bench_string_building,
bench_latex_escaping,
bench_latency_target,
bench_batch_generation
);
criterion_main!(benches);

View File

@@ -0,0 +1,437 @@
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use std::time::Duration;
/// Benchmark peak memory during inference
fn bench_peak_memory_inference(c: &mut Criterion) {
let mut group = c.benchmark_group("peak_memory_inference");
group.measurement_time(Duration::from_secs(10));
let sizes = [(224, 224), (384, 384), (512, 512)];
for (w, h) in sizes {
group.bench_with_input(
BenchmarkId::new("single_inference", format!("{}x{}", w, h)),
&(w, h),
|b, &(width, height)| {
b.iter_with_large_drop(|| {
let memory_tracker = MemoryTracker::new();
// Simulate model loading
let model = load_model();
// Create input
let image = create_image(width, height);
// Preprocessing
let preprocessed = preprocess(image);
// Inference
let output = run_inference(&model, preprocessed);
// Postprocessing
let result = postprocess(output);
let peak_memory = memory_tracker.peak_usage();
black_box((result, peak_memory))
});
},
);
}
group.finish();
}
/// Benchmark memory per image in batch
fn bench_memory_per_batch_image(c: &mut Criterion) {
let mut group = c.benchmark_group("memory_per_batch_image");
group.measurement_time(Duration::from_secs(15));
let batch_sizes = [1, 4, 8, 16, 32];
let size = (384, 384);
for batch_size in batch_sizes {
group.bench_with_input(
BenchmarkId::new("batch_inference", batch_size),
&batch_size,
|b, &size| {
b.iter_with_large_drop(|| {
let memory_tracker = MemoryTracker::new();
let model = load_model();
let batch = create_batch(size, 384, 384);
let output = run_batch_inference(&model, batch);
let total_memory = memory_tracker.peak_usage();
let per_image = total_memory / size;
black_box((output, per_image))
});
},
);
}
group.finish();
}
/// Benchmark model loading memory
fn bench_model_loading_memory(c: &mut Criterion) {
let mut group = c.benchmark_group("model_loading_memory");
group.measurement_time(Duration::from_secs(10));
group.bench_function("detection_model", |b| {
b.iter_with_large_drop(|| {
let tracker = MemoryTracker::new();
let model = load_detection_model();
let memory = tracker.peak_usage();
black_box((model, memory))
});
});
group.bench_function("recognition_model", |b| {
b.iter_with_large_drop(|| {
let tracker = MemoryTracker::new();
let model = load_recognition_model();
let memory = tracker.peak_usage();
black_box((model, memory))
});
});
group.bench_function("math_model", |b| {
b.iter_with_large_drop(|| {
let tracker = MemoryTracker::new();
let model = load_math_model();
let memory = tracker.peak_usage();
black_box((model, memory))
});
});
group.bench_function("all_models", |b| {
b.iter_with_large_drop(|| {
let tracker = MemoryTracker::new();
let detection = load_detection_model();
let recognition = load_recognition_model();
let math = load_math_model();
let total_memory = tracker.peak_usage();
black_box((detection, recognition, math, total_memory))
});
});
group.finish();
}
/// Benchmark memory growth over time
fn bench_memory_growth(c: &mut Criterion) {
let mut group = c.benchmark_group("memory_growth");
group.measurement_time(Duration::from_secs(20));
group.bench_function("sequential_inferences", |b| {
b.iter_with_large_drop(|| {
let tracker = MemoryTracker::new();
let model = load_model();
let mut memory_samples = Vec::new();
for i in 0..100 {
let image = create_image(384, 384);
let preprocessed = preprocess(image);
let _output = run_inference(&model, preprocessed);
if i % 10 == 0 {
memory_samples.push(tracker.current_usage());
}
}
let growth = calculate_memory_growth(&memory_samples);
black_box((memory_samples, growth))
});
});
group.finish();
}
/// Benchmark memory fragmentation
fn bench_memory_fragmentation(c: &mut Criterion) {
let mut group = c.benchmark_group("memory_fragmentation");
group.measurement_time(Duration::from_secs(10));
group.bench_function("allocate_deallocate_pattern", |b| {
b.iter(|| {
let mut allocations = Vec::new();
// Allocate various sizes
for i in 0..100 {
let size = (i % 10 + 1) * 1024;
allocations.push(vec![0u8; size]);
}
// Deallocate every other allocation
allocations = allocations
.into_iter()
.enumerate()
.filter_map(|(i, v)| if i % 2 == 0 { Some(v) } else { None })
.collect();
// Allocate more
for i in 0..50 {
let size = (i % 5 + 1) * 2048;
allocations.push(vec![0u8; size]);
}
black_box(allocations)
});
});
group.finish();
}
/// Benchmark cache memory overhead
fn bench_cache_memory(c: &mut Criterion) {
let mut group = c.benchmark_group("cache_memory");
group.measurement_time(Duration::from_secs(10));
let cache_sizes = [100, 1000, 10000];
for cache_size in cache_sizes {
group.bench_with_input(
BenchmarkId::new("embedding_cache", cache_size),
&cache_size,
|b, &size| {
b.iter_with_large_drop(|| {
let tracker = MemoryTracker::new();
let cache = create_embedding_cache(size);
let memory = tracker.peak_usage();
black_box((cache, memory))
});
},
);
}
group.finish();
}
/// Benchmark memory pool efficiency
fn bench_memory_pools(c: &mut Criterion) {
let mut group = c.benchmark_group("memory_pools");
group.measurement_time(Duration::from_secs(8));
group.bench_function("without_pool", |b| {
b.iter(|| {
let mut allocations = Vec::new();
for _ in 0..100 {
let buffer = vec![0u8; 1024 * 1024];
allocations.push(buffer);
}
black_box(allocations)
});
});
group.bench_function("with_pool", |b| {
let mut pool = MemoryPool::new(1024 * 1024, 100);
b.iter(|| {
let mut handles = Vec::new();
for _ in 0..100 {
let handle = pool.allocate();
handles.push(handle);
}
black_box(handles)
});
});
group.finish();
}
/// Benchmark tensor memory layouts
fn bench_tensor_layouts(c: &mut Criterion) {
let mut group = c.benchmark_group("tensor_layouts");
group.measurement_time(Duration::from_secs(8));
let size = (384, 384, 3);
group.bench_function("hwc_layout", |b| {
b.iter(|| {
let tracker = MemoryTracker::new();
let tensor = create_hwc_tensor(size.0, size.1, size.2);
let memory = tracker.peak_usage();
black_box((tensor, memory))
});
});
group.bench_function("chw_layout", |b| {
b.iter(|| {
let tracker = MemoryTracker::new();
let tensor = create_chw_tensor(size.0, size.1, size.2);
let memory = tracker.peak_usage();
black_box((tensor, memory))
});
});
group.bench_function("layout_conversion", |b| {
let hwc = create_hwc_tensor(size.0, size.1, size.2);
b.iter(|| {
let tracker = MemoryTracker::new();
let chw = convert_hwc_to_chw(&hwc, size.0, size.1, size.2);
let memory = tracker.peak_usage();
black_box((chw, memory))
});
});
group.finish();
}
// Mock implementations
struct MemoryTracker {
initial_usage: usize,
peak: usize,
}
impl MemoryTracker {
fn new() -> Self {
Self {
initial_usage: get_current_memory_usage(),
peak: 0,
}
}
fn current_usage(&self) -> usize {
get_current_memory_usage() - self.initial_usage
}
fn peak_usage(&mut self) -> usize {
let current = self.current_usage();
self.peak = self.peak.max(current);
self.peak
}
}
fn get_current_memory_usage() -> usize {
// In production, this would query actual memory usage
// For benchmarking, we'll estimate based on allocations
0
}
type Model = Vec<u8>;
type Image = Vec<u8>;
type Tensor = Vec<f32>;
type Output = Vec<f32>;
fn load_model() -> Model {
vec![0u8; 100 * 1024 * 1024] // 100 MB model
}
fn load_detection_model() -> Model {
vec![0u8; 150 * 1024 * 1024] // 150 MB
}
fn load_recognition_model() -> Model {
vec![0u8; 80 * 1024 * 1024] // 80 MB
}
fn load_math_model() -> Model {
vec![0u8; 120 * 1024 * 1024] // 120 MB
}
fn create_image(width: u32, height: u32) -> Image {
vec![128u8; (width * height * 3) as usize]
}
fn create_batch(batch_size: usize, width: u32, height: u32) -> Vec<Image> {
(0..batch_size)
.map(|_| create_image(width, height))
.collect()
}
fn preprocess(image: Image) -> Tensor {
image.iter().map(|&x| x as f32 / 255.0).collect()
}
fn run_inference(_model: &Model, input: Tensor) -> Output {
input.iter().map(|&x| x * 2.0).collect()
}
fn run_batch_inference(_model: &Model, batch: Vec<Image>) -> Vec<Output> {
batch
.into_iter()
.map(|img| {
let tensor = preprocess(img);
tensor.iter().map(|&x| x * 2.0).collect()
})
.collect()
}
fn postprocess(output: Output) -> String {
format!("result_{:.2}", output[0])
}
fn calculate_memory_growth(samples: &[usize]) -> f64 {
if samples.len() < 2 {
return 0.0;
}
let first = samples[0] as f64;
let last = samples[samples.len() - 1] as f64;
(last - first) / first
}
fn create_embedding_cache(size: usize) -> Vec<Vec<f32>> {
(0..size).map(|_| vec![0.5f32; 512]).collect()
}
struct MemoryPool {
block_size: usize,
blocks: Vec<Vec<u8>>,
available: Vec<usize>,
}
impl MemoryPool {
fn new(block_size: usize, count: usize) -> Self {
let blocks = (0..count).map(|_| vec![0u8; block_size]).collect();
let available = (0..count).collect();
Self {
block_size,
blocks,
available,
}
}
fn allocate(&mut self) -> Option<usize> {
self.available.pop()
}
}
fn create_hwc_tensor(height: u32, width: u32, channels: u32) -> Vec<f32> {
vec![0.5f32; (height * width * channels) as usize]
}
fn create_chw_tensor(height: u32, width: u32, channels: u32) -> Vec<f32> {
vec![0.5f32; (channels * height * width) as usize]
}
fn convert_hwc_to_chw(hwc: &[f32], height: u32, width: u32, channels: u32) -> Vec<f32> {
let mut chw = Vec::with_capacity(hwc.len());
for c in 0..channels {
for h in 0..height {
for w in 0..width {
let hwc_idx = ((h * width + w) * channels + c) as usize;
chw.push(hwc[hwc_idx]);
}
}
}
chw
}
criterion_group!(
benches,
bench_peak_memory_inference,
bench_memory_per_batch_image,
bench_model_loading_memory,
bench_memory_growth,
bench_memory_fragmentation,
bench_cache_memory,
bench_memory_pools,
bench_tensor_layouts
);
criterion_main!(benches);

View File

@@ -0,0 +1,194 @@
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use std::time::Duration;
/// Benchmark single image OCR at various sizes
fn bench_single_image(c: &mut Criterion) {
let mut group = c.benchmark_group("single_image_ocr");
group.measurement_time(Duration::from_secs(10));
group.sample_size(50);
// Test various image sizes
let sizes = [
(224, 224), // Small
(384, 384), // Medium
(512, 512), // Large
(768, 768), // Extra large
(1024, 1024), // Very large
];
for (w, h) in sizes {
group.bench_with_input(
BenchmarkId::new("resolution", format!("{}x{}", w, h)),
&(w, h),
|b, &(width, height)| {
// Create synthetic image data
let image_data = vec![128u8; (width * height * 3) as usize];
b.iter(|| {
// Simulate OCR processing pipeline
// In production, this would call actual OCR functions
let preprocessed = preprocess_image(black_box(&image_data), width, height);
let features = extract_features(black_box(&preprocessed));
let text = recognize_text(black_box(&features));
black_box(text)
});
},
);
}
group.finish();
}
/// Benchmark batch processing with various batch sizes
fn bench_batch_processing(c: &mut Criterion) {
let mut group = c.benchmark_group("batch_processing");
group.measurement_time(Duration::from_secs(15));
group.sample_size(30);
let batch_sizes = [1, 4, 8, 16, 32];
let image_size = (384, 384);
for batch_size in batch_sizes {
group.bench_with_input(
BenchmarkId::new("batch_size", batch_size),
&batch_size,
|b, &size| {
// Create batch of synthetic images
let images: Vec<Vec<u8>> = (0..size)
.map(|_| vec![128u8; (image_size.0 * image_size.1 * 3) as usize])
.collect();
b.iter(|| {
// Process entire batch
let results: Vec<_> = images
.iter()
.map(|img| {
let preprocessed =
preprocess_image(black_box(img), image_size.0, image_size.1);
let features = extract_features(black_box(&preprocessed));
recognize_text(black_box(&features))
})
.collect();
black_box(results)
});
},
);
}
group.finish();
}
/// Benchmark cold start vs warm model performance
fn bench_cold_vs_warm(c: &mut Criterion) {
let mut group = c.benchmark_group("cold_vs_warm");
group.measurement_time(Duration::from_secs(10));
let image_data = vec![128u8; (384 * 384 * 3) as usize];
// Cold start benchmark - model initialization included
group.bench_function("cold_start", |b| {
b.iter_with_large_drop(|| {
// Simulate model initialization + inference
let _model = initialize_model();
let preprocessed = preprocess_image(black_box(&image_data), 384, 384);
let features = extract_features(black_box(&preprocessed));
let text = recognize_text(black_box(&features));
black_box(text)
});
});
// Warm model benchmark - model already initialized
group.bench_function("warm_inference", |b| {
let _model = initialize_model(); // Initialize once outside benchmark
b.iter(|| {
let preprocessed = preprocess_image(black_box(&image_data), 384, 384);
let features = extract_features(black_box(&preprocessed));
let text = recognize_text(black_box(&features));
black_box(text)
});
});
group.finish();
}
/// Benchmark P95 and P99 latency targets
fn bench_latency_percentiles(c: &mut Criterion) {
let mut group = c.benchmark_group("latency_percentiles");
group.measurement_time(Duration::from_secs(20));
group.sample_size(100); // More samples for better percentile accuracy
let image_data = vec![128u8; (384 * 384 * 3) as usize];
group.bench_function("p95_target_100ms", |b| {
b.iter(|| {
let preprocessed = preprocess_image(black_box(&image_data), 384, 384);
let features = extract_features(black_box(&preprocessed));
let text = recognize_text(black_box(&features));
black_box(text)
});
});
group.finish();
}
/// Benchmark throughput (images per second)
fn bench_throughput(c: &mut Criterion) {
let mut group = c.benchmark_group("throughput");
group.measurement_time(Duration::from_secs(15));
group.throughput(criterion::Throughput::Elements(1));
let image_data = vec![128u8; (384 * 384 * 3) as usize];
group.bench_function("images_per_second", |b| {
b.iter(|| {
let preprocessed = preprocess_image(black_box(&image_data), 384, 384);
let features = extract_features(black_box(&preprocessed));
let text = recognize_text(black_box(&features));
black_box(text)
});
});
group.finish();
}
// Mock implementations for benchmarking
// In production, these would be actual OCR pipeline functions
fn initialize_model() -> Vec<u8> {
// Simulate model loading
std::thread::sleep(Duration::from_millis(50));
vec![0u8; 1024]
}
fn preprocess_image(data: &[u8], width: u32, height: u32) -> Vec<u8> {
// Simulate preprocessing: resize, normalize, grayscale
let mut processed = Vec::with_capacity((width * height) as usize);
for chunk in data.chunks(3) {
// Convert to grayscale
let gray = (chunk[0] as u32 + chunk[1] as u32 + chunk[2] as u32) / 3;
processed.push(gray as u8);
}
processed
}
fn extract_features(data: &[u8]) -> Vec<f32> {
// Simulate feature extraction
data.iter().map(|&x| x as f32 / 255.0).collect()
}
fn recognize_text(features: &[f32]) -> String {
// Simulate text recognition
let sum: f32 = features.iter().take(100).sum();
format!("recognized_text_{:.2}", sum)
}
criterion_group!(
benches,
bench_single_image,
bench_batch_processing,
bench_cold_vs_warm,
bench_latency_percentiles,
bench_throughput
);
criterion_main!(benches);

View File

@@ -0,0 +1,224 @@
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use scipix_ocr::optimize::*;
fn bench_grayscale(c: &mut Criterion) {
let mut group = c.benchmark_group("grayscale");
for size in [256, 512, 1024, 2048].iter() {
let pixels = size * size;
let rgba: Vec<u8> = (0..pixels * 4).map(|i| (i % 256) as u8).collect();
let mut gray = vec![0u8; pixels];
group.throughput(Throughput::Elements(pixels as u64));
// Benchmark SIMD version
group.bench_with_input(BenchmarkId::new("simd", size), size, |b, _| {
b.iter(|| {
simd::simd_grayscale(black_box(&rgba), black_box(&mut gray));
});
});
// Benchmark scalar version
group.bench_with_input(BenchmarkId::new("scalar", size), size, |b, _| {
b.iter(|| {
for (i, chunk) in rgba.chunks_exact(4).enumerate() {
let r = chunk[0] as u32;
let g = chunk[1] as u32;
let b = chunk[2] as u32;
gray[i] = ((r * 77 + g * 150 + b * 29) >> 8) as u8;
}
});
});
}
group.finish();
}
fn bench_threshold(c: &mut Criterion) {
let mut group = c.benchmark_group("threshold");
for size in [1024, 4096, 16384, 65536].iter() {
let gray: Vec<u8> = (0..*size).map(|i| (i % 256) as u8).collect();
let mut out = vec![0u8; *size];
group.throughput(Throughput::Elements(*size as u64));
// SIMD version
group.bench_with_input(BenchmarkId::new("simd", size), size, |b, _| {
b.iter(|| {
simd::simd_threshold(black_box(&gray), black_box(128), black_box(&mut out));
});
});
// Scalar version
group.bench_with_input(BenchmarkId::new("scalar", size), size, |b, _| {
b.iter(|| {
for (g, o) in gray.iter().zip(out.iter_mut()) {
*o = if *g >= 128 { 255 } else { 0 };
}
});
});
}
group.finish();
}
fn bench_normalize(c: &mut Criterion) {
let mut group = c.benchmark_group("normalize");
for size in [128, 512, 2048, 8192].iter() {
let mut data: Vec<f32> = (0..*size).map(|i| i as f32).collect();
group.throughput(Throughput::Elements(*size as u64));
// SIMD version
group.bench_with_input(BenchmarkId::new("simd", size), size, |b, _| {
let mut data_copy = data.clone();
b.iter(|| {
simd::simd_normalize(black_box(&mut data_copy));
});
});
// Scalar version
group.bench_with_input(BenchmarkId::new("scalar", size), size, |b, _| {
let mut data_copy = data.clone();
b.iter(|| {
let sum: f32 = data_copy.iter().sum();
let mean = sum / data_copy.len() as f32;
let variance: f32 = data_copy.iter().map(|x| (x - mean).powi(2)).sum::<f32>()
/ data_copy.len() as f32;
let std_dev = variance.sqrt() + 1e-8;
for x in data_copy.iter_mut() {
*x = (*x - mean) / std_dev;
}
});
});
}
group.finish();
}
fn bench_parallel_map(c: &mut Criterion) {
let mut group = c.benchmark_group("parallel_map");
for size in [100, 1000, 10000].iter() {
let data: Vec<i32> = (0..*size).collect();
group.throughput(Throughput::Elements(*size as u64));
// Parallel version
group.bench_with_input(BenchmarkId::new("parallel", size), size, |b, _| {
b.iter(|| {
parallel::parallel_map_chunked(black_box(data.clone()), 100, |x| x * x + x * 2 + 1)
});
});
// Sequential version
group.bench_with_input(BenchmarkId::new("sequential", size), size, |b, _| {
b.iter(|| data.iter().map(|&x| x * x + x * 2 + 1).collect::<Vec<_>>());
});
}
group.finish();
}
fn bench_buffer_pool(c: &mut Criterion) {
let mut group = c.benchmark_group("buffer_pool");
let pool = memory::BufferPool::new(|| Vec::with_capacity(1024), 10, 100);
// Benchmark pooled allocation
group.bench_function("pooled", |b| {
b.iter(|| {
let mut buf = pool.acquire();
buf.extend_from_slice(&[0u8; 512]);
black_box(&buf);
});
});
// Benchmark direct allocation
group.bench_function("direct", |b| {
b.iter(|| {
let mut buf = Vec::with_capacity(1024);
buf.extend_from_slice(&[0u8; 512]);
black_box(&buf);
});
});
group.finish();
}
fn bench_quantization(c: &mut Criterion) {
let mut group = c.benchmark_group("quantization");
for size in [1024, 4096, 16384].iter() {
let weights: Vec<f32> = (0..*size)
.map(|i| (i as f32 / *size as f32) * 2.0 - 1.0)
.collect();
group.throughput(Throughput::Elements(*size as u64));
// Quantize
group.bench_with_input(BenchmarkId::new("quantize", size), size, |b, _| {
b.iter(|| quantize::quantize_weights(black_box(&weights)));
});
// Dequantize
let (quantized, params) = quantize::quantize_weights(&weights);
group.bench_with_input(BenchmarkId::new("dequantize", size), size, |b, _| {
b.iter(|| quantize::dequantize(black_box(&quantized), black_box(params)));
});
// Per-channel quantization
let shape = vec![*size / 64, 64];
group.bench_with_input(BenchmarkId::new("per_channel", size), size, |b, _| {
b.iter(|| {
quantize::PerChannelQuant::from_f32(black_box(&weights), black_box(shape.clone()))
});
});
}
group.finish();
}
fn bench_memory_operations(c: &mut Criterion) {
let mut group = c.benchmark_group("memory_ops");
// Arena allocation
let mut arena = memory::Arena::with_capacity(1024 * 1024);
group.bench_function("arena_alloc", |b| {
b.iter(|| {
arena.reset();
for _ in 0..100 {
let slice = arena.alloc(1024, 8);
black_box(slice);
}
});
});
// Vector allocation
group.bench_function("vec_alloc", |b| {
b.iter(|| {
for _ in 0..100 {
let mut vec = Vec::with_capacity(1024);
vec.resize(1024, 0u8);
black_box(&vec);
}
});
});
group.finish();
}
criterion_group!(
benches,
bench_grayscale,
bench_threshold,
bench_normalize,
bench_parallel_map,
bench_buffer_pool,
bench_quantization,
bench_memory_operations
);
criterion_main!(benches);

View File

@@ -0,0 +1,356 @@
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use std::time::Duration;
/// Benchmark individual preprocessing transforms
fn bench_individual_transforms(c: &mut Criterion) {
let mut group = c.benchmark_group("individual_transforms");
group.measurement_time(Duration::from_secs(8));
let sizes = [(224, 224), (384, 384), (512, 512)];
for (w, h) in sizes {
let image_data = generate_test_image(w, h);
// Grayscale conversion
group.bench_with_input(
BenchmarkId::new("grayscale", format!("{}x{}", w, h)),
&image_data,
|b, img| {
b.iter(|| black_box(convert_to_grayscale(black_box(img), w, h)));
},
);
// Gaussian blur
group.bench_with_input(
BenchmarkId::new("gaussian_blur", format!("{}x{}", w, h)),
&image_data,
|b, img| {
b.iter(|| black_box(apply_gaussian_blur(black_box(img), w, h, 5)));
},
);
// Adaptive threshold
group.bench_with_input(
BenchmarkId::new("threshold", format!("{}x{}", w, h)),
&image_data,
|b, img| {
b.iter(|| black_box(apply_adaptive_threshold(black_box(img), w, h)));
},
);
// Edge detection
group.bench_with_input(
BenchmarkId::new("edge_detection", format!("{}x{}", w, h)),
&image_data,
|b, img| {
b.iter(|| black_box(detect_edges(black_box(img), w, h)));
},
);
// Normalization
group.bench_with_input(
BenchmarkId::new("normalize", format!("{}x{}", w, h)),
&image_data,
|b, img| {
b.iter(|| black_box(normalize_image(black_box(img))));
},
);
}
group.finish();
}
/// Benchmark full preprocessing pipeline
fn bench_full_pipeline(c: &mut Criterion) {
let mut group = c.benchmark_group("full_pipeline");
group.measurement_time(Duration::from_secs(10));
let sizes = [(224, 224), (384, 384), (512, 512)];
for (w, h) in sizes {
let image_data = generate_test_image(w, h);
group.bench_with_input(
BenchmarkId::new("sequential", format!("{}x{}", w, h)),
&(image_data.clone(), w, h),
|b, (img, width, height)| {
b.iter(|| {
let gray = convert_to_grayscale(black_box(img), *width, *height);
let blurred = apply_gaussian_blur(&gray, *width, *height, 5);
let threshold = apply_adaptive_threshold(&blurred, *width, *height);
let edges = detect_edges(&threshold, *width, *height);
let normalized = normalize_image(&edges);
black_box(normalized)
});
},
);
}
group.finish();
}
/// Benchmark parallel vs sequential preprocessing
fn bench_parallel_vs_sequential(c: &mut Criterion) {
let mut group = c.benchmark_group("parallel_vs_sequential");
group.measurement_time(Duration::from_secs(10));
// Create batch of images
let batch_size = 8;
let size = (384, 384);
let images: Vec<Vec<u8>> = (0..batch_size)
.map(|_| generate_test_image(size.0, size.1))
.collect();
// Sequential processing
group.bench_function("sequential_batch", |b| {
b.iter(|| {
let results: Vec<_> = images
.iter()
.map(|img| {
let gray = convert_to_grayscale(black_box(img), size.0, size.1);
let blurred = apply_gaussian_blur(&gray, size.0, size.1, 5);
apply_adaptive_threshold(&blurred, size.0, size.1)
})
.collect();
black_box(results)
});
});
// Parallel processing (simulated with rayon-like chunking)
group.bench_function("parallel_batch", |b| {
b.iter(|| {
// In production, this would use rayon::par_iter()
let results: Vec<_> = images
.chunks(2)
.flat_map(|chunk| {
chunk.iter().map(|img| {
let gray = convert_to_grayscale(black_box(img), size.0, size.1);
let blurred = apply_gaussian_blur(&gray, size.0, size.1, 5);
apply_adaptive_threshold(&blurred, size.0, size.1)
})
})
.collect();
black_box(results)
});
});
group.finish();
}
/// Benchmark resize operations
fn bench_resize_operations(c: &mut Criterion) {
let mut group = c.benchmark_group("resize_operations");
group.measurement_time(Duration::from_secs(8));
let source_image = generate_test_image(1024, 1024);
let target_sizes = [(224, 224), (384, 384), (512, 512)];
for (target_w, target_h) in target_sizes {
group.bench_with_input(
BenchmarkId::new("nearest_neighbor", format!("{}x{}", target_w, target_h)),
&(target_w, target_h),
|b, &(tw, th)| {
b.iter(|| black_box(resize_nearest(&source_image, 1024, 1024, tw, th)));
},
);
group.bench_with_input(
BenchmarkId::new("bilinear", format!("{}x{}", target_w, target_h)),
&(target_w, target_h),
|b, &(tw, th)| {
b.iter(|| black_box(resize_bilinear(&source_image, 1024, 1024, tw, th)));
},
);
}
group.finish();
}
/// Benchmark target: preprocessing should complete in <20ms
fn bench_latency_target(c: &mut Criterion) {
let mut group = c.benchmark_group("latency_target_20ms");
group.measurement_time(Duration::from_secs(10));
group.sample_size(100);
let image_data = generate_test_image(384, 384);
group.bench_function("full_pipeline_384x384", |b| {
b.iter(|| {
let gray = convert_to_grayscale(black_box(&image_data), 384, 384);
let blurred = apply_gaussian_blur(&gray, 384, 384, 5);
let threshold = apply_adaptive_threshold(&blurred, 384, 384);
let normalized = normalize_image(&threshold);
black_box(normalized)
});
});
group.finish();
}
// Mock implementations
fn generate_test_image(width: u32, height: u32) -> Vec<u8> {
let size = (width * height * 3) as usize;
(0..size).map(|i| ((i * 123 + 456) % 256) as u8).collect()
}
fn convert_to_grayscale(rgb_data: &[u8], width: u32, height: u32) -> Vec<u8> {
let mut gray = Vec::with_capacity((width * height) as usize);
for chunk in rgb_data.chunks(3) {
let r = chunk[0] as u32;
let g = chunk[1] as u32;
let b = chunk[2] as u32;
let gray_value = ((r * 299 + g * 587 + b * 114) / 1000) as u8;
gray.push(gray_value);
}
gray
}
fn apply_gaussian_blur(data: &[u8], width: u32, height: u32, kernel_size: usize) -> Vec<u8> {
let mut result = Vec::with_capacity(data.len());
let radius = kernel_size / 2;
for y in 0..height {
for x in 0..width {
let mut sum = 0u32;
let mut count = 0u32;
for ky in 0..kernel_size {
for kx in 0..kernel_size {
let nx = x as i32 + kx as i32 - radius as i32;
let ny = y as i32 + ky as i32 - radius as i32;
if nx >= 0 && nx < width as i32 && ny >= 0 && ny < height as i32 {
let idx = (ny as u32 * width + nx as u32) as usize;
sum += data[idx] as u32;
count += 1;
}
}
}
result.push((sum / count) as u8);
}
}
result
}
fn apply_adaptive_threshold(data: &[u8], width: u32, height: u32) -> Vec<u8> {
let mut result = Vec::with_capacity(data.len());
let block_size = 11;
let c = 2;
for y in 0..height {
for x in 0..width {
let idx = (y * width + x) as usize;
let pixel = data[idx];
// Calculate local mean
let mut sum = 0u32;
let mut count = 0u32;
let radius = block_size / 2;
for by in y.saturating_sub(radius)..=(y + radius).min(height - 1) {
for bx in x.saturating_sub(radius)..=(x + radius).min(width - 1) {
let bidx = (by * width + bx) as usize;
sum += data[bidx] as u32;
count += 1;
}
}
let threshold = (sum / count) as i32 - c;
result.push(if pixel as i32 > threshold { 255 } else { 0 });
}
}
result
}
fn detect_edges(data: &[u8], width: u32, height: u32) -> Vec<u8> {
let mut result = Vec::with_capacity(data.len());
// Simple Sobel edge detection
for y in 0..height {
for x in 0..width {
if x == 0 || x == width - 1 || y == 0 || y == height - 1 {
result.push(0);
continue;
}
let idx = (y * width + x) as usize;
let gx = (data[idx + 1] as i32 - data[idx - 1] as i32).abs();
let gy = (data[idx + width as usize] as i32 - data[idx - width as usize] as i32).abs();
let magnitude = ((gx * gx + gy * gy) as f32).sqrt().min(255.0);
result.push(magnitude as u8);
}
}
result
}
fn normalize_image(data: &[u8]) -> Vec<f32> {
data.iter().map(|&x| (x as f32 - 128.0) / 128.0).collect()
}
fn resize_nearest(src: &[u8], src_w: u32, src_h: u32, dst_w: u32, dst_h: u32) -> Vec<u8> {
let mut result = Vec::with_capacity((dst_w * dst_h) as usize);
let x_ratio = src_w as f32 / dst_w as f32;
let y_ratio = src_h as f32 / dst_h as f32;
for y in 0..dst_h {
for x in 0..dst_w {
let src_x = (x as f32 * x_ratio) as u32;
let src_y = (y as f32 * y_ratio) as u32;
let idx = (src_y * src_w + src_x) as usize;
result.push(src[idx]);
}
}
result
}
fn resize_bilinear(src: &[u8], src_w: u32, src_h: u32, dst_w: u32, dst_h: u32) -> Vec<u8> {
let mut result = Vec::with_capacity((dst_w * dst_h) as usize);
let x_ratio = (src_w - 1) as f32 / dst_w as f32;
let y_ratio = (src_h - 1) as f32 / dst_h as f32;
for y in 0..dst_h {
for x in 0..dst_w {
let src_x = x as f32 * x_ratio;
let src_y = y as f32 * y_ratio;
let x1 = src_x.floor() as u32;
let y1 = src_y.floor() as u32;
let x2 = (x1 + 1).min(src_w - 1);
let y2 = (y1 + 1).min(src_h - 1);
let q11 = src[(y1 * src_w + x1) as usize] as f32;
let q21 = src[(y1 * src_w + x2) as usize] as f32;
let q12 = src[(y2 * src_w + x1) as usize] as f32;
let q22 = src[(y2 * src_w + x2) as usize] as f32;
let wx = src_x - x1 as f32;
let wy = src_y - y1 as f32;
let value = q11 * (1.0 - wx) * (1.0 - wy)
+ q21 * wx * (1.0 - wy)
+ q12 * (1.0 - wx) * wy
+ q22 * wx * wy;
result.push(value as u8);
}
}
result
}
criterion_group!(
benches,
bench_individual_transforms,
bench_full_pipeline,
bench_parallel_vs_sequential,
bench_resize_operations,
bench_latency_target
);
criterion_main!(benches);