Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
455
vendor/ruvector/examples/scipix/benches/api.rs
vendored
Normal file
455
vendor/ruvector/examples/scipix/benches/api.rs
vendored
Normal file
@@ -0,0 +1,455 @@
|
||||
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
|
||||
use std::time::Duration;
|
||||
|
||||
/// Benchmark API request parsing
|
||||
fn bench_request_parsing(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("request_parsing");
|
||||
group.measurement_time(Duration::from_secs(5));
|
||||
|
||||
let json_payloads = vec![
|
||||
("small", r#"{"image_url": "http://example.com/img.jpg"}"#),
|
||||
(
|
||||
"medium",
|
||||
r#"{
|
||||
"image_url": "http://example.com/img.jpg",
|
||||
"options": {
|
||||
"languages": ["en", "es"],
|
||||
"format": "latex",
|
||||
"inline_mode": true
|
||||
}
|
||||
}"#,
|
||||
),
|
||||
(
|
||||
"large",
|
||||
r#"{
|
||||
"image_url": "http://example.com/img.jpg",
|
||||
"options": {
|
||||
"languages": ["en", "es", "fr", "de"],
|
||||
"format": "latex",
|
||||
"inline_mode": true,
|
||||
"detect_orientation": true,
|
||||
"skip_preprocessing": false,
|
||||
"models": ["text", "math", "table"],
|
||||
"confidence_threshold": 0.8
|
||||
},
|
||||
"metadata": {
|
||||
"user_id": "12345",
|
||||
"session_id": "abcde",
|
||||
"timestamp": 1234567890
|
||||
}
|
||||
}"#,
|
||||
),
|
||||
];
|
||||
|
||||
for (name, payload) in json_payloads {
|
||||
group.bench_with_input(BenchmarkId::new("parse_json", name), &payload, |b, json| {
|
||||
b.iter(|| black_box(parse_ocr_request(black_box(json))));
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark response serialization
|
||||
fn bench_response_serialization(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("response_serialization");
|
||||
group.measurement_time(Duration::from_secs(5));
|
||||
|
||||
let responses = vec![
|
||||
("simple", create_simple_response()),
|
||||
("detailed", create_detailed_response()),
|
||||
("batch", create_batch_response(10)),
|
||||
];
|
||||
|
||||
for (name, response) in responses {
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("serialize_json", name),
|
||||
&response,
|
||||
|b, resp| {
|
||||
b.iter(|| black_box(serialize_response(black_box(resp))));
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark concurrent request handling
|
||||
fn bench_concurrent_requests(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("concurrent_requests");
|
||||
group.measurement_time(Duration::from_secs(10));
|
||||
|
||||
let concurrent_levels = [1, 5, 10, 20, 50];
|
||||
|
||||
for concurrency in concurrent_levels {
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("handle_requests", concurrency),
|
||||
&concurrency,
|
||||
|b, &level| {
|
||||
b.iter(|| {
|
||||
let handles: Vec<_> = (0..level).map(|_| handle_single_request()).collect();
|
||||
black_box(handles)
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark middleware overhead
|
||||
fn bench_middleware_overhead(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("middleware_overhead");
|
||||
group.measurement_time(Duration::from_secs(5));
|
||||
|
||||
let request = create_mock_request();
|
||||
|
||||
group.bench_function("no_middleware", |b| {
|
||||
b.iter(|| black_box(handle_request_direct(black_box(&request))));
|
||||
});
|
||||
|
||||
group.bench_function("with_auth", |b| {
|
||||
b.iter(|| {
|
||||
let authed = auth_middleware(black_box(&request));
|
||||
black_box(handle_request_direct(black_box(&authed)))
|
||||
});
|
||||
});
|
||||
|
||||
group.bench_function("with_logging", |b| {
|
||||
b.iter(|| {
|
||||
let logged = logging_middleware(black_box(&request));
|
||||
black_box(handle_request_direct(black_box(&logged)))
|
||||
});
|
||||
});
|
||||
|
||||
group.bench_function("full_stack", |b| {
|
||||
b.iter(|| {
|
||||
let req = black_box(&request);
|
||||
let authed = auth_middleware(req);
|
||||
let logged = logging_middleware(&authed);
|
||||
let validated = validation_middleware(&logged);
|
||||
let rate_limited = rate_limit_middleware(&validated);
|
||||
black_box(handle_request_direct(black_box(&rate_limited)))
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark request validation
|
||||
fn bench_request_validation(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("request_validation");
|
||||
group.measurement_time(Duration::from_secs(5));
|
||||
|
||||
let valid_request = create_valid_request();
|
||||
let invalid_request = create_invalid_request();
|
||||
|
||||
group.bench_function("validate_valid", |b| {
|
||||
b.iter(|| black_box(validate_request(black_box(&valid_request))));
|
||||
});
|
||||
|
||||
group.bench_function("validate_invalid", |b| {
|
||||
b.iter(|| black_box(validate_request(black_box(&invalid_request))));
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark rate limiting
|
||||
fn bench_rate_limiting(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("rate_limiting");
|
||||
group.measurement_time(Duration::from_secs(5));
|
||||
|
||||
let mut limiter = RateLimiter::new(100, Duration::from_secs(60));
|
||||
|
||||
group.bench_function("check_limit", |b| {
|
||||
b.iter(|| black_box(limiter.check_limit("user_123")));
|
||||
});
|
||||
|
||||
group.bench_function("update_limit", |b| {
|
||||
b.iter(|| {
|
||||
limiter.record_request("user_123");
|
||||
black_box(&limiter)
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark error handling
|
||||
fn bench_error_handling(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("error_handling");
|
||||
group.measurement_time(Duration::from_secs(5));
|
||||
|
||||
group.bench_function("create_error_response", |b| {
|
||||
b.iter(|| black_box(create_error_response("Invalid request", 400)));
|
||||
});
|
||||
|
||||
group.bench_function("log_and_respond", |b| {
|
||||
b.iter(|| {
|
||||
let error = "Processing failed";
|
||||
log_error(error);
|
||||
black_box(create_error_response(error, 500))
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark end-to-end API request
|
||||
fn bench_e2e_api_request(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("e2e_api_request");
|
||||
group.measurement_time(Duration::from_secs(15));
|
||||
|
||||
let request_json = r#"{
|
||||
"image_url": "http://example.com/img.jpg",
|
||||
"options": {
|
||||
"format": "latex"
|
||||
}
|
||||
}"#;
|
||||
|
||||
group.bench_function("full_request_cycle", |b| {
|
||||
b.iter(|| {
|
||||
// Parse
|
||||
let request = parse_ocr_request(black_box(request_json));
|
||||
|
||||
// Validate
|
||||
let _validated = validate_request(&request);
|
||||
|
||||
// Auth
|
||||
let _authed = auth_middleware(&request);
|
||||
|
||||
// Process (simulated)
|
||||
let response = process_ocr_request(&request);
|
||||
|
||||
// Serialize
|
||||
let json = serialize_response(&response);
|
||||
|
||||
black_box(json)
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// Mock types and implementations
|
||||
|
||||
#[derive(Clone)]
|
||||
struct OcrRequest {
|
||||
image_url: String,
|
||||
options: RequestOptions,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct RequestOptions {
|
||||
format: String,
|
||||
languages: Vec<String>,
|
||||
confidence_threshold: f32,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct OcrResponse {
|
||||
text: String,
|
||||
latex: String,
|
||||
confidence: f32,
|
||||
regions: Vec<Region>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct Region {
|
||||
bbox: [f32; 4],
|
||||
text: String,
|
||||
confidence: f32,
|
||||
}
|
||||
|
||||
struct RateLimiter {
|
||||
max_requests: usize,
|
||||
window: Duration,
|
||||
requests: std::collections::HashMap<String, Vec<std::time::Instant>>,
|
||||
}
|
||||
|
||||
impl RateLimiter {
|
||||
fn new(max_requests: usize, window: Duration) -> Self {
|
||||
Self {
|
||||
max_requests,
|
||||
window,
|
||||
requests: std::collections::HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn check_limit(&mut self, user_id: &str) -> bool {
|
||||
let now = std::time::Instant::now();
|
||||
let requests = self
|
||||
.requests
|
||||
.entry(user_id.to_string())
|
||||
.or_insert_with(Vec::new);
|
||||
|
||||
requests.retain(|&req_time| now.duration_since(req_time) < self.window);
|
||||
|
||||
requests.len() < self.max_requests
|
||||
}
|
||||
|
||||
fn record_request(&mut self, user_id: &str) {
|
||||
let now = std::time::Instant::now();
|
||||
self.requests
|
||||
.entry(user_id.to_string())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(now);
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_ocr_request(json: &str) -> OcrRequest {
|
||||
// Simulate JSON parsing
|
||||
OcrRequest {
|
||||
image_url: "http://example.com/img.jpg".to_string(),
|
||||
options: RequestOptions {
|
||||
format: "latex".to_string(),
|
||||
languages: vec!["en".to_string()],
|
||||
confidence_threshold: 0.8,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_response(response: &OcrResponse) -> String {
|
||||
// Simulate JSON serialization
|
||||
format!(
|
||||
r#"{{"text":"{}","latex":"{}","confidence":{}}}"#,
|
||||
response.text, response.latex, response.confidence
|
||||
)
|
||||
}
|
||||
|
||||
fn create_simple_response() -> OcrResponse {
|
||||
OcrResponse {
|
||||
text: "E = mc^2".to_string(),
|
||||
latex: "E = mc^2".to_string(),
|
||||
confidence: 0.95,
|
||||
regions: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
fn create_detailed_response() -> OcrResponse {
|
||||
OcrResponse {
|
||||
text: "Complex equation with multiple terms".to_string(),
|
||||
latex: "\\int_0^1 x^2 dx = \\frac{1}{3}".to_string(),
|
||||
confidence: 0.92,
|
||||
regions: vec![
|
||||
Region {
|
||||
bbox: [0.0, 0.0, 100.0, 50.0],
|
||||
text: "integral".to_string(),
|
||||
confidence: 0.95,
|
||||
},
|
||||
Region {
|
||||
bbox: [100.0, 0.0, 200.0, 50.0],
|
||||
text: "equals".to_string(),
|
||||
confidence: 0.98,
|
||||
},
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
fn create_batch_response(count: usize) -> OcrResponse {
|
||||
let regions: Vec<_> = (0..count)
|
||||
.map(|i| Region {
|
||||
bbox: [i as f32 * 10.0, 0.0, (i + 1) as f32 * 10.0, 50.0],
|
||||
text: format!("region_{}", i),
|
||||
confidence: 0.9,
|
||||
})
|
||||
.collect();
|
||||
|
||||
OcrResponse {
|
||||
text: "Batch text".to_string(),
|
||||
latex: "batch latex".to_string(),
|
||||
confidence: 0.9,
|
||||
regions,
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_single_request() -> OcrResponse {
|
||||
create_simple_response()
|
||||
}
|
||||
|
||||
fn create_mock_request() -> OcrRequest {
|
||||
OcrRequest {
|
||||
image_url: "http://example.com/img.jpg".to_string(),
|
||||
options: RequestOptions {
|
||||
format: "latex".to_string(),
|
||||
languages: vec!["en".to_string()],
|
||||
confidence_threshold: 0.8,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_request_direct(request: &OcrRequest) -> OcrResponse {
|
||||
process_ocr_request(request)
|
||||
}
|
||||
|
||||
fn auth_middleware(request: &OcrRequest) -> OcrRequest {
|
||||
// Simulate auth check
|
||||
request.clone()
|
||||
}
|
||||
|
||||
fn logging_middleware(request: &OcrRequest) -> OcrRequest {
|
||||
// Simulate logging
|
||||
request.clone()
|
||||
}
|
||||
|
||||
fn validation_middleware(request: &OcrRequest) -> OcrRequest {
|
||||
// Simulate validation
|
||||
request.clone()
|
||||
}
|
||||
|
||||
fn rate_limit_middleware(request: &OcrRequest) -> OcrRequest {
|
||||
// Simulate rate limiting
|
||||
request.clone()
|
||||
}
|
||||
|
||||
fn create_valid_request() -> OcrRequest {
|
||||
create_mock_request()
|
||||
}
|
||||
|
||||
fn create_invalid_request() -> OcrRequest {
|
||||
OcrRequest {
|
||||
image_url: "".to_string(),
|
||||
options: RequestOptions {
|
||||
format: "invalid".to_string(),
|
||||
languages: vec![],
|
||||
confidence_threshold: -1.0,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn validate_request(request: &OcrRequest) -> Result<(), String> {
|
||||
if request.image_url.is_empty() {
|
||||
return Err("Image URL is required".to_string());
|
||||
}
|
||||
if request.options.confidence_threshold < 0.0 || request.options.confidence_threshold > 1.0 {
|
||||
return Err("Invalid confidence threshold".to_string());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_error_response(message: &str, _code: u16) -> String {
|
||||
format!(r#"{{"error":"{}"}}"#, message)
|
||||
}
|
||||
|
||||
fn log_error(_message: &str) {
|
||||
// Simulate logging
|
||||
}
|
||||
|
||||
fn process_ocr_request(_request: &OcrRequest) -> OcrResponse {
|
||||
// Simulate OCR processing
|
||||
create_simple_response()
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_request_parsing,
|
||||
bench_response_serialization,
|
||||
bench_concurrent_requests,
|
||||
bench_middleware_overhead,
|
||||
bench_request_validation,
|
||||
bench_rate_limiting,
|
||||
bench_error_handling,
|
||||
bench_e2e_api_request
|
||||
);
|
||||
criterion_main!(benches);
|
||||
450
vendor/ruvector/examples/scipix/benches/cache.rs
vendored
Normal file
450
vendor/ruvector/examples/scipix/benches/cache.rs
vendored
Normal file
@@ -0,0 +1,450 @@
|
||||
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
|
||||
use std::collections::HashMap;
|
||||
use std::time::Duration;
|
||||
|
||||
/// Benchmark embedding generation
|
||||
fn bench_embedding_generation(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("embedding_generation");
|
||||
group.measurement_time(Duration::from_secs(8));
|
||||
|
||||
let image_sizes = [(224, 224), (384, 384), (512, 512)];
|
||||
|
||||
for (w, h) in image_sizes {
|
||||
let image_data = generate_test_image(w, h);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("generate", format!("{}x{}", w, h)),
|
||||
&image_data,
|
||||
|b, img| {
|
||||
b.iter(|| black_box(generate_embedding(black_box(img))));
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark similarity search (vector search)
|
||||
fn bench_similarity_search(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("similarity_search");
|
||||
group.measurement_time(Duration::from_secs(10));
|
||||
|
||||
// Create cache with varying sizes
|
||||
let cache_sizes = [100, 1000, 10000];
|
||||
|
||||
for cache_size in cache_sizes {
|
||||
let cache = create_embedding_cache(cache_size);
|
||||
let query_embedding = generate_random_embedding(512);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("linear_search", cache_size),
|
||||
&(&cache, &query_embedding),
|
||||
|b, (cache, query)| {
|
||||
b.iter(|| {
|
||||
black_box(linear_similarity_search(
|
||||
black_box(cache),
|
||||
black_box(query),
|
||||
10,
|
||||
))
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// Approximate nearest neighbor search
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("ann_search", cache_size),
|
||||
&(&cache, &query_embedding),
|
||||
|b, (cache, query)| {
|
||||
b.iter(|| {
|
||||
black_box(ann_similarity_search(
|
||||
black_box(cache),
|
||||
black_box(query),
|
||||
10,
|
||||
))
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark cache hit latency
|
||||
fn bench_cache_hit_latency(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("cache_hit_latency");
|
||||
group.measurement_time(Duration::from_secs(5));
|
||||
|
||||
let cache = create_embedding_cache(1000);
|
||||
let query = generate_random_embedding(512);
|
||||
|
||||
group.bench_function("exact_match", |b| {
|
||||
let cached_embedding = cache.values().next().unwrap();
|
||||
b.iter(|| {
|
||||
black_box(find_exact_match(
|
||||
black_box(&cache),
|
||||
black_box(cached_embedding),
|
||||
))
|
||||
});
|
||||
});
|
||||
|
||||
group.bench_function("similarity_threshold", |b| {
|
||||
b.iter(|| {
|
||||
black_box(find_by_similarity_threshold(
|
||||
black_box(&cache),
|
||||
black_box(&query),
|
||||
0.95,
|
||||
))
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark cache miss latency
|
||||
fn bench_cache_miss_latency(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("cache_miss_latency");
|
||||
group.measurement_time(Duration::from_secs(8));
|
||||
|
||||
let cache = create_embedding_cache(1000);
|
||||
let new_image = generate_test_image(384, 384);
|
||||
|
||||
group.bench_function("miss_with_generation", |b| {
|
||||
b.iter(|| {
|
||||
let query_embedding = generate_embedding(black_box(&new_image));
|
||||
let result = linear_similarity_search(black_box(&cache), &query_embedding, 1);
|
||||
if result.is_empty() || result[0].1 < 0.95 {
|
||||
// Cache miss - would need to process
|
||||
black_box(process_new_image(black_box(&new_image)))
|
||||
} else {
|
||||
black_box(result[0].2.clone())
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark cache insertion
|
||||
fn bench_cache_insertion(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("cache_insertion");
|
||||
group.measurement_time(Duration::from_secs(8));
|
||||
|
||||
group.bench_function("insert_new_entry", |b| {
|
||||
let mut cache = create_embedding_cache(1000);
|
||||
let mut counter = 0;
|
||||
|
||||
b.iter(|| {
|
||||
let embedding = generate_random_embedding(512);
|
||||
let key = format!("key_{}", counter);
|
||||
cache.insert(key.clone(), embedding);
|
||||
counter += 1;
|
||||
black_box(&cache)
|
||||
});
|
||||
});
|
||||
|
||||
group.bench_function("insert_with_eviction", |b| {
|
||||
let mut cache = LRUCache::new(1000);
|
||||
let mut counter = 0;
|
||||
|
||||
b.iter(|| {
|
||||
let embedding = generate_random_embedding(512);
|
||||
let key = format!("key_{}", counter);
|
||||
cache.insert(key, embedding);
|
||||
counter += 1;
|
||||
black_box(&cache)
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark cache update operations
|
||||
fn bench_cache_updates(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("cache_updates");
|
||||
group.measurement_time(Duration::from_secs(5));
|
||||
|
||||
let mut cache = create_embedding_cache(1000);
|
||||
let keys: Vec<_> = cache.keys().cloned().collect();
|
||||
|
||||
group.bench_function("update_existing", |b| {
|
||||
let mut idx = 0;
|
||||
b.iter(|| {
|
||||
let key = &keys[idx % keys.len()];
|
||||
let new_embedding = generate_random_embedding(512);
|
||||
cache.insert(key.clone(), new_embedding);
|
||||
idx += 1;
|
||||
black_box(&cache)
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark batch cache operations
|
||||
fn bench_batch_cache_ops(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("batch_cache_operations");
|
||||
group.measurement_time(Duration::from_secs(10));
|
||||
|
||||
let batch_sizes = [10, 50, 100];
|
||||
|
||||
for batch_size in batch_sizes {
|
||||
let cache = create_embedding_cache(1000);
|
||||
let queries: Vec<_> = (0..batch_size)
|
||||
.map(|_| generate_random_embedding(512))
|
||||
.collect();
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("batch_search", batch_size),
|
||||
&(&cache, &queries),
|
||||
|b, (cache, queries)| {
|
||||
b.iter(|| {
|
||||
let results: Vec<_> = queries
|
||||
.iter()
|
||||
.map(|q| linear_similarity_search(black_box(cache), q, 10))
|
||||
.collect();
|
||||
black_box(results)
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("batch_insert", batch_size),
|
||||
&queries,
|
||||
|b, queries| {
|
||||
b.iter_with_setup(
|
||||
|| create_embedding_cache(1000),
|
||||
|mut cache| {
|
||||
for (i, embedding) in queries.iter().enumerate() {
|
||||
cache.insert(format!("batch_{}", i), embedding.clone());
|
||||
}
|
||||
black_box(cache)
|
||||
},
|
||||
);
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark cache statistics and monitoring
|
||||
fn bench_cache_statistics(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("cache_statistics");
|
||||
group.measurement_time(Duration::from_secs(5));
|
||||
|
||||
let cache = create_embedding_cache(10000);
|
||||
|
||||
group.bench_function("compute_stats", |b| {
|
||||
b.iter(|| black_box(compute_cache_statistics(black_box(&cache))));
|
||||
});
|
||||
|
||||
group.bench_function("memory_usage", |b| {
|
||||
b.iter(|| black_box(estimate_cache_memory(black_box(&cache))));
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// Mock implementations
|
||||
|
||||
type Embedding = Vec<f32>;
|
||||
|
||||
struct LRUCache {
|
||||
capacity: usize,
|
||||
cache: HashMap<String, Embedding>,
|
||||
access_order: Vec<String>,
|
||||
}
|
||||
|
||||
impl LRUCache {
|
||||
fn new(capacity: usize) -> Self {
|
||||
Self {
|
||||
capacity,
|
||||
cache: HashMap::new(),
|
||||
access_order: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn insert(&mut self, key: String, value: Embedding) {
|
||||
if self.cache.len() >= self.capacity && !self.cache.contains_key(&key) {
|
||||
if let Some(lru_key) = self.access_order.first().cloned() {
|
||||
self.cache.remove(&lru_key);
|
||||
self.access_order.remove(0);
|
||||
}
|
||||
}
|
||||
|
||||
self.cache.insert(key.clone(), value);
|
||||
self.access_order.retain(|k| k != &key);
|
||||
self.access_order.push(key);
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_test_image(width: u32, height: u32) -> Vec<u8> {
|
||||
vec![128u8; (width * height * 3) as usize]
|
||||
}
|
||||
|
||||
fn generate_random_embedding(dim: usize) -> Embedding {
|
||||
(0..dim).map(|i| (i as f32 * 0.001) % 1.0).collect()
|
||||
}
|
||||
|
||||
fn generate_embedding(image_data: &[u8]) -> Embedding {
|
||||
// Simulate embedding generation from image
|
||||
let dim = 512;
|
||||
let mut embedding = Vec::with_capacity(dim);
|
||||
|
||||
for i in 0..dim {
|
||||
let idx = (i * image_data.len() / dim) % image_data.len();
|
||||
embedding.push(image_data[idx] as f32 / 255.0);
|
||||
}
|
||||
|
||||
// Normalize
|
||||
let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
embedding.iter_mut().for_each(|x| *x /= norm);
|
||||
|
||||
embedding
|
||||
}
|
||||
|
||||
fn create_embedding_cache(size: usize) -> HashMap<String, Embedding> {
|
||||
let mut cache = HashMap::new();
|
||||
for i in 0..size {
|
||||
let embedding = generate_random_embedding(512);
|
||||
cache.insert(format!("image_{}", i), embedding);
|
||||
}
|
||||
cache
|
||||
}
|
||||
|
||||
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
||||
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
|
||||
if norm_a > 0.0 && norm_b > 0.0 {
|
||||
dot / (norm_a * norm_b)
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
}
|
||||
|
||||
fn linear_similarity_search(
|
||||
cache: &HashMap<String, Embedding>,
|
||||
query: &Embedding,
|
||||
top_k: usize,
|
||||
) -> Vec<(String, f32, Embedding)> {
|
||||
let mut results: Vec<_> = cache
|
||||
.iter()
|
||||
.map(|(key, embedding)| {
|
||||
let similarity = cosine_similarity(query, embedding);
|
||||
(key.clone(), similarity, embedding.clone())
|
||||
})
|
||||
.collect();
|
||||
|
||||
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
|
||||
results.truncate(top_k);
|
||||
results
|
||||
}
|
||||
|
||||
fn ann_similarity_search(
|
||||
cache: &HashMap<String, Embedding>,
|
||||
query: &Embedding,
|
||||
top_k: usize,
|
||||
) -> Vec<(String, f32, Embedding)> {
|
||||
// Simplified ANN using random sampling
|
||||
let sample_size = (cache.len() / 10).max(100).min(cache.len());
|
||||
let mut results: Vec<_> = cache
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(i, _)| i % (cache.len() / sample_size.max(1)) == 0)
|
||||
.map(|(_, (key, embedding))| {
|
||||
let similarity = cosine_similarity(query, embedding);
|
||||
(key.clone(), similarity, embedding.clone())
|
||||
})
|
||||
.collect();
|
||||
|
||||
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
|
||||
results.truncate(top_k);
|
||||
results
|
||||
}
|
||||
|
||||
fn find_exact_match(cache: &HashMap<String, Embedding>, query: &Embedding) -> Option<String> {
|
||||
cache.iter().find_map(|(key, embedding)| {
|
||||
if embedding.len() == query.len()
|
||||
&& embedding
|
||||
.iter()
|
||||
.zip(query.iter())
|
||||
.all(|(a, b)| (a - b).abs() < 1e-6)
|
||||
{
|
||||
Some(key.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn find_by_similarity_threshold(
|
||||
cache: &HashMap<String, Embedding>,
|
||||
query: &Embedding,
|
||||
threshold: f32,
|
||||
) -> Option<(String, f32)> {
|
||||
cache
|
||||
.iter()
|
||||
.filter_map(|(key, embedding)| {
|
||||
let similarity = cosine_similarity(query, embedding);
|
||||
if similarity >= threshold {
|
||||
Some((key.clone(), similarity))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
|
||||
}
|
||||
|
||||
fn process_new_image(_image_data: &[u8]) -> String {
|
||||
// Simulate OCR processing
|
||||
std::thread::sleep(Duration::from_millis(50));
|
||||
"processed_result".to_string()
|
||||
}
|
||||
|
||||
struct CacheStatistics {
|
||||
size: usize,
|
||||
avg_embedding_norm: f32,
|
||||
memory_bytes: usize,
|
||||
}
|
||||
|
||||
fn compute_cache_statistics(cache: &HashMap<String, Embedding>) -> CacheStatistics {
|
||||
let size = cache.len();
|
||||
let avg_norm = if size > 0 {
|
||||
let total_norm: f32 = cache
|
||||
.values()
|
||||
.map(|emb| emb.iter().map(|x| x * x).sum::<f32>().sqrt())
|
||||
.sum();
|
||||
total_norm / size as f32
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
let memory_bytes = estimate_cache_memory(cache);
|
||||
|
||||
CacheStatistics {
|
||||
size,
|
||||
avg_embedding_norm: avg_norm,
|
||||
memory_bytes,
|
||||
}
|
||||
}
|
||||
|
||||
fn estimate_cache_memory(cache: &HashMap<String, Embedding>) -> usize {
|
||||
let key_bytes: usize = cache.keys().map(|k| k.len()).sum();
|
||||
let embedding_bytes: usize = cache.values().map(|e| e.len() * 4).sum();
|
||||
key_bytes + embedding_bytes + cache.len() * 64 // HashMap overhead
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_embedding_generation,
|
||||
bench_similarity_search,
|
||||
bench_cache_hit_latency,
|
||||
bench_cache_miss_latency,
|
||||
bench_cache_insertion,
|
||||
bench_cache_updates,
|
||||
bench_batch_cache_ops,
|
||||
bench_cache_statistics
|
||||
);
|
||||
criterion_main!(benches);
|
||||
413
vendor/ruvector/examples/scipix/benches/inference.rs
vendored
Normal file
413
vendor/ruvector/examples/scipix/benches/inference.rs
vendored
Normal file
@@ -0,0 +1,413 @@
|
||||
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
|
||||
use std::time::Duration;
|
||||
|
||||
/// Benchmark text detection model inference
|
||||
fn bench_text_detection(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("text_detection_model");
|
||||
group.measurement_time(Duration::from_secs(10));
|
||||
|
||||
let sizes = [(224, 224), (384, 384), (512, 512)];
|
||||
|
||||
for (w, h) in sizes {
|
||||
let input_tensor = create_input_tensor(w, h, 3);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("inference", format!("{}x{}", w, h)),
|
||||
&input_tensor,
|
||||
|b, tensor| {
|
||||
b.iter(|| black_box(run_detection_model(black_box(tensor))));
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark text recognition model inference
|
||||
fn bench_text_recognition(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("text_recognition_model");
|
||||
group.measurement_time(Duration::from_secs(10));
|
||||
|
||||
// Recognition typically works on smaller cropped regions
|
||||
let sizes = [(32, 128), (48, 192), (64, 256)];
|
||||
|
||||
for (h, w) in sizes {
|
||||
let input_tensor = create_input_tensor(w, h, 1);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("inference", format!("{}x{}", w, h)),
|
||||
&input_tensor,
|
||||
|b, tensor| {
|
||||
b.iter(|| black_box(run_recognition_model(black_box(tensor))));
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark math equation model inference
|
||||
fn bench_math_model(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("math_model");
|
||||
group.measurement_time(Duration::from_secs(10));
|
||||
|
||||
let sizes = [(224, 224), (320, 320), (384, 384)];
|
||||
|
||||
for (w, h) in sizes {
|
||||
let input_tensor = create_input_tensor(w, h, 3);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("inference", format!("{}x{}", w, h)),
|
||||
&input_tensor,
|
||||
|b, tensor| {
|
||||
b.iter(|| black_box(run_math_model(black_box(tensor))));
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark tensor preprocessing operations
|
||||
fn bench_tensor_preprocessing(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("tensor_preprocessing");
|
||||
group.measurement_time(Duration::from_secs(8));
|
||||
|
||||
let image_data = vec![128u8; 384 * 384 * 3];
|
||||
|
||||
group.bench_function("normalization", |b| {
|
||||
b.iter(|| black_box(normalize_tensor(black_box(&image_data))));
|
||||
});
|
||||
|
||||
group.bench_function("standardization", |b| {
|
||||
b.iter(|| black_box(standardize_tensor(black_box(&image_data))));
|
||||
});
|
||||
|
||||
group.bench_function("to_chw_layout", |b| {
|
||||
b.iter(|| black_box(convert_to_chw(black_box(&image_data), 384, 384)));
|
||||
});
|
||||
|
||||
group.bench_function("add_batch_dimension", |b| {
|
||||
let tensor = normalize_tensor(&image_data);
|
||||
b.iter(|| black_box(add_batch_dim(black_box(&tensor))));
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark output postprocessing
|
||||
fn bench_output_postprocessing(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("output_postprocessing");
|
||||
group.measurement_time(Duration::from_secs(8));
|
||||
|
||||
let detection_output = create_detection_output(1000);
|
||||
let recognition_output = create_recognition_output(100);
|
||||
|
||||
group.bench_function("nms_filtering", |b| {
|
||||
b.iter(|| black_box(apply_nms(black_box(&detection_output), 0.5)));
|
||||
});
|
||||
|
||||
group.bench_function("confidence_filtering", |b| {
|
||||
b.iter(|| black_box(filter_by_confidence(black_box(&detection_output), 0.7)));
|
||||
});
|
||||
|
||||
group.bench_function("decode_sequence", |b| {
|
||||
b.iter(|| black_box(decode_ctc_output(black_box(&recognition_output))));
|
||||
});
|
||||
|
||||
group.bench_function("beam_search", |b| {
|
||||
b.iter(|| black_box(beam_search_decode(black_box(&recognition_output), 5)));
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark batch inference
|
||||
fn bench_batch_inference(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("batch_inference");
|
||||
group.measurement_time(Duration::from_secs(15));
|
||||
|
||||
let batch_sizes = [1, 4, 8, 16];
|
||||
let size = (384, 384);
|
||||
|
||||
for batch_size in batch_sizes {
|
||||
let batch_tensor = create_batch_tensor(batch_size, size.0, size.1, 3);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("detection_batch", batch_size),
|
||||
&batch_tensor,
|
||||
|b, tensor| {
|
||||
b.iter(|| black_box(run_detection_model(black_box(tensor))));
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark model warm-up time
|
||||
fn bench_model_warmup(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("model_warmup");
|
||||
group.measurement_time(Duration::from_secs(10));
|
||||
|
||||
group.bench_function("detection_model_init", |b| {
|
||||
b.iter_with_large_drop(|| black_box(initialize_detection_model()));
|
||||
});
|
||||
|
||||
group.bench_function("recognition_model_init", |b| {
|
||||
b.iter_with_large_drop(|| black_box(initialize_recognition_model()));
|
||||
});
|
||||
|
||||
group.bench_function("math_model_init", |b| {
|
||||
b.iter_with_large_drop(|| black_box(initialize_math_model()));
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark end-to-end inference pipeline
|
||||
fn bench_e2e_pipeline(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("e2e_inference_pipeline");
|
||||
group.measurement_time(Duration::from_secs(15));
|
||||
|
||||
let image_data = vec![128u8; 384 * 384 * 3];
|
||||
|
||||
group.bench_function("full_pipeline", |b| {
|
||||
b.iter(|| {
|
||||
// Preprocessing
|
||||
let normalized = normalize_tensor(black_box(&image_data));
|
||||
let chw = convert_to_chw(&normalized, 384, 384);
|
||||
let batched = add_batch_dim(&chw);
|
||||
|
||||
// Detection
|
||||
let detection_output = run_detection_model(&batched);
|
||||
let boxes = apply_nms(&detection_output, 0.5);
|
||||
|
||||
// Recognition (simulated for each box)
|
||||
let mut results = Vec::new();
|
||||
for _box in boxes.iter().take(5) {
|
||||
let rec_output = run_recognition_model(&batched);
|
||||
let text = decode_ctc_output(&rec_output);
|
||||
results.push(text);
|
||||
}
|
||||
|
||||
black_box(results)
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// Mock implementations
|
||||
|
||||
fn create_input_tensor(width: u32, height: u32, channels: u32) -> Vec<f32> {
|
||||
vec![0.5f32; (width * height * channels) as usize]
|
||||
}
|
||||
|
||||
fn create_batch_tensor(batch: usize, width: u32, height: u32, channels: u32) -> Vec<f32> {
|
||||
vec![0.5f32; batch * (width * height * channels) as usize]
|
||||
}
|
||||
|
||||
fn run_detection_model(input: &[f32]) -> Vec<Detection> {
|
||||
// Simulate model inference
|
||||
let output_size = input.len() / 100;
|
||||
(0..output_size)
|
||||
.map(|i| Detection {
|
||||
bbox: [i as f32, i as f32, (i + 10) as f32, (i + 10) as f32],
|
||||
confidence: 0.8 + (i % 20) as f32 / 100.0,
|
||||
class_id: i % 10,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn run_recognition_model(input: &[f32]) -> Vec<f32> {
|
||||
// Simulate CTC output: [time_steps, vocab_size]
|
||||
let time_steps = 32;
|
||||
let vocab_size = 64;
|
||||
vec![0.1f32; time_steps * vocab_size]
|
||||
}
|
||||
|
||||
fn run_math_model(input: &[f32]) -> Vec<f32> {
|
||||
// Simulate math model output
|
||||
vec![0.5f32; input.len() / 10]
|
||||
}
|
||||
|
||||
fn initialize_detection_model() -> Vec<u8> {
|
||||
std::thread::sleep(Duration::from_millis(100));
|
||||
vec![0u8; 1024 * 1024]
|
||||
}
|
||||
|
||||
fn initialize_recognition_model() -> Vec<u8> {
|
||||
std::thread::sleep(Duration::from_millis(80));
|
||||
vec![0u8; 512 * 1024]
|
||||
}
|
||||
|
||||
fn initialize_math_model() -> Vec<u8> {
|
||||
std::thread::sleep(Duration::from_millis(120));
|
||||
vec![0u8; 2048 * 1024]
|
||||
}
|
||||
|
||||
fn normalize_tensor(data: &[u8]) -> Vec<f32> {
|
||||
data.iter().map(|&x| x as f32 / 255.0).collect()
|
||||
}
|
||||
|
||||
fn standardize_tensor(data: &[u8]) -> Vec<f32> {
|
||||
let mean = 128.0f32;
|
||||
let std = 64.0f32;
|
||||
data.iter().map(|&x| (x as f32 - mean) / std).collect()
|
||||
}
|
||||
|
||||
fn convert_to_chw(data: &[f32], width: u32, height: u32) -> Vec<f32> {
|
||||
// Convert HWC to CHW layout
|
||||
let channels = data.len() / (width * height) as usize;
|
||||
let mut chw = Vec::with_capacity(data.len());
|
||||
|
||||
for c in 0..channels {
|
||||
for h in 0..height {
|
||||
for w in 0..width {
|
||||
let hwc_idx = ((h * width + w) * channels as u32 + c as u32) as usize;
|
||||
chw.push(data[hwc_idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
chw
|
||||
}
|
||||
|
||||
fn add_batch_dim(tensor: &[f32]) -> Vec<f32> {
|
||||
tensor.to_vec()
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct Detection {
|
||||
bbox: [f32; 4],
|
||||
confidence: f32,
|
||||
class_id: usize,
|
||||
}
|
||||
|
||||
fn create_detection_output(count: usize) -> Vec<Detection> {
|
||||
(0..count)
|
||||
.map(|i| Detection {
|
||||
bbox: [i as f32, i as f32, (i + 10) as f32, (i + 10) as f32],
|
||||
confidence: 0.5 + (i % 50) as f32 / 100.0,
|
||||
class_id: i % 10,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn create_recognition_output(time_steps: usize) -> Vec<f32> {
|
||||
vec![0.1f32; time_steps * 64]
|
||||
}
|
||||
|
||||
fn apply_nms(detections: &[Detection], iou_threshold: f32) -> Vec<Detection> {
|
||||
let mut filtered = Vec::new();
|
||||
let mut sorted = detections.to_vec();
|
||||
sorted.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
|
||||
|
||||
for det in sorted {
|
||||
let overlap = filtered
|
||||
.iter()
|
||||
.any(|kept: &Detection| calculate_iou(&det.bbox, &kept.bbox) > iou_threshold);
|
||||
|
||||
if !overlap {
|
||||
filtered.push(det);
|
||||
}
|
||||
}
|
||||
|
||||
filtered
|
||||
}
|
||||
|
||||
fn calculate_iou(box1: &[f32; 4], box2: &[f32; 4]) -> f32 {
|
||||
let x1 = box1[0].max(box2[0]);
|
||||
let y1 = box1[1].max(box2[1]);
|
||||
let x2 = box1[2].min(box2[2]);
|
||||
let y2 = box1[3].min(box2[3]);
|
||||
|
||||
let intersection = (x2 - x1).max(0.0) * (y2 - y1).max(0.0);
|
||||
let area1 = (box1[2] - box1[0]) * (box1[3] - box1[1]);
|
||||
let area2 = (box2[2] - box2[0]) * (box2[3] - box2[1]);
|
||||
let union = area1 + area2 - intersection;
|
||||
|
||||
if union > 0.0 {
|
||||
intersection / union
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
}
|
||||
|
||||
fn filter_by_confidence(detections: &[Detection], threshold: f32) -> Vec<Detection> {
|
||||
detections
|
||||
.iter()
|
||||
.filter(|d| d.confidence >= threshold)
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn decode_ctc_output(logits: &[f32]) -> String {
|
||||
// Simple greedy CTC decoding
|
||||
let time_steps = logits.len() / 64;
|
||||
let mut result = String::new();
|
||||
let mut prev_char = None;
|
||||
|
||||
for t in 0..time_steps {
|
||||
let start_idx = t * 64;
|
||||
let end_idx = start_idx + 64;
|
||||
let step_logits = &logits[start_idx..end_idx];
|
||||
|
||||
let (max_idx, _) = step_logits
|
||||
.iter()
|
||||
.enumerate()
|
||||
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
|
||||
.unwrap();
|
||||
|
||||
if max_idx > 0 && Some(max_idx) != prev_char {
|
||||
result.push((b'a' + max_idx as u8 % 26) as char);
|
||||
}
|
||||
|
||||
prev_char = Some(max_idx);
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn beam_search_decode(logits: &[f32], beam_width: usize) -> String {
|
||||
// Simplified beam search
|
||||
let time_steps = logits.len() / 64;
|
||||
let mut beams: Vec<(String, f32)> = vec![(String::new(), 0.0)];
|
||||
|
||||
for t in 0..time_steps {
|
||||
let start_idx = t * 64;
|
||||
let end_idx = start_idx + 64;
|
||||
let step_logits = &logits[start_idx..end_idx];
|
||||
|
||||
let mut new_beams = Vec::new();
|
||||
|
||||
for (text, score) in &beams {
|
||||
for (char_idx, &logit) in step_logits.iter().enumerate().take(beam_width) {
|
||||
let mut new_text = text.clone();
|
||||
if char_idx > 0 {
|
||||
new_text.push((b'a' + char_idx as u8 % 26) as char);
|
||||
}
|
||||
new_beams.push((new_text, score + logit));
|
||||
}
|
||||
}
|
||||
|
||||
new_beams.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
|
||||
beams = new_beams.into_iter().take(beam_width).collect();
|
||||
}
|
||||
|
||||
beams[0].0.clone()
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_text_detection,
|
||||
bench_text_recognition,
|
||||
bench_math_model,
|
||||
bench_tensor_preprocessing,
|
||||
bench_output_postprocessing,
|
||||
bench_batch_inference,
|
||||
bench_model_warmup,
|
||||
bench_e2e_pipeline
|
||||
);
|
||||
criterion_main!(benches);
|
||||
395
vendor/ruvector/examples/scipix/benches/latex_generation.rs
vendored
Normal file
395
vendor/ruvector/examples/scipix/benches/latex_generation.rs
vendored
Normal file
@@ -0,0 +1,395 @@
|
||||
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
|
||||
use std::time::Duration;
|
||||
|
||||
/// Benchmark simple LaTeX expression generation
|
||||
fn bench_simple_expressions(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("simple_expressions");
|
||||
group.measurement_time(Duration::from_secs(5));
|
||||
|
||||
let test_cases = vec![
|
||||
(
|
||||
"fraction",
|
||||
Expression::Fraction(
|
||||
Box::new(Expression::Number(1)),
|
||||
Box::new(Expression::Number(2)),
|
||||
),
|
||||
),
|
||||
(
|
||||
"power",
|
||||
Expression::Power(
|
||||
Box::new(Expression::Variable("x".to_string())),
|
||||
Box::new(Expression::Number(2)),
|
||||
),
|
||||
),
|
||||
(
|
||||
"sum",
|
||||
Expression::Sum(
|
||||
Box::new(Expression::Number(1)),
|
||||
Box::new(Expression::Number(2)),
|
||||
),
|
||||
),
|
||||
(
|
||||
"product",
|
||||
Expression::Product(
|
||||
Box::new(Expression::Variable("a".to_string())),
|
||||
Box::new(Expression::Variable("b".to_string())),
|
||||
),
|
||||
),
|
||||
];
|
||||
|
||||
for (name, expr) in test_cases {
|
||||
group.bench_with_input(BenchmarkId::new("to_latex", name), &expr, |b, expr| {
|
||||
b.iter(|| black_box(expr.to_latex()));
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark complex LaTeX expression generation
|
||||
fn bench_complex_expressions(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("complex_expressions");
|
||||
group.measurement_time(Duration::from_secs(8));
|
||||
|
||||
// Create complex nested expressions
|
||||
let test_cases = vec![
|
||||
("matrix_2x2", create_matrix(2, 2)),
|
||||
("matrix_3x3", create_matrix(3, 3)),
|
||||
("matrix_4x4", create_matrix(4, 4)),
|
||||
("integral", create_integral()),
|
||||
("summation", create_summation()),
|
||||
("nested_fraction", create_nested_fraction(3)),
|
||||
("polynomial", create_polynomial(5)),
|
||||
];
|
||||
|
||||
for (name, expr) in test_cases {
|
||||
group.bench_with_input(BenchmarkId::new("to_latex", name), &expr, |b, expr| {
|
||||
b.iter(|| black_box(expr.to_latex()));
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark AST traversal performance
|
||||
fn bench_ast_traversal(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("ast_traversal");
|
||||
group.measurement_time(Duration::from_secs(5));
|
||||
|
||||
let depths = [3, 5, 7, 10];
|
||||
|
||||
for depth in depths {
|
||||
let expr = create_nested_expression(depth);
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("depth", depth), &expr, |b, expr| {
|
||||
b.iter(|| black_box(count_nodes(black_box(expr))));
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark string building and concatenation
|
||||
fn bench_string_building(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("string_building");
|
||||
group.measurement_time(Duration::from_secs(5));
|
||||
|
||||
let expr = create_polynomial(20);
|
||||
|
||||
// Compare different string building strategies
|
||||
group.bench_function("to_latex_default", |b| {
|
||||
b.iter(|| black_box(expr.to_latex()));
|
||||
});
|
||||
|
||||
group.bench_function("to_latex_with_capacity", |b| {
|
||||
b.iter(|| black_box(expr.to_latex_with_capacity()));
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark LaTeX escaping and special characters
|
||||
fn bench_latex_escaping(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("latex_escaping");
|
||||
group.measurement_time(Duration::from_secs(5));
|
||||
|
||||
let test_strings = vec![
|
||||
("no_special", "simple text"),
|
||||
("underscores", "var_1 + var_2"),
|
||||
("braces", "{x} + {y}"),
|
||||
("mixed", "α + β_1^2 ∫ dx"),
|
||||
];
|
||||
|
||||
for (name, text) in test_strings {
|
||||
group.bench_with_input(BenchmarkId::new("escape", name), &text, |b, text| {
|
||||
b.iter(|| black_box(escape_latex(black_box(text))));
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark target: LaTeX generation should complete in <5ms
|
||||
fn bench_latency_target(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("latency_target_5ms");
|
||||
group.measurement_time(Duration::from_secs(10));
|
||||
group.sample_size(100);
|
||||
|
||||
// Typical complex expression from OCR
|
||||
let expr = create_typical_ocr_expression();
|
||||
|
||||
group.bench_function("typical_ocr_expression", |b| {
|
||||
b.iter(|| black_box(expr.to_latex()));
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark batch LaTeX generation
|
||||
fn bench_batch_generation(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("batch_generation");
|
||||
group.measurement_time(Duration::from_secs(10));
|
||||
|
||||
let batch_sizes = [10, 50, 100];
|
||||
|
||||
for size in batch_sizes {
|
||||
let expressions: Vec<_> = (0..size).map(|i| create_polynomial(i % 10 + 1)).collect();
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("batch_size", size),
|
||||
&expressions,
|
||||
|b, exprs| {
|
||||
b.iter(|| {
|
||||
let results: Vec<_> = exprs.iter().map(|expr| expr.to_latex()).collect();
|
||||
black_box(results)
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// Mock AST and Expression types
|
||||
|
||||
#[derive(Clone)]
|
||||
enum Expression {
|
||||
Number(i32),
|
||||
Variable(String),
|
||||
Fraction(Box<Expression>, Box<Expression>),
|
||||
Power(Box<Expression>, Box<Expression>),
|
||||
Sum(Box<Expression>, Box<Expression>),
|
||||
Product(Box<Expression>, Box<Expression>),
|
||||
Matrix(Vec<Vec<Expression>>),
|
||||
Integral(Box<Expression>, String, String, String),
|
||||
Summation(Box<Expression>, String, String, String),
|
||||
}
|
||||
|
||||
impl Expression {
|
||||
fn to_latex(&self) -> String {
|
||||
match self {
|
||||
Expression::Number(n) => n.to_string(),
|
||||
Expression::Variable(v) => v.clone(),
|
||||
Expression::Fraction(num, den) => {
|
||||
format!("\\frac{{{}}}{{{}}}", num.to_latex(), den.to_latex())
|
||||
}
|
||||
Expression::Power(base, exp) => {
|
||||
format!("{{{}}}^{{{}}}", base.to_latex(), exp.to_latex())
|
||||
}
|
||||
Expression::Sum(a, b) => {
|
||||
format!("{} + {}", a.to_latex(), b.to_latex())
|
||||
}
|
||||
Expression::Product(a, b) => {
|
||||
format!("{} \\cdot {}", a.to_latex(), b.to_latex())
|
||||
}
|
||||
Expression::Matrix(rows) => {
|
||||
let mut result = String::from("\\begin{bmatrix}");
|
||||
for (i, row) in rows.iter().enumerate() {
|
||||
for (j, cell) in row.iter().enumerate() {
|
||||
result.push_str(&cell.to_latex());
|
||||
if j < row.len() - 1 {
|
||||
result.push_str(" & ");
|
||||
}
|
||||
}
|
||||
if i < rows.len() - 1 {
|
||||
result.push_str(" \\\\ ");
|
||||
}
|
||||
}
|
||||
result.push_str("\\end{bmatrix}");
|
||||
result
|
||||
}
|
||||
Expression::Integral(expr, var, lower, upper) => {
|
||||
format!(
|
||||
"\\int_{{{}}}^{{{}}} {} \\, d{}",
|
||||
lower,
|
||||
upper,
|
||||
expr.to_latex(),
|
||||
var
|
||||
)
|
||||
}
|
||||
Expression::Summation(expr, var, lower, upper) => {
|
||||
format!(
|
||||
"\\sum_{{{}={}}}^{{{}}} {}",
|
||||
var,
|
||||
lower,
|
||||
upper,
|
||||
expr.to_latex()
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn to_latex_with_capacity(&self) -> String {
|
||||
let mut result = String::with_capacity(256);
|
||||
self.append_latex(&mut result);
|
||||
result
|
||||
}
|
||||
|
||||
fn append_latex(&self, buffer: &mut String) {
|
||||
buffer.push_str(&self.to_latex());
|
||||
}
|
||||
}
|
||||
|
||||
fn create_matrix(rows: usize, cols: usize) -> Expression {
|
||||
let matrix = (0..rows)
|
||||
.map(|i| {
|
||||
(0..cols)
|
||||
.map(|j| Expression::Number((i * cols + j) as i32))
|
||||
.collect()
|
||||
})
|
||||
.collect();
|
||||
Expression::Matrix(matrix)
|
||||
}
|
||||
|
||||
fn create_integral() -> Expression {
|
||||
Expression::Integral(
|
||||
Box::new(Expression::Power(
|
||||
Box::new(Expression::Variable("x".to_string())),
|
||||
Box::new(Expression::Number(2)),
|
||||
)),
|
||||
"x".to_string(),
|
||||
"0".to_string(),
|
||||
"1".to_string(),
|
||||
)
|
||||
}
|
||||
|
||||
fn create_summation() -> Expression {
|
||||
Expression::Summation(
|
||||
Box::new(Expression::Power(
|
||||
Box::new(Expression::Variable("i".to_string())),
|
||||
Box::new(Expression::Number(2)),
|
||||
)),
|
||||
"i".to_string(),
|
||||
"1".to_string(),
|
||||
"n".to_string(),
|
||||
)
|
||||
}
|
||||
|
||||
fn create_nested_fraction(depth: usize) -> Expression {
|
||||
if depth == 0 {
|
||||
Expression::Number(1)
|
||||
} else {
|
||||
Expression::Fraction(
|
||||
Box::new(Expression::Number(1)),
|
||||
Box::new(create_nested_fraction(depth - 1)),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn create_polynomial(degree: usize) -> Expression {
|
||||
let mut expr = Expression::Number(0);
|
||||
for i in 0..=degree {
|
||||
let term = Expression::Product(
|
||||
Box::new(Expression::Number(i as i32 + 1)),
|
||||
Box::new(Expression::Power(
|
||||
Box::new(Expression::Variable("x".to_string())),
|
||||
Box::new(Expression::Number(i as i32)),
|
||||
)),
|
||||
);
|
||||
expr = Expression::Sum(Box::new(expr), Box::new(term));
|
||||
}
|
||||
expr
|
||||
}
|
||||
|
||||
fn create_nested_expression(depth: usize) -> Expression {
|
||||
if depth == 0 {
|
||||
Expression::Variable("x".to_string())
|
||||
} else {
|
||||
Expression::Sum(
|
||||
Box::new(create_nested_expression(depth - 1)),
|
||||
Box::new(Expression::Number(depth as i32)),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn create_typical_ocr_expression() -> Expression {
|
||||
// Typical expression: (a + b)^2 = a^2 + 2ab + b^2
|
||||
Expression::Sum(
|
||||
Box::new(Expression::Sum(
|
||||
Box::new(Expression::Power(
|
||||
Box::new(Expression::Variable("a".to_string())),
|
||||
Box::new(Expression::Number(2)),
|
||||
)),
|
||||
Box::new(Expression::Product(
|
||||
Box::new(Expression::Product(
|
||||
Box::new(Expression::Number(2)),
|
||||
Box::new(Expression::Variable("a".to_string())),
|
||||
)),
|
||||
Box::new(Expression::Variable("b".to_string())),
|
||||
)),
|
||||
)),
|
||||
Box::new(Expression::Power(
|
||||
Box::new(Expression::Variable("b".to_string())),
|
||||
Box::new(Expression::Number(2)),
|
||||
)),
|
||||
)
|
||||
}
|
||||
|
||||
fn count_nodes(expr: &Expression) -> usize {
|
||||
match expr {
|
||||
Expression::Number(_) | Expression::Variable(_) => 1,
|
||||
Expression::Fraction(a, b)
|
||||
| Expression::Power(a, b)
|
||||
| Expression::Sum(a, b)
|
||||
| Expression::Product(a, b) => 1 + count_nodes(a) + count_nodes(b),
|
||||
Expression::Matrix(rows) => {
|
||||
1 + rows
|
||||
.iter()
|
||||
.map(|row| row.iter().map(|e| count_nodes(e)).sum::<usize>())
|
||||
.sum::<usize>()
|
||||
}
|
||||
Expression::Integral(expr, _, _, _) | Expression::Summation(expr, _, _, _) => {
|
||||
1 + count_nodes(expr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn escape_latex(text: &str) -> String {
|
||||
text.chars()
|
||||
.map(|c| match c {
|
||||
'_' => "\\_".to_string(),
|
||||
'{' => "\\{".to_string(),
|
||||
'}' => "\\}".to_string(),
|
||||
'&' => "\\&".to_string(),
|
||||
'%' => "\\%".to_string(),
|
||||
'$' => "\\$".to_string(),
|
||||
'#' => "\\#".to_string(),
|
||||
'^' => "\\^{}".to_string(),
|
||||
'~' => "\\~{}".to_string(),
|
||||
'\\' => "\\textbackslash{}".to_string(),
|
||||
_ => c.to_string(),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_simple_expressions,
|
||||
bench_complex_expressions,
|
||||
bench_ast_traversal,
|
||||
bench_string_building,
|
||||
bench_latex_escaping,
|
||||
bench_latency_target,
|
||||
bench_batch_generation
|
||||
);
|
||||
criterion_main!(benches);
|
||||
437
vendor/ruvector/examples/scipix/benches/memory.rs
vendored
Normal file
437
vendor/ruvector/examples/scipix/benches/memory.rs
vendored
Normal file
@@ -0,0 +1,437 @@
|
||||
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
|
||||
use std::time::Duration;
|
||||
|
||||
/// Benchmark peak memory during inference
|
||||
fn bench_peak_memory_inference(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("peak_memory_inference");
|
||||
group.measurement_time(Duration::from_secs(10));
|
||||
|
||||
let sizes = [(224, 224), (384, 384), (512, 512)];
|
||||
|
||||
for (w, h) in sizes {
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("single_inference", format!("{}x{}", w, h)),
|
||||
&(w, h),
|
||||
|b, &(width, height)| {
|
||||
b.iter_with_large_drop(|| {
|
||||
let memory_tracker = MemoryTracker::new();
|
||||
|
||||
// Simulate model loading
|
||||
let model = load_model();
|
||||
|
||||
// Create input
|
||||
let image = create_image(width, height);
|
||||
|
||||
// Preprocessing
|
||||
let preprocessed = preprocess(image);
|
||||
|
||||
// Inference
|
||||
let output = run_inference(&model, preprocessed);
|
||||
|
||||
// Postprocessing
|
||||
let result = postprocess(output);
|
||||
|
||||
let peak_memory = memory_tracker.peak_usage();
|
||||
black_box((result, peak_memory))
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark memory per image in batch
|
||||
fn bench_memory_per_batch_image(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("memory_per_batch_image");
|
||||
group.measurement_time(Duration::from_secs(15));
|
||||
|
||||
let batch_sizes = [1, 4, 8, 16, 32];
|
||||
let size = (384, 384);
|
||||
|
||||
for batch_size in batch_sizes {
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("batch_inference", batch_size),
|
||||
&batch_size,
|
||||
|b, &size| {
|
||||
b.iter_with_large_drop(|| {
|
||||
let memory_tracker = MemoryTracker::new();
|
||||
|
||||
let model = load_model();
|
||||
let batch = create_batch(size, 384, 384);
|
||||
let output = run_batch_inference(&model, batch);
|
||||
|
||||
let total_memory = memory_tracker.peak_usage();
|
||||
let per_image = total_memory / size;
|
||||
|
||||
black_box((output, per_image))
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark model loading memory
|
||||
fn bench_model_loading_memory(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("model_loading_memory");
|
||||
group.measurement_time(Duration::from_secs(10));
|
||||
|
||||
group.bench_function("detection_model", |b| {
|
||||
b.iter_with_large_drop(|| {
|
||||
let tracker = MemoryTracker::new();
|
||||
let model = load_detection_model();
|
||||
let memory = tracker.peak_usage();
|
||||
black_box((model, memory))
|
||||
});
|
||||
});
|
||||
|
||||
group.bench_function("recognition_model", |b| {
|
||||
b.iter_with_large_drop(|| {
|
||||
let tracker = MemoryTracker::new();
|
||||
let model = load_recognition_model();
|
||||
let memory = tracker.peak_usage();
|
||||
black_box((model, memory))
|
||||
});
|
||||
});
|
||||
|
||||
group.bench_function("math_model", |b| {
|
||||
b.iter_with_large_drop(|| {
|
||||
let tracker = MemoryTracker::new();
|
||||
let model = load_math_model();
|
||||
let memory = tracker.peak_usage();
|
||||
black_box((model, memory))
|
||||
});
|
||||
});
|
||||
|
||||
group.bench_function("all_models", |b| {
|
||||
b.iter_with_large_drop(|| {
|
||||
let tracker = MemoryTracker::new();
|
||||
let detection = load_detection_model();
|
||||
let recognition = load_recognition_model();
|
||||
let math = load_math_model();
|
||||
let total_memory = tracker.peak_usage();
|
||||
black_box((detection, recognition, math, total_memory))
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark memory growth over time
|
||||
fn bench_memory_growth(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("memory_growth");
|
||||
group.measurement_time(Duration::from_secs(20));
|
||||
|
||||
group.bench_function("sequential_inferences", |b| {
|
||||
b.iter_with_large_drop(|| {
|
||||
let tracker = MemoryTracker::new();
|
||||
let model = load_model();
|
||||
let mut memory_samples = Vec::new();
|
||||
|
||||
for i in 0..100 {
|
||||
let image = create_image(384, 384);
|
||||
let preprocessed = preprocess(image);
|
||||
let _output = run_inference(&model, preprocessed);
|
||||
|
||||
if i % 10 == 0 {
|
||||
memory_samples.push(tracker.current_usage());
|
||||
}
|
||||
}
|
||||
|
||||
let growth = calculate_memory_growth(&memory_samples);
|
||||
black_box((memory_samples, growth))
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark memory fragmentation
|
||||
fn bench_memory_fragmentation(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("memory_fragmentation");
|
||||
group.measurement_time(Duration::from_secs(10));
|
||||
|
||||
group.bench_function("allocate_deallocate_pattern", |b| {
|
||||
b.iter(|| {
|
||||
let mut allocations = Vec::new();
|
||||
|
||||
// Allocate various sizes
|
||||
for i in 0..100 {
|
||||
let size = (i % 10 + 1) * 1024;
|
||||
allocations.push(vec![0u8; size]);
|
||||
}
|
||||
|
||||
// Deallocate every other allocation
|
||||
allocations = allocations
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, v)| if i % 2 == 0 { Some(v) } else { None })
|
||||
.collect();
|
||||
|
||||
// Allocate more
|
||||
for i in 0..50 {
|
||||
let size = (i % 5 + 1) * 2048;
|
||||
allocations.push(vec![0u8; size]);
|
||||
}
|
||||
|
||||
black_box(allocations)
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark cache memory overhead
|
||||
fn bench_cache_memory(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("cache_memory");
|
||||
group.measurement_time(Duration::from_secs(10));
|
||||
|
||||
let cache_sizes = [100, 1000, 10000];
|
||||
|
||||
for cache_size in cache_sizes {
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("embedding_cache", cache_size),
|
||||
&cache_size,
|
||||
|b, &size| {
|
||||
b.iter_with_large_drop(|| {
|
||||
let tracker = MemoryTracker::new();
|
||||
let cache = create_embedding_cache(size);
|
||||
let memory = tracker.peak_usage();
|
||||
black_box((cache, memory))
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark memory pool efficiency
|
||||
fn bench_memory_pools(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("memory_pools");
|
||||
group.measurement_time(Duration::from_secs(8));
|
||||
|
||||
group.bench_function("without_pool", |b| {
|
||||
b.iter(|| {
|
||||
let mut allocations = Vec::new();
|
||||
for _ in 0..100 {
|
||||
let buffer = vec![0u8; 1024 * 1024];
|
||||
allocations.push(buffer);
|
||||
}
|
||||
black_box(allocations)
|
||||
});
|
||||
});
|
||||
|
||||
group.bench_function("with_pool", |b| {
|
||||
let mut pool = MemoryPool::new(1024 * 1024, 100);
|
||||
b.iter(|| {
|
||||
let mut handles = Vec::new();
|
||||
for _ in 0..100 {
|
||||
let handle = pool.allocate();
|
||||
handles.push(handle);
|
||||
}
|
||||
black_box(handles)
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark tensor memory layouts
|
||||
fn bench_tensor_layouts(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("tensor_layouts");
|
||||
group.measurement_time(Duration::from_secs(8));
|
||||
|
||||
let size = (384, 384, 3);
|
||||
|
||||
group.bench_function("hwc_layout", |b| {
|
||||
b.iter(|| {
|
||||
let tracker = MemoryTracker::new();
|
||||
let tensor = create_hwc_tensor(size.0, size.1, size.2);
|
||||
let memory = tracker.peak_usage();
|
||||
black_box((tensor, memory))
|
||||
});
|
||||
});
|
||||
|
||||
group.bench_function("chw_layout", |b| {
|
||||
b.iter(|| {
|
||||
let tracker = MemoryTracker::new();
|
||||
let tensor = create_chw_tensor(size.0, size.1, size.2);
|
||||
let memory = tracker.peak_usage();
|
||||
black_box((tensor, memory))
|
||||
});
|
||||
});
|
||||
|
||||
group.bench_function("layout_conversion", |b| {
|
||||
let hwc = create_hwc_tensor(size.0, size.1, size.2);
|
||||
b.iter(|| {
|
||||
let tracker = MemoryTracker::new();
|
||||
let chw = convert_hwc_to_chw(&hwc, size.0, size.1, size.2);
|
||||
let memory = tracker.peak_usage();
|
||||
black_box((chw, memory))
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// Mock implementations
|
||||
|
||||
struct MemoryTracker {
|
||||
initial_usage: usize,
|
||||
peak: usize,
|
||||
}
|
||||
|
||||
impl MemoryTracker {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
initial_usage: get_current_memory_usage(),
|
||||
peak: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn current_usage(&self) -> usize {
|
||||
get_current_memory_usage() - self.initial_usage
|
||||
}
|
||||
|
||||
fn peak_usage(&mut self) -> usize {
|
||||
let current = self.current_usage();
|
||||
self.peak = self.peak.max(current);
|
||||
self.peak
|
||||
}
|
||||
}
|
||||
|
||||
fn get_current_memory_usage() -> usize {
|
||||
// In production, this would query actual memory usage
|
||||
// For benchmarking, we'll estimate based on allocations
|
||||
0
|
||||
}
|
||||
|
||||
type Model = Vec<u8>;
|
||||
type Image = Vec<u8>;
|
||||
type Tensor = Vec<f32>;
|
||||
type Output = Vec<f32>;
|
||||
|
||||
fn load_model() -> Model {
|
||||
vec![0u8; 100 * 1024 * 1024] // 100 MB model
|
||||
}
|
||||
|
||||
fn load_detection_model() -> Model {
|
||||
vec![0u8; 150 * 1024 * 1024] // 150 MB
|
||||
}
|
||||
|
||||
fn load_recognition_model() -> Model {
|
||||
vec![0u8; 80 * 1024 * 1024] // 80 MB
|
||||
}
|
||||
|
||||
fn load_math_model() -> Model {
|
||||
vec![0u8; 120 * 1024 * 1024] // 120 MB
|
||||
}
|
||||
|
||||
fn create_image(width: u32, height: u32) -> Image {
|
||||
vec![128u8; (width * height * 3) as usize]
|
||||
}
|
||||
|
||||
fn create_batch(batch_size: usize, width: u32, height: u32) -> Vec<Image> {
|
||||
(0..batch_size)
|
||||
.map(|_| create_image(width, height))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn preprocess(image: Image) -> Tensor {
|
||||
image.iter().map(|&x| x as f32 / 255.0).collect()
|
||||
}
|
||||
|
||||
fn run_inference(_model: &Model, input: Tensor) -> Output {
|
||||
input.iter().map(|&x| x * 2.0).collect()
|
||||
}
|
||||
|
||||
fn run_batch_inference(_model: &Model, batch: Vec<Image>) -> Vec<Output> {
|
||||
batch
|
||||
.into_iter()
|
||||
.map(|img| {
|
||||
let tensor = preprocess(img);
|
||||
tensor.iter().map(|&x| x * 2.0).collect()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn postprocess(output: Output) -> String {
|
||||
format!("result_{:.2}", output[0])
|
||||
}
|
||||
|
||||
fn calculate_memory_growth(samples: &[usize]) -> f64 {
|
||||
if samples.len() < 2 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let first = samples[0] as f64;
|
||||
let last = samples[samples.len() - 1] as f64;
|
||||
|
||||
(last - first) / first
|
||||
}
|
||||
|
||||
fn create_embedding_cache(size: usize) -> Vec<Vec<f32>> {
|
||||
(0..size).map(|_| vec![0.5f32; 512]).collect()
|
||||
}
|
||||
|
||||
struct MemoryPool {
|
||||
block_size: usize,
|
||||
blocks: Vec<Vec<u8>>,
|
||||
available: Vec<usize>,
|
||||
}
|
||||
|
||||
impl MemoryPool {
|
||||
fn new(block_size: usize, count: usize) -> Self {
|
||||
let blocks = (0..count).map(|_| vec![0u8; block_size]).collect();
|
||||
let available = (0..count).collect();
|
||||
|
||||
Self {
|
||||
block_size,
|
||||
blocks,
|
||||
available,
|
||||
}
|
||||
}
|
||||
|
||||
fn allocate(&mut self) -> Option<usize> {
|
||||
self.available.pop()
|
||||
}
|
||||
}
|
||||
|
||||
fn create_hwc_tensor(height: u32, width: u32, channels: u32) -> Vec<f32> {
|
||||
vec![0.5f32; (height * width * channels) as usize]
|
||||
}
|
||||
|
||||
fn create_chw_tensor(height: u32, width: u32, channels: u32) -> Vec<f32> {
|
||||
vec![0.5f32; (channels * height * width) as usize]
|
||||
}
|
||||
|
||||
fn convert_hwc_to_chw(hwc: &[f32], height: u32, width: u32, channels: u32) -> Vec<f32> {
|
||||
let mut chw = Vec::with_capacity(hwc.len());
|
||||
|
||||
for c in 0..channels {
|
||||
for h in 0..height {
|
||||
for w in 0..width {
|
||||
let hwc_idx = ((h * width + w) * channels + c) as usize;
|
||||
chw.push(hwc[hwc_idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
chw
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_peak_memory_inference,
|
||||
bench_memory_per_batch_image,
|
||||
bench_model_loading_memory,
|
||||
bench_memory_growth,
|
||||
bench_memory_fragmentation,
|
||||
bench_cache_memory,
|
||||
bench_memory_pools,
|
||||
bench_tensor_layouts
|
||||
);
|
||||
criterion_main!(benches);
|
||||
194
vendor/ruvector/examples/scipix/benches/ocr_latency.rs
vendored
Normal file
194
vendor/ruvector/examples/scipix/benches/ocr_latency.rs
vendored
Normal file
@@ -0,0 +1,194 @@
|
||||
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
|
||||
use std::time::Duration;
|
||||
|
||||
/// Benchmark single image OCR at various sizes
|
||||
fn bench_single_image(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("single_image_ocr");
|
||||
group.measurement_time(Duration::from_secs(10));
|
||||
group.sample_size(50);
|
||||
|
||||
// Test various image sizes
|
||||
let sizes = [
|
||||
(224, 224), // Small
|
||||
(384, 384), // Medium
|
||||
(512, 512), // Large
|
||||
(768, 768), // Extra large
|
||||
(1024, 1024), // Very large
|
||||
];
|
||||
|
||||
for (w, h) in sizes {
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("resolution", format!("{}x{}", w, h)),
|
||||
&(w, h),
|
||||
|b, &(width, height)| {
|
||||
// Create synthetic image data
|
||||
let image_data = vec![128u8; (width * height * 3) as usize];
|
||||
|
||||
b.iter(|| {
|
||||
// Simulate OCR processing pipeline
|
||||
// In production, this would call actual OCR functions
|
||||
let preprocessed = preprocess_image(black_box(&image_data), width, height);
|
||||
let features = extract_features(black_box(&preprocessed));
|
||||
let text = recognize_text(black_box(&features));
|
||||
black_box(text)
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark batch processing with various batch sizes
|
||||
fn bench_batch_processing(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("batch_processing");
|
||||
group.measurement_time(Duration::from_secs(15));
|
||||
group.sample_size(30);
|
||||
|
||||
let batch_sizes = [1, 4, 8, 16, 32];
|
||||
let image_size = (384, 384);
|
||||
|
||||
for batch_size in batch_sizes {
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("batch_size", batch_size),
|
||||
&batch_size,
|
||||
|b, &size| {
|
||||
// Create batch of synthetic images
|
||||
let images: Vec<Vec<u8>> = (0..size)
|
||||
.map(|_| vec![128u8; (image_size.0 * image_size.1 * 3) as usize])
|
||||
.collect();
|
||||
|
||||
b.iter(|| {
|
||||
// Process entire batch
|
||||
let results: Vec<_> = images
|
||||
.iter()
|
||||
.map(|img| {
|
||||
let preprocessed =
|
||||
preprocess_image(black_box(img), image_size.0, image_size.1);
|
||||
let features = extract_features(black_box(&preprocessed));
|
||||
recognize_text(black_box(&features))
|
||||
})
|
||||
.collect();
|
||||
black_box(results)
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark cold start vs warm model performance
|
||||
fn bench_cold_vs_warm(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("cold_vs_warm");
|
||||
group.measurement_time(Duration::from_secs(10));
|
||||
|
||||
let image_data = vec![128u8; (384 * 384 * 3) as usize];
|
||||
|
||||
// Cold start benchmark - model initialization included
|
||||
group.bench_function("cold_start", |b| {
|
||||
b.iter_with_large_drop(|| {
|
||||
// Simulate model initialization + inference
|
||||
let _model = initialize_model();
|
||||
let preprocessed = preprocess_image(black_box(&image_data), 384, 384);
|
||||
let features = extract_features(black_box(&preprocessed));
|
||||
let text = recognize_text(black_box(&features));
|
||||
black_box(text)
|
||||
});
|
||||
});
|
||||
|
||||
// Warm model benchmark - model already initialized
|
||||
group.bench_function("warm_inference", |b| {
|
||||
let _model = initialize_model(); // Initialize once outside benchmark
|
||||
|
||||
b.iter(|| {
|
||||
let preprocessed = preprocess_image(black_box(&image_data), 384, 384);
|
||||
let features = extract_features(black_box(&preprocessed));
|
||||
let text = recognize_text(black_box(&features));
|
||||
black_box(text)
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark P95 and P99 latency targets
|
||||
fn bench_latency_percentiles(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("latency_percentiles");
|
||||
group.measurement_time(Duration::from_secs(20));
|
||||
group.sample_size(100); // More samples for better percentile accuracy
|
||||
|
||||
let image_data = vec![128u8; (384 * 384 * 3) as usize];
|
||||
|
||||
group.bench_function("p95_target_100ms", |b| {
|
||||
b.iter(|| {
|
||||
let preprocessed = preprocess_image(black_box(&image_data), 384, 384);
|
||||
let features = extract_features(black_box(&preprocessed));
|
||||
let text = recognize_text(black_box(&features));
|
||||
black_box(text)
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark throughput (images per second)
|
||||
fn bench_throughput(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("throughput");
|
||||
group.measurement_time(Duration::from_secs(15));
|
||||
group.throughput(criterion::Throughput::Elements(1));
|
||||
|
||||
let image_data = vec![128u8; (384 * 384 * 3) as usize];
|
||||
|
||||
group.bench_function("images_per_second", |b| {
|
||||
b.iter(|| {
|
||||
let preprocessed = preprocess_image(black_box(&image_data), 384, 384);
|
||||
let features = extract_features(black_box(&preprocessed));
|
||||
let text = recognize_text(black_box(&features));
|
||||
black_box(text)
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// Mock implementations for benchmarking
|
||||
// In production, these would be actual OCR pipeline functions
|
||||
|
||||
fn initialize_model() -> Vec<u8> {
|
||||
// Simulate model loading
|
||||
std::thread::sleep(Duration::from_millis(50));
|
||||
vec![0u8; 1024]
|
||||
}
|
||||
|
||||
fn preprocess_image(data: &[u8], width: u32, height: u32) -> Vec<u8> {
|
||||
// Simulate preprocessing: resize, normalize, grayscale
|
||||
let mut processed = Vec::with_capacity((width * height) as usize);
|
||||
for chunk in data.chunks(3) {
|
||||
// Convert to grayscale
|
||||
let gray = (chunk[0] as u32 + chunk[1] as u32 + chunk[2] as u32) / 3;
|
||||
processed.push(gray as u8);
|
||||
}
|
||||
processed
|
||||
}
|
||||
|
||||
fn extract_features(data: &[u8]) -> Vec<f32> {
|
||||
// Simulate feature extraction
|
||||
data.iter().map(|&x| x as f32 / 255.0).collect()
|
||||
}
|
||||
|
||||
fn recognize_text(features: &[f32]) -> String {
|
||||
// Simulate text recognition
|
||||
let sum: f32 = features.iter().take(100).sum();
|
||||
format!("recognized_text_{:.2}", sum)
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_single_image,
|
||||
bench_batch_processing,
|
||||
bench_cold_vs_warm,
|
||||
bench_latency_percentiles,
|
||||
bench_throughput
|
||||
);
|
||||
criterion_main!(benches);
|
||||
224
vendor/ruvector/examples/scipix/benches/optimization_bench.rs
vendored
Normal file
224
vendor/ruvector/examples/scipix/benches/optimization_bench.rs
vendored
Normal file
@@ -0,0 +1,224 @@
|
||||
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
||||
use scipix_ocr::optimize::*;
|
||||
|
||||
fn bench_grayscale(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("grayscale");
|
||||
|
||||
for size in [256, 512, 1024, 2048].iter() {
|
||||
let pixels = size * size;
|
||||
let rgba: Vec<u8> = (0..pixels * 4).map(|i| (i % 256) as u8).collect();
|
||||
let mut gray = vec![0u8; pixels];
|
||||
|
||||
group.throughput(Throughput::Elements(pixels as u64));
|
||||
|
||||
// Benchmark SIMD version
|
||||
group.bench_with_input(BenchmarkId::new("simd", size), size, |b, _| {
|
||||
b.iter(|| {
|
||||
simd::simd_grayscale(black_box(&rgba), black_box(&mut gray));
|
||||
});
|
||||
});
|
||||
|
||||
// Benchmark scalar version
|
||||
group.bench_with_input(BenchmarkId::new("scalar", size), size, |b, _| {
|
||||
b.iter(|| {
|
||||
for (i, chunk) in rgba.chunks_exact(4).enumerate() {
|
||||
let r = chunk[0] as u32;
|
||||
let g = chunk[1] as u32;
|
||||
let b = chunk[2] as u32;
|
||||
gray[i] = ((r * 77 + g * 150 + b * 29) >> 8) as u8;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_threshold(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("threshold");
|
||||
|
||||
for size in [1024, 4096, 16384, 65536].iter() {
|
||||
let gray: Vec<u8> = (0..*size).map(|i| (i % 256) as u8).collect();
|
||||
let mut out = vec![0u8; *size];
|
||||
|
||||
group.throughput(Throughput::Elements(*size as u64));
|
||||
|
||||
// SIMD version
|
||||
group.bench_with_input(BenchmarkId::new("simd", size), size, |b, _| {
|
||||
b.iter(|| {
|
||||
simd::simd_threshold(black_box(&gray), black_box(128), black_box(&mut out));
|
||||
});
|
||||
});
|
||||
|
||||
// Scalar version
|
||||
group.bench_with_input(BenchmarkId::new("scalar", size), size, |b, _| {
|
||||
b.iter(|| {
|
||||
for (g, o) in gray.iter().zip(out.iter_mut()) {
|
||||
*o = if *g >= 128 { 255 } else { 0 };
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_normalize(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("normalize");
|
||||
|
||||
for size in [128, 512, 2048, 8192].iter() {
|
||||
let mut data: Vec<f32> = (0..*size).map(|i| i as f32).collect();
|
||||
|
||||
group.throughput(Throughput::Elements(*size as u64));
|
||||
|
||||
// SIMD version
|
||||
group.bench_with_input(BenchmarkId::new("simd", size), size, |b, _| {
|
||||
let mut data_copy = data.clone();
|
||||
b.iter(|| {
|
||||
simd::simd_normalize(black_box(&mut data_copy));
|
||||
});
|
||||
});
|
||||
|
||||
// Scalar version
|
||||
group.bench_with_input(BenchmarkId::new("scalar", size), size, |b, _| {
|
||||
let mut data_copy = data.clone();
|
||||
b.iter(|| {
|
||||
let sum: f32 = data_copy.iter().sum();
|
||||
let mean = sum / data_copy.len() as f32;
|
||||
let variance: f32 = data_copy.iter().map(|x| (x - mean).powi(2)).sum::<f32>()
|
||||
/ data_copy.len() as f32;
|
||||
let std_dev = variance.sqrt() + 1e-8;
|
||||
for x in data_copy.iter_mut() {
|
||||
*x = (*x - mean) / std_dev;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_parallel_map(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("parallel_map");
|
||||
|
||||
for size in [100, 1000, 10000].iter() {
|
||||
let data: Vec<i32> = (0..*size).collect();
|
||||
|
||||
group.throughput(Throughput::Elements(*size as u64));
|
||||
|
||||
// Parallel version
|
||||
group.bench_with_input(BenchmarkId::new("parallel", size), size, |b, _| {
|
||||
b.iter(|| {
|
||||
parallel::parallel_map_chunked(black_box(data.clone()), 100, |x| x * x + x * 2 + 1)
|
||||
});
|
||||
});
|
||||
|
||||
// Sequential version
|
||||
group.bench_with_input(BenchmarkId::new("sequential", size), size, |b, _| {
|
||||
b.iter(|| data.iter().map(|&x| x * x + x * 2 + 1).collect::<Vec<_>>());
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_buffer_pool(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("buffer_pool");
|
||||
|
||||
let pool = memory::BufferPool::new(|| Vec::with_capacity(1024), 10, 100);
|
||||
|
||||
// Benchmark pooled allocation
|
||||
group.bench_function("pooled", |b| {
|
||||
b.iter(|| {
|
||||
let mut buf = pool.acquire();
|
||||
buf.extend_from_slice(&[0u8; 512]);
|
||||
black_box(&buf);
|
||||
});
|
||||
});
|
||||
|
||||
// Benchmark direct allocation
|
||||
group.bench_function("direct", |b| {
|
||||
b.iter(|| {
|
||||
let mut buf = Vec::with_capacity(1024);
|
||||
buf.extend_from_slice(&[0u8; 512]);
|
||||
black_box(&buf);
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_quantization(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("quantization");
|
||||
|
||||
for size in [1024, 4096, 16384].iter() {
|
||||
let weights: Vec<f32> = (0..*size)
|
||||
.map(|i| (i as f32 / *size as f32) * 2.0 - 1.0)
|
||||
.collect();
|
||||
|
||||
group.throughput(Throughput::Elements(*size as u64));
|
||||
|
||||
// Quantize
|
||||
group.bench_with_input(BenchmarkId::new("quantize", size), size, |b, _| {
|
||||
b.iter(|| quantize::quantize_weights(black_box(&weights)));
|
||||
});
|
||||
|
||||
// Dequantize
|
||||
let (quantized, params) = quantize::quantize_weights(&weights);
|
||||
group.bench_with_input(BenchmarkId::new("dequantize", size), size, |b, _| {
|
||||
b.iter(|| quantize::dequantize(black_box(&quantized), black_box(params)));
|
||||
});
|
||||
|
||||
// Per-channel quantization
|
||||
let shape = vec![*size / 64, 64];
|
||||
group.bench_with_input(BenchmarkId::new("per_channel", size), size, |b, _| {
|
||||
b.iter(|| {
|
||||
quantize::PerChannelQuant::from_f32(black_box(&weights), black_box(shape.clone()))
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_memory_operations(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("memory_ops");
|
||||
|
||||
// Arena allocation
|
||||
let mut arena = memory::Arena::with_capacity(1024 * 1024);
|
||||
group.bench_function("arena_alloc", |b| {
|
||||
b.iter(|| {
|
||||
arena.reset();
|
||||
for _ in 0..100 {
|
||||
let slice = arena.alloc(1024, 8);
|
||||
black_box(slice);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// Vector allocation
|
||||
group.bench_function("vec_alloc", |b| {
|
||||
b.iter(|| {
|
||||
for _ in 0..100 {
|
||||
let mut vec = Vec::with_capacity(1024);
|
||||
vec.resize(1024, 0u8);
|
||||
black_box(&vec);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_grayscale,
|
||||
bench_threshold,
|
||||
bench_normalize,
|
||||
bench_parallel_map,
|
||||
bench_buffer_pool,
|
||||
bench_quantization,
|
||||
bench_memory_operations
|
||||
);
|
||||
|
||||
criterion_main!(benches);
|
||||
356
vendor/ruvector/examples/scipix/benches/preprocessing.rs
vendored
Normal file
356
vendor/ruvector/examples/scipix/benches/preprocessing.rs
vendored
Normal file
@@ -0,0 +1,356 @@
|
||||
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
|
||||
use std::time::Duration;
|
||||
|
||||
/// Benchmark individual preprocessing transforms
|
||||
fn bench_individual_transforms(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("individual_transforms");
|
||||
group.measurement_time(Duration::from_secs(8));
|
||||
|
||||
let sizes = [(224, 224), (384, 384), (512, 512)];
|
||||
|
||||
for (w, h) in sizes {
|
||||
let image_data = generate_test_image(w, h);
|
||||
|
||||
// Grayscale conversion
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("grayscale", format!("{}x{}", w, h)),
|
||||
&image_data,
|
||||
|b, img| {
|
||||
b.iter(|| black_box(convert_to_grayscale(black_box(img), w, h)));
|
||||
},
|
||||
);
|
||||
|
||||
// Gaussian blur
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("gaussian_blur", format!("{}x{}", w, h)),
|
||||
&image_data,
|
||||
|b, img| {
|
||||
b.iter(|| black_box(apply_gaussian_blur(black_box(img), w, h, 5)));
|
||||
},
|
||||
);
|
||||
|
||||
// Adaptive threshold
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("threshold", format!("{}x{}", w, h)),
|
||||
&image_data,
|
||||
|b, img| {
|
||||
b.iter(|| black_box(apply_adaptive_threshold(black_box(img), w, h)));
|
||||
},
|
||||
);
|
||||
|
||||
// Edge detection
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("edge_detection", format!("{}x{}", w, h)),
|
||||
&image_data,
|
||||
|b, img| {
|
||||
b.iter(|| black_box(detect_edges(black_box(img), w, h)));
|
||||
},
|
||||
);
|
||||
|
||||
// Normalization
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("normalize", format!("{}x{}", w, h)),
|
||||
&image_data,
|
||||
|b, img| {
|
||||
b.iter(|| black_box(normalize_image(black_box(img))));
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark full preprocessing pipeline
|
||||
fn bench_full_pipeline(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("full_pipeline");
|
||||
group.measurement_time(Duration::from_secs(10));
|
||||
|
||||
let sizes = [(224, 224), (384, 384), (512, 512)];
|
||||
|
||||
for (w, h) in sizes {
|
||||
let image_data = generate_test_image(w, h);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("sequential", format!("{}x{}", w, h)),
|
||||
&(image_data.clone(), w, h),
|
||||
|b, (img, width, height)| {
|
||||
b.iter(|| {
|
||||
let gray = convert_to_grayscale(black_box(img), *width, *height);
|
||||
let blurred = apply_gaussian_blur(&gray, *width, *height, 5);
|
||||
let threshold = apply_adaptive_threshold(&blurred, *width, *height);
|
||||
let edges = detect_edges(&threshold, *width, *height);
|
||||
let normalized = normalize_image(&edges);
|
||||
black_box(normalized)
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark parallel vs sequential preprocessing
|
||||
fn bench_parallel_vs_sequential(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("parallel_vs_sequential");
|
||||
group.measurement_time(Duration::from_secs(10));
|
||||
|
||||
// Create batch of images
|
||||
let batch_size = 8;
|
||||
let size = (384, 384);
|
||||
let images: Vec<Vec<u8>> = (0..batch_size)
|
||||
.map(|_| generate_test_image(size.0, size.1))
|
||||
.collect();
|
||||
|
||||
// Sequential processing
|
||||
group.bench_function("sequential_batch", |b| {
|
||||
b.iter(|| {
|
||||
let results: Vec<_> = images
|
||||
.iter()
|
||||
.map(|img| {
|
||||
let gray = convert_to_grayscale(black_box(img), size.0, size.1);
|
||||
let blurred = apply_gaussian_blur(&gray, size.0, size.1, 5);
|
||||
apply_adaptive_threshold(&blurred, size.0, size.1)
|
||||
})
|
||||
.collect();
|
||||
black_box(results)
|
||||
});
|
||||
});
|
||||
|
||||
// Parallel processing (simulated with rayon-like chunking)
|
||||
group.bench_function("parallel_batch", |b| {
|
||||
b.iter(|| {
|
||||
// In production, this would use rayon::par_iter()
|
||||
let results: Vec<_> = images
|
||||
.chunks(2)
|
||||
.flat_map(|chunk| {
|
||||
chunk.iter().map(|img| {
|
||||
let gray = convert_to_grayscale(black_box(img), size.0, size.1);
|
||||
let blurred = apply_gaussian_blur(&gray, size.0, size.1, 5);
|
||||
apply_adaptive_threshold(&blurred, size.0, size.1)
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
black_box(results)
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark resize operations
|
||||
fn bench_resize_operations(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("resize_operations");
|
||||
group.measurement_time(Duration::from_secs(8));
|
||||
|
||||
let source_image = generate_test_image(1024, 1024);
|
||||
let target_sizes = [(224, 224), (384, 384), (512, 512)];
|
||||
|
||||
for (target_w, target_h) in target_sizes {
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("nearest_neighbor", format!("{}x{}", target_w, target_h)),
|
||||
&(target_w, target_h),
|
||||
|b, &(tw, th)| {
|
||||
b.iter(|| black_box(resize_nearest(&source_image, 1024, 1024, tw, th)));
|
||||
},
|
||||
);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("bilinear", format!("{}x{}", target_w, target_h)),
|
||||
&(target_w, target_h),
|
||||
|b, &(tw, th)| {
|
||||
b.iter(|| black_box(resize_bilinear(&source_image, 1024, 1024, tw, th)));
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Benchmark target: preprocessing should complete in <20ms
|
||||
fn bench_latency_target(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("latency_target_20ms");
|
||||
group.measurement_time(Duration::from_secs(10));
|
||||
group.sample_size(100);
|
||||
|
||||
let image_data = generate_test_image(384, 384);
|
||||
|
||||
group.bench_function("full_pipeline_384x384", |b| {
|
||||
b.iter(|| {
|
||||
let gray = convert_to_grayscale(black_box(&image_data), 384, 384);
|
||||
let blurred = apply_gaussian_blur(&gray, 384, 384, 5);
|
||||
let threshold = apply_adaptive_threshold(&blurred, 384, 384);
|
||||
let normalized = normalize_image(&threshold);
|
||||
black_box(normalized)
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// Mock implementations
|
||||
|
||||
fn generate_test_image(width: u32, height: u32) -> Vec<u8> {
|
||||
let size = (width * height * 3) as usize;
|
||||
(0..size).map(|i| ((i * 123 + 456) % 256) as u8).collect()
|
||||
}
|
||||
|
||||
fn convert_to_grayscale(rgb_data: &[u8], width: u32, height: u32) -> Vec<u8> {
|
||||
let mut gray = Vec::with_capacity((width * height) as usize);
|
||||
for chunk in rgb_data.chunks(3) {
|
||||
let r = chunk[0] as u32;
|
||||
let g = chunk[1] as u32;
|
||||
let b = chunk[2] as u32;
|
||||
let gray_value = ((r * 299 + g * 587 + b * 114) / 1000) as u8;
|
||||
gray.push(gray_value);
|
||||
}
|
||||
gray
|
||||
}
|
||||
|
||||
fn apply_gaussian_blur(data: &[u8], width: u32, height: u32, kernel_size: usize) -> Vec<u8> {
|
||||
let mut result = Vec::with_capacity(data.len());
|
||||
let radius = kernel_size / 2;
|
||||
|
||||
for y in 0..height {
|
||||
for x in 0..width {
|
||||
let mut sum = 0u32;
|
||||
let mut count = 0u32;
|
||||
|
||||
for ky in 0..kernel_size {
|
||||
for kx in 0..kernel_size {
|
||||
let nx = x as i32 + kx as i32 - radius as i32;
|
||||
let ny = y as i32 + ky as i32 - radius as i32;
|
||||
|
||||
if nx >= 0 && nx < width as i32 && ny >= 0 && ny < height as i32 {
|
||||
let idx = (ny as u32 * width + nx as u32) as usize;
|
||||
sum += data[idx] as u32;
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result.push((sum / count) as u8);
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn apply_adaptive_threshold(data: &[u8], width: u32, height: u32) -> Vec<u8> {
|
||||
let mut result = Vec::with_capacity(data.len());
|
||||
let block_size = 11;
|
||||
let c = 2;
|
||||
|
||||
for y in 0..height {
|
||||
for x in 0..width {
|
||||
let idx = (y * width + x) as usize;
|
||||
let pixel = data[idx];
|
||||
|
||||
// Calculate local mean
|
||||
let mut sum = 0u32;
|
||||
let mut count = 0u32;
|
||||
let radius = block_size / 2;
|
||||
|
||||
for by in y.saturating_sub(radius)..=(y + radius).min(height - 1) {
|
||||
for bx in x.saturating_sub(radius)..=(x + radius).min(width - 1) {
|
||||
let bidx = (by * width + bx) as usize;
|
||||
sum += data[bidx] as u32;
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let threshold = (sum / count) as i32 - c;
|
||||
result.push(if pixel as i32 > threshold { 255 } else { 0 });
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn detect_edges(data: &[u8], width: u32, height: u32) -> Vec<u8> {
|
||||
let mut result = Vec::with_capacity(data.len());
|
||||
|
||||
// Simple Sobel edge detection
|
||||
for y in 0..height {
|
||||
for x in 0..width {
|
||||
if x == 0 || x == width - 1 || y == 0 || y == height - 1 {
|
||||
result.push(0);
|
||||
continue;
|
||||
}
|
||||
|
||||
let idx = (y * width + x) as usize;
|
||||
let gx = (data[idx + 1] as i32 - data[idx - 1] as i32).abs();
|
||||
let gy = (data[idx + width as usize] as i32 - data[idx - width as usize] as i32).abs();
|
||||
let magnitude = ((gx * gx + gy * gy) as f32).sqrt().min(255.0);
|
||||
|
||||
result.push(magnitude as u8);
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn normalize_image(data: &[u8]) -> Vec<f32> {
|
||||
data.iter().map(|&x| (x as f32 - 128.0) / 128.0).collect()
|
||||
}
|
||||
|
||||
fn resize_nearest(src: &[u8], src_w: u32, src_h: u32, dst_w: u32, dst_h: u32) -> Vec<u8> {
|
||||
let mut result = Vec::with_capacity((dst_w * dst_h) as usize);
|
||||
let x_ratio = src_w as f32 / dst_w as f32;
|
||||
let y_ratio = src_h as f32 / dst_h as f32;
|
||||
|
||||
for y in 0..dst_h {
|
||||
for x in 0..dst_w {
|
||||
let src_x = (x as f32 * x_ratio) as u32;
|
||||
let src_y = (y as f32 * y_ratio) as u32;
|
||||
let idx = (src_y * src_w + src_x) as usize;
|
||||
result.push(src[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn resize_bilinear(src: &[u8], src_w: u32, src_h: u32, dst_w: u32, dst_h: u32) -> Vec<u8> {
|
||||
let mut result = Vec::with_capacity((dst_w * dst_h) as usize);
|
||||
let x_ratio = (src_w - 1) as f32 / dst_w as f32;
|
||||
let y_ratio = (src_h - 1) as f32 / dst_h as f32;
|
||||
|
||||
for y in 0..dst_h {
|
||||
for x in 0..dst_w {
|
||||
let src_x = x as f32 * x_ratio;
|
||||
let src_y = y as f32 * y_ratio;
|
||||
|
||||
let x1 = src_x.floor() as u32;
|
||||
let y1 = src_y.floor() as u32;
|
||||
let x2 = (x1 + 1).min(src_w - 1);
|
||||
let y2 = (y1 + 1).min(src_h - 1);
|
||||
|
||||
let q11 = src[(y1 * src_w + x1) as usize] as f32;
|
||||
let q21 = src[(y1 * src_w + x2) as usize] as f32;
|
||||
let q12 = src[(y2 * src_w + x1) as usize] as f32;
|
||||
let q22 = src[(y2 * src_w + x2) as usize] as f32;
|
||||
|
||||
let wx = src_x - x1 as f32;
|
||||
let wy = src_y - y1 as f32;
|
||||
|
||||
let value = q11 * (1.0 - wx) * (1.0 - wy)
|
||||
+ q21 * wx * (1.0 - wy)
|
||||
+ q12 * (1.0 - wx) * wy
|
||||
+ q22 * wx * wy;
|
||||
|
||||
result.push(value as u8);
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_individual_transforms,
|
||||
bench_full_pipeline,
|
||||
bench_parallel_vs_sequential,
|
||||
bench_resize_operations,
|
||||
bench_latency_target
|
||||
);
|
||||
criterion_main!(benches);
|
||||
Reference in New Issue
Block a user