Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/examples/scipix/benches/api.rs
+++ b/vendor/ruvector/examples/scipix/benches/api.rs
@@ -0,0 +1,455 @@
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use std::time::Duration;
+
+/// Benchmark API request parsing
+fn bench_request_parsing(c: &mut Criterion) {
+    let mut group = c.benchmark_group("request_parsing");
+    group.measurement_time(Duration::from_secs(5));
+
+    let json_payloads = vec![
+        ("small", r#"{"image_url": "http://example.com/img.jpg"}"#),
+        (
+            "medium",
+            r#"{
+            "image_url": "http://example.com/img.jpg",
+            "options": {
+                "languages": ["en", "es"],
+                "format": "latex",
+                "inline_mode": true
+            }
+        }"#,
+        ),
+        (
+            "large",
+            r#"{
+            "image_url": "http://example.com/img.jpg",
+            "options": {
+                "languages": ["en", "es", "fr", "de"],
+                "format": "latex",
+                "inline_mode": true,
+                "detect_orientation": true,
+                "skip_preprocessing": false,
+                "models": ["text", "math", "table"],
+                "confidence_threshold": 0.8
+            },
+            "metadata": {
+                "user_id": "12345",
+                "session_id": "abcde",
+                "timestamp": 1234567890
+            }
+        }"#,
+        ),
+    ];
+
+    for (name, payload) in json_payloads {
+        group.bench_with_input(BenchmarkId::new("parse_json", name), &payload, |b, json| {
+            b.iter(|| black_box(parse_ocr_request(black_box(json))));
+        });
+    }
+
+    group.finish();
+}
+
+/// Benchmark response serialization
+fn bench_response_serialization(c: &mut Criterion) {
+    let mut group = c.benchmark_group("response_serialization");
+    group.measurement_time(Duration::from_secs(5));
+
+    let responses = vec![
+        ("simple", create_simple_response()),
+        ("detailed", create_detailed_response()),
+        ("batch", create_batch_response(10)),
+    ];
+
+    for (name, response) in responses {
+        group.bench_with_input(
+            BenchmarkId::new("serialize_json", name),
+            &response,
+            |b, resp| {
+                b.iter(|| black_box(serialize_response(black_box(resp))));
+            },
+        );
+    }
+
+    group.finish();
+}
+
+/// Benchmark concurrent request handling
+fn bench_concurrent_requests(c: &mut Criterion) {
+    let mut group = c.benchmark_group("concurrent_requests");
+    group.measurement_time(Duration::from_secs(10));
+
+    let concurrent_levels = [1, 5, 10, 20, 50];
+
+    for concurrency in concurrent_levels {
+        group.bench_with_input(
+            BenchmarkId::new("handle_requests", concurrency),
+            &concurrency,
+            |b, &level| {
+                b.iter(|| {
+                    let handles: Vec<_> = (0..level).map(|_| handle_single_request()).collect();
+                    black_box(handles)
+                });
+            },
+        );
+    }
+
+    group.finish();
+}
+
+/// Benchmark middleware overhead
+fn bench_middleware_overhead(c: &mut Criterion) {
+    let mut group = c.benchmark_group("middleware_overhead");
+    group.measurement_time(Duration::from_secs(5));
+
+    let request = create_mock_request();
+
+    group.bench_function("no_middleware", |b| {
+        b.iter(|| black_box(handle_request_direct(black_box(&request))));
+    });
+
+    group.bench_function("with_auth", |b| {
+        b.iter(|| {
+            let authed = auth_middleware(black_box(&request));
+            black_box(handle_request_direct(black_box(&authed)))
+        });
+    });
+
+    group.bench_function("with_logging", |b| {
+        b.iter(|| {
+            let logged = logging_middleware(black_box(&request));
+            black_box(handle_request_direct(black_box(&logged)))
+        });
+    });
+
+    group.bench_function("full_stack", |b| {
+        b.iter(|| {
+            let req = black_box(&request);
+            let authed = auth_middleware(req);
+            let logged = logging_middleware(&authed);
+            let validated = validation_middleware(&logged);
+            let rate_limited = rate_limit_middleware(&validated);
+            black_box(handle_request_direct(black_box(&rate_limited)))
+        });
+    });
+
+    group.finish();
+}
+
+/// Benchmark request validation
+fn bench_request_validation(c: &mut Criterion) {
+    let mut group = c.benchmark_group("request_validation");
+    group.measurement_time(Duration::from_secs(5));
+
+    let valid_request = create_valid_request();
+    let invalid_request = create_invalid_request();
+
+    group.bench_function("validate_valid", |b| {
+        b.iter(|| black_box(validate_request(black_box(&valid_request))));
+    });
+
+    group.bench_function("validate_invalid", |b| {
+        b.iter(|| black_box(validate_request(black_box(&invalid_request))));
+    });
+
+    group.finish();
+}
+
+/// Benchmark rate limiting
+fn bench_rate_limiting(c: &mut Criterion) {
+    let mut group = c.benchmark_group("rate_limiting");
+    group.measurement_time(Duration::from_secs(5));
+
+    let mut limiter = RateLimiter::new(100, Duration::from_secs(60));
+
+    group.bench_function("check_limit", |b| {
+        b.iter(|| black_box(limiter.check_limit("user_123")));
+    });
+
+    group.bench_function("update_limit", |b| {
+        b.iter(|| {
+            limiter.record_request("user_123");
+            black_box(&limiter)
+        });
+    });
+
+    group.finish();
+}
+
+/// Benchmark error handling
+fn bench_error_handling(c: &mut Criterion) {
+    let mut group = c.benchmark_group("error_handling");
+    group.measurement_time(Duration::from_secs(5));
+
+    group.bench_function("create_error_response", |b| {
+        b.iter(|| black_box(create_error_response("Invalid request", 400)));
+    });
+
+    group.bench_function("log_and_respond", |b| {
+        b.iter(|| {
+            let error = "Processing failed";
+            log_error(error);
+            black_box(create_error_response(error, 500))
+        });
+    });
+
+    group.finish();
+}
+
+/// Benchmark end-to-end API request
+fn bench_e2e_api_request(c: &mut Criterion) {
+    let mut group = c.benchmark_group("e2e_api_request");
+    group.measurement_time(Duration::from_secs(15));
+
+    let request_json = r#"{
+        "image_url": "http://example.com/img.jpg",
+        "options": {
+            "format": "latex"
+        }
+    }"#;
+
+    group.bench_function("full_request_cycle", |b| {
+        b.iter(|| {
+            // Parse
+            let request = parse_ocr_request(black_box(request_json));
+
+            // Validate
+            let _validated = validate_request(&request);
+
+            // Auth
+            let _authed = auth_middleware(&request);
+
+            // Process (simulated)
+            let response = process_ocr_request(&request);
+
+            // Serialize
+            let json = serialize_response(&response);
+
+            black_box(json)
+        });
+    });
+
+    group.finish();
+}
+
+// Mock types and implementations
+
+#[derive(Clone)]
+struct OcrRequest {
+    image_url: String,
+    options: RequestOptions,
+}
+
+#[derive(Clone)]
+struct RequestOptions {
+    format: String,
+    languages: Vec<String>,
+    confidence_threshold: f32,
+}
+
+#[derive(Clone)]
+struct OcrResponse {
+    text: String,
+    latex: String,
+    confidence: f32,
+    regions: Vec<Region>,
+}
+
+#[derive(Clone)]
+struct Region {
+    bbox: [f32; 4],
+    text: String,
+    confidence: f32,
+}
+
+struct RateLimiter {
+    max_requests: usize,
+    window: Duration,
+    requests: std::collections::HashMap<String, Vec<std::time::Instant>>,
+}
+
+impl RateLimiter {
+    fn new(max_requests: usize, window: Duration) -> Self {
+        Self {
+            max_requests,
+            window,
+            requests: std::collections::HashMap::new(),
+        }
+    }
+
+    fn check_limit(&mut self, user_id: &str) -> bool {
+        let now = std::time::Instant::now();
+        let requests = self
+            .requests
+            .entry(user_id.to_string())
+            .or_insert_with(Vec::new);
+
+        requests.retain(|&req_time| now.duration_since(req_time) < self.window);
+
+        requests.len() < self.max_requests
+    }
+
+    fn record_request(&mut self, user_id: &str) {
+        let now = std::time::Instant::now();
+        self.requests
+            .entry(user_id.to_string())
+            .or_insert_with(Vec::new)
+            .push(now);
+    }
+}
+
+fn parse_ocr_request(json: &str) -> OcrRequest {
+    // Simulate JSON parsing
+    OcrRequest {
+        image_url: "http://example.com/img.jpg".to_string(),
+        options: RequestOptions {
+            format: "latex".to_string(),
+            languages: vec!["en".to_string()],
+            confidence_threshold: 0.8,
+        },
+    }
+}
+
+fn serialize_response(response: &OcrResponse) -> String {
+    // Simulate JSON serialization
+    format!(
+        r#"{{"text":"{}","latex":"{}","confidence":{}}}"#,
+        response.text, response.latex, response.confidence
+    )
+}
+
+fn create_simple_response() -> OcrResponse {
+    OcrResponse {
+        text: "E = mc^2".to_string(),
+        latex: "E = mc^2".to_string(),
+        confidence: 0.95,
+        regions: vec![],
+    }
+}
+
+fn create_detailed_response() -> OcrResponse {
+    OcrResponse {
+        text: "Complex equation with multiple terms".to_string(),
+        latex: "\\int_0^1 x^2 dx = \\frac{1}{3}".to_string(),
+        confidence: 0.92,
+        regions: vec![
+            Region {
+                bbox: [0.0, 0.0, 100.0, 50.0],
+                text: "integral".to_string(),
+                confidence: 0.95,
+            },
+            Region {
+                bbox: [100.0, 0.0, 200.0, 50.0],
+                text: "equals".to_string(),
+                confidence: 0.98,
+            },
+        ],
+    }
+}
+
+fn create_batch_response(count: usize) -> OcrResponse {
+    let regions: Vec<_> = (0..count)
+        .map(|i| Region {
+            bbox: [i as f32 * 10.0, 0.0, (i + 1) as f32 * 10.0, 50.0],
+            text: format!("region_{}", i),
+            confidence: 0.9,
+        })
+        .collect();
+
+    OcrResponse {
+        text: "Batch text".to_string(),
+        latex: "batch latex".to_string(),
+        confidence: 0.9,
+        regions,
+    }
+}
+
+fn handle_single_request() -> OcrResponse {
+    create_simple_response()
+}
+
+fn create_mock_request() -> OcrRequest {
+    OcrRequest {
+        image_url: "http://example.com/img.jpg".to_string(),
+        options: RequestOptions {
+            format: "latex".to_string(),
+            languages: vec!["en".to_string()],
+            confidence_threshold: 0.8,
+        },
+    }
+}
+
+fn handle_request_direct(request: &OcrRequest) -> OcrResponse {
+    process_ocr_request(request)
+}
+
+fn auth_middleware(request: &OcrRequest) -> OcrRequest {
+    // Simulate auth check
+    request.clone()
+}
+
+fn logging_middleware(request: &OcrRequest) -> OcrRequest {
+    // Simulate logging
+    request.clone()
+}
+
+fn validation_middleware(request: &OcrRequest) -> OcrRequest {
+    // Simulate validation
+    request.clone()
+}
+
+fn rate_limit_middleware(request: &OcrRequest) -> OcrRequest {
+    // Simulate rate limiting
+    request.clone()
+}
+
+fn create_valid_request() -> OcrRequest {
+    create_mock_request()
+}
+
+fn create_invalid_request() -> OcrRequest {
+    OcrRequest {
+        image_url: "".to_string(),
+        options: RequestOptions {
+            format: "invalid".to_string(),
+            languages: vec![],
+            confidence_threshold: -1.0,
+        },
+    }
+}
+
+fn validate_request(request: &OcrRequest) -> Result<(), String> {
+    if request.image_url.is_empty() {
+        return Err("Image URL is required".to_string());
+    }
+    if request.options.confidence_threshold < 0.0 || request.options.confidence_threshold > 1.0 {
+        return Err("Invalid confidence threshold".to_string());
+    }
+    Ok(())
+}
+
+fn create_error_response(message: &str, _code: u16) -> String {
+    format!(r#"{{"error":"{}"}}"#, message)
+}
+
+fn log_error(_message: &str) {
+    // Simulate logging
+}
+
+fn process_ocr_request(_request: &OcrRequest) -> OcrResponse {
+    // Simulate OCR processing
+    create_simple_response()
+}
+
+criterion_group!(
+    benches,
+    bench_request_parsing,
+    bench_response_serialization,
+    bench_concurrent_requests,
+    bench_middleware_overhead,
+    bench_request_validation,
+    bench_rate_limiting,
+    bench_error_handling,
+    bench_e2e_api_request
+);
+criterion_main!(benches);
--- a/vendor/ruvector/examples/scipix/benches/cache.rs
+++ b/vendor/ruvector/examples/scipix/benches/cache.rs
@@ -0,0 +1,450 @@
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use std::collections::HashMap;
+use std::time::Duration;
+
+/// Benchmark embedding generation
+fn bench_embedding_generation(c: &mut Criterion) {
+    let mut group = c.benchmark_group("embedding_generation");
+    group.measurement_time(Duration::from_secs(8));
+
+    let image_sizes = [(224, 224), (384, 384), (512, 512)];
+
+    for (w, h) in image_sizes {
+        let image_data = generate_test_image(w, h);
+
+        group.bench_with_input(
+            BenchmarkId::new("generate", format!("{}x{}", w, h)),
+            &image_data,
+            |b, img| {
+                b.iter(|| black_box(generate_embedding(black_box(img))));
+            },
+        );
+    }
+
+    group.finish();
+}
+
+/// Benchmark similarity search (vector search)
+fn bench_similarity_search(c: &mut Criterion) {
+    let mut group = c.benchmark_group("similarity_search");
+    group.measurement_time(Duration::from_secs(10));
+
+    // Create cache with varying sizes
+    let cache_sizes = [100, 1000, 10000];
+
+    for cache_size in cache_sizes {
+        let cache = create_embedding_cache(cache_size);
+        let query_embedding = generate_random_embedding(512);
+
+        group.bench_with_input(
+            BenchmarkId::new("linear_search", cache_size),
+            &(&cache, &query_embedding),
+            |b, (cache, query)| {
+                b.iter(|| {
+                    black_box(linear_similarity_search(
+                        black_box(cache),
+                        black_box(query),
+                        10,
+                    ))
+                });
+            },
+        );
+
+        // Approximate nearest neighbor search
+        group.bench_with_input(
+            BenchmarkId::new("ann_search", cache_size),
+            &(&cache, &query_embedding),
+            |b, (cache, query)| {
+                b.iter(|| {
+                    black_box(ann_similarity_search(
+                        black_box(cache),
+                        black_box(query),
+                        10,
+                    ))
+                });
+            },
+        );
+    }
+
+    group.finish();
+}
+
+/// Benchmark cache hit latency
+fn bench_cache_hit_latency(c: &mut Criterion) {
+    let mut group = c.benchmark_group("cache_hit_latency");
+    group.measurement_time(Duration::from_secs(5));
+
+    let cache = create_embedding_cache(1000);
+    let query = generate_random_embedding(512);
+
+    group.bench_function("exact_match", |b| {
+        let cached_embedding = cache.values().next().unwrap();
+        b.iter(|| {
+            black_box(find_exact_match(
+                black_box(&cache),
+                black_box(cached_embedding),
+            ))
+        });
+    });
+
+    group.bench_function("similarity_threshold", |b| {
+        b.iter(|| {
+            black_box(find_by_similarity_threshold(
+                black_box(&cache),
+                black_box(&query),
+                0.95,
+            ))
+        });
+    });
+
+    group.finish();
+}
+
+/// Benchmark cache miss latency
+fn bench_cache_miss_latency(c: &mut Criterion) {
+    let mut group = c.benchmark_group("cache_miss_latency");
+    group.measurement_time(Duration::from_secs(8));
+
+    let cache = create_embedding_cache(1000);
+    let new_image = generate_test_image(384, 384);
+
+    group.bench_function("miss_with_generation", |b| {
+        b.iter(|| {
+            let query_embedding = generate_embedding(black_box(&new_image));
+            let result = linear_similarity_search(black_box(&cache), &query_embedding, 1);
+            if result.is_empty() || result[0].1 < 0.95 {
+                // Cache miss - would need to process
+                black_box(process_new_image(black_box(&new_image)))
+            } else {
+                black_box(result[0].2.clone())
+            }
+        });
+    });
+
+    group.finish();
+}
+
+/// Benchmark cache insertion
+fn bench_cache_insertion(c: &mut Criterion) {
+    let mut group = c.benchmark_group("cache_insertion");
+    group.measurement_time(Duration::from_secs(8));
+
+    group.bench_function("insert_new_entry", |b| {
+        let mut cache = create_embedding_cache(1000);
+        let mut counter = 0;
+
+        b.iter(|| {
+            let embedding = generate_random_embedding(512);
+            let key = format!("key_{}", counter);
+            cache.insert(key.clone(), embedding);
+            counter += 1;
+            black_box(&cache)
+        });
+    });
+
+    group.bench_function("insert_with_eviction", |b| {
+        let mut cache = LRUCache::new(1000);
+        let mut counter = 0;
+
+        b.iter(|| {
+            let embedding = generate_random_embedding(512);
+            let key = format!("key_{}", counter);
+            cache.insert(key, embedding);
+            counter += 1;
+            black_box(&cache)
+        });
+    });
+
+    group.finish();
+}
+
+/// Benchmark cache update operations
+fn bench_cache_updates(c: &mut Criterion) {
+    let mut group = c.benchmark_group("cache_updates");
+    group.measurement_time(Duration::from_secs(5));
+
+    let mut cache = create_embedding_cache(1000);
+    let keys: Vec<_> = cache.keys().cloned().collect();
+
+    group.bench_function("update_existing", |b| {
+        let mut idx = 0;
+        b.iter(|| {
+            let key = &keys[idx % keys.len()];
+            let new_embedding = generate_random_embedding(512);
+            cache.insert(key.clone(), new_embedding);
+            idx += 1;
+            black_box(&cache)
+        });
+    });
+
+    group.finish();
+}
+
+/// Benchmark batch cache operations
+fn bench_batch_cache_ops(c: &mut Criterion) {
+    let mut group = c.benchmark_group("batch_cache_operations");
+    group.measurement_time(Duration::from_secs(10));
+
+    let batch_sizes = [10, 50, 100];
+
+    for batch_size in batch_sizes {
+        let cache = create_embedding_cache(1000);
+        let queries: Vec<_> = (0..batch_size)
+            .map(|_| generate_random_embedding(512))
+            .collect();
+
+        group.bench_with_input(
+            BenchmarkId::new("batch_search", batch_size),
+            &(&cache, &queries),
+            |b, (cache, queries)| {
+                b.iter(|| {
+                    let results: Vec<_> = queries
+                        .iter()
+                        .map(|q| linear_similarity_search(black_box(cache), q, 10))
+                        .collect();
+                    black_box(results)
+                });
+            },
+        );
+
+        group.bench_with_input(
+            BenchmarkId::new("batch_insert", batch_size),
+            &queries,
+            |b, queries| {
+                b.iter_with_setup(
+                    || create_embedding_cache(1000),
+                    |mut cache| {
+                        for (i, embedding) in queries.iter().enumerate() {
+                            cache.insert(format!("batch_{}", i), embedding.clone());
+                        }
+                        black_box(cache)
+                    },
+                );
+            },
+        );
+    }
+
+    group.finish();
+}
+
+/// Benchmark cache statistics and monitoring
+fn bench_cache_statistics(c: &mut Criterion) {
+    let mut group = c.benchmark_group("cache_statistics");
+    group.measurement_time(Duration::from_secs(5));
+
+    let cache = create_embedding_cache(10000);
+
+    group.bench_function("compute_stats", |b| {
+        b.iter(|| black_box(compute_cache_statistics(black_box(&cache))));
+    });
+
+    group.bench_function("memory_usage", |b| {
+        b.iter(|| black_box(estimate_cache_memory(black_box(&cache))));
+    });
+
+    group.finish();
+}
+
+// Mock implementations
+
+type Embedding = Vec<f32>;
+
+struct LRUCache {
+    capacity: usize,
+    cache: HashMap<String, Embedding>,
+    access_order: Vec<String>,
+}
+
+impl LRUCache {
+    fn new(capacity: usize) -> Self {
+        Self {
+            capacity,
+            cache: HashMap::new(),
+            access_order: Vec::new(),
+        }
+    }
+
+    fn insert(&mut self, key: String, value: Embedding) {
+        if self.cache.len() >= self.capacity && !self.cache.contains_key(&key) {
+            if let Some(lru_key) = self.access_order.first().cloned() {
+                self.cache.remove(&lru_key);
+                self.access_order.remove(0);
+            }
+        }
+
+        self.cache.insert(key.clone(), value);
+        self.access_order.retain(|k| k != &key);
+        self.access_order.push(key);
+    }
+}
+
+fn generate_test_image(width: u32, height: u32) -> Vec<u8> {
+    vec![128u8; (width * height * 3) as usize]
+}
+
+fn generate_random_embedding(dim: usize) -> Embedding {
+    (0..dim).map(|i| (i as f32 * 0.001) % 1.0).collect()
+}
+
+fn generate_embedding(image_data: &[u8]) -> Embedding {
+    // Simulate embedding generation from image
+    let dim = 512;
+    let mut embedding = Vec::with_capacity(dim);
+
+    for i in 0..dim {
+        let idx = (i * image_data.len() / dim) % image_data.len();
+        embedding.push(image_data[idx] as f32 / 255.0);
+    }
+
+    // Normalize
+    let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
+    embedding.iter_mut().for_each(|x| *x /= norm);
+
+    embedding
+}
+
+fn create_embedding_cache(size: usize) -> HashMap<String, Embedding> {
+    let mut cache = HashMap::new();
+    for i in 0..size {
+        let embedding = generate_random_embedding(512);
+        cache.insert(format!("image_{}", i), embedding);
+    }
+    cache
+}
+
+fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
+    let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
+    let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
+    let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
+
+    if norm_a > 0.0 && norm_b > 0.0 {
+        dot / (norm_a * norm_b)
+    } else {
+        0.0
+    }
+}
+
+fn linear_similarity_search(
+    cache: &HashMap<String, Embedding>,
+    query: &Embedding,
+    top_k: usize,
+) -> Vec<(String, f32, Embedding)> {
+    let mut results: Vec<_> = cache
+        .iter()
+        .map(|(key, embedding)| {
+            let similarity = cosine_similarity(query, embedding);
+            (key.clone(), similarity, embedding.clone())
+        })
+        .collect();
+
+    results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
+    results.truncate(top_k);
+    results
+}
+
+fn ann_similarity_search(
+    cache: &HashMap<String, Embedding>,
+    query: &Embedding,
+    top_k: usize,
+) -> Vec<(String, f32, Embedding)> {
+    // Simplified ANN using random sampling
+    let sample_size = (cache.len() / 10).max(100).min(cache.len());
+    let mut results: Vec<_> = cache
+        .iter()
+        .enumerate()
+        .filter(|(i, _)| i % (cache.len() / sample_size.max(1)) == 0)
+        .map(|(_, (key, embedding))| {
+            let similarity = cosine_similarity(query, embedding);
+            (key.clone(), similarity, embedding.clone())
+        })
+        .collect();
+
+    results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
+    results.truncate(top_k);
+    results
+}
+
+fn find_exact_match(cache: &HashMap<String, Embedding>, query: &Embedding) -> Option<String> {
+    cache.iter().find_map(|(key, embedding)| {
+        if embedding.len() == query.len()
+            && embedding
+                .iter()
+                .zip(query.iter())
+                .all(|(a, b)| (a - b).abs() < 1e-6)
+        {
+            Some(key.clone())
+        } else {
+            None
+        }
+    })
+}
+
+fn find_by_similarity_threshold(
+    cache: &HashMap<String, Embedding>,
+    query: &Embedding,
+    threshold: f32,
+) -> Option<(String, f32)> {
+    cache
+        .iter()
+        .filter_map(|(key, embedding)| {
+            let similarity = cosine_similarity(query, embedding);
+            if similarity >= threshold {
+                Some((key.clone(), similarity))
+            } else {
+                None
+            }
+        })
+        .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
+}
+
+fn process_new_image(_image_data: &[u8]) -> String {
+    // Simulate OCR processing
+    std::thread::sleep(Duration::from_millis(50));
+    "processed_result".to_string()
+}
+
+struct CacheStatistics {
+    size: usize,
+    avg_embedding_norm: f32,
+    memory_bytes: usize,
+}
+
+fn compute_cache_statistics(cache: &HashMap<String, Embedding>) -> CacheStatistics {
+    let size = cache.len();
+    let avg_norm = if size > 0 {
+        let total_norm: f32 = cache
+            .values()
+            .map(|emb| emb.iter().map(|x| x * x).sum::<f32>().sqrt())
+            .sum();
+        total_norm / size as f32
+    } else {
+        0.0
+    };
+
+    let memory_bytes = estimate_cache_memory(cache);
+
+    CacheStatistics {
+        size,
+        avg_embedding_norm: avg_norm,
+        memory_bytes,
+    }
+}
+
+fn estimate_cache_memory(cache: &HashMap<String, Embedding>) -> usize {
+    let key_bytes: usize = cache.keys().map(|k| k.len()).sum();
+    let embedding_bytes: usize = cache.values().map(|e| e.len() * 4).sum();
+    key_bytes + embedding_bytes + cache.len() * 64 // HashMap overhead
+}
+
+criterion_group!(
+    benches,
+    bench_embedding_generation,
+    bench_similarity_search,
+    bench_cache_hit_latency,
+    bench_cache_miss_latency,
+    bench_cache_insertion,
+    bench_cache_updates,
+    bench_batch_cache_ops,
+    bench_cache_statistics
+);
+criterion_main!(benches);
--- a/vendor/ruvector/examples/scipix/benches/inference.rs
+++ b/vendor/ruvector/examples/scipix/benches/inference.rs
@@ -0,0 +1,413 @@
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use std::time::Duration;
+
+/// Benchmark text detection model inference
+fn bench_text_detection(c: &mut Criterion) {
+    let mut group = c.benchmark_group("text_detection_model");
+    group.measurement_time(Duration::from_secs(10));
+
+    let sizes = [(224, 224), (384, 384), (512, 512)];
+
+    for (w, h) in sizes {
+        let input_tensor = create_input_tensor(w, h, 3);
+
+        group.bench_with_input(
+            BenchmarkId::new("inference", format!("{}x{}", w, h)),
+            &input_tensor,
+            |b, tensor| {
+                b.iter(|| black_box(run_detection_model(black_box(tensor))));
+            },
+        );
+    }
+
+    group.finish();
+}
+
+/// Benchmark text recognition model inference
+fn bench_text_recognition(c: &mut Criterion) {
+    let mut group = c.benchmark_group("text_recognition_model");
+    group.measurement_time(Duration::from_secs(10));
+
+    // Recognition typically works on smaller cropped regions
+    let sizes = [(32, 128), (48, 192), (64, 256)];
+
+    for (h, w) in sizes {
+        let input_tensor = create_input_tensor(w, h, 1);
+
+        group.bench_with_input(
+            BenchmarkId::new("inference", format!("{}x{}", w, h)),
+            &input_tensor,
+            |b, tensor| {
+                b.iter(|| black_box(run_recognition_model(black_box(tensor))));
+            },
+        );
+    }
+
+    group.finish();
+}
+
+/// Benchmark math equation model inference
+fn bench_math_model(c: &mut Criterion) {
+    let mut group = c.benchmark_group("math_model");
+    group.measurement_time(Duration::from_secs(10));
+
+    let sizes = [(224, 224), (320, 320), (384, 384)];
+
+    for (w, h) in sizes {
+        let input_tensor = create_input_tensor(w, h, 3);
+
+        group.bench_with_input(
+            BenchmarkId::new("inference", format!("{}x{}", w, h)),
+            &input_tensor,
+            |b, tensor| {
+                b.iter(|| black_box(run_math_model(black_box(tensor))));
+            },
+        );
+    }
+
+    group.finish();
+}
+
+/// Benchmark tensor preprocessing operations
+fn bench_tensor_preprocessing(c: &mut Criterion) {
+    let mut group = c.benchmark_group("tensor_preprocessing");
+    group.measurement_time(Duration::from_secs(8));
+
+    let image_data = vec![128u8; 384 * 384 * 3];
+
+    group.bench_function("normalization", |b| {
+        b.iter(|| black_box(normalize_tensor(black_box(&image_data))));
+    });
+
+    group.bench_function("standardization", |b| {
+        b.iter(|| black_box(standardize_tensor(black_box(&image_data))));
+    });
+
+    group.bench_function("to_chw_layout", |b| {
+        b.iter(|| black_box(convert_to_chw(black_box(&image_data), 384, 384)));
+    });
+
+    group.bench_function("add_batch_dimension", |b| {
+        let tensor = normalize_tensor(&image_data);
+        b.iter(|| black_box(add_batch_dim(black_box(&tensor))));
+    });
+
+    group.finish();
+}
+
+/// Benchmark output postprocessing
+fn bench_output_postprocessing(c: &mut Criterion) {
+    let mut group = c.benchmark_group("output_postprocessing");
+    group.measurement_time(Duration::from_secs(8));
+
+    let detection_output = create_detection_output(1000);
+    let recognition_output = create_recognition_output(100);
+
+    group.bench_function("nms_filtering", |b| {
+        b.iter(|| black_box(apply_nms(black_box(&detection_output), 0.5)));
+    });
+
+    group.bench_function("confidence_filtering", |b| {
+        b.iter(|| black_box(filter_by_confidence(black_box(&detection_output), 0.7)));
+    });
+
+    group.bench_function("decode_sequence", |b| {
+        b.iter(|| black_box(decode_ctc_output(black_box(&recognition_output))));
+    });
+
+    group.bench_function("beam_search", |b| {
+        b.iter(|| black_box(beam_search_decode(black_box(&recognition_output), 5)));
+    });
+
+    group.finish();
+}
+
+/// Benchmark batch inference
+fn bench_batch_inference(c: &mut Criterion) {
+    let mut group = c.benchmark_group("batch_inference");
+    group.measurement_time(Duration::from_secs(15));
+
+    let batch_sizes = [1, 4, 8, 16];
+    let size = (384, 384);
+
+    for batch_size in batch_sizes {
+        let batch_tensor = create_batch_tensor(batch_size, size.0, size.1, 3);
+
+        group.bench_with_input(
+            BenchmarkId::new("detection_batch", batch_size),
+            &batch_tensor,
+            |b, tensor| {
+                b.iter(|| black_box(run_detection_model(black_box(tensor))));
+            },
+        );
+    }
+
+    group.finish();
+}
+
+/// Benchmark model warm-up time
+fn bench_model_warmup(c: &mut Criterion) {
+    let mut group = c.benchmark_group("model_warmup");
+    group.measurement_time(Duration::from_secs(10));
+
+    group.bench_function("detection_model_init", |b| {
+        b.iter_with_large_drop(|| black_box(initialize_detection_model()));
+    });
+
+    group.bench_function("recognition_model_init", |b| {
+        b.iter_with_large_drop(|| black_box(initialize_recognition_model()));
+    });
+
+    group.bench_function("math_model_init", |b| {
+        b.iter_with_large_drop(|| black_box(initialize_math_model()));
+    });
+
+    group.finish();
+}
+
+/// Benchmark end-to-end inference pipeline
+fn bench_e2e_pipeline(c: &mut Criterion) {
+    let mut group = c.benchmark_group("e2e_inference_pipeline");
+    group.measurement_time(Duration::from_secs(15));
+
+    let image_data = vec![128u8; 384 * 384 * 3];
+
+    group.bench_function("full_pipeline", |b| {
+        b.iter(|| {
+            // Preprocessing
+            let normalized = normalize_tensor(black_box(&image_data));
+            let chw = convert_to_chw(&normalized, 384, 384);
+            let batched = add_batch_dim(&chw);
+
+            // Detection
+            let detection_output = run_detection_model(&batched);
+            let boxes = apply_nms(&detection_output, 0.5);
+
+            // Recognition (simulated for each box)
+            let mut results = Vec::new();
+            for _box in boxes.iter().take(5) {
+                let rec_output = run_recognition_model(&batched);
+                let text = decode_ctc_output(&rec_output);
+                results.push(text);
+            }
+
+            black_box(results)
+        });
+    });
+
+    group.finish();
+}
+
+// Mock implementations
+
+fn create_input_tensor(width: u32, height: u32, channels: u32) -> Vec<f32> {
+    vec![0.5f32; (width * height * channels) as usize]
+}
+
+fn create_batch_tensor(batch: usize, width: u32, height: u32, channels: u32) -> Vec<f32> {
+    vec![0.5f32; batch * (width * height * channels) as usize]
+}
+
+fn run_detection_model(input: &[f32]) -> Vec<Detection> {
+    // Simulate model inference
+    let output_size = input.len() / 100;
+    (0..output_size)
+        .map(|i| Detection {
+            bbox: [i as f32, i as f32, (i + 10) as f32, (i + 10) as f32],
+            confidence: 0.8 + (i % 20) as f32 / 100.0,
+            class_id: i % 10,
+        })
+        .collect()
+}
+
+fn run_recognition_model(input: &[f32]) -> Vec<f32> {
+    // Simulate CTC output: [time_steps, vocab_size]
+    let time_steps = 32;
+    let vocab_size = 64;
+    vec![0.1f32; time_steps * vocab_size]
+}
+
+fn run_math_model(input: &[f32]) -> Vec<f32> {
+    // Simulate math model output
+    vec![0.5f32; input.len() / 10]
+}
+
+fn initialize_detection_model() -> Vec<u8> {
+    std::thread::sleep(Duration::from_millis(100));
+    vec![0u8; 1024 * 1024]
+}
+
+fn initialize_recognition_model() -> Vec<u8> {
+    std::thread::sleep(Duration::from_millis(80));
+    vec![0u8; 512 * 1024]
+}
+
+fn initialize_math_model() -> Vec<u8> {
+    std::thread::sleep(Duration::from_millis(120));
+    vec![0u8; 2048 * 1024]
+}
+
+fn normalize_tensor(data: &[u8]) -> Vec<f32> {
+    data.iter().map(|&x| x as f32 / 255.0).collect()
+}
+
+fn standardize_tensor(data: &[u8]) -> Vec<f32> {
+    let mean = 128.0f32;
+    let std = 64.0f32;
+    data.iter().map(|&x| (x as f32 - mean) / std).collect()
+}
+
+fn convert_to_chw(data: &[f32], width: u32, height: u32) -> Vec<f32> {
+    // Convert HWC to CHW layout
+    let channels = data.len() / (width * height) as usize;
+    let mut chw = Vec::with_capacity(data.len());
+
+    for c in 0..channels {
+        for h in 0..height {
+            for w in 0..width {
+                let hwc_idx = ((h * width + w) * channels as u32 + c as u32) as usize;
+                chw.push(data[hwc_idx]);
+            }
+        }
+    }
+
+    chw
+}
+
+fn add_batch_dim(tensor: &[f32]) -> Vec<f32> {
+    tensor.to_vec()
+}
+
+#[derive(Clone)]
+struct Detection {
+    bbox: [f32; 4],
+    confidence: f32,
+    class_id: usize,
+}
+
+fn create_detection_output(count: usize) -> Vec<Detection> {
+    (0..count)
+        .map(|i| Detection {
+            bbox: [i as f32, i as f32, (i + 10) as f32, (i + 10) as f32],
+            confidence: 0.5 + (i % 50) as f32 / 100.0,
+            class_id: i % 10,
+        })
+        .collect()
+}
+
+fn create_recognition_output(time_steps: usize) -> Vec<f32> {
+    vec![0.1f32; time_steps * 64]
+}
+
+fn apply_nms(detections: &[Detection], iou_threshold: f32) -> Vec<Detection> {
+    let mut filtered = Vec::new();
+    let mut sorted = detections.to_vec();
+    sorted.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
+
+    for det in sorted {
+        let overlap = filtered
+            .iter()
+            .any(|kept: &Detection| calculate_iou(&det.bbox, &kept.bbox) > iou_threshold);
+
+        if !overlap {
+            filtered.push(det);
+        }
+    }
+
+    filtered
+}
+
+fn calculate_iou(box1: &[f32; 4], box2: &[f32; 4]) -> f32 {
+    let x1 = box1[0].max(box2[0]);
+    let y1 = box1[1].max(box2[1]);
+    let x2 = box1[2].min(box2[2]);
+    let y2 = box1[3].min(box2[3]);
+
+    let intersection = (x2 - x1).max(0.0) * (y2 - y1).max(0.0);
+    let area1 = (box1[2] - box1[0]) * (box1[3] - box1[1]);
+    let area2 = (box2[2] - box2[0]) * (box2[3] - box2[1]);
+    let union = area1 + area2 - intersection;
+
+    if union > 0.0 {
+        intersection / union
+    } else {
+        0.0
+    }
+}
+
+fn filter_by_confidence(detections: &[Detection], threshold: f32) -> Vec<Detection> {
+    detections
+        .iter()
+        .filter(|d| d.confidence >= threshold)
+        .cloned()
+        .collect()
+}
+
+fn decode_ctc_output(logits: &[f32]) -> String {
+    // Simple greedy CTC decoding
+    let time_steps = logits.len() / 64;
+    let mut result = String::new();
+    let mut prev_char = None;
+
+    for t in 0..time_steps {
+        let start_idx = t * 64;
+        let end_idx = start_idx + 64;
+        let step_logits = &logits[start_idx..end_idx];
+
+        let (max_idx, _) = step_logits
+            .iter()
+            .enumerate()
+            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
+            .unwrap();
+
+        if max_idx > 0 && Some(max_idx) != prev_char {
+            result.push((b'a' + max_idx as u8 % 26) as char);
+        }
+
+        prev_char = Some(max_idx);
+    }
+
+    result
+}
+
+fn beam_search_decode(logits: &[f32], beam_width: usize) -> String {
+    // Simplified beam search
+    let time_steps = logits.len() / 64;
+    let mut beams: Vec<(String, f32)> = vec![(String::new(), 0.0)];
+
+    for t in 0..time_steps {
+        let start_idx = t * 64;
+        let end_idx = start_idx + 64;
+        let step_logits = &logits[start_idx..end_idx];
+
+        let mut new_beams = Vec::new();
+
+        for (text, score) in &beams {
+            for (char_idx, &logit) in step_logits.iter().enumerate().take(beam_width) {
+                let mut new_text = text.clone();
+                if char_idx > 0 {
+                    new_text.push((b'a' + char_idx as u8 % 26) as char);
+                }
+                new_beams.push((new_text, score + logit));
+            }
+        }
+
+        new_beams.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
+        beams = new_beams.into_iter().take(beam_width).collect();
+    }
+
+    beams[0].0.clone()
+}
+
+criterion_group!(
+    benches,
+    bench_text_detection,
+    bench_text_recognition,
+    bench_math_model,
+    bench_tensor_preprocessing,
+    bench_output_postprocessing,
+    bench_batch_inference,
+    bench_model_warmup,
+    bench_e2e_pipeline
+);
+criterion_main!(benches);
--- a/vendor/ruvector/examples/scipix/benches/latex_generation.rs
+++ b/vendor/ruvector/examples/scipix/benches/latex_generation.rs
@@ -0,0 +1,395 @@
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use std::time::Duration;
+
+/// Benchmark simple LaTeX expression generation
+fn bench_simple_expressions(c: &mut Criterion) {
+    let mut group = c.benchmark_group("simple_expressions");
+    group.measurement_time(Duration::from_secs(5));
+
+    let test_cases = vec![
+        (
+            "fraction",
+            Expression::Fraction(
+                Box::new(Expression::Number(1)),
+                Box::new(Expression::Number(2)),
+            ),
+        ),
+        (
+            "power",
+            Expression::Power(
+                Box::new(Expression::Variable("x".to_string())),
+                Box::new(Expression::Number(2)),
+            ),
+        ),
+        (
+            "sum",
+            Expression::Sum(
+                Box::new(Expression::Number(1)),
+                Box::new(Expression::Number(2)),
+            ),
+        ),
+        (
+            "product",
+            Expression::Product(
+                Box::new(Expression::Variable("a".to_string())),
+                Box::new(Expression::Variable("b".to_string())),
+            ),
+        ),
+    ];
+
+    for (name, expr) in test_cases {
+        group.bench_with_input(BenchmarkId::new("to_latex", name), &expr, |b, expr| {
+            b.iter(|| black_box(expr.to_latex()));
+        });
+    }
+
+    group.finish();
+}
+
+/// Benchmark complex LaTeX expression generation
+fn bench_complex_expressions(c: &mut Criterion) {
+    let mut group = c.benchmark_group("complex_expressions");
+    group.measurement_time(Duration::from_secs(8));
+
+    // Create complex nested expressions
+    let test_cases = vec![
+        ("matrix_2x2", create_matrix(2, 2)),
+        ("matrix_3x3", create_matrix(3, 3)),
+        ("matrix_4x4", create_matrix(4, 4)),
+        ("integral", create_integral()),
+        ("summation", create_summation()),
+        ("nested_fraction", create_nested_fraction(3)),
+        ("polynomial", create_polynomial(5)),
+    ];
+
+    for (name, expr) in test_cases {
+        group.bench_with_input(BenchmarkId::new("to_latex", name), &expr, |b, expr| {
+            b.iter(|| black_box(expr.to_latex()));
+        });
+    }
+
+    group.finish();
+}
+
+/// Benchmark AST traversal performance
+fn bench_ast_traversal(c: &mut Criterion) {
+    let mut group = c.benchmark_group("ast_traversal");
+    group.measurement_time(Duration::from_secs(5));
+
+    let depths = [3, 5, 7, 10];
+
+    for depth in depths {
+        let expr = create_nested_expression(depth);
+
+        group.bench_with_input(BenchmarkId::new("depth", depth), &expr, |b, expr| {
+            b.iter(|| black_box(count_nodes(black_box(expr))));
+        });
+    }
+
+    group.finish();
+}
+
+/// Benchmark string building and concatenation
+fn bench_string_building(c: &mut Criterion) {
+    let mut group = c.benchmark_group("string_building");
+    group.measurement_time(Duration::from_secs(5));
+
+    let expr = create_polynomial(20);
+
+    // Compare different string building strategies
+    group.bench_function("to_latex_default", |b| {
+        b.iter(|| black_box(expr.to_latex()));
+    });
+
+    group.bench_function("to_latex_with_capacity", |b| {
+        b.iter(|| black_box(expr.to_latex_with_capacity()));
+    });
+
+    group.finish();
+}
+
+/// Benchmark LaTeX escaping and special characters
+fn bench_latex_escaping(c: &mut Criterion) {
+    let mut group = c.benchmark_group("latex_escaping");
+    group.measurement_time(Duration::from_secs(5));
+
+    let test_strings = vec![
+        ("no_special", "simple text"),
+        ("underscores", "var_1 + var_2"),
+        ("braces", "{x} + {y}"),
+        ("mixed", "α + β_1^2 ∫ dx"),
+    ];
+
+    for (name, text) in test_strings {
+        group.bench_with_input(BenchmarkId::new("escape", name), &text, |b, text| {
+            b.iter(|| black_box(escape_latex(black_box(text))));
+        });
+    }
+
+    group.finish();
+}
+
+/// Benchmark target: LaTeX generation should complete in <5ms
+fn bench_latency_target(c: &mut Criterion) {
+    let mut group = c.benchmark_group("latency_target_5ms");
+    group.measurement_time(Duration::from_secs(10));
+    group.sample_size(100);
+
+    // Typical complex expression from OCR
+    let expr = create_typical_ocr_expression();
+
+    group.bench_function("typical_ocr_expression", |b| {
+        b.iter(|| black_box(expr.to_latex()));
+    });
+
+    group.finish();
+}
+
+/// Benchmark batch LaTeX generation
+fn bench_batch_generation(c: &mut Criterion) {
+    let mut group = c.benchmark_group("batch_generation");
+    group.measurement_time(Duration::from_secs(10));
+
+    let batch_sizes = [10, 50, 100];
+
+    for size in batch_sizes {
+        let expressions: Vec<_> = (0..size).map(|i| create_polynomial(i % 10 + 1)).collect();
+
+        group.bench_with_input(
+            BenchmarkId::new("batch_size", size),
+            &expressions,
+            |b, exprs| {
+                b.iter(|| {
+                    let results: Vec<_> = exprs.iter().map(|expr| expr.to_latex()).collect();
+                    black_box(results)
+                });
+            },
+        );
+    }
+
+    group.finish();
+}
+
+// Mock AST and Expression types
+
+#[derive(Clone)]
+enum Expression {
+    Number(i32),
+    Variable(String),
+    Fraction(Box<Expression>, Box<Expression>),
+    Power(Box<Expression>, Box<Expression>),
+    Sum(Box<Expression>, Box<Expression>),
+    Product(Box<Expression>, Box<Expression>),
+    Matrix(Vec<Vec<Expression>>),
+    Integral(Box<Expression>, String, String, String),
+    Summation(Box<Expression>, String, String, String),
+}
+
+impl Expression {
+    fn to_latex(&self) -> String {
+        match self {
+            Expression::Number(n) => n.to_string(),
+            Expression::Variable(v) => v.clone(),
+            Expression::Fraction(num, den) => {
+                format!("\\frac{{{}}}{{{}}}", num.to_latex(), den.to_latex())
+            }
+            Expression::Power(base, exp) => {
+                format!("{{{}}}^{{{}}}", base.to_latex(), exp.to_latex())
+            }
+            Expression::Sum(a, b) => {
+                format!("{} + {}", a.to_latex(), b.to_latex())
+            }
+            Expression::Product(a, b) => {
+                format!("{} \\cdot {}", a.to_latex(), b.to_latex())
+            }
+            Expression::Matrix(rows) => {
+                let mut result = String::from("\\begin{bmatrix}");
+                for (i, row) in rows.iter().enumerate() {
+                    for (j, cell) in row.iter().enumerate() {
+                        result.push_str(&cell.to_latex());
+                        if j < row.len() - 1 {
+                            result.push_str(" & ");
+                        }
+                    }
+                    if i < rows.len() - 1 {
+                        result.push_str(" \\\\ ");
+                    }
+                }
+                result.push_str("\\end{bmatrix}");
+                result
+            }
+            Expression::Integral(expr, var, lower, upper) => {
+                format!(
+                    "\\int_{{{}}}^{{{}}} {} \\, d{}",
+                    lower,
+                    upper,
+                    expr.to_latex(),
+                    var
+                )
+            }
+            Expression::Summation(expr, var, lower, upper) => {
+                format!(
+                    "\\sum_{{{}={}}}^{{{}}} {}",
+                    var,
+                    lower,
+                    upper,
+                    expr.to_latex()
+                )
+            }
+        }
+    }
+
+    fn to_latex_with_capacity(&self) -> String {
+        let mut result = String::with_capacity(256);
+        self.append_latex(&mut result);
+        result
+    }
+
+    fn append_latex(&self, buffer: &mut String) {
+        buffer.push_str(&self.to_latex());
+    }
+}
+
+fn create_matrix(rows: usize, cols: usize) -> Expression {
+    let matrix = (0..rows)
+        .map(|i| {
+            (0..cols)
+                .map(|j| Expression::Number((i * cols + j) as i32))
+                .collect()
+        })
+        .collect();
+    Expression::Matrix(matrix)
+}
+
+fn create_integral() -> Expression {
+    Expression::Integral(
+        Box::new(Expression::Power(
+            Box::new(Expression::Variable("x".to_string())),
+            Box::new(Expression::Number(2)),
+        )),
+        "x".to_string(),
+        "0".to_string(),
+        "1".to_string(),
+    )
+}
+
+fn create_summation() -> Expression {
+    Expression::Summation(
+        Box::new(Expression::Power(
+            Box::new(Expression::Variable("i".to_string())),
+            Box::new(Expression::Number(2)),
+        )),
+        "i".to_string(),
+        "1".to_string(),
+        "n".to_string(),
+    )
+}
+
+fn create_nested_fraction(depth: usize) -> Expression {
+    if depth == 0 {
+        Expression::Number(1)
+    } else {
+        Expression::Fraction(
+            Box::new(Expression::Number(1)),
+            Box::new(create_nested_fraction(depth - 1)),
+        )
+    }
+}
+
+fn create_polynomial(degree: usize) -> Expression {
+    let mut expr = Expression::Number(0);
+    for i in 0..=degree {
+        let term = Expression::Product(
+            Box::new(Expression::Number(i as i32 + 1)),
+            Box::new(Expression::Power(
+                Box::new(Expression::Variable("x".to_string())),
+                Box::new(Expression::Number(i as i32)),
+            )),
+        );
+        expr = Expression::Sum(Box::new(expr), Box::new(term));
+    }
+    expr
+}
+
+fn create_nested_expression(depth: usize) -> Expression {
+    if depth == 0 {
+        Expression::Variable("x".to_string())
+    } else {
+        Expression::Sum(
+            Box::new(create_nested_expression(depth - 1)),
+            Box::new(Expression::Number(depth as i32)),
+        )
+    }
+}
+
+fn create_typical_ocr_expression() -> Expression {
+    // Typical expression: (a + b)^2 = a^2 + 2ab + b^2
+    Expression::Sum(
+        Box::new(Expression::Sum(
+            Box::new(Expression::Power(
+                Box::new(Expression::Variable("a".to_string())),
+                Box::new(Expression::Number(2)),
+            )),
+            Box::new(Expression::Product(
+                Box::new(Expression::Product(
+                    Box::new(Expression::Number(2)),
+                    Box::new(Expression::Variable("a".to_string())),
+                )),
+                Box::new(Expression::Variable("b".to_string())),
+            )),
+        )),
+        Box::new(Expression::Power(
+            Box::new(Expression::Variable("b".to_string())),
+            Box::new(Expression::Number(2)),
+        )),
+    )
+}
+
+fn count_nodes(expr: &Expression) -> usize {
+    match expr {
+        Expression::Number(_) | Expression::Variable(_) => 1,
+        Expression::Fraction(a, b)
+        | Expression::Power(a, b)
+        | Expression::Sum(a, b)
+        | Expression::Product(a, b) => 1 + count_nodes(a) + count_nodes(b),
+        Expression::Matrix(rows) => {
+            1 + rows
+                .iter()
+                .map(|row| row.iter().map(|e| count_nodes(e)).sum::<usize>())
+                .sum::<usize>()
+        }
+        Expression::Integral(expr, _, _, _) | Expression::Summation(expr, _, _, _) => {
+            1 + count_nodes(expr)
+        }
+    }
+}
+
+fn escape_latex(text: &str) -> String {
+    text.chars()
+        .map(|c| match c {
+            '_' => "\\_".to_string(),
+            '{' => "\\{".to_string(),
+            '}' => "\\}".to_string(),
+            '&' => "\\&".to_string(),
+            '%' => "\\%".to_string(),
+            '$' => "\\$".to_string(),
+            '#' => "\\#".to_string(),
+            '^' => "\\^{}".to_string(),
+            '~' => "\\~{}".to_string(),
+            '\\' => "\\textbackslash{}".to_string(),
+            _ => c.to_string(),
+        })
+        .collect()
+}
+
+criterion_group!(
+    benches,
+    bench_simple_expressions,
+    bench_complex_expressions,
+    bench_ast_traversal,
+    bench_string_building,
+    bench_latex_escaping,
+    bench_latency_target,
+    bench_batch_generation
+);
+criterion_main!(benches);
--- a/vendor/ruvector/examples/scipix/benches/memory.rs
+++ b/vendor/ruvector/examples/scipix/benches/memory.rs
@@ -0,0 +1,437 @@
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use std::time::Duration;
+
+/// Benchmark peak memory during inference
+fn bench_peak_memory_inference(c: &mut Criterion) {
+    let mut group = c.benchmark_group("peak_memory_inference");
+    group.measurement_time(Duration::from_secs(10));
+
+    let sizes = [(224, 224), (384, 384), (512, 512)];
+
+    for (w, h) in sizes {
+        group.bench_with_input(
+            BenchmarkId::new("single_inference", format!("{}x{}", w, h)),
+            &(w, h),
+            |b, &(width, height)| {
+                b.iter_with_large_drop(|| {
+                    let memory_tracker = MemoryTracker::new();
+
+                    // Simulate model loading
+                    let model = load_model();
+
+                    // Create input
+                    let image = create_image(width, height);
+
+                    // Preprocessing
+                    let preprocessed = preprocess(image);
+
+                    // Inference
+                    let output = run_inference(&model, preprocessed);
+
+                    // Postprocessing
+                    let result = postprocess(output);
+
+                    let peak_memory = memory_tracker.peak_usage();
+                    black_box((result, peak_memory))
+                });
+            },
+        );
+    }
+
+    group.finish();
+}
+
+/// Benchmark memory per image in batch
+fn bench_memory_per_batch_image(c: &mut Criterion) {
+    let mut group = c.benchmark_group("memory_per_batch_image");
+    group.measurement_time(Duration::from_secs(15));
+
+    let batch_sizes = [1, 4, 8, 16, 32];
+    let size = (384, 384);
+
+    for batch_size in batch_sizes {
+        group.bench_with_input(
+            BenchmarkId::new("batch_inference", batch_size),
+            &batch_size,
+            |b, &size| {
+                b.iter_with_large_drop(|| {
+                    let memory_tracker = MemoryTracker::new();
+
+                    let model = load_model();
+                    let batch = create_batch(size, 384, 384);
+                    let output = run_batch_inference(&model, batch);
+
+                    let total_memory = memory_tracker.peak_usage();
+                    let per_image = total_memory / size;
+
+                    black_box((output, per_image))
+                });
+            },
+        );
+    }
+
+    group.finish();
+}
+
+/// Benchmark model loading memory
+fn bench_model_loading_memory(c: &mut Criterion) {
+    let mut group = c.benchmark_group("model_loading_memory");
+    group.measurement_time(Duration::from_secs(10));
+
+    group.bench_function("detection_model", |b| {
+        b.iter_with_large_drop(|| {
+            let tracker = MemoryTracker::new();
+            let model = load_detection_model();
+            let memory = tracker.peak_usage();
+            black_box((model, memory))
+        });
+    });
+
+    group.bench_function("recognition_model", |b| {
+        b.iter_with_large_drop(|| {
+            let tracker = MemoryTracker::new();
+            let model = load_recognition_model();
+            let memory = tracker.peak_usage();
+            black_box((model, memory))
+        });
+    });
+
+    group.bench_function("math_model", |b| {
+        b.iter_with_large_drop(|| {
+            let tracker = MemoryTracker::new();
+            let model = load_math_model();
+            let memory = tracker.peak_usage();
+            black_box((model, memory))
+        });
+    });
+
+    group.bench_function("all_models", |b| {
+        b.iter_with_large_drop(|| {
+            let tracker = MemoryTracker::new();
+            let detection = load_detection_model();
+            let recognition = load_recognition_model();
+            let math = load_math_model();
+            let total_memory = tracker.peak_usage();
+            black_box((detection, recognition, math, total_memory))
+        });
+    });
+
+    group.finish();
+}
+
+/// Benchmark memory growth over time
+fn bench_memory_growth(c: &mut Criterion) {
+    let mut group = c.benchmark_group("memory_growth");
+    group.measurement_time(Duration::from_secs(20));
+
+    group.bench_function("sequential_inferences", |b| {
+        b.iter_with_large_drop(|| {
+            let tracker = MemoryTracker::new();
+            let model = load_model();
+            let mut memory_samples = Vec::new();
+
+            for i in 0..100 {
+                let image = create_image(384, 384);
+                let preprocessed = preprocess(image);
+                let _output = run_inference(&model, preprocessed);
+
+                if i % 10 == 0 {
+                    memory_samples.push(tracker.current_usage());
+                }
+            }
+
+            let growth = calculate_memory_growth(&memory_samples);
+            black_box((memory_samples, growth))
+        });
+    });
+
+    group.finish();
+}
+
+/// Benchmark memory fragmentation
+fn bench_memory_fragmentation(c: &mut Criterion) {
+    let mut group = c.benchmark_group("memory_fragmentation");
+    group.measurement_time(Duration::from_secs(10));
+
+    group.bench_function("allocate_deallocate_pattern", |b| {
+        b.iter(|| {
+            let mut allocations = Vec::new();
+
+            // Allocate various sizes
+            for i in 0..100 {
+                let size = (i % 10 + 1) * 1024;
+                allocations.push(vec![0u8; size]);
+            }
+
+            // Deallocate every other allocation
+            allocations = allocations
+                .into_iter()
+                .enumerate()
+                .filter_map(|(i, v)| if i % 2 == 0 { Some(v) } else { None })
+                .collect();
+
+            // Allocate more
+            for i in 0..50 {
+                let size = (i % 5 + 1) * 2048;
+                allocations.push(vec![0u8; size]);
+            }
+
+            black_box(allocations)
+        });
+    });
+
+    group.finish();
+}
+
+/// Benchmark cache memory overhead
+fn bench_cache_memory(c: &mut Criterion) {
+    let mut group = c.benchmark_group("cache_memory");
+    group.measurement_time(Duration::from_secs(10));
+
+    let cache_sizes = [100, 1000, 10000];
+
+    for cache_size in cache_sizes {
+        group.bench_with_input(
+            BenchmarkId::new("embedding_cache", cache_size),
+            &cache_size,
+            |b, &size| {
+                b.iter_with_large_drop(|| {
+                    let tracker = MemoryTracker::new();
+                    let cache = create_embedding_cache(size);
+                    let memory = tracker.peak_usage();
+                    black_box((cache, memory))
+                });
+            },
+        );
+    }
+
+    group.finish();
+}
+
+/// Benchmark memory pool efficiency
+fn bench_memory_pools(c: &mut Criterion) {
+    let mut group = c.benchmark_group("memory_pools");
+    group.measurement_time(Duration::from_secs(8));
+
+    group.bench_function("without_pool", |b| {
+        b.iter(|| {
+            let mut allocations = Vec::new();
+            for _ in 0..100 {
+                let buffer = vec![0u8; 1024 * 1024];
+                allocations.push(buffer);
+            }
+            black_box(allocations)
+        });
+    });
+
+    group.bench_function("with_pool", |b| {
+        let mut pool = MemoryPool::new(1024 * 1024, 100);
+        b.iter(|| {
+            let mut handles = Vec::new();
+            for _ in 0..100 {
+                let handle = pool.allocate();
+                handles.push(handle);
+            }
+            black_box(handles)
+        });
+    });
+
+    group.finish();
+}
+
+/// Benchmark tensor memory layouts
+fn bench_tensor_layouts(c: &mut Criterion) {
+    let mut group = c.benchmark_group("tensor_layouts");
+    group.measurement_time(Duration::from_secs(8));
+
+    let size = (384, 384, 3);
+
+    group.bench_function("hwc_layout", |b| {
+        b.iter(|| {
+            let tracker = MemoryTracker::new();
+            let tensor = create_hwc_tensor(size.0, size.1, size.2);
+            let memory = tracker.peak_usage();
+            black_box((tensor, memory))
+        });
+    });
+
+    group.bench_function("chw_layout", |b| {
+        b.iter(|| {
+            let tracker = MemoryTracker::new();
+            let tensor = create_chw_tensor(size.0, size.1, size.2);
+            let memory = tracker.peak_usage();
+            black_box((tensor, memory))
+        });
+    });
+
+    group.bench_function("layout_conversion", |b| {
+        let hwc = create_hwc_tensor(size.0, size.1, size.2);
+        b.iter(|| {
+            let tracker = MemoryTracker::new();
+            let chw = convert_hwc_to_chw(&hwc, size.0, size.1, size.2);
+            let memory = tracker.peak_usage();
+            black_box((chw, memory))
+        });
+    });
+
+    group.finish();
+}
+
+// Mock implementations
+
+struct MemoryTracker {
+    initial_usage: usize,
+    peak: usize,
+}
+
+impl MemoryTracker {
+    fn new() -> Self {
+        Self {
+            initial_usage: get_current_memory_usage(),
+            peak: 0,
+        }
+    }
+
+    fn current_usage(&self) -> usize {
+        get_current_memory_usage() - self.initial_usage
+    }
+
+    fn peak_usage(&mut self) -> usize {
+        let current = self.current_usage();
+        self.peak = self.peak.max(current);
+        self.peak
+    }
+}
+
+fn get_current_memory_usage() -> usize {
+    // In production, this would query actual memory usage
+    // For benchmarking, we'll estimate based on allocations
+    0
+}
+
+type Model = Vec<u8>;
+type Image = Vec<u8>;
+type Tensor = Vec<f32>;
+type Output = Vec<f32>;
+
+fn load_model() -> Model {
+    vec![0u8; 100 * 1024 * 1024] // 100 MB model
+}
+
+fn load_detection_model() -> Model {
+    vec![0u8; 150 * 1024 * 1024] // 150 MB
+}
+
+fn load_recognition_model() -> Model {
+    vec![0u8; 80 * 1024 * 1024] // 80 MB
+}
+
+fn load_math_model() -> Model {
+    vec![0u8; 120 * 1024 * 1024] // 120 MB
+}
+
+fn create_image(width: u32, height: u32) -> Image {
+    vec![128u8; (width * height * 3) as usize]
+}
+
+fn create_batch(batch_size: usize, width: u32, height: u32) -> Vec<Image> {
+    (0..batch_size)
+        .map(|_| create_image(width, height))
+        .collect()
+}
+
+fn preprocess(image: Image) -> Tensor {
+    image.iter().map(|&x| x as f32 / 255.0).collect()
+}
+
+fn run_inference(_model: &Model, input: Tensor) -> Output {
+    input.iter().map(|&x| x * 2.0).collect()
+}
+
+fn run_batch_inference(_model: &Model, batch: Vec<Image>) -> Vec<Output> {
+    batch
+        .into_iter()
+        .map(|img| {
+            let tensor = preprocess(img);
+            tensor.iter().map(|&x| x * 2.0).collect()
+        })
+        .collect()
+}
+
+fn postprocess(output: Output) -> String {
+    format!("result_{:.2}", output[0])
+}
+
+fn calculate_memory_growth(samples: &[usize]) -> f64 {
+    if samples.len() < 2 {
+        return 0.0;
+    }
+
+    let first = samples[0] as f64;
+    let last = samples[samples.len() - 1] as f64;
+
+    (last - first) / first
+}
+
+fn create_embedding_cache(size: usize) -> Vec<Vec<f32>> {
+    (0..size).map(|_| vec![0.5f32; 512]).collect()
+}
+
+struct MemoryPool {
+    block_size: usize,
+    blocks: Vec<Vec<u8>>,
+    available: Vec<usize>,
+}
+
+impl MemoryPool {
+    fn new(block_size: usize, count: usize) -> Self {
+        let blocks = (0..count).map(|_| vec![0u8; block_size]).collect();
+        let available = (0..count).collect();
+
+        Self {
+            block_size,
+            blocks,
+            available,
+        }
+    }
+
+    fn allocate(&mut self) -> Option<usize> {
+        self.available.pop()
+    }
+}
+
+fn create_hwc_tensor(height: u32, width: u32, channels: u32) -> Vec<f32> {
+    vec![0.5f32; (height * width * channels) as usize]
+}
+
+fn create_chw_tensor(height: u32, width: u32, channels: u32) -> Vec<f32> {
+    vec![0.5f32; (channels * height * width) as usize]
+}
+
+fn convert_hwc_to_chw(hwc: &[f32], height: u32, width: u32, channels: u32) -> Vec<f32> {
+    let mut chw = Vec::with_capacity(hwc.len());
+
+    for c in 0..channels {
+        for h in 0..height {
+            for w in 0..width {
+                let hwc_idx = ((h * width + w) * channels + c) as usize;
+                chw.push(hwc[hwc_idx]);
+            }
+        }
+    }
+
+    chw
+}
+
+criterion_group!(
+    benches,
+    bench_peak_memory_inference,
+    bench_memory_per_batch_image,
+    bench_model_loading_memory,
+    bench_memory_growth,
+    bench_memory_fragmentation,
+    bench_cache_memory,
+    bench_memory_pools,
+    bench_tensor_layouts
+);
+criterion_main!(benches);
--- a/vendor/ruvector/examples/scipix/benches/ocr_latency.rs
+++ b/vendor/ruvector/examples/scipix/benches/ocr_latency.rs
@@ -0,0 +1,194 @@
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use std::time::Duration;
+
+/// Benchmark single image OCR at various sizes
+fn bench_single_image(c: &mut Criterion) {
+    let mut group = c.benchmark_group("single_image_ocr");
+    group.measurement_time(Duration::from_secs(10));
+    group.sample_size(50);
+
+    // Test various image sizes
+    let sizes = [
+        (224, 224),   // Small
+        (384, 384),   // Medium
+        (512, 512),   // Large
+        (768, 768),   // Extra large
+        (1024, 1024), // Very large
+    ];
+
+    for (w, h) in sizes {
+        group.bench_with_input(
+            BenchmarkId::new("resolution", format!("{}x{}", w, h)),
+            &(w, h),
+            |b, &(width, height)| {
+                // Create synthetic image data
+                let image_data = vec![128u8; (width * height * 3) as usize];
+
+                b.iter(|| {
+                    // Simulate OCR processing pipeline
+                    // In production, this would call actual OCR functions
+                    let preprocessed = preprocess_image(black_box(&image_data), width, height);
+                    let features = extract_features(black_box(&preprocessed));
+                    let text = recognize_text(black_box(&features));
+                    black_box(text)
+                });
+            },
+        );
+    }
+
+    group.finish();
+}
+
+/// Benchmark batch processing with various batch sizes
+fn bench_batch_processing(c: &mut Criterion) {
+    let mut group = c.benchmark_group("batch_processing");
+    group.measurement_time(Duration::from_secs(15));
+    group.sample_size(30);
+
+    let batch_sizes = [1, 4, 8, 16, 32];
+    let image_size = (384, 384);
+
+    for batch_size in batch_sizes {
+        group.bench_with_input(
+            BenchmarkId::new("batch_size", batch_size),
+            &batch_size,
+            |b, &size| {
+                // Create batch of synthetic images
+                let images: Vec<Vec<u8>> = (0..size)
+                    .map(|_| vec![128u8; (image_size.0 * image_size.1 * 3) as usize])
+                    .collect();
+
+                b.iter(|| {
+                    // Process entire batch
+                    let results: Vec<_> = images
+                        .iter()
+                        .map(|img| {
+                            let preprocessed =
+                                preprocess_image(black_box(img), image_size.0, image_size.1);
+                            let features = extract_features(black_box(&preprocessed));
+                            recognize_text(black_box(&features))
+                        })
+                        .collect();
+                    black_box(results)
+                });
+            },
+        );
+    }
+
+    group.finish();
+}
+
+/// Benchmark cold start vs warm model performance
+fn bench_cold_vs_warm(c: &mut Criterion) {
+    let mut group = c.benchmark_group("cold_vs_warm");
+    group.measurement_time(Duration::from_secs(10));
+
+    let image_data = vec![128u8; (384 * 384 * 3) as usize];
+
+    // Cold start benchmark - model initialization included
+    group.bench_function("cold_start", |b| {
+        b.iter_with_large_drop(|| {
+            // Simulate model initialization + inference
+            let _model = initialize_model();
+            let preprocessed = preprocess_image(black_box(&image_data), 384, 384);
+            let features = extract_features(black_box(&preprocessed));
+            let text = recognize_text(black_box(&features));
+            black_box(text)
+        });
+    });
+
+    // Warm model benchmark - model already initialized
+    group.bench_function("warm_inference", |b| {
+        let _model = initialize_model(); // Initialize once outside benchmark
+
+        b.iter(|| {
+            let preprocessed = preprocess_image(black_box(&image_data), 384, 384);
+            let features = extract_features(black_box(&preprocessed));
+            let text = recognize_text(black_box(&features));
+            black_box(text)
+        });
+    });
+
+    group.finish();
+}
+
+/// Benchmark P95 and P99 latency targets
+fn bench_latency_percentiles(c: &mut Criterion) {
+    let mut group = c.benchmark_group("latency_percentiles");
+    group.measurement_time(Duration::from_secs(20));
+    group.sample_size(100); // More samples for better percentile accuracy
+
+    let image_data = vec![128u8; (384 * 384 * 3) as usize];
+
+    group.bench_function("p95_target_100ms", |b| {
+        b.iter(|| {
+            let preprocessed = preprocess_image(black_box(&image_data), 384, 384);
+            let features = extract_features(black_box(&preprocessed));
+            let text = recognize_text(black_box(&features));
+            black_box(text)
+        });
+    });
+
+    group.finish();
+}
+
+/// Benchmark throughput (images per second)
+fn bench_throughput(c: &mut Criterion) {
+    let mut group = c.benchmark_group("throughput");
+    group.measurement_time(Duration::from_secs(15));
+    group.throughput(criterion::Throughput::Elements(1));
+
+    let image_data = vec![128u8; (384 * 384 * 3) as usize];
+
+    group.bench_function("images_per_second", |b| {
+        b.iter(|| {
+            let preprocessed = preprocess_image(black_box(&image_data), 384, 384);
+            let features = extract_features(black_box(&preprocessed));
+            let text = recognize_text(black_box(&features));
+            black_box(text)
+        });
+    });
+
+    group.finish();
+}
+
+// Mock implementations for benchmarking
+// In production, these would be actual OCR pipeline functions
+
+fn initialize_model() -> Vec<u8> {
+    // Simulate model loading
+    std::thread::sleep(Duration::from_millis(50));
+    vec![0u8; 1024]
+}
+
+fn preprocess_image(data: &[u8], width: u32, height: u32) -> Vec<u8> {
+    // Simulate preprocessing: resize, normalize, grayscale
+    let mut processed = Vec::with_capacity((width * height) as usize);
+    for chunk in data.chunks(3) {
+        // Convert to grayscale
+        let gray = (chunk[0] as u32 + chunk[1] as u32 + chunk[2] as u32) / 3;
+        processed.push(gray as u8);
+    }
+    processed
+}
+
+fn extract_features(data: &[u8]) -> Vec<f32> {
+    // Simulate feature extraction
+    data.iter().map(|&x| x as f32 / 255.0).collect()
+}
+
+fn recognize_text(features: &[f32]) -> String {
+    // Simulate text recognition
+    let sum: f32 = features.iter().take(100).sum();
+    format!("recognized_text_{:.2}", sum)
+}
+
+criterion_group!(
+    benches,
+    bench_single_image,
+    bench_batch_processing,
+    bench_cold_vs_warm,
+    bench_latency_percentiles,
+    bench_throughput
+);
+criterion_main!(benches);
--- a/vendor/ruvector/examples/scipix/benches/optimization_bench.rs
+++ b/vendor/ruvector/examples/scipix/benches/optimization_bench.rs
@@ -0,0 +1,224 @@
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
+use scipix_ocr::optimize::*;
+
+fn bench_grayscale(c: &mut Criterion) {
+    let mut group = c.benchmark_group("grayscale");
+
+    for size in [256, 512, 1024, 2048].iter() {
+        let pixels = size * size;
+        let rgba: Vec<u8> = (0..pixels * 4).map(|i| (i % 256) as u8).collect();
+        let mut gray = vec![0u8; pixels];
+
+        group.throughput(Throughput::Elements(pixels as u64));
+
+        // Benchmark SIMD version
+        group.bench_with_input(BenchmarkId::new("simd", size), size, |b, _| {
+            b.iter(|| {
+                simd::simd_grayscale(black_box(&rgba), black_box(&mut gray));
+            });
+        });
+
+        // Benchmark scalar version
+        group.bench_with_input(BenchmarkId::new("scalar", size), size, |b, _| {
+            b.iter(|| {
+                for (i, chunk) in rgba.chunks_exact(4).enumerate() {
+                    let r = chunk[0] as u32;
+                    let g = chunk[1] as u32;
+                    let b = chunk[2] as u32;
+                    gray[i] = ((r * 77 + g * 150 + b * 29) >> 8) as u8;
+                }
+            });
+        });
+    }
+
+    group.finish();
+}
+
+fn bench_threshold(c: &mut Criterion) {
+    let mut group = c.benchmark_group("threshold");
+
+    for size in [1024, 4096, 16384, 65536].iter() {
+        let gray: Vec<u8> = (0..*size).map(|i| (i % 256) as u8).collect();
+        let mut out = vec![0u8; *size];
+
+        group.throughput(Throughput::Elements(*size as u64));
+
+        // SIMD version
+        group.bench_with_input(BenchmarkId::new("simd", size), size, |b, _| {
+            b.iter(|| {
+                simd::simd_threshold(black_box(&gray), black_box(128), black_box(&mut out));
+            });
+        });
+
+        // Scalar version
+        group.bench_with_input(BenchmarkId::new("scalar", size), size, |b, _| {
+            b.iter(|| {
+                for (g, o) in gray.iter().zip(out.iter_mut()) {
+                    *o = if *g >= 128 { 255 } else { 0 };
+                }
+            });
+        });
+    }
+
+    group.finish();
+}
+
+fn bench_normalize(c: &mut Criterion) {
+    let mut group = c.benchmark_group("normalize");
+
+    for size in [128, 512, 2048, 8192].iter() {
+        let mut data: Vec<f32> = (0..*size).map(|i| i as f32).collect();
+
+        group.throughput(Throughput::Elements(*size as u64));
+
+        // SIMD version
+        group.bench_with_input(BenchmarkId::new("simd", size), size, |b, _| {
+            let mut data_copy = data.clone();
+            b.iter(|| {
+                simd::simd_normalize(black_box(&mut data_copy));
+            });
+        });
+
+        // Scalar version
+        group.bench_with_input(BenchmarkId::new("scalar", size), size, |b, _| {
+            let mut data_copy = data.clone();
+            b.iter(|| {
+                let sum: f32 = data_copy.iter().sum();
+                let mean = sum / data_copy.len() as f32;
+                let variance: f32 = data_copy.iter().map(|x| (x - mean).powi(2)).sum::<f32>()
+                    / data_copy.len() as f32;
+                let std_dev = variance.sqrt() + 1e-8;
+                for x in data_copy.iter_mut() {
+                    *x = (*x - mean) / std_dev;
+                }
+            });
+        });
+    }
+
+    group.finish();
+}
+
+fn bench_parallel_map(c: &mut Criterion) {
+    let mut group = c.benchmark_group("parallel_map");
+
+    for size in [100, 1000, 10000].iter() {
+        let data: Vec<i32> = (0..*size).collect();
+
+        group.throughput(Throughput::Elements(*size as u64));
+
+        // Parallel version
+        group.bench_with_input(BenchmarkId::new("parallel", size), size, |b, _| {
+            b.iter(|| {
+                parallel::parallel_map_chunked(black_box(data.clone()), 100, |x| x * x + x * 2 + 1)
+            });
+        });
+
+        // Sequential version
+        group.bench_with_input(BenchmarkId::new("sequential", size), size, |b, _| {
+            b.iter(|| data.iter().map(|&x| x * x + x * 2 + 1).collect::<Vec<_>>());
+        });
+    }
+
+    group.finish();
+}
+
+fn bench_buffer_pool(c: &mut Criterion) {
+    let mut group = c.benchmark_group("buffer_pool");
+
+    let pool = memory::BufferPool::new(|| Vec::with_capacity(1024), 10, 100);
+
+    // Benchmark pooled allocation
+    group.bench_function("pooled", |b| {
+        b.iter(|| {
+            let mut buf = pool.acquire();
+            buf.extend_from_slice(&[0u8; 512]);
+            black_box(&buf);
+        });
+    });
+
+    // Benchmark direct allocation
+    group.bench_function("direct", |b| {
+        b.iter(|| {
+            let mut buf = Vec::with_capacity(1024);
+            buf.extend_from_slice(&[0u8; 512]);
+            black_box(&buf);
+        });
+    });
+
+    group.finish();
+}
+
+fn bench_quantization(c: &mut Criterion) {
+    let mut group = c.benchmark_group("quantization");
+
+    for size in [1024, 4096, 16384].iter() {
+        let weights: Vec<f32> = (0..*size)
+            .map(|i| (i as f32 / *size as f32) * 2.0 - 1.0)
+            .collect();
+
+        group.throughput(Throughput::Elements(*size as u64));
+
+        // Quantize
+        group.bench_with_input(BenchmarkId::new("quantize", size), size, |b, _| {
+            b.iter(|| quantize::quantize_weights(black_box(&weights)));
+        });
+
+        // Dequantize
+        let (quantized, params) = quantize::quantize_weights(&weights);
+        group.bench_with_input(BenchmarkId::new("dequantize", size), size, |b, _| {
+            b.iter(|| quantize::dequantize(black_box(&quantized), black_box(params)));
+        });
+
+        // Per-channel quantization
+        let shape = vec![*size / 64, 64];
+        group.bench_with_input(BenchmarkId::new("per_channel", size), size, |b, _| {
+            b.iter(|| {
+                quantize::PerChannelQuant::from_f32(black_box(&weights), black_box(shape.clone()))
+            });
+        });
+    }
+
+    group.finish();
+}
+
+fn bench_memory_operations(c: &mut Criterion) {
+    let mut group = c.benchmark_group("memory_ops");
+
+    // Arena allocation
+    let mut arena = memory::Arena::with_capacity(1024 * 1024);
+    group.bench_function("arena_alloc", |b| {
+        b.iter(|| {
+            arena.reset();
+            for _ in 0..100 {
+                let slice = arena.alloc(1024, 8);
+                black_box(slice);
+            }
+        });
+    });
+
+    // Vector allocation
+    group.bench_function("vec_alloc", |b| {
+        b.iter(|| {
+            for _ in 0..100 {
+                let mut vec = Vec::with_capacity(1024);
+                vec.resize(1024, 0u8);
+                black_box(&vec);
+            }
+        });
+    });
+
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    bench_grayscale,
+    bench_threshold,
+    bench_normalize,
+    bench_parallel_map,
+    bench_buffer_pool,
+    bench_quantization,
+    bench_memory_operations
+);
+
+criterion_main!(benches);
--- a/vendor/ruvector/examples/scipix/benches/preprocessing.rs
+++ b/vendor/ruvector/examples/scipix/benches/preprocessing.rs
@@ -0,0 +1,356 @@
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use std::time::Duration;
+
+/// Benchmark individual preprocessing transforms
+fn bench_individual_transforms(c: &mut Criterion) {
+    let mut group = c.benchmark_group("individual_transforms");
+    group.measurement_time(Duration::from_secs(8));
+
+    let sizes = [(224, 224), (384, 384), (512, 512)];
+
+    for (w, h) in sizes {
+        let image_data = generate_test_image(w, h);
+
+        // Grayscale conversion
+        group.bench_with_input(
+            BenchmarkId::new("grayscale", format!("{}x{}", w, h)),
+            &image_data,
+            |b, img| {
+                b.iter(|| black_box(convert_to_grayscale(black_box(img), w, h)));
+            },
+        );
+
+        // Gaussian blur
+        group.bench_with_input(
+            BenchmarkId::new("gaussian_blur", format!("{}x{}", w, h)),
+            &image_data,
+            |b, img| {
+                b.iter(|| black_box(apply_gaussian_blur(black_box(img), w, h, 5)));
+            },
+        );
+
+        // Adaptive threshold
+        group.bench_with_input(
+            BenchmarkId::new("threshold", format!("{}x{}", w, h)),
+            &image_data,
+            |b, img| {
+                b.iter(|| black_box(apply_adaptive_threshold(black_box(img), w, h)));
+            },
+        );
+
+        // Edge detection
+        group.bench_with_input(
+            BenchmarkId::new("edge_detection", format!("{}x{}", w, h)),
+            &image_data,
+            |b, img| {
+                b.iter(|| black_box(detect_edges(black_box(img), w, h)));
+            },
+        );
+
+        // Normalization
+        group.bench_with_input(
+            BenchmarkId::new("normalize", format!("{}x{}", w, h)),
+            &image_data,
+            |b, img| {
+                b.iter(|| black_box(normalize_image(black_box(img))));
+            },
+        );
+    }
+
+    group.finish();
+}
+
+/// Benchmark full preprocessing pipeline
+fn bench_full_pipeline(c: &mut Criterion) {
+    let mut group = c.benchmark_group("full_pipeline");
+    group.measurement_time(Duration::from_secs(10));
+
+    let sizes = [(224, 224), (384, 384), (512, 512)];
+
+    for (w, h) in sizes {
+        let image_data = generate_test_image(w, h);
+
+        group.bench_with_input(
+            BenchmarkId::new("sequential", format!("{}x{}", w, h)),
+            &(image_data.clone(), w, h),
+            |b, (img, width, height)| {
+                b.iter(|| {
+                    let gray = convert_to_grayscale(black_box(img), *width, *height);
+                    let blurred = apply_gaussian_blur(&gray, *width, *height, 5);
+                    let threshold = apply_adaptive_threshold(&blurred, *width, *height);
+                    let edges = detect_edges(&threshold, *width, *height);
+                    let normalized = normalize_image(&edges);
+                    black_box(normalized)
+                });
+            },
+        );
+    }
+
+    group.finish();
+}
+
+/// Benchmark parallel vs sequential preprocessing
+fn bench_parallel_vs_sequential(c: &mut Criterion) {
+    let mut group = c.benchmark_group("parallel_vs_sequential");
+    group.measurement_time(Duration::from_secs(10));
+
+    // Create batch of images
+    let batch_size = 8;
+    let size = (384, 384);
+    let images: Vec<Vec<u8>> = (0..batch_size)
+        .map(|_| generate_test_image(size.0, size.1))
+        .collect();
+
+    // Sequential processing
+    group.bench_function("sequential_batch", |b| {
+        b.iter(|| {
+            let results: Vec<_> = images
+                .iter()
+                .map(|img| {
+                    let gray = convert_to_grayscale(black_box(img), size.0, size.1);
+                    let blurred = apply_gaussian_blur(&gray, size.0, size.1, 5);
+                    apply_adaptive_threshold(&blurred, size.0, size.1)
+                })
+                .collect();
+            black_box(results)
+        });
+    });
+
+    // Parallel processing (simulated with rayon-like chunking)
+    group.bench_function("parallel_batch", |b| {
+        b.iter(|| {
+            // In production, this would use rayon::par_iter()
+            let results: Vec<_> = images
+                .chunks(2)
+                .flat_map(|chunk| {
+                    chunk.iter().map(|img| {
+                        let gray = convert_to_grayscale(black_box(img), size.0, size.1);
+                        let blurred = apply_gaussian_blur(&gray, size.0, size.1, 5);
+                        apply_adaptive_threshold(&blurred, size.0, size.1)
+                    })
+                })
+                .collect();
+            black_box(results)
+        });
+    });
+
+    group.finish();
+}
+
+/// Benchmark resize operations
+fn bench_resize_operations(c: &mut Criterion) {
+    let mut group = c.benchmark_group("resize_operations");
+    group.measurement_time(Duration::from_secs(8));
+
+    let source_image = generate_test_image(1024, 1024);
+    let target_sizes = [(224, 224), (384, 384), (512, 512)];
+
+    for (target_w, target_h) in target_sizes {
+        group.bench_with_input(
+            BenchmarkId::new("nearest_neighbor", format!("{}x{}", target_w, target_h)),
+            &(target_w, target_h),
+            |b, &(tw, th)| {
+                b.iter(|| black_box(resize_nearest(&source_image, 1024, 1024, tw, th)));
+            },
+        );
+
+        group.bench_with_input(
+            BenchmarkId::new("bilinear", format!("{}x{}", target_w, target_h)),
+            &(target_w, target_h),
+            |b, &(tw, th)| {
+                b.iter(|| black_box(resize_bilinear(&source_image, 1024, 1024, tw, th)));
+            },
+        );
+    }
+
+    group.finish();
+}
+
+/// Benchmark target: preprocessing should complete in <20ms
+fn bench_latency_target(c: &mut Criterion) {
+    let mut group = c.benchmark_group("latency_target_20ms");
+    group.measurement_time(Duration::from_secs(10));
+    group.sample_size(100);
+
+    let image_data = generate_test_image(384, 384);
+
+    group.bench_function("full_pipeline_384x384", |b| {
+        b.iter(|| {
+            let gray = convert_to_grayscale(black_box(&image_data), 384, 384);
+            let blurred = apply_gaussian_blur(&gray, 384, 384, 5);
+            let threshold = apply_adaptive_threshold(&blurred, 384, 384);
+            let normalized = normalize_image(&threshold);
+            black_box(normalized)
+        });
+    });
+
+    group.finish();
+}
+
+// Mock implementations
+
+fn generate_test_image(width: u32, height: u32) -> Vec<u8> {
+    let size = (width * height * 3) as usize;
+    (0..size).map(|i| ((i * 123 + 456) % 256) as u8).collect()
+}
+
+fn convert_to_grayscale(rgb_data: &[u8], width: u32, height: u32) -> Vec<u8> {
+    let mut gray = Vec::with_capacity((width * height) as usize);
+    for chunk in rgb_data.chunks(3) {
+        let r = chunk[0] as u32;
+        let g = chunk[1] as u32;
+        let b = chunk[2] as u32;
+        let gray_value = ((r * 299 + g * 587 + b * 114) / 1000) as u8;
+        gray.push(gray_value);
+    }
+    gray
+}
+
+fn apply_gaussian_blur(data: &[u8], width: u32, height: u32, kernel_size: usize) -> Vec<u8> {
+    let mut result = Vec::with_capacity(data.len());
+    let radius = kernel_size / 2;
+
+    for y in 0..height {
+        for x in 0..width {
+            let mut sum = 0u32;
+            let mut count = 0u32;
+
+            for ky in 0..kernel_size {
+                for kx in 0..kernel_size {
+                    let nx = x as i32 + kx as i32 - radius as i32;
+                    let ny = y as i32 + ky as i32 - radius as i32;
+
+                    if nx >= 0 && nx < width as i32 && ny >= 0 && ny < height as i32 {
+                        let idx = (ny as u32 * width + nx as u32) as usize;
+                        sum += data[idx] as u32;
+                        count += 1;
+                    }
+                }
+            }
+
+            result.push((sum / count) as u8);
+        }
+    }
+
+    result
+}
+
+fn apply_adaptive_threshold(data: &[u8], width: u32, height: u32) -> Vec<u8> {
+    let mut result = Vec::with_capacity(data.len());
+    let block_size = 11;
+    let c = 2;
+
+    for y in 0..height {
+        for x in 0..width {
+            let idx = (y * width + x) as usize;
+            let pixel = data[idx];
+
+            // Calculate local mean
+            let mut sum = 0u32;
+            let mut count = 0u32;
+            let radius = block_size / 2;
+
+            for by in y.saturating_sub(radius)..=(y + radius).min(height - 1) {
+                for bx in x.saturating_sub(radius)..=(x + radius).min(width - 1) {
+                    let bidx = (by * width + bx) as usize;
+                    sum += data[bidx] as u32;
+                    count += 1;
+                }
+            }
+
+            let threshold = (sum / count) as i32 - c;
+            result.push(if pixel as i32 > threshold { 255 } else { 0 });
+        }
+    }
+
+    result
+}
+
+fn detect_edges(data: &[u8], width: u32, height: u32) -> Vec<u8> {
+    let mut result = Vec::with_capacity(data.len());
+
+    // Simple Sobel edge detection
+    for y in 0..height {
+        for x in 0..width {
+            if x == 0 || x == width - 1 || y == 0 || y == height - 1 {
+                result.push(0);
+                continue;
+            }
+
+            let idx = (y * width + x) as usize;
+            let gx = (data[idx + 1] as i32 - data[idx - 1] as i32).abs();
+            let gy = (data[idx + width as usize] as i32 - data[idx - width as usize] as i32).abs();
+            let magnitude = ((gx * gx + gy * gy) as f32).sqrt().min(255.0);
+
+            result.push(magnitude as u8);
+        }
+    }
+
+    result
+}
+
+fn normalize_image(data: &[u8]) -> Vec<f32> {
+    data.iter().map(|&x| (x as f32 - 128.0) / 128.0).collect()
+}
+
+fn resize_nearest(src: &[u8], src_w: u32, src_h: u32, dst_w: u32, dst_h: u32) -> Vec<u8> {
+    let mut result = Vec::with_capacity((dst_w * dst_h) as usize);
+    let x_ratio = src_w as f32 / dst_w as f32;
+    let y_ratio = src_h as f32 / dst_h as f32;
+
+    for y in 0..dst_h {
+        for x in 0..dst_w {
+            let src_x = (x as f32 * x_ratio) as u32;
+            let src_y = (y as f32 * y_ratio) as u32;
+            let idx = (src_y * src_w + src_x) as usize;
+            result.push(src[idx]);
+        }
+    }
+
+    result
+}
+
+fn resize_bilinear(src: &[u8], src_w: u32, src_h: u32, dst_w: u32, dst_h: u32) -> Vec<u8> {
+    let mut result = Vec::with_capacity((dst_w * dst_h) as usize);
+    let x_ratio = (src_w - 1) as f32 / dst_w as f32;
+    let y_ratio = (src_h - 1) as f32 / dst_h as f32;
+
+    for y in 0..dst_h {
+        for x in 0..dst_w {
+            let src_x = x as f32 * x_ratio;
+            let src_y = y as f32 * y_ratio;
+
+            let x1 = src_x.floor() as u32;
+            let y1 = src_y.floor() as u32;
+            let x2 = (x1 + 1).min(src_w - 1);
+            let y2 = (y1 + 1).min(src_h - 1);
+
+            let q11 = src[(y1 * src_w + x1) as usize] as f32;
+            let q21 = src[(y1 * src_w + x2) as usize] as f32;
+            let q12 = src[(y2 * src_w + x1) as usize] as f32;
+            let q22 = src[(y2 * src_w + x2) as usize] as f32;
+
+            let wx = src_x - x1 as f32;
+            let wy = src_y - y1 as f32;
+
+            let value = q11 * (1.0 - wx) * (1.0 - wy)
+                + q21 * wx * (1.0 - wy)
+                + q12 * (1.0 - wx) * wy
+                + q22 * wx * wy;
+
+            result.push(value as u8);
+        }
+    }
+
+    result
+}
+
+criterion_group!(
+    benches,
+    bench_individual_transforms,
+    bench_full_pipeline,
+    bench_parallel_vs_sequential,
+    bench_resize_operations,
+    bench_latency_target
+);
+criterion_main!(benches);