Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions
--- a/examples/scipix/tests/integration/accuracy_tests.rs
+++ b/examples/scipix/tests/integration/accuracy_tests.rs
@@ -0,0 +1,450 @@
+// Accuracy validation tests
+//
+// Tests OCR accuracy against Im2latex-100k subset and calculates CER, WER, BLEU
+
+use super::*;
+use tokio;
+
+#[tokio::test]
+async fn test_accuracy_simple_expressions() {
+    let test_server = TestServer::start()
+        .await
+        .expect("Failed to start test server");
+
+    let test_cases = vec![
+        ("x + 1", "x + 1"),
+        ("2x - 3", "2x - 3"),
+        ("a = b", "a = b"),
+        ("f(x)", "f(x)"),
+        ("y^2", "y^2"),
+    ];
+
+    let mut total_cer = 0.0;
+    let mut correct = 0;
+
+    for (equation, expected) in test_cases.iter() {
+        let image = images::generate_simple_equation(equation);
+        let path = format!("/tmp/accuracy_simple_{}.png", equation.replace(' ', "_"));
+        image.save(&path).unwrap();
+
+        let result = test_server
+            .process_image(&path, OutputFormat::LaTeX)
+            .await
+            .expect("Processing failed");
+
+        let cer = metrics::calculate_cer(expected, &result.latex);
+        total_cer += cer;
+
+        if latex::normalize(&result.latex) == latex::normalize(expected) {
+            correct += 1;
+        }
+
+        println!(
+            "Equation: {} | CER: {:.4} | Got: {}",
+            equation, cer, result.latex
+        );
+    }
+
+    let avg_cer = total_cer / test_cases.len() as f64;
+    let accuracy = correct as f64 / test_cases.len() as f64;
+
+    println!(
+        "Simple expressions - Avg CER: {:.4}, Accuracy: {:.2}%",
+        avg_cer,
+        accuracy * 100.0
+    );
+
+    assert!(avg_cer < 0.05, "Average CER too high: {:.4}", avg_cer);
+    assert!(
+        accuracy > 0.90,
+        "Accuracy too low: {:.2}%",
+        accuracy * 100.0
+    );
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_accuracy_im2latex_subset() {
+    let test_server = TestServer::start()
+        .await
+        .expect("Failed to start test server");
+
+    // Load Im2latex-100k test subset (sample)
+    let test_cases = load_im2latex_test_subset(50); // Test 50 samples
+
+    let mut cer_sum = 0.0;
+    let mut wer_sum = 0.0;
+    let mut bleu_sum = 0.0;
+    let mut exact_matches = 0;
+
+    for (i, case) in test_cases.iter().enumerate() {
+        // Generate or load image
+        let image_path = case.image_path.clone();
+
+        let result = test_server
+            .process_image(&image_path, OutputFormat::LaTeX)
+            .await
+            .expect("Processing failed");
+
+        // Calculate metrics
+        let cer = metrics::calculate_cer(&case.ground_truth, &result.latex);
+        let wer = metrics::calculate_wer(&case.ground_truth, &result.latex);
+        let bleu = metrics::calculate_bleu(&case.ground_truth, &result.latex, 4);
+
+        cer_sum += cer;
+        wer_sum += wer;
+        bleu_sum += bleu;
+
+        if latex::normalize(&result.latex) == latex::normalize(&case.ground_truth) {
+            exact_matches += 1;
+        }
+
+        if i % 10 == 0 {
+            println!("Processed {}/{} samples", i + 1, test_cases.len());
+        }
+    }
+
+    let count = test_cases.len() as f64;
+    let avg_cer = cer_sum / count;
+    let avg_wer = wer_sum / count;
+    let avg_bleu = bleu_sum / count;
+    let exact_match_rate = exact_matches as f64 / count;
+
+    println!("\nIm2latex subset results:");
+    println!("  Average CER: {:.4}", avg_cer);
+    println!("  Average WER: {:.4}", avg_wer);
+    println!("  Average BLEU: {:.2}", avg_bleu);
+    println!("  Exact match rate: {:.2}%", exact_match_rate * 100.0);
+
+    // Assert quality thresholds
+    assert!(avg_cer < 0.03, "CER too high: {:.4}", avg_cer);
+    assert!(avg_bleu > 80.0, "BLEU too low: {:.2}", avg_bleu);
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_accuracy_fractions() {
+    let test_server = TestServer::start()
+        .await
+        .expect("Failed to start test server");
+
+    let test_cases = vec![
+        ((1, 2), r"\frac{1}{2}"),
+        ((3, 4), r"\frac{3}{4}"),
+        ((5, 6), r"\frac{5}{6}"),
+        ((10, 3), r"\frac{10}{3}"),
+    ];
+
+    let mut correct = 0;
+
+    for ((num, den), expected) in test_cases.iter() {
+        let image = images::generate_fraction(*num, *den);
+        let path = format!("/tmp/frac_{}_{}.png", num, den);
+        image.save(&path).unwrap();
+
+        let result = test_server
+            .process_image(&path, OutputFormat::LaTeX)
+            .await
+            .expect("Processing failed");
+
+        if latex::expressions_match(&result.latex, expected) {
+            correct += 1;
+        } else {
+            println!(
+                "Fraction {}/{} - Expected: {}, Got: {}",
+                num, den, expected, result.latex
+            );
+        }
+    }
+
+    let accuracy = correct as f64 / test_cases.len() as f64;
+    println!("Fraction accuracy: {:.2}%", accuracy * 100.0);
+
+    assert!(
+        accuracy >= 0.85,
+        "Fraction accuracy too low: {:.2}%",
+        accuracy * 100.0
+    );
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_accuracy_special_symbols() {
+    let test_server = TestServer::start()
+        .await
+        .expect("Failed to start test server");
+
+    let test_cases = vec![
+        (r"\alpha", r"\alpha"),
+        (r"\beta", r"\beta"),
+        (r"\sum", r"\sum"),
+        (r"\int", r"\int"),
+        (r"\pi", r"\pi"),
+        (r"\infty", r"\infty"),
+    ];
+
+    let mut correct = 0;
+
+    for (symbol, expected) in test_cases.iter() {
+        let image = images::generate_symbol(symbol);
+        let path = format!("/tmp/symbol_{}.png", symbol.replace('\\', ""));
+        image.save(&path).unwrap();
+
+        let result = test_server
+            .process_image(&path, OutputFormat::LaTeX)
+            .await
+            .expect("Processing failed");
+
+        if result.latex.contains(expected) {
+            correct += 1;
+        } else {
+            println!(
+                "Symbol {} - Expected to contain: {}, Got: {}",
+                symbol, expected, result.latex
+            );
+        }
+    }
+
+    let accuracy = correct as f64 / test_cases.len() as f64;
+    println!("Special symbol accuracy: {:.2}%", accuracy * 100.0);
+
+    assert!(
+        accuracy >= 0.80,
+        "Symbol accuracy too low: {:.2}%",
+        accuracy * 100.0
+    );
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_accuracy_regression_detection() {
+    let test_server = TestServer::start()
+        .await
+        .expect("Failed to start test server");
+
+    // Load baseline results
+    let baseline = load_baseline_results();
+
+    // Run same test cases
+    let test_cases = load_regression_test_cases();
+
+    let mut regressions = Vec::new();
+
+    for case in test_cases.iter() {
+        let result = test_server
+            .process_image(&case.image_path, OutputFormat::LaTeX)
+            .await
+            .expect("Processing failed");
+
+        // Compare with baseline
+        if let Some(baseline_result) = baseline.get(&case.id) {
+            let current_cer = metrics::calculate_cer(&case.ground_truth, &result.latex);
+            let baseline_cer = baseline_result.cer;
+
+            // Check for regression (10% threshold)
+            if current_cer > baseline_cer * 1.10 {
+                regressions.push((
+                    case.id.clone(),
+                    baseline_cer,
+                    current_cer,
+                    baseline_result.latex.clone(),
+                    result.latex.clone(),
+                ));
+            }
+        }
+    }
+
+    if !regressions.is_empty() {
+        println!("Regressions detected:");
+        for (id, baseline_cer, current_cer, baseline_latex, current_latex) in &regressions {
+            println!("  {} - CER: {:.4} -> {:.4}", id, baseline_cer, current_cer);
+            println!("    Baseline: {}", baseline_latex);
+            println!("    Current:  {}", current_latex);
+        }
+    }
+
+    assert!(
+        regressions.is_empty(),
+        "Found {} regressions",
+        regressions.len()
+    );
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_accuracy_confidence_calibration() {
+    let test_server = TestServer::start()
+        .await
+        .expect("Failed to start test server");
+
+    let test_cases = load_calibration_test_cases();
+
+    let mut high_conf_correct = 0;
+    let mut high_conf_total = 0;
+    let mut low_conf_correct = 0;
+    let mut low_conf_total = 0;
+
+    for case in test_cases.iter() {
+        let result = test_server
+            .process_image(&case.image_path, OutputFormat::LaTeX)
+            .await
+            .expect("Processing failed");
+
+        let is_correct = latex::normalize(&result.latex) == latex::normalize(&case.ground_truth);
+
+        if result.confidence > 0.9 {
+            high_conf_total += 1;
+            if is_correct {
+                high_conf_correct += 1;
+            }
+        } else if result.confidence < 0.7 {
+            low_conf_total += 1;
+            if is_correct {
+                low_conf_correct += 1;
+            }
+        }
+    }
+
+    let high_conf_accuracy = if high_conf_total > 0 {
+        high_conf_correct as f64 / high_conf_total as f64
+    } else {
+        1.0
+    };
+
+    let low_conf_accuracy = if low_conf_total > 0 {
+        low_conf_correct as f64 / low_conf_total as f64
+    } else {
+        0.0
+    };
+
+    println!("Confidence calibration:");
+    println!(
+        "  High confidence (>0.9): {:.2}% accuracy ({}/{})",
+        high_conf_accuracy * 100.0,
+        high_conf_correct,
+        high_conf_total
+    );
+    println!(
+        "  Low confidence (<0.7): {:.2}% accuracy ({}/{})",
+        low_conf_accuracy * 100.0,
+        low_conf_correct,
+        low_conf_total
+    );
+
+    // High confidence should correlate with high accuracy
+    assert!(
+        high_conf_accuracy > 0.95,
+        "High confidence predictions should be very accurate"
+    );
+
+    test_server.shutdown().await;
+}
+
+// Helper functions and types
+
+#[derive(Debug, Clone)]
+struct TestCase {
+    id: String,
+    image_path: String,
+    ground_truth: String,
+}
+
+#[derive(Debug, Clone)]
+struct BaselineResult {
+    latex: String,
+    cer: f64,
+}
+
+fn load_im2latex_test_subset(count: usize) -> Vec<TestCase> {
+    // Load or generate Im2latex test subset
+    // For now, generate synthetic test cases
+    (0..count)
+        .map(|i| {
+            let eq = match i % 5 {
+                0 => format!("x^{}", i),
+                1 => format!("a + {}", i),
+                2 => format!(r"\frac{{{}}}{{{}}}", i, i + 1),
+                3 => format!("{}x + {}", i, i * 2),
+                _ => format!("y = {}x", i),
+            };
+
+            let image = images::generate_simple_equation(&eq);
+            let path = format!("/tmp/im2latex_{}.png", i);
+            image.save(&path).unwrap();
+
+            TestCase {
+                id: format!("im2latex_{}", i),
+                image_path: path,
+                ground_truth: eq,
+            }
+        })
+        .collect()
+}
+
+fn load_regression_test_cases() -> Vec<TestCase> {
+    // Load regression test cases from file or generate
+    vec![
+        TestCase {
+            id: "reg_001".to_string(),
+            image_path: "/tmp/reg_001.png".to_string(),
+            ground_truth: "x + y".to_string(),
+        },
+        // Add more test cases...
+    ]
+}
+
+fn load_baseline_results() -> std::collections::HashMap<String, BaselineResult> {
+    // Load baseline results from file
+    let mut baseline = std::collections::HashMap::new();
+
+    baseline.insert(
+        "reg_001".to_string(),
+        BaselineResult {
+            latex: "x + y".to_string(),
+            cer: 0.0,
+        },
+    );
+
+    baseline
+}
+
+fn load_calibration_test_cases() -> Vec<TestCase> {
+    // Generate test cases with varying difficulty for confidence calibration
+    let mut cases = Vec::new();
+
+    // Easy cases
+    for i in 0..10 {
+        let eq = format!("x + {}", i);
+        let image = images::generate_simple_equation(&eq);
+        let path = format!("/tmp/calib_easy_{}.png", i);
+        image.save(&path).unwrap();
+
+        cases.push(TestCase {
+            id: format!("calib_easy_{}", i),
+            image_path: path,
+            ground_truth: eq,
+        });
+    }
+
+    // Hard cases (noisy)
+    for i in 0..10 {
+        let eq = format!("y^{}", i);
+        let mut image = images::generate_simple_equation(&eq);
+        images::add_noise(&mut image, 0.2);
+        let path = format!("/tmp/calib_hard_{}.png", i);
+        image.save(&path).unwrap();
+
+        cases.push(TestCase {
+            id: format!("calib_hard_{}", i),
+            image_path: path,
+            ground_truth: eq,
+        });
+    }
+
+    cases
+}
--- a/examples/scipix/tests/integration/api_tests.rs
+++ b/examples/scipix/tests/integration/api_tests.rs
@@ -0,0 +1,80 @@
+// API server integration tests
+//
+// Tests HTTP API endpoints, authentication, rate limiting, and async processing
+
+use super::*;
+use reqwest::{multipart, Client, StatusCode};
+use serde_json::json;
+use tokio;
+
+#[tokio::test]
+async fn test_api_post_text_with_file() {
+    let test_server = TestServer::start_api()
+        .await
+        .expect("Failed to start API server");
+    let client = Client::new();
+
+    // Create test image
+    let image = images::generate_simple_equation("x + y");
+    image.save("/tmp/api_test.png").unwrap();
+    let image_bytes = std::fs::read("/tmp/api_test.png").unwrap();
+
+    // Create multipart form
+    let form = multipart::Form::new().part(
+        "file",
+        multipart::Part::bytes(image_bytes)
+            .file_name("equation.png")
+            .mime_str("image/png")
+            .unwrap(),
+    );
+
+    // POST to /v3/text
+    let response = client
+        .post(&format!("{}/v3/text", test_server.base_url()))
+        .header("app_id", "test_app_id")
+        .header("app_key", "test_app_key")
+        .multipart(form)
+        .send()
+        .await
+        .expect("Request failed");
+
+    assert_eq!(response.status(), StatusCode::OK);
+
+    let result: serde_json::Value = response.json().await.unwrap();
+    assert!(result.get("request_id").is_some(), "Should have request_id");
+    assert!(result.get("text").is_some(), "Should have text field");
+    assert!(
+        result.get("processing_time_ms").is_some(),
+        "Should have processing time"
+    );
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_api_authentication_validation() {
+    let test_server = TestServer::start_api()
+        .await
+        .expect("Failed to start API server");
+    let client = Client::new();
+
+    let payload = json!({
+        "src": "base64data"
+    });
+
+    // Test missing auth
+    let response = client
+        .post(&format!("{}/v3/text", test_server.base_url()))
+        .json(&payload)
+        .send()
+        .await
+        .expect("Request failed");
+
+    assert_eq!(
+        response.status(),
+        StatusCode::UNAUTHORIZED,
+        "Should require authentication"
+    );
+
+    test_server.shutdown().await;
+}
--- a/examples/scipix/tests/integration/cache_tests.rs
+++ b/examples/scipix/tests/integration/cache_tests.rs
@@ -0,0 +1,300 @@
+// Cache integration tests
+//
+// Tests caching behavior, hit/miss ratios, similarity search, and persistence
+//
+// Note: These tests use mock test infrastructure.
+// Real OCR processing requires ONNX models to be configured.
+
+use super::*;
+use crate::common::{CacheStats, OutputFormat};
+
+#[tokio::test]
+async fn test_cache_hit_miss_behavior() {
+    let test_server = TestServer::with_cache()
+        .await
+        .expect("Failed to start test server with cache");
+
+    let image = images::generate_simple_equation("x^2");
+    image.save("/tmp/cache_test_1.png").unwrap();
+
+    // First request - should miss cache
+    let result1 = test_server
+        .process_image("/tmp/cache_test_1.png", OutputFormat::LaTeX)
+        .await
+        .expect("Processing failed");
+
+    // Get cache stats
+    let _stats = test_server
+        .cache_stats()
+        .await
+        .expect("Failed to get cache stats");
+
+    // Second request - should hit cache
+    let result2 = test_server
+        .process_image("/tmp/cache_test_1.png", OutputFormat::LaTeX)
+        .await
+        .expect("Processing failed");
+
+    // Verify results match
+    assert_eq!(result1.latex, result2.latex, "Cached result should match");
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_cache_similarity_lookup() {
+    let test_server = TestServer::with_cache()
+        .await
+        .expect("Failed to start test server");
+
+    // Create original image
+    let image1 = images::generate_simple_equation("a + b");
+    image1.save("/tmp/similarity_1.png").unwrap();
+
+    // Create similar image (slightly different rendering)
+    let mut image2 = images::generate_simple_equation("a + b");
+    images::add_slight_variation(&mut image2, 0.05);
+    image2.save("/tmp/similarity_2.png").unwrap();
+
+    // Process first image
+    let result1 = test_server
+        .process_image("/tmp/similarity_1.png", OutputFormat::LaTeX)
+        .await
+        .expect("Processing failed");
+
+    // Process similar image
+    let result2 = test_server
+        .process_image("/tmp/similarity_2.png", OutputFormat::LaTeX)
+        .await
+        .expect("Processing failed");
+
+    // Results should be similar
+    let similarity = latex::calculate_similarity(&result1.latex, &result2.latex);
+    assert!(
+        similarity > 0.9,
+        "Similar images should produce similar results"
+    );
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_cache_eviction() {
+    // Start server with small cache size
+    let test_server = TestServer::with_cache_size(3)
+        .await
+        .expect("Failed to start test server");
+
+    // Create and process 5 different images
+    for i in 0..5 {
+        let eq = format!("x + {}", i);
+        let image = images::generate_simple_equation(&eq);
+        let path = format!("/tmp/eviction_{}.png", i);
+        image.save(&path).unwrap();
+
+        test_server
+            .process_image(&path, OutputFormat::LaTeX)
+            .await
+            .expect("Processing failed");
+    }
+
+    // Get cache stats
+    let stats = test_server
+        .cache_stats()
+        .await
+        .expect("Failed to get cache stats");
+    assert!(stats.current_size <= 3, "Cache should not exceed max size");
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_cache_persistence() {
+    let cache_dir = "/tmp/scipix_cache_persist";
+    std::fs::create_dir_all(cache_dir).unwrap();
+
+    // Start server with persistent cache
+    let test_server = TestServer::with_persistent_cache(cache_dir)
+        .await
+        .expect("Failed to start test server");
+
+    // Process image
+    let image = images::generate_simple_equation("persistent");
+    image.save("/tmp/persist_test.png").unwrap();
+
+    let result1 = test_server
+        .process_image("/tmp/persist_test.png", OutputFormat::LaTeX)
+        .await
+        .expect("Processing failed");
+
+    // Shutdown server
+    test_server.shutdown().await;
+
+    // Start new server with same cache directory
+    let test_server2 = TestServer::with_persistent_cache(cache_dir)
+        .await
+        .expect("Failed to start second test server");
+
+    // Process same image - should hit persistent cache
+    let result2 = test_server2
+        .process_image("/tmp/persist_test.png", OutputFormat::LaTeX)
+        .await
+        .expect("Processing failed");
+
+    // Results should match
+    assert_eq!(
+        result1.latex, result2.latex,
+        "Persistent cache should restore results"
+    );
+
+    test_server2.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_cache_invalidation() {
+    let test_server = TestServer::with_cache()
+        .await
+        .expect("Failed to start test server");
+
+    // Process image
+    let image = images::generate_simple_equation("invalidate");
+    image.save("/tmp/invalidate_test.png").unwrap();
+
+    let result1 = test_server
+        .process_image("/tmp/invalidate_test.png", OutputFormat::LaTeX)
+        .await
+        .expect("Processing failed");
+
+    // Invalidate cache
+    test_server
+        .invalidate_cache()
+        .await
+        .expect("Cache invalidation failed");
+
+    // Process again - should miss cache
+    let result2 = test_server
+        .process_image("/tmp/invalidate_test.png", OutputFormat::LaTeX)
+        .await
+        .expect("Processing failed");
+
+    // Results should match but processing should take time
+    assert_eq!(result1.latex, result2.latex, "Results should still match");
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_cache_hit_ratio() {
+    let test_server = TestServer::with_cache()
+        .await
+        .expect("Failed to start test server");
+
+    // Create test images
+    let equations = vec!["a", "b", "c"];
+    for eq in &equations {
+        let image = images::generate_simple_equation(eq);
+        image.save(&format!("/tmp/ratio_{}.png", eq)).unwrap();
+    }
+
+    // Process each image twice
+    for eq in &equations {
+        let path = format!("/tmp/ratio_{}.png", eq);
+
+        // First time (miss)
+        test_server
+            .process_image(&path, OutputFormat::LaTeX)
+            .await
+            .expect("Processing failed");
+
+        // Second time (hit)
+        test_server
+            .process_image(&path, OutputFormat::LaTeX)
+            .await
+            .expect("Processing failed");
+    }
+
+    // Get stats
+    let _stats = test_server
+        .cache_stats()
+        .await
+        .expect("Failed to get cache stats");
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_cache_ttl_expiration() {
+    // Start server with 1-second TTL
+    let test_server = TestServer::with_cache_ttl(1)
+        .await
+        .expect("Failed to start test server");
+
+    // Process image
+    let image = images::generate_simple_equation("ttl");
+    image.save("/tmp/ttl_test.png").unwrap();
+
+    let result1 = test_server
+        .process_image("/tmp/ttl_test.png", OutputFormat::LaTeX)
+        .await
+        .expect("Processing failed");
+
+    // Immediately reprocess - should hit cache
+    let result2 = test_server
+        .process_image("/tmp/ttl_test.png", OutputFormat::LaTeX)
+        .await
+        .expect("Processing failed");
+
+    assert_eq!(result1.latex, result2.latex);
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_cache_concurrent_access() {
+    let test_server = TestServer::with_cache()
+        .await
+        .expect("Failed to start test server");
+
+    let image = images::generate_simple_equation("concurrent");
+    image.save("/tmp/concurrent_cache.png").unwrap();
+
+    // First request to populate cache
+    test_server
+        .process_image("/tmp/concurrent_cache.png", OutputFormat::LaTeX)
+        .await
+        .expect("Processing failed");
+
+    // Spawn multiple concurrent requests
+    let mut handles = vec![];
+    for _ in 0..10 {
+        let server = test_server.clone();
+        let handle = tokio::spawn(async move {
+            server
+                .process_image("/tmp/concurrent_cache.png", OutputFormat::LaTeX)
+                .await
+        });
+        handles.push(handle);
+    }
+
+    // Wait for all to complete
+    let results = futures::future::join_all(handles).await;
+
+    // All should succeed and return same result
+    assert!(
+        results.iter().all(|r| r.is_ok()),
+        "All requests should succeed"
+    );
+
+    let first_latex = &results[0].as_ref().unwrap().as_ref().unwrap().latex;
+    assert!(
+        results
+            .iter()
+            .all(|r| { &r.as_ref().unwrap().as_ref().unwrap().latex == first_latex }),
+        "All results should match"
+    );
+
+    test_server.shutdown().await;
+}
+
+// Re-export CacheStats for backward compatibility
+pub use crate::common::CacheStats as CacheStatsCompat;
--- a/examples/scipix/tests/integration/cli_tests.rs
+++ b/examples/scipix/tests/integration/cli_tests.rs
@@ -0,0 +1,227 @@
+// CLI integration tests
+//
+// Tests command-line interface functionality
+
+use super::*;
+use assert_cmd::Command;
+use predicates::prelude::*;
+use std::process::Stdio;
+
+#[test]
+fn test_cli_ocr_command_with_file() {
+    // Create test image
+    let image = images::generate_simple_equation("x + 1");
+    image.save("/tmp/cli_test.png").unwrap();
+
+    // Run CLI command
+    let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
+    cmd.arg("ocr")
+        .arg("/tmp/cli_test.png")
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("x"))
+        .stdout(predicate::str::contains("LaTeX:"));
+}
+
+#[test]
+fn test_cli_ocr_with_output_format() {
+    let image = images::generate_fraction(3, 4);
+    image.save("/tmp/cli_fraction.png").unwrap();
+
+    // Test LaTeX output
+    let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
+    cmd.arg("ocr")
+        .arg("/tmp/cli_fraction.png")
+        .arg("--format")
+        .arg("latex")
+        .assert()
+        .success()
+        .stdout(predicate::str::contains(r"\frac"));
+
+    // Test MathML output
+    let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
+    cmd.arg("ocr")
+        .arg("/tmp/cli_fraction.png")
+        .arg("--format")
+        .arg("mathml")
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("<mfrac>"));
+}
+
+#[test]
+fn test_cli_batch_command() {
+    // Create test directory with images
+    std::fs::create_dir_all("/tmp/cli_batch").unwrap();
+
+    let equations = vec!["a + b", "x - y", "2 * 3"];
+    for (i, eq) in equations.iter().enumerate() {
+        let image = images::generate_simple_equation(eq);
+        image.save(&format!("/tmp/cli_batch/eq_{}.png", i)).unwrap();
+    }
+
+    // Run batch command
+    let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
+    cmd.arg("batch")
+        .arg("/tmp/cli_batch")
+        .arg("--output")
+        .arg("/tmp/cli_batch_results.json")
+        .assert()
+        .success();
+
+    // Verify output file
+    let results = std::fs::read_to_string("/tmp/cli_batch_results.json").unwrap();
+    assert!(results.contains("a"), "Should contain results");
+    assert!(results.len() > 100, "Should have substantial output");
+}
+
+#[test]
+#[ignore] // Requires built binary and available port
+fn test_cli_serve_command_startup() {
+    // This test requires the binary to be built first
+    // Use std::process::Command for spawn functionality
+    use std::process::Command as StdCommand;
+
+    // Get the binary path from environment, or fall back to cargo build path
+    let bin_path = std::env::var("CARGO_BIN_EXE_scipix-ocr")
+        .unwrap_or_else(|_| "target/debug/scipix-ocr".to_string());
+
+    let mut child = StdCommand::new(&bin_path)
+        .arg("serve")
+        .arg("--port")
+        .arg("18080")
+        .stdout(Stdio::piped())
+        .spawn()
+        .expect("Failed to start server");
+
+    // Wait for server startup
+    std::thread::sleep(std::time::Duration::from_secs(2));
+
+    // Check if server is running
+    let client = reqwest::blocking::Client::new();
+    let response = client
+        .get("http://localhost:18080/health")
+        .timeout(std::time::Duration::from_secs(5))
+        .send();
+
+    // Kill server
+    let _ = child.kill();
+
+    assert!(response.is_ok(), "Server should respond to health check");
+}
+
+#[test]
+fn test_cli_config_command() {
+    // Test config show
+    let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
+    cmd.arg("config").arg("show").assert().success().stdout(
+        predicate::str::contains("model_path").or(predicate::str::contains("Configuration")),
+    );
+
+    // Test config set
+    let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
+    cmd.arg("config")
+        .arg("set")
+        .arg("preprocessing.enable_deskew")
+        .arg("true")
+        .assert()
+        .success();
+}
+
+#[test]
+fn test_cli_invalid_file() {
+    let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
+    cmd.arg("ocr")
+        .arg("/nonexistent/file.png")
+        .assert()
+        .failure()
+        .stderr(predicate::str::contains("not found").or(predicate::str::contains("error")));
+}
+
+#[test]
+fn test_cli_exit_codes() {
+    // Success case
+    let image = images::generate_simple_equation("ok");
+    image.save("/tmp/exit_code_test.png").unwrap();
+
+    let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
+    cmd.arg("ocr")
+        .arg("/tmp/exit_code_test.png")
+        .assert()
+        .code(0);
+
+    // Failure case
+    let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
+    cmd.arg("ocr")
+        .arg("/nonexistent.png")
+        .assert()
+        .code(predicate::ne(0));
+}
+
+#[test]
+fn test_cli_verbose_output() {
+    let image = images::generate_simple_equation("verbose");
+    image.save("/tmp/verbose_test.png").unwrap();
+
+    let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
+    cmd.arg("ocr")
+        .arg("/tmp/verbose_test.png")
+        .arg("--verbose")
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("Processing").or(predicate::str::contains("Confidence")));
+}
+
+#[test]
+fn test_cli_json_output() {
+    let image = images::generate_simple_equation("json");
+    image.save("/tmp/json_test.png").unwrap();
+
+    let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
+    let output = cmd
+        .arg("ocr")
+        .arg("/tmp/json_test.png")
+        .arg("--output-format")
+        .arg("json")
+        .output()
+        .expect("Failed to execute command");
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+
+    // Verify JSON structure
+    let json: serde_json::Value =
+        serde_json::from_str(&stdout).expect("Output should be valid JSON");
+
+    assert!(json.get("latex").is_some(), "Should have latex field");
+    assert!(
+        json.get("confidence").is_some(),
+        "Should have confidence field"
+    );
+}
+
+#[test]
+fn test_cli_help_command() {
+    let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
+    cmd.arg("--help")
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("USAGE:"))
+        .stdout(predicate::str::contains("COMMANDS:"));
+
+    // Test subcommand help
+    let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
+    cmd.arg("ocr")
+        .arg("--help")
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("OPTIONS:"));
+}
+
+#[test]
+fn test_cli_version_command() {
+    let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
+    cmd.arg("--version")
+        .assert()
+        .success()
+        .stdout(predicate::str::contains(env!("CARGO_PKG_VERSION")));
+}
--- a/examples/scipix/tests/integration/mod.rs
+++ b/examples/scipix/tests/integration/mod.rs
@@ -0,0 +1,14 @@
+// Integration test module organization
+//
+// This module provides integration tests for the ruvector-scipix OCR system.
+// Tests are organized by functionality area.
+
+pub mod accuracy_tests;
+pub mod api_tests;
+pub mod cache_tests;
+pub mod cli_tests;
+pub mod performance_tests;
+pub mod pipeline_tests;
+
+// Re-export common test utilities
+pub use crate::common::*;
--- a/examples/scipix/tests/integration/performance_tests.rs
+++ b/examples/scipix/tests/integration/performance_tests.rs
@@ -0,0 +1,386 @@
+// Performance validation tests
+//
+// Tests latency, memory usage, throughput, and ensures no memory leaks
+
+use super::*;
+use std::time::{Duration, Instant};
+use tokio;
+
+#[tokio::test]
+async fn test_performance_latency_within_bounds() {
+    let test_server = TestServer::start()
+        .await
+        .expect("Failed to start test server");
+
+    let image = images::generate_simple_equation("x + y");
+    image.save("/tmp/perf_latency.png").unwrap();
+
+    // Measure latency
+    let start = Instant::now();
+    let result = test_server
+        .process_image("/tmp/perf_latency.png", OutputFormat::LaTeX)
+        .await
+        .expect("Processing failed");
+    let latency = start.elapsed();
+
+    println!("Latency: {:?}", latency);
+    println!("Confidence: {}", result.confidence);
+
+    // Assert latency is within bounds (<100ms for simple equation)
+    assert!(latency.as_millis() < 100, "Latency too high: {:?}", latency);
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_performance_memory_usage_limits() {
+    let test_server = TestServer::start()
+        .await
+        .expect("Failed to start test server");
+
+    // Get initial memory usage
+    let initial_memory = get_memory_usage();
+
+    // Process multiple images
+    for i in 0..100 {
+        let eq = format!("x + {}", i);
+        let image = images::generate_simple_equation(&eq);
+        let path = format!("/tmp/perf_mem_{}.png", i);
+        image.save(&path).unwrap();
+
+        test_server
+            .process_image(&path, OutputFormat::LaTeX)
+            .await
+            .expect("Processing failed");
+
+        // Clean up
+        std::fs::remove_file(&path).unwrap();
+    }
+
+    // Get final memory usage
+    let final_memory = get_memory_usage();
+    let memory_increase = final_memory - initial_memory;
+
+    println!("Memory increase: {} MB", memory_increase / 1024 / 1024);
+
+    // Assert memory usage is reasonable (<100MB increase)
+    assert!(
+        memory_increase < 100 * 1024 * 1024,
+        "Memory usage too high: {} bytes",
+        memory_increase
+    );
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_performance_no_memory_leaks() {
+    let test_server = TestServer::start()
+        .await
+        .expect("Failed to start test server");
+
+    let image = images::generate_simple_equation("leak test");
+    image.save("/tmp/leak_test.png").unwrap();
+
+    // Process same image many times
+    let iterations = 1000;
+    let mut memory_samples = Vec::new();
+
+    for i in 0..iterations {
+        test_server
+            .process_image("/tmp/leak_test.png", OutputFormat::LaTeX)
+            .await
+            .expect("Processing failed");
+
+        // Sample memory every 100 iterations
+        if i % 100 == 0 {
+            memory_samples.push(get_memory_usage());
+        }
+    }
+
+    // Check for linear memory growth (leak indicator)
+    let first_sample = memory_samples[0];
+    let last_sample = *memory_samples.last().unwrap();
+    let growth_rate = (last_sample - first_sample) as f64 / iterations as f64;
+
+    println!("Memory growth rate: {} bytes/iteration", growth_rate);
+    println!("Samples: {:?}", memory_samples);
+
+    // Growth rate should be minimal (<1KB per iteration)
+    assert!(
+        growth_rate < 1024.0,
+        "Possible memory leak detected: {} bytes/iteration",
+        growth_rate
+    );
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_performance_throughput() {
+    let test_server = TestServer::start()
+        .await
+        .expect("Failed to start test server");
+
+    // Create test images
+    let image_count = 50;
+    for i in 0..image_count {
+        let eq = format!("throughput_{}", i);
+        let image = images::generate_simple_equation(&eq);
+        image.save(&format!("/tmp/throughput_{}.png", i)).unwrap();
+    }
+
+    // Measure throughput
+    let start = Instant::now();
+
+    for i in 0..image_count {
+        test_server
+            .process_image(&format!("/tmp/throughput_{}.png", i), OutputFormat::LaTeX)
+            .await
+            .expect("Processing failed");
+    }
+
+    let duration = start.elapsed();
+    let throughput = image_count as f64 / duration.as_secs_f64();
+
+    println!("Throughput: {:.2} images/second", throughput);
+    println!("Total time: {:?} for {} images", duration, image_count);
+
+    // Assert reasonable throughput (>5 images/second)
+    assert!(
+        throughput > 5.0,
+        "Throughput too low: {:.2} images/s",
+        throughput
+    );
+
+    // Cleanup
+    for i in 0..image_count {
+        std::fs::remove_file(&format!("/tmp/throughput_{}.png", i)).unwrap();
+    }
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_performance_concurrent_throughput() {
+    let test_server = TestServer::start()
+        .await
+        .expect("Failed to start test server");
+
+    // Create test image
+    let image = images::generate_simple_equation("concurrent");
+    image.save("/tmp/concurrent_throughput.png").unwrap();
+
+    let concurrent_requests = 20;
+    let start = Instant::now();
+
+    // Spawn concurrent requests
+    let mut handles = vec![];
+    for _ in 0..concurrent_requests {
+        let server = test_server.clone();
+        let handle = tokio::spawn(async move {
+            server
+                .process_image("/tmp/concurrent_throughput.png", OutputFormat::LaTeX)
+                .await
+        });
+        handles.push(handle);
+    }
+
+    // Wait for all to complete
+    let results = futures::future::join_all(handles).await;
+    let duration = start.elapsed();
+
+    let success_count = results.iter().filter(|r| r.is_ok()).count();
+    let throughput = concurrent_requests as f64 / duration.as_secs_f64();
+
+    println!("Concurrent throughput: {:.2} req/second", throughput);
+    println!("Success rate: {}/{}", success_count, concurrent_requests);
+
+    assert!(
+        success_count == concurrent_requests,
+        "All requests should succeed"
+    );
+    assert!(
+        throughput > 10.0,
+        "Concurrent throughput too low: {:.2}",
+        throughput
+    );
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_performance_latency_percentiles() {
+    let test_server = TestServer::start()
+        .await
+        .expect("Failed to start test server");
+
+    let iterations = 100;
+    let mut latencies = Vec::new();
+
+    for i in 0..iterations {
+        let eq = format!("p{}", i);
+        let image = images::generate_simple_equation(&eq);
+        let path = format!("/tmp/percentile_{}.png", i);
+        image.save(&path).unwrap();
+
+        let start = Instant::now();
+        test_server
+            .process_image(&path, OutputFormat::LaTeX)
+            .await
+            .expect("Processing failed");
+        let latency = start.elapsed();
+
+        latencies.push(latency.as_micros());
+
+        std::fs::remove_file(&path).unwrap();
+    }
+
+    // Sort latencies
+    latencies.sort();
+
+    // Calculate percentiles
+    let p50 = latencies[50];
+    let p95 = latencies[95];
+    let p99 = latencies[99];
+
+    println!("Latency percentiles:");
+    println!("  P50: {} μs ({} ms)", p50, p50 / 1000);
+    println!("  P95: {} μs ({} ms)", p95, p95 / 1000);
+    println!("  P99: {} μs ({} ms)", p99, p99 / 1000);
+
+    // Assert percentile targets
+    assert!(p50 < 100_000, "P50 latency too high: {} μs", p50); // <100ms
+    assert!(p95 < 200_000, "P95 latency too high: {} μs", p95); // <200ms
+    assert!(p99 < 500_000, "P99 latency too high: {} μs", p99); // <500ms
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_performance_batch_efficiency() {
+    let test_server = TestServer::start()
+        .await
+        .expect("Failed to start test server");
+
+    // Create test images
+    let batch_size = 10;
+    let mut paths = Vec::new();
+
+    for i in 0..batch_size {
+        let eq = format!("batch_{}", i);
+        let image = images::generate_simple_equation(&eq);
+        let path = format!("/tmp/batch_eff_{}.png", i);
+        image.save(&path).unwrap();
+        paths.push(path);
+    }
+
+    // Measure sequential processing
+    let start_sequential = Instant::now();
+    for path in &paths {
+        test_server
+            .process_image(path, OutputFormat::LaTeX)
+            .await
+            .expect("Processing failed");
+    }
+    let sequential_time = start_sequential.elapsed();
+
+    // Measure batch processing
+    let start_batch = Instant::now();
+    test_server
+        .process_batch(
+            &paths.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
+            OutputFormat::LaTeX,
+        )
+        .await
+        .expect("Batch processing failed");
+    let batch_time = start_batch.elapsed();
+
+    println!("Sequential time: {:?}", sequential_time);
+    println!("Batch time: {:?}", batch_time);
+    println!(
+        "Speedup: {:.2}x",
+        sequential_time.as_secs_f64() / batch_time.as_secs_f64()
+    );
+
+    // Batch should be faster
+    assert!(
+        batch_time < sequential_time,
+        "Batch processing should be faster"
+    );
+
+    // Cleanup
+    for path in paths {
+        std::fs::remove_file(&path).unwrap();
+    }
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_performance_cold_start_warmup() {
+    // Measure cold start
+    let start_cold = Instant::now();
+    let test_server = TestServer::start()
+        .await
+        .expect("Failed to start test server");
+    let cold_start_time = start_cold.elapsed();
+
+    println!("Cold start time: {:?}", cold_start_time);
+
+    // First request (warmup)
+    let image = images::generate_simple_equation("warmup");
+    image.save("/tmp/warmup.png").unwrap();
+
+    let start_first = Instant::now();
+    test_server
+        .process_image("/tmp/warmup.png", OutputFormat::LaTeX)
+        .await
+        .expect("Processing failed");
+    let first_request_time = start_first.elapsed();
+
+    // Second request (warmed up)
+    let start_second = Instant::now();
+    test_server
+        .process_image("/tmp/warmup.png", OutputFormat::LaTeX)
+        .await
+        .expect("Processing failed");
+    let second_request_time = start_second.elapsed();
+
+    println!("First request time: {:?}", first_request_time);
+    println!("Second request time: {:?}", second_request_time);
+
+    // Cold start should be reasonable (<5s)
+    assert!(
+        cold_start_time.as_secs() < 5,
+        "Cold start too slow: {:?}",
+        cold_start_time
+    );
+
+    // Second request should be faster (model loaded)
+    assert!(
+        second_request_time < first_request_time,
+        "Warmed up request should be faster"
+    );
+
+    test_server.shutdown().await;
+}
+
+// Helper function to get current memory usage
+fn get_memory_usage() -> usize {
+    #[cfg(target_os = "linux")]
+    {
+        // Read from /proc/self/statm
+        if let Ok(content) = std::fs::read_to_string("/proc/self/statm") {
+            if let Some(rss) = content.split_whitespace().nth(1) {
+                if let Ok(pages) = rss.parse::<usize>() {
+                    // Convert pages to bytes (assuming 4KB pages)
+                    return pages * 4096;
+                }
+            }
+        }
+    }
+
+    // Fallback for other platforms or if reading fails
+    0
+}
--- a/examples/scipix/tests/integration/pipeline_tests.rs
+++ b/examples/scipix/tests/integration/pipeline_tests.rs
@@ -0,0 +1,248 @@
+// Full pipeline integration tests
+//
+// Tests the complete OCR pipeline from image input to final output
+//
+// Note: These tests use mock test infrastructure.
+// Real OCR processing requires ONNX models to be configured.
+
+use super::*;
+use crate::common::{OutputFormat, ProcessingOptions};
+
+#[tokio::test]
+async fn test_png_to_latex_pipeline() {
+    let test_server = TestServer::start()
+        .await
+        .expect("Failed to start test server");
+
+    // Create test image
+    let image = images::generate_simple_equation("x^2 + 2x + 1");
+    let image_path = "/tmp/test_equation.png";
+    image.save(image_path).unwrap();
+
+    // Process through pipeline
+    let result = test_server
+        .process_image(image_path, OutputFormat::LaTeX)
+        .await
+        .expect("Pipeline processing failed");
+
+    // Verify output
+    assert!(!result.latex.is_empty(), "LaTeX output should not be empty");
+    assert!(
+        result.confidence > 0.7,
+        "Confidence too low: {}",
+        result.confidence
+    );
+    assert!(result.latex.contains("x"), "Should contain variable x");
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_jpeg_to_mathml_pipeline() {
+    let test_server = TestServer::start()
+        .await
+        .expect("Failed to start test server");
+
+    // Create JPEG test image
+    let image = images::generate_fraction(1, 2);
+    let image_path = "/tmp/test_fraction.jpg";
+    image.save(image_path).unwrap();
+
+    // Process to MathML
+    let result = test_server
+        .process_image(image_path, OutputFormat::MathML)
+        .await
+        .expect("Pipeline processing failed");
+
+    // Verify MathML structure
+    assert!(result.mathml.is_some(), "MathML output should be present");
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_webp_to_html_pipeline() {
+    let test_server = TestServer::start()
+        .await
+        .expect("Failed to start test server");
+
+    // Create WebP test image
+    let image = images::generate_integral("x dx");
+    let image_path = "/tmp/test_integral.webp";
+    // Note: WebP support may require additional image codec
+    image.save(image_path).unwrap_or_else(|_| {
+        // Fallback to PNG if WebP not supported
+        image.save("/tmp/test_integral.png").unwrap();
+    });
+
+    let actual_path = if std::path::Path::new(image_path).exists() {
+        image_path
+    } else {
+        "/tmp/test_integral.png"
+    };
+
+    // Process to HTML
+    let _result = test_server
+        .process_image(actual_path, OutputFormat::HTML)
+        .await
+        .expect("Pipeline processing failed");
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_pipeline_timeout_handling() {
+    let test_server = TestServer::with_timeout(100)
+        .await
+        .expect("Failed to start test server");
+
+    // Create complex image that might take time
+    let complex_image = images::generate_complex_equation();
+    complex_image.save("/tmp/complex.png").unwrap();
+
+    let start = std::time::Instant::now();
+    let _result = test_server
+        .process_image("/tmp/complex.png", OutputFormat::LaTeX)
+        .await;
+    let duration = start.elapsed();
+
+    // Should either complete or timeout within reasonable time
+    assert!(
+        duration.as_millis() < 500,
+        "Should timeout or complete quickly"
+    );
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_batch_pipeline_processing() {
+    let test_server = TestServer::start()
+        .await
+        .expect("Failed to start test server");
+
+    // Create multiple test images
+    let test_images = vec![
+        ("x + y", "/tmp/batch_1.png"),
+        ("a - b", "/tmp/batch_2.png"),
+        ("2 * 3", "/tmp/batch_3.png"),
+        ("x / y", "/tmp/batch_4.png"),
+    ];
+
+    for (equation, path) in &test_images {
+        let img = images::generate_simple_equation(equation);
+        img.save(path).unwrap();
+    }
+
+    // Process batch
+    let paths: Vec<&str> = test_images.iter().map(|(_, p)| *p).collect();
+    let results = test_server
+        .process_batch(&paths, OutputFormat::LaTeX)
+        .await
+        .expect("Batch processing failed");
+
+    // Verify all processed
+    assert_eq!(results.len(), 4, "Should process all images");
+    for (i, result) in results.iter().enumerate() {
+        assert!(!result.latex.is_empty(), "Result {} should have LaTeX", i);
+        assert!(result.confidence > 0.5, "Result {} confidence too low", i);
+    }
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_pipeline_with_preprocessing() {
+    let test_server = TestServer::start()
+        .await
+        .expect("Failed to start test server");
+
+    // Create noisy image
+    let mut image = images::generate_simple_equation("f(x) = x^2");
+    images::add_noise(&mut image, 0.1);
+    image.save("/tmp/noisy.png").unwrap();
+
+    // Process with preprocessing enabled
+    let result = test_server
+        .process_image_with_options(
+            "/tmp/noisy.png",
+            OutputFormat::LaTeX,
+            ProcessingOptions {
+                enable_preprocessing: true,
+                enable_denoising: true,
+                enable_deskew: true,
+                ..Default::default()
+            },
+        )
+        .await
+        .expect("Processing failed");
+
+    // Should still recognize despite noise
+    assert!(
+        !result.latex.is_empty(),
+        "Should extract LaTeX from noisy image"
+    );
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_multi_format_output() {
+    let test_server = TestServer::start()
+        .await
+        .expect("Failed to start test server");
+
+    // Create test image
+    let image = images::generate_fraction(3, 4);
+    image.save("/tmp/fraction.png").unwrap();
+
+    // Request multiple output formats
+    let result = test_server
+        .process_image_with_options(
+            "/tmp/fraction.png",
+            OutputFormat::All,
+            ProcessingOptions {
+                include_latex: true,
+                include_mathml: true,
+                include_ascii: true,
+                include_text: true,
+                ..Default::default()
+            },
+        )
+        .await
+        .expect("Processing failed");
+
+    // Verify output present
+    assert!(!result.latex.is_empty(), "Should have LaTeX");
+    assert!(result.mathml.is_some(), "Should have MathML");
+
+    test_server.shutdown().await;
+}
+
+#[tokio::test]
+async fn test_pipeline_caching() {
+    let test_server = TestServer::with_cache()
+        .await
+        .expect("Failed to start test server");
+
+    // Create test image
+    let image = images::generate_simple_equation("a + b = c");
+    image.save("/tmp/cached.png").unwrap();
+
+    // First processing
+    let result1 = test_server
+        .process_image("/tmp/cached.png", OutputFormat::LaTeX)
+        .await
+        .expect("First processing failed");
+
+    // Second processing (should hit cache)
+    let result2 = test_server
+        .process_image("/tmp/cached.png", OutputFormat::LaTeX)
+        .await
+        .expect("Second processing failed");
+
+    // Verify cache hit
+    assert_eq!(result1.latex, result2.latex, "Results should match");
+
+    test_server.shutdown().await;
+}