Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
450
examples/scipix/tests/integration/accuracy_tests.rs
Normal file
450
examples/scipix/tests/integration/accuracy_tests.rs
Normal file
@@ -0,0 +1,450 @@
|
||||
// Accuracy validation tests
|
||||
//
|
||||
// Tests OCR accuracy against Im2latex-100k subset and calculates CER, WER, BLEU
|
||||
|
||||
use super::*;
|
||||
use tokio;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_accuracy_simple_expressions() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
let test_cases = vec![
|
||||
("x + 1", "x + 1"),
|
||||
("2x - 3", "2x - 3"),
|
||||
("a = b", "a = b"),
|
||||
("f(x)", "f(x)"),
|
||||
("y^2", "y^2"),
|
||||
];
|
||||
|
||||
let mut total_cer = 0.0;
|
||||
let mut correct = 0;
|
||||
|
||||
for (equation, expected) in test_cases.iter() {
|
||||
let image = images::generate_simple_equation(equation);
|
||||
let path = format!("/tmp/accuracy_simple_{}.png", equation.replace(' ', "_"));
|
||||
image.save(&path).unwrap();
|
||||
|
||||
let result = test_server
|
||||
.process_image(&path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
let cer = metrics::calculate_cer(expected, &result.latex);
|
||||
total_cer += cer;
|
||||
|
||||
if latex::normalize(&result.latex) == latex::normalize(expected) {
|
||||
correct += 1;
|
||||
}
|
||||
|
||||
println!(
|
||||
"Equation: {} | CER: {:.4} | Got: {}",
|
||||
equation, cer, result.latex
|
||||
);
|
||||
}
|
||||
|
||||
let avg_cer = total_cer / test_cases.len() as f64;
|
||||
let accuracy = correct as f64 / test_cases.len() as f64;
|
||||
|
||||
println!(
|
||||
"Simple expressions - Avg CER: {:.4}, Accuracy: {:.2}%",
|
||||
avg_cer,
|
||||
accuracy * 100.0
|
||||
);
|
||||
|
||||
assert!(avg_cer < 0.05, "Average CER too high: {:.4}", avg_cer);
|
||||
assert!(
|
||||
accuracy > 0.90,
|
||||
"Accuracy too low: {:.2}%",
|
||||
accuracy * 100.0
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_accuracy_im2latex_subset() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Load Im2latex-100k test subset (sample)
|
||||
let test_cases = load_im2latex_test_subset(50); // Test 50 samples
|
||||
|
||||
let mut cer_sum = 0.0;
|
||||
let mut wer_sum = 0.0;
|
||||
let mut bleu_sum = 0.0;
|
||||
let mut exact_matches = 0;
|
||||
|
||||
for (i, case) in test_cases.iter().enumerate() {
|
||||
// Generate or load image
|
||||
let image_path = case.image_path.clone();
|
||||
|
||||
let result = test_server
|
||||
.process_image(&image_path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Calculate metrics
|
||||
let cer = metrics::calculate_cer(&case.ground_truth, &result.latex);
|
||||
let wer = metrics::calculate_wer(&case.ground_truth, &result.latex);
|
||||
let bleu = metrics::calculate_bleu(&case.ground_truth, &result.latex, 4);
|
||||
|
||||
cer_sum += cer;
|
||||
wer_sum += wer;
|
||||
bleu_sum += bleu;
|
||||
|
||||
if latex::normalize(&result.latex) == latex::normalize(&case.ground_truth) {
|
||||
exact_matches += 1;
|
||||
}
|
||||
|
||||
if i % 10 == 0 {
|
||||
println!("Processed {}/{} samples", i + 1, test_cases.len());
|
||||
}
|
||||
}
|
||||
|
||||
let count = test_cases.len() as f64;
|
||||
let avg_cer = cer_sum / count;
|
||||
let avg_wer = wer_sum / count;
|
||||
let avg_bleu = bleu_sum / count;
|
||||
let exact_match_rate = exact_matches as f64 / count;
|
||||
|
||||
println!("\nIm2latex subset results:");
|
||||
println!(" Average CER: {:.4}", avg_cer);
|
||||
println!(" Average WER: {:.4}", avg_wer);
|
||||
println!(" Average BLEU: {:.2}", avg_bleu);
|
||||
println!(" Exact match rate: {:.2}%", exact_match_rate * 100.0);
|
||||
|
||||
// Assert quality thresholds
|
||||
assert!(avg_cer < 0.03, "CER too high: {:.4}", avg_cer);
|
||||
assert!(avg_bleu > 80.0, "BLEU too low: {:.2}", avg_bleu);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_accuracy_fractions() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
let test_cases = vec![
|
||||
((1, 2), r"\frac{1}{2}"),
|
||||
((3, 4), r"\frac{3}{4}"),
|
||||
((5, 6), r"\frac{5}{6}"),
|
||||
((10, 3), r"\frac{10}{3}"),
|
||||
];
|
||||
|
||||
let mut correct = 0;
|
||||
|
||||
for ((num, den), expected) in test_cases.iter() {
|
||||
let image = images::generate_fraction(*num, *den);
|
||||
let path = format!("/tmp/frac_{}_{}.png", num, den);
|
||||
image.save(&path).unwrap();
|
||||
|
||||
let result = test_server
|
||||
.process_image(&path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
if latex::expressions_match(&result.latex, expected) {
|
||||
correct += 1;
|
||||
} else {
|
||||
println!(
|
||||
"Fraction {}/{} - Expected: {}, Got: {}",
|
||||
num, den, expected, result.latex
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let accuracy = correct as f64 / test_cases.len() as f64;
|
||||
println!("Fraction accuracy: {:.2}%", accuracy * 100.0);
|
||||
|
||||
assert!(
|
||||
accuracy >= 0.85,
|
||||
"Fraction accuracy too low: {:.2}%",
|
||||
accuracy * 100.0
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_accuracy_special_symbols() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
let test_cases = vec![
|
||||
(r"\alpha", r"\alpha"),
|
||||
(r"\beta", r"\beta"),
|
||||
(r"\sum", r"\sum"),
|
||||
(r"\int", r"\int"),
|
||||
(r"\pi", r"\pi"),
|
||||
(r"\infty", r"\infty"),
|
||||
];
|
||||
|
||||
let mut correct = 0;
|
||||
|
||||
for (symbol, expected) in test_cases.iter() {
|
||||
let image = images::generate_symbol(symbol);
|
||||
let path = format!("/tmp/symbol_{}.png", symbol.replace('\\', ""));
|
||||
image.save(&path).unwrap();
|
||||
|
||||
let result = test_server
|
||||
.process_image(&path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
if result.latex.contains(expected) {
|
||||
correct += 1;
|
||||
} else {
|
||||
println!(
|
||||
"Symbol {} - Expected to contain: {}, Got: {}",
|
||||
symbol, expected, result.latex
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let accuracy = correct as f64 / test_cases.len() as f64;
|
||||
println!("Special symbol accuracy: {:.2}%", accuracy * 100.0);
|
||||
|
||||
assert!(
|
||||
accuracy >= 0.80,
|
||||
"Symbol accuracy too low: {:.2}%",
|
||||
accuracy * 100.0
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_accuracy_regression_detection() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Load baseline results
|
||||
let baseline = load_baseline_results();
|
||||
|
||||
// Run same test cases
|
||||
let test_cases = load_regression_test_cases();
|
||||
|
||||
let mut regressions = Vec::new();
|
||||
|
||||
for case in test_cases.iter() {
|
||||
let result = test_server
|
||||
.process_image(&case.image_path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Compare with baseline
|
||||
if let Some(baseline_result) = baseline.get(&case.id) {
|
||||
let current_cer = metrics::calculate_cer(&case.ground_truth, &result.latex);
|
||||
let baseline_cer = baseline_result.cer;
|
||||
|
||||
// Check for regression (10% threshold)
|
||||
if current_cer > baseline_cer * 1.10 {
|
||||
regressions.push((
|
||||
case.id.clone(),
|
||||
baseline_cer,
|
||||
current_cer,
|
||||
baseline_result.latex.clone(),
|
||||
result.latex.clone(),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !regressions.is_empty() {
|
||||
println!("Regressions detected:");
|
||||
for (id, baseline_cer, current_cer, baseline_latex, current_latex) in ®ressions {
|
||||
println!(" {} - CER: {:.4} -> {:.4}", id, baseline_cer, current_cer);
|
||||
println!(" Baseline: {}", baseline_latex);
|
||||
println!(" Current: {}", current_latex);
|
||||
}
|
||||
}
|
||||
|
||||
assert!(
|
||||
regressions.is_empty(),
|
||||
"Found {} regressions",
|
||||
regressions.len()
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_accuracy_confidence_calibration() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
let test_cases = load_calibration_test_cases();
|
||||
|
||||
let mut high_conf_correct = 0;
|
||||
let mut high_conf_total = 0;
|
||||
let mut low_conf_correct = 0;
|
||||
let mut low_conf_total = 0;
|
||||
|
||||
for case in test_cases.iter() {
|
||||
let result = test_server
|
||||
.process_image(&case.image_path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
let is_correct = latex::normalize(&result.latex) == latex::normalize(&case.ground_truth);
|
||||
|
||||
if result.confidence > 0.9 {
|
||||
high_conf_total += 1;
|
||||
if is_correct {
|
||||
high_conf_correct += 1;
|
||||
}
|
||||
} else if result.confidence < 0.7 {
|
||||
low_conf_total += 1;
|
||||
if is_correct {
|
||||
low_conf_correct += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let high_conf_accuracy = if high_conf_total > 0 {
|
||||
high_conf_correct as f64 / high_conf_total as f64
|
||||
} else {
|
||||
1.0
|
||||
};
|
||||
|
||||
let low_conf_accuracy = if low_conf_total > 0 {
|
||||
low_conf_correct as f64 / low_conf_total as f64
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
println!("Confidence calibration:");
|
||||
println!(
|
||||
" High confidence (>0.9): {:.2}% accuracy ({}/{})",
|
||||
high_conf_accuracy * 100.0,
|
||||
high_conf_correct,
|
||||
high_conf_total
|
||||
);
|
||||
println!(
|
||||
" Low confidence (<0.7): {:.2}% accuracy ({}/{})",
|
||||
low_conf_accuracy * 100.0,
|
||||
low_conf_correct,
|
||||
low_conf_total
|
||||
);
|
||||
|
||||
// High confidence should correlate with high accuracy
|
||||
assert!(
|
||||
high_conf_accuracy > 0.95,
|
||||
"High confidence predictions should be very accurate"
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
// Helper functions and types
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct TestCase {
|
||||
id: String,
|
||||
image_path: String,
|
||||
ground_truth: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct BaselineResult {
|
||||
latex: String,
|
||||
cer: f64,
|
||||
}
|
||||
|
||||
fn load_im2latex_test_subset(count: usize) -> Vec<TestCase> {
|
||||
// Load or generate Im2latex test subset
|
||||
// For now, generate synthetic test cases
|
||||
(0..count)
|
||||
.map(|i| {
|
||||
let eq = match i % 5 {
|
||||
0 => format!("x^{}", i),
|
||||
1 => format!("a + {}", i),
|
||||
2 => format!(r"\frac{{{}}}{{{}}}", i, i + 1),
|
||||
3 => format!("{}x + {}", i, i * 2),
|
||||
_ => format!("y = {}x", i),
|
||||
};
|
||||
|
||||
let image = images::generate_simple_equation(&eq);
|
||||
let path = format!("/tmp/im2latex_{}.png", i);
|
||||
image.save(&path).unwrap();
|
||||
|
||||
TestCase {
|
||||
id: format!("im2latex_{}", i),
|
||||
image_path: path,
|
||||
ground_truth: eq,
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn load_regression_test_cases() -> Vec<TestCase> {
|
||||
// Load regression test cases from file or generate
|
||||
vec![
|
||||
TestCase {
|
||||
id: "reg_001".to_string(),
|
||||
image_path: "/tmp/reg_001.png".to_string(),
|
||||
ground_truth: "x + y".to_string(),
|
||||
},
|
||||
// Add more test cases...
|
||||
]
|
||||
}
|
||||
|
||||
fn load_baseline_results() -> std::collections::HashMap<String, BaselineResult> {
|
||||
// Load baseline results from file
|
||||
let mut baseline = std::collections::HashMap::new();
|
||||
|
||||
baseline.insert(
|
||||
"reg_001".to_string(),
|
||||
BaselineResult {
|
||||
latex: "x + y".to_string(),
|
||||
cer: 0.0,
|
||||
},
|
||||
);
|
||||
|
||||
baseline
|
||||
}
|
||||
|
||||
fn load_calibration_test_cases() -> Vec<TestCase> {
|
||||
// Generate test cases with varying difficulty for confidence calibration
|
||||
let mut cases = Vec::new();
|
||||
|
||||
// Easy cases
|
||||
for i in 0..10 {
|
||||
let eq = format!("x + {}", i);
|
||||
let image = images::generate_simple_equation(&eq);
|
||||
let path = format!("/tmp/calib_easy_{}.png", i);
|
||||
image.save(&path).unwrap();
|
||||
|
||||
cases.push(TestCase {
|
||||
id: format!("calib_easy_{}", i),
|
||||
image_path: path,
|
||||
ground_truth: eq,
|
||||
});
|
||||
}
|
||||
|
||||
// Hard cases (noisy)
|
||||
for i in 0..10 {
|
||||
let eq = format!("y^{}", i);
|
||||
let mut image = images::generate_simple_equation(&eq);
|
||||
images::add_noise(&mut image, 0.2);
|
||||
let path = format!("/tmp/calib_hard_{}.png", i);
|
||||
image.save(&path).unwrap();
|
||||
|
||||
cases.push(TestCase {
|
||||
id: format!("calib_hard_{}", i),
|
||||
image_path: path,
|
||||
ground_truth: eq,
|
||||
});
|
||||
}
|
||||
|
||||
cases
|
||||
}
|
||||
80
examples/scipix/tests/integration/api_tests.rs
Normal file
80
examples/scipix/tests/integration/api_tests.rs
Normal file
@@ -0,0 +1,80 @@
|
||||
// API server integration tests
|
||||
//
|
||||
// Tests HTTP API endpoints, authentication, rate limiting, and async processing
|
||||
|
||||
use super::*;
|
||||
use reqwest::{multipart, Client, StatusCode};
|
||||
use serde_json::json;
|
||||
use tokio;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_api_post_text_with_file() {
|
||||
let test_server = TestServer::start_api()
|
||||
.await
|
||||
.expect("Failed to start API server");
|
||||
let client = Client::new();
|
||||
|
||||
// Create test image
|
||||
let image = images::generate_simple_equation("x + y");
|
||||
image.save("/tmp/api_test.png").unwrap();
|
||||
let image_bytes = std::fs::read("/tmp/api_test.png").unwrap();
|
||||
|
||||
// Create multipart form
|
||||
let form = multipart::Form::new().part(
|
||||
"file",
|
||||
multipart::Part::bytes(image_bytes)
|
||||
.file_name("equation.png")
|
||||
.mime_str("image/png")
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
// POST to /v3/text
|
||||
let response = client
|
||||
.post(&format!("{}/v3/text", test_server.base_url()))
|
||||
.header("app_id", "test_app_id")
|
||||
.header("app_key", "test_app_key")
|
||||
.multipart(form)
|
||||
.send()
|
||||
.await
|
||||
.expect("Request failed");
|
||||
|
||||
assert_eq!(response.status(), StatusCode::OK);
|
||||
|
||||
let result: serde_json::Value = response.json().await.unwrap();
|
||||
assert!(result.get("request_id").is_some(), "Should have request_id");
|
||||
assert!(result.get("text").is_some(), "Should have text field");
|
||||
assert!(
|
||||
result.get("processing_time_ms").is_some(),
|
||||
"Should have processing time"
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_api_authentication_validation() {
|
||||
let test_server = TestServer::start_api()
|
||||
.await
|
||||
.expect("Failed to start API server");
|
||||
let client = Client::new();
|
||||
|
||||
let payload = json!({
|
||||
"src": "base64data"
|
||||
});
|
||||
|
||||
// Test missing auth
|
||||
let response = client
|
||||
.post(&format!("{}/v3/text", test_server.base_url()))
|
||||
.json(&payload)
|
||||
.send()
|
||||
.await
|
||||
.expect("Request failed");
|
||||
|
||||
assert_eq!(
|
||||
response.status(),
|
||||
StatusCode::UNAUTHORIZED,
|
||||
"Should require authentication"
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
300
examples/scipix/tests/integration/cache_tests.rs
Normal file
300
examples/scipix/tests/integration/cache_tests.rs
Normal file
@@ -0,0 +1,300 @@
|
||||
// Cache integration tests
|
||||
//
|
||||
// Tests caching behavior, hit/miss ratios, similarity search, and persistence
|
||||
//
|
||||
// Note: These tests use mock test infrastructure.
|
||||
// Real OCR processing requires ONNX models to be configured.
|
||||
|
||||
use super::*;
|
||||
use crate::common::{CacheStats, OutputFormat};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_hit_miss_behavior() {
|
||||
let test_server = TestServer::with_cache()
|
||||
.await
|
||||
.expect("Failed to start test server with cache");
|
||||
|
||||
let image = images::generate_simple_equation("x^2");
|
||||
image.save("/tmp/cache_test_1.png").unwrap();
|
||||
|
||||
// First request - should miss cache
|
||||
let result1 = test_server
|
||||
.process_image("/tmp/cache_test_1.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Get cache stats
|
||||
let _stats = test_server
|
||||
.cache_stats()
|
||||
.await
|
||||
.expect("Failed to get cache stats");
|
||||
|
||||
// Second request - should hit cache
|
||||
let result2 = test_server
|
||||
.process_image("/tmp/cache_test_1.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Verify results match
|
||||
assert_eq!(result1.latex, result2.latex, "Cached result should match");
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_similarity_lookup() {
|
||||
let test_server = TestServer::with_cache()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create original image
|
||||
let image1 = images::generate_simple_equation("a + b");
|
||||
image1.save("/tmp/similarity_1.png").unwrap();
|
||||
|
||||
// Create similar image (slightly different rendering)
|
||||
let mut image2 = images::generate_simple_equation("a + b");
|
||||
images::add_slight_variation(&mut image2, 0.05);
|
||||
image2.save("/tmp/similarity_2.png").unwrap();
|
||||
|
||||
// Process first image
|
||||
let result1 = test_server
|
||||
.process_image("/tmp/similarity_1.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Process similar image
|
||||
let result2 = test_server
|
||||
.process_image("/tmp/similarity_2.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Results should be similar
|
||||
let similarity = latex::calculate_similarity(&result1.latex, &result2.latex);
|
||||
assert!(
|
||||
similarity > 0.9,
|
||||
"Similar images should produce similar results"
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_eviction() {
|
||||
// Start server with small cache size
|
||||
let test_server = TestServer::with_cache_size(3)
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create and process 5 different images
|
||||
for i in 0..5 {
|
||||
let eq = format!("x + {}", i);
|
||||
let image = images::generate_simple_equation(&eq);
|
||||
let path = format!("/tmp/eviction_{}.png", i);
|
||||
image.save(&path).unwrap();
|
||||
|
||||
test_server
|
||||
.process_image(&path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
}
|
||||
|
||||
// Get cache stats
|
||||
let stats = test_server
|
||||
.cache_stats()
|
||||
.await
|
||||
.expect("Failed to get cache stats");
|
||||
assert!(stats.current_size <= 3, "Cache should not exceed max size");
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_persistence() {
|
||||
let cache_dir = "/tmp/scipix_cache_persist";
|
||||
std::fs::create_dir_all(cache_dir).unwrap();
|
||||
|
||||
// Start server with persistent cache
|
||||
let test_server = TestServer::with_persistent_cache(cache_dir)
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Process image
|
||||
let image = images::generate_simple_equation("persistent");
|
||||
image.save("/tmp/persist_test.png").unwrap();
|
||||
|
||||
let result1 = test_server
|
||||
.process_image("/tmp/persist_test.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Shutdown server
|
||||
test_server.shutdown().await;
|
||||
|
||||
// Start new server with same cache directory
|
||||
let test_server2 = TestServer::with_persistent_cache(cache_dir)
|
||||
.await
|
||||
.expect("Failed to start second test server");
|
||||
|
||||
// Process same image - should hit persistent cache
|
||||
let result2 = test_server2
|
||||
.process_image("/tmp/persist_test.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Results should match
|
||||
assert_eq!(
|
||||
result1.latex, result2.latex,
|
||||
"Persistent cache should restore results"
|
||||
);
|
||||
|
||||
test_server2.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_invalidation() {
|
||||
let test_server = TestServer::with_cache()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Process image
|
||||
let image = images::generate_simple_equation("invalidate");
|
||||
image.save("/tmp/invalidate_test.png").unwrap();
|
||||
|
||||
let result1 = test_server
|
||||
.process_image("/tmp/invalidate_test.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Invalidate cache
|
||||
test_server
|
||||
.invalidate_cache()
|
||||
.await
|
||||
.expect("Cache invalidation failed");
|
||||
|
||||
// Process again - should miss cache
|
||||
let result2 = test_server
|
||||
.process_image("/tmp/invalidate_test.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Results should match but processing should take time
|
||||
assert_eq!(result1.latex, result2.latex, "Results should still match");
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_hit_ratio() {
|
||||
let test_server = TestServer::with_cache()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create test images
|
||||
let equations = vec!["a", "b", "c"];
|
||||
for eq in &equations {
|
||||
let image = images::generate_simple_equation(eq);
|
||||
image.save(&format!("/tmp/ratio_{}.png", eq)).unwrap();
|
||||
}
|
||||
|
||||
// Process each image twice
|
||||
for eq in &equations {
|
||||
let path = format!("/tmp/ratio_{}.png", eq);
|
||||
|
||||
// First time (miss)
|
||||
test_server
|
||||
.process_image(&path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Second time (hit)
|
||||
test_server
|
||||
.process_image(&path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
}
|
||||
|
||||
// Get stats
|
||||
let _stats = test_server
|
||||
.cache_stats()
|
||||
.await
|
||||
.expect("Failed to get cache stats");
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_ttl_expiration() {
|
||||
// Start server with 1-second TTL
|
||||
let test_server = TestServer::with_cache_ttl(1)
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Process image
|
||||
let image = images::generate_simple_equation("ttl");
|
||||
image.save("/tmp/ttl_test.png").unwrap();
|
||||
|
||||
let result1 = test_server
|
||||
.process_image("/tmp/ttl_test.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Immediately reprocess - should hit cache
|
||||
let result2 = test_server
|
||||
.process_image("/tmp/ttl_test.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
assert_eq!(result1.latex, result2.latex);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_concurrent_access() {
|
||||
let test_server = TestServer::with_cache()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
let image = images::generate_simple_equation("concurrent");
|
||||
image.save("/tmp/concurrent_cache.png").unwrap();
|
||||
|
||||
// First request to populate cache
|
||||
test_server
|
||||
.process_image("/tmp/concurrent_cache.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Spawn multiple concurrent requests
|
||||
let mut handles = vec![];
|
||||
for _ in 0..10 {
|
||||
let server = test_server.clone();
|
||||
let handle = tokio::spawn(async move {
|
||||
server
|
||||
.process_image("/tmp/concurrent_cache.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
});
|
||||
handles.push(handle);
|
||||
}
|
||||
|
||||
// Wait for all to complete
|
||||
let results = futures::future::join_all(handles).await;
|
||||
|
||||
// All should succeed and return same result
|
||||
assert!(
|
||||
results.iter().all(|r| r.is_ok()),
|
||||
"All requests should succeed"
|
||||
);
|
||||
|
||||
let first_latex = &results[0].as_ref().unwrap().as_ref().unwrap().latex;
|
||||
assert!(
|
||||
results
|
||||
.iter()
|
||||
.all(|r| { &r.as_ref().unwrap().as_ref().unwrap().latex == first_latex }),
|
||||
"All results should match"
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
// Re-export CacheStats for backward compatibility
|
||||
pub use crate::common::CacheStats as CacheStatsCompat;
|
||||
227
examples/scipix/tests/integration/cli_tests.rs
Normal file
227
examples/scipix/tests/integration/cli_tests.rs
Normal file
@@ -0,0 +1,227 @@
|
||||
// CLI integration tests
|
||||
//
|
||||
// Tests command-line interface functionality
|
||||
|
||||
use super::*;
|
||||
use assert_cmd::Command;
|
||||
use predicates::prelude::*;
|
||||
use std::process::Stdio;
|
||||
|
||||
#[test]
|
||||
fn test_cli_ocr_command_with_file() {
|
||||
// Create test image
|
||||
let image = images::generate_simple_equation("x + 1");
|
||||
image.save("/tmp/cli_test.png").unwrap();
|
||||
|
||||
// Run CLI command
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("ocr")
|
||||
.arg("/tmp/cli_test.png")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(predicate::str::contains("x"))
|
||||
.stdout(predicate::str::contains("LaTeX:"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cli_ocr_with_output_format() {
|
||||
let image = images::generate_fraction(3, 4);
|
||||
image.save("/tmp/cli_fraction.png").unwrap();
|
||||
|
||||
// Test LaTeX output
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("ocr")
|
||||
.arg("/tmp/cli_fraction.png")
|
||||
.arg("--format")
|
||||
.arg("latex")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(predicate::str::contains(r"\frac"));
|
||||
|
||||
// Test MathML output
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("ocr")
|
||||
.arg("/tmp/cli_fraction.png")
|
||||
.arg("--format")
|
||||
.arg("mathml")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(predicate::str::contains("<mfrac>"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cli_batch_command() {
|
||||
// Create test directory with images
|
||||
std::fs::create_dir_all("/tmp/cli_batch").unwrap();
|
||||
|
||||
let equations = vec!["a + b", "x - y", "2 * 3"];
|
||||
for (i, eq) in equations.iter().enumerate() {
|
||||
let image = images::generate_simple_equation(eq);
|
||||
image.save(&format!("/tmp/cli_batch/eq_{}.png", i)).unwrap();
|
||||
}
|
||||
|
||||
// Run batch command
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("batch")
|
||||
.arg("/tmp/cli_batch")
|
||||
.arg("--output")
|
||||
.arg("/tmp/cli_batch_results.json")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
// Verify output file
|
||||
let results = std::fs::read_to_string("/tmp/cli_batch_results.json").unwrap();
|
||||
assert!(results.contains("a"), "Should contain results");
|
||||
assert!(results.len() > 100, "Should have substantial output");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore] // Requires built binary and available port
|
||||
fn test_cli_serve_command_startup() {
|
||||
// This test requires the binary to be built first
|
||||
// Use std::process::Command for spawn functionality
|
||||
use std::process::Command as StdCommand;
|
||||
|
||||
// Get the binary path from environment, or fall back to cargo build path
|
||||
let bin_path = std::env::var("CARGO_BIN_EXE_scipix-ocr")
|
||||
.unwrap_or_else(|_| "target/debug/scipix-ocr".to_string());
|
||||
|
||||
let mut child = StdCommand::new(&bin_path)
|
||||
.arg("serve")
|
||||
.arg("--port")
|
||||
.arg("18080")
|
||||
.stdout(Stdio::piped())
|
||||
.spawn()
|
||||
.expect("Failed to start server");
|
||||
|
||||
// Wait for server startup
|
||||
std::thread::sleep(std::time::Duration::from_secs(2));
|
||||
|
||||
// Check if server is running
|
||||
let client = reqwest::blocking::Client::new();
|
||||
let response = client
|
||||
.get("http://localhost:18080/health")
|
||||
.timeout(std::time::Duration::from_secs(5))
|
||||
.send();
|
||||
|
||||
// Kill server
|
||||
let _ = child.kill();
|
||||
|
||||
assert!(response.is_ok(), "Server should respond to health check");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cli_config_command() {
|
||||
// Test config show
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("config").arg("show").assert().success().stdout(
|
||||
predicate::str::contains("model_path").or(predicate::str::contains("Configuration")),
|
||||
);
|
||||
|
||||
// Test config set
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("config")
|
||||
.arg("set")
|
||||
.arg("preprocessing.enable_deskew")
|
||||
.arg("true")
|
||||
.assert()
|
||||
.success();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cli_invalid_file() {
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("ocr")
|
||||
.arg("/nonexistent/file.png")
|
||||
.assert()
|
||||
.failure()
|
||||
.stderr(predicate::str::contains("not found").or(predicate::str::contains("error")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cli_exit_codes() {
|
||||
// Success case
|
||||
let image = images::generate_simple_equation("ok");
|
||||
image.save("/tmp/exit_code_test.png").unwrap();
|
||||
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("ocr")
|
||||
.arg("/tmp/exit_code_test.png")
|
||||
.assert()
|
||||
.code(0);
|
||||
|
||||
// Failure case
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("ocr")
|
||||
.arg("/nonexistent.png")
|
||||
.assert()
|
||||
.code(predicate::ne(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cli_verbose_output() {
|
||||
let image = images::generate_simple_equation("verbose");
|
||||
image.save("/tmp/verbose_test.png").unwrap();
|
||||
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("ocr")
|
||||
.arg("/tmp/verbose_test.png")
|
||||
.arg("--verbose")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(predicate::str::contains("Processing").or(predicate::str::contains("Confidence")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cli_json_output() {
|
||||
let image = images::generate_simple_equation("json");
|
||||
image.save("/tmp/json_test.png").unwrap();
|
||||
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
let output = cmd
|
||||
.arg("ocr")
|
||||
.arg("/tmp/json_test.png")
|
||||
.arg("--output-format")
|
||||
.arg("json")
|
||||
.output()
|
||||
.expect("Failed to execute command");
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
|
||||
// Verify JSON structure
|
||||
let json: serde_json::Value =
|
||||
serde_json::from_str(&stdout).expect("Output should be valid JSON");
|
||||
|
||||
assert!(json.get("latex").is_some(), "Should have latex field");
|
||||
assert!(
|
||||
json.get("confidence").is_some(),
|
||||
"Should have confidence field"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cli_help_command() {
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("--help")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(predicate::str::contains("USAGE:"))
|
||||
.stdout(predicate::str::contains("COMMANDS:"));
|
||||
|
||||
// Test subcommand help
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("ocr")
|
||||
.arg("--help")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(predicate::str::contains("OPTIONS:"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cli_version_command() {
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("--version")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(predicate::str::contains(env!("CARGO_PKG_VERSION")));
|
||||
}
|
||||
14
examples/scipix/tests/integration/mod.rs
Normal file
14
examples/scipix/tests/integration/mod.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
// Integration test module organization
|
||||
//
|
||||
// This module provides integration tests for the ruvector-scipix OCR system.
|
||||
// Tests are organized by functionality area.
|
||||
|
||||
pub mod accuracy_tests;
|
||||
pub mod api_tests;
|
||||
pub mod cache_tests;
|
||||
pub mod cli_tests;
|
||||
pub mod performance_tests;
|
||||
pub mod pipeline_tests;
|
||||
|
||||
// Re-export common test utilities
|
||||
pub use crate::common::*;
|
||||
386
examples/scipix/tests/integration/performance_tests.rs
Normal file
386
examples/scipix/tests/integration/performance_tests.rs
Normal file
@@ -0,0 +1,386 @@
|
||||
// Performance validation tests
|
||||
//
|
||||
// Tests latency, memory usage, throughput, and ensures no memory leaks
|
||||
|
||||
use super::*;
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_performance_latency_within_bounds() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
let image = images::generate_simple_equation("x + y");
|
||||
image.save("/tmp/perf_latency.png").unwrap();
|
||||
|
||||
// Measure latency
|
||||
let start = Instant::now();
|
||||
let result = test_server
|
||||
.process_image("/tmp/perf_latency.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
let latency = start.elapsed();
|
||||
|
||||
println!("Latency: {:?}", latency);
|
||||
println!("Confidence: {}", result.confidence);
|
||||
|
||||
// Assert latency is within bounds (<100ms for simple equation)
|
||||
assert!(latency.as_millis() < 100, "Latency too high: {:?}", latency);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_performance_memory_usage_limits() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Get initial memory usage
|
||||
let initial_memory = get_memory_usage();
|
||||
|
||||
// Process multiple images
|
||||
for i in 0..100 {
|
||||
let eq = format!("x + {}", i);
|
||||
let image = images::generate_simple_equation(&eq);
|
||||
let path = format!("/tmp/perf_mem_{}.png", i);
|
||||
image.save(&path).unwrap();
|
||||
|
||||
test_server
|
||||
.process_image(&path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Clean up
|
||||
std::fs::remove_file(&path).unwrap();
|
||||
}
|
||||
|
||||
// Get final memory usage
|
||||
let final_memory = get_memory_usage();
|
||||
let memory_increase = final_memory - initial_memory;
|
||||
|
||||
println!("Memory increase: {} MB", memory_increase / 1024 / 1024);
|
||||
|
||||
// Assert memory usage is reasonable (<100MB increase)
|
||||
assert!(
|
||||
memory_increase < 100 * 1024 * 1024,
|
||||
"Memory usage too high: {} bytes",
|
||||
memory_increase
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_performance_no_memory_leaks() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
let image = images::generate_simple_equation("leak test");
|
||||
image.save("/tmp/leak_test.png").unwrap();
|
||||
|
||||
// Process same image many times
|
||||
let iterations = 1000;
|
||||
let mut memory_samples = Vec::new();
|
||||
|
||||
for i in 0..iterations {
|
||||
test_server
|
||||
.process_image("/tmp/leak_test.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Sample memory every 100 iterations
|
||||
if i % 100 == 0 {
|
||||
memory_samples.push(get_memory_usage());
|
||||
}
|
||||
}
|
||||
|
||||
// Check for linear memory growth (leak indicator)
|
||||
let first_sample = memory_samples[0];
|
||||
let last_sample = *memory_samples.last().unwrap();
|
||||
let growth_rate = (last_sample - first_sample) as f64 / iterations as f64;
|
||||
|
||||
println!("Memory growth rate: {} bytes/iteration", growth_rate);
|
||||
println!("Samples: {:?}", memory_samples);
|
||||
|
||||
// Growth rate should be minimal (<1KB per iteration)
|
||||
assert!(
|
||||
growth_rate < 1024.0,
|
||||
"Possible memory leak detected: {} bytes/iteration",
|
||||
growth_rate
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_performance_throughput() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create test images
|
||||
let image_count = 50;
|
||||
for i in 0..image_count {
|
||||
let eq = format!("throughput_{}", i);
|
||||
let image = images::generate_simple_equation(&eq);
|
||||
image.save(&format!("/tmp/throughput_{}.png", i)).unwrap();
|
||||
}
|
||||
|
||||
// Measure throughput
|
||||
let start = Instant::now();
|
||||
|
||||
for i in 0..image_count {
|
||||
test_server
|
||||
.process_image(&format!("/tmp/throughput_{}.png", i), OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
}
|
||||
|
||||
let duration = start.elapsed();
|
||||
let throughput = image_count as f64 / duration.as_secs_f64();
|
||||
|
||||
println!("Throughput: {:.2} images/second", throughput);
|
||||
println!("Total time: {:?} for {} images", duration, image_count);
|
||||
|
||||
// Assert reasonable throughput (>5 images/second)
|
||||
assert!(
|
||||
throughput > 5.0,
|
||||
"Throughput too low: {:.2} images/s",
|
||||
throughput
|
||||
);
|
||||
|
||||
// Cleanup
|
||||
for i in 0..image_count {
|
||||
std::fs::remove_file(&format!("/tmp/throughput_{}.png", i)).unwrap();
|
||||
}
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_performance_concurrent_throughput() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create test image
|
||||
let image = images::generate_simple_equation("concurrent");
|
||||
image.save("/tmp/concurrent_throughput.png").unwrap();
|
||||
|
||||
let concurrent_requests = 20;
|
||||
let start = Instant::now();
|
||||
|
||||
// Spawn concurrent requests
|
||||
let mut handles = vec![];
|
||||
for _ in 0..concurrent_requests {
|
||||
let server = test_server.clone();
|
||||
let handle = tokio::spawn(async move {
|
||||
server
|
||||
.process_image("/tmp/concurrent_throughput.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
});
|
||||
handles.push(handle);
|
||||
}
|
||||
|
||||
// Wait for all to complete
|
||||
let results = futures::future::join_all(handles).await;
|
||||
let duration = start.elapsed();
|
||||
|
||||
let success_count = results.iter().filter(|r| r.is_ok()).count();
|
||||
let throughput = concurrent_requests as f64 / duration.as_secs_f64();
|
||||
|
||||
println!("Concurrent throughput: {:.2} req/second", throughput);
|
||||
println!("Success rate: {}/{}", success_count, concurrent_requests);
|
||||
|
||||
assert!(
|
||||
success_count == concurrent_requests,
|
||||
"All requests should succeed"
|
||||
);
|
||||
assert!(
|
||||
throughput > 10.0,
|
||||
"Concurrent throughput too low: {:.2}",
|
||||
throughput
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_performance_latency_percentiles() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
let iterations = 100;
|
||||
let mut latencies = Vec::new();
|
||||
|
||||
for i in 0..iterations {
|
||||
let eq = format!("p{}", i);
|
||||
let image = images::generate_simple_equation(&eq);
|
||||
let path = format!("/tmp/percentile_{}.png", i);
|
||||
image.save(&path).unwrap();
|
||||
|
||||
let start = Instant::now();
|
||||
test_server
|
||||
.process_image(&path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
let latency = start.elapsed();
|
||||
|
||||
latencies.push(latency.as_micros());
|
||||
|
||||
std::fs::remove_file(&path).unwrap();
|
||||
}
|
||||
|
||||
// Sort latencies
|
||||
latencies.sort();
|
||||
|
||||
// Calculate percentiles
|
||||
let p50 = latencies[50];
|
||||
let p95 = latencies[95];
|
||||
let p99 = latencies[99];
|
||||
|
||||
println!("Latency percentiles:");
|
||||
println!(" P50: {} μs ({} ms)", p50, p50 / 1000);
|
||||
println!(" P95: {} μs ({} ms)", p95, p95 / 1000);
|
||||
println!(" P99: {} μs ({} ms)", p99, p99 / 1000);
|
||||
|
||||
// Assert percentile targets
|
||||
assert!(p50 < 100_000, "P50 latency too high: {} μs", p50); // <100ms
|
||||
assert!(p95 < 200_000, "P95 latency too high: {} μs", p95); // <200ms
|
||||
assert!(p99 < 500_000, "P99 latency too high: {} μs", p99); // <500ms
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_performance_batch_efficiency() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create test images
|
||||
let batch_size = 10;
|
||||
let mut paths = Vec::new();
|
||||
|
||||
for i in 0..batch_size {
|
||||
let eq = format!("batch_{}", i);
|
||||
let image = images::generate_simple_equation(&eq);
|
||||
let path = format!("/tmp/batch_eff_{}.png", i);
|
||||
image.save(&path).unwrap();
|
||||
paths.push(path);
|
||||
}
|
||||
|
||||
// Measure sequential processing
|
||||
let start_sequential = Instant::now();
|
||||
for path in &paths {
|
||||
test_server
|
||||
.process_image(path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
}
|
||||
let sequential_time = start_sequential.elapsed();
|
||||
|
||||
// Measure batch processing
|
||||
let start_batch = Instant::now();
|
||||
test_server
|
||||
.process_batch(
|
||||
&paths.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
|
||||
OutputFormat::LaTeX,
|
||||
)
|
||||
.await
|
||||
.expect("Batch processing failed");
|
||||
let batch_time = start_batch.elapsed();
|
||||
|
||||
println!("Sequential time: {:?}", sequential_time);
|
||||
println!("Batch time: {:?}", batch_time);
|
||||
println!(
|
||||
"Speedup: {:.2}x",
|
||||
sequential_time.as_secs_f64() / batch_time.as_secs_f64()
|
||||
);
|
||||
|
||||
// Batch should be faster
|
||||
assert!(
|
||||
batch_time < sequential_time,
|
||||
"Batch processing should be faster"
|
||||
);
|
||||
|
||||
// Cleanup
|
||||
for path in paths {
|
||||
std::fs::remove_file(&path).unwrap();
|
||||
}
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_performance_cold_start_warmup() {
|
||||
// Measure cold start
|
||||
let start_cold = Instant::now();
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
let cold_start_time = start_cold.elapsed();
|
||||
|
||||
println!("Cold start time: {:?}", cold_start_time);
|
||||
|
||||
// First request (warmup)
|
||||
let image = images::generate_simple_equation("warmup");
|
||||
image.save("/tmp/warmup.png").unwrap();
|
||||
|
||||
let start_first = Instant::now();
|
||||
test_server
|
||||
.process_image("/tmp/warmup.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
let first_request_time = start_first.elapsed();
|
||||
|
||||
// Second request (warmed up)
|
||||
let start_second = Instant::now();
|
||||
test_server
|
||||
.process_image("/tmp/warmup.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
let second_request_time = start_second.elapsed();
|
||||
|
||||
println!("First request time: {:?}", first_request_time);
|
||||
println!("Second request time: {:?}", second_request_time);
|
||||
|
||||
// Cold start should be reasonable (<5s)
|
||||
assert!(
|
||||
cold_start_time.as_secs() < 5,
|
||||
"Cold start too slow: {:?}",
|
||||
cold_start_time
|
||||
);
|
||||
|
||||
// Second request should be faster (model loaded)
|
||||
assert!(
|
||||
second_request_time < first_request_time,
|
||||
"Warmed up request should be faster"
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
// Helper function to get current memory usage
|
||||
fn get_memory_usage() -> usize {
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
// Read from /proc/self/statm
|
||||
if let Ok(content) = std::fs::read_to_string("/proc/self/statm") {
|
||||
if let Some(rss) = content.split_whitespace().nth(1) {
|
||||
if let Ok(pages) = rss.parse::<usize>() {
|
||||
// Convert pages to bytes (assuming 4KB pages)
|
||||
return pages * 4096;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback for other platforms or if reading fails
|
||||
0
|
||||
}
|
||||
248
examples/scipix/tests/integration/pipeline_tests.rs
Normal file
248
examples/scipix/tests/integration/pipeline_tests.rs
Normal file
@@ -0,0 +1,248 @@
|
||||
// Full pipeline integration tests
|
||||
//
|
||||
// Tests the complete OCR pipeline from image input to final output
|
||||
//
|
||||
// Note: These tests use mock test infrastructure.
|
||||
// Real OCR processing requires ONNX models to be configured.
|
||||
|
||||
use super::*;
|
||||
use crate::common::{OutputFormat, ProcessingOptions};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_png_to_latex_pipeline() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create test image
|
||||
let image = images::generate_simple_equation("x^2 + 2x + 1");
|
||||
let image_path = "/tmp/test_equation.png";
|
||||
image.save(image_path).unwrap();
|
||||
|
||||
// Process through pipeline
|
||||
let result = test_server
|
||||
.process_image(image_path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Pipeline processing failed");
|
||||
|
||||
// Verify output
|
||||
assert!(!result.latex.is_empty(), "LaTeX output should not be empty");
|
||||
assert!(
|
||||
result.confidence > 0.7,
|
||||
"Confidence too low: {}",
|
||||
result.confidence
|
||||
);
|
||||
assert!(result.latex.contains("x"), "Should contain variable x");
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_jpeg_to_mathml_pipeline() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create JPEG test image
|
||||
let image = images::generate_fraction(1, 2);
|
||||
let image_path = "/tmp/test_fraction.jpg";
|
||||
image.save(image_path).unwrap();
|
||||
|
||||
// Process to MathML
|
||||
let result = test_server
|
||||
.process_image(image_path, OutputFormat::MathML)
|
||||
.await
|
||||
.expect("Pipeline processing failed");
|
||||
|
||||
// Verify MathML structure
|
||||
assert!(result.mathml.is_some(), "MathML output should be present");
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_webp_to_html_pipeline() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create WebP test image
|
||||
let image = images::generate_integral("x dx");
|
||||
let image_path = "/tmp/test_integral.webp";
|
||||
// Note: WebP support may require additional image codec
|
||||
image.save(image_path).unwrap_or_else(|_| {
|
||||
// Fallback to PNG if WebP not supported
|
||||
image.save("/tmp/test_integral.png").unwrap();
|
||||
});
|
||||
|
||||
let actual_path = if std::path::Path::new(image_path).exists() {
|
||||
image_path
|
||||
} else {
|
||||
"/tmp/test_integral.png"
|
||||
};
|
||||
|
||||
// Process to HTML
|
||||
let _result = test_server
|
||||
.process_image(actual_path, OutputFormat::HTML)
|
||||
.await
|
||||
.expect("Pipeline processing failed");
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_pipeline_timeout_handling() {
|
||||
let test_server = TestServer::with_timeout(100)
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create complex image that might take time
|
||||
let complex_image = images::generate_complex_equation();
|
||||
complex_image.save("/tmp/complex.png").unwrap();
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
let _result = test_server
|
||||
.process_image("/tmp/complex.png", OutputFormat::LaTeX)
|
||||
.await;
|
||||
let duration = start.elapsed();
|
||||
|
||||
// Should either complete or timeout within reasonable time
|
||||
assert!(
|
||||
duration.as_millis() < 500,
|
||||
"Should timeout or complete quickly"
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_batch_pipeline_processing() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create multiple test images
|
||||
let test_images = vec![
|
||||
("x + y", "/tmp/batch_1.png"),
|
||||
("a - b", "/tmp/batch_2.png"),
|
||||
("2 * 3", "/tmp/batch_3.png"),
|
||||
("x / y", "/tmp/batch_4.png"),
|
||||
];
|
||||
|
||||
for (equation, path) in &test_images {
|
||||
let img = images::generate_simple_equation(equation);
|
||||
img.save(path).unwrap();
|
||||
}
|
||||
|
||||
// Process batch
|
||||
let paths: Vec<&str> = test_images.iter().map(|(_, p)| *p).collect();
|
||||
let results = test_server
|
||||
.process_batch(&paths, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Batch processing failed");
|
||||
|
||||
// Verify all processed
|
||||
assert_eq!(results.len(), 4, "Should process all images");
|
||||
for (i, result) in results.iter().enumerate() {
|
||||
assert!(!result.latex.is_empty(), "Result {} should have LaTeX", i);
|
||||
assert!(result.confidence > 0.5, "Result {} confidence too low", i);
|
||||
}
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_pipeline_with_preprocessing() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create noisy image
|
||||
let mut image = images::generate_simple_equation("f(x) = x^2");
|
||||
images::add_noise(&mut image, 0.1);
|
||||
image.save("/tmp/noisy.png").unwrap();
|
||||
|
||||
// Process with preprocessing enabled
|
||||
let result = test_server
|
||||
.process_image_with_options(
|
||||
"/tmp/noisy.png",
|
||||
OutputFormat::LaTeX,
|
||||
ProcessingOptions {
|
||||
enable_preprocessing: true,
|
||||
enable_denoising: true,
|
||||
enable_deskew: true,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Should still recognize despite noise
|
||||
assert!(
|
||||
!result.latex.is_empty(),
|
||||
"Should extract LaTeX from noisy image"
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_multi_format_output() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create test image
|
||||
let image = images::generate_fraction(3, 4);
|
||||
image.save("/tmp/fraction.png").unwrap();
|
||||
|
||||
// Request multiple output formats
|
||||
let result = test_server
|
||||
.process_image_with_options(
|
||||
"/tmp/fraction.png",
|
||||
OutputFormat::All,
|
||||
ProcessingOptions {
|
||||
include_latex: true,
|
||||
include_mathml: true,
|
||||
include_ascii: true,
|
||||
include_text: true,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Verify output present
|
||||
assert!(!result.latex.is_empty(), "Should have LaTeX");
|
||||
assert!(result.mathml.is_some(), "Should have MathML");
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_pipeline_caching() {
|
||||
let test_server = TestServer::with_cache()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create test image
|
||||
let image = images::generate_simple_equation("a + b = c");
|
||||
image.save("/tmp/cached.png").unwrap();
|
||||
|
||||
// First processing
|
||||
let result1 = test_server
|
||||
.process_image("/tmp/cached.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("First processing failed");
|
||||
|
||||
// Second processing (should hit cache)
|
||||
let result2 = test_server
|
||||
.process_image("/tmp/cached.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Second processing failed");
|
||||
|
||||
// Verify cache hit
|
||||
assert_eq!(result1.latex, result2.latex, "Results should match");
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
Reference in New Issue
Block a user