Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,450 @@
// Accuracy validation tests
//
// Tests OCR accuracy against Im2latex-100k subset and calculates CER, WER, BLEU
use super::*;
use tokio;
#[tokio::test]
async fn test_accuracy_simple_expressions() {
let test_server = TestServer::start()
.await
.expect("Failed to start test server");
let test_cases = vec![
("x + 1", "x + 1"),
("2x - 3", "2x - 3"),
("a = b", "a = b"),
("f(x)", "f(x)"),
("y^2", "y^2"),
];
let mut total_cer = 0.0;
let mut correct = 0;
for (equation, expected) in test_cases.iter() {
let image = images::generate_simple_equation(equation);
let path = format!("/tmp/accuracy_simple_{}.png", equation.replace(' ', "_"));
image.save(&path).unwrap();
let result = test_server
.process_image(&path, OutputFormat::LaTeX)
.await
.expect("Processing failed");
let cer = metrics::calculate_cer(expected, &result.latex);
total_cer += cer;
if latex::normalize(&result.latex) == latex::normalize(expected) {
correct += 1;
}
println!(
"Equation: {} | CER: {:.4} | Got: {}",
equation, cer, result.latex
);
}
let avg_cer = total_cer / test_cases.len() as f64;
let accuracy = correct as f64 / test_cases.len() as f64;
println!(
"Simple expressions - Avg CER: {:.4}, Accuracy: {:.2}%",
avg_cer,
accuracy * 100.0
);
assert!(avg_cer < 0.05, "Average CER too high: {:.4}", avg_cer);
assert!(
accuracy > 0.90,
"Accuracy too low: {:.2}%",
accuracy * 100.0
);
test_server.shutdown().await;
}
#[tokio::test]
async fn test_accuracy_im2latex_subset() {
let test_server = TestServer::start()
.await
.expect("Failed to start test server");
// Load Im2latex-100k test subset (sample)
let test_cases = load_im2latex_test_subset(50); // Test 50 samples
let mut cer_sum = 0.0;
let mut wer_sum = 0.0;
let mut bleu_sum = 0.0;
let mut exact_matches = 0;
for (i, case) in test_cases.iter().enumerate() {
// Generate or load image
let image_path = case.image_path.clone();
let result = test_server
.process_image(&image_path, OutputFormat::LaTeX)
.await
.expect("Processing failed");
// Calculate metrics
let cer = metrics::calculate_cer(&case.ground_truth, &result.latex);
let wer = metrics::calculate_wer(&case.ground_truth, &result.latex);
let bleu = metrics::calculate_bleu(&case.ground_truth, &result.latex, 4);
cer_sum += cer;
wer_sum += wer;
bleu_sum += bleu;
if latex::normalize(&result.latex) == latex::normalize(&case.ground_truth) {
exact_matches += 1;
}
if i % 10 == 0 {
println!("Processed {}/{} samples", i + 1, test_cases.len());
}
}
let count = test_cases.len() as f64;
let avg_cer = cer_sum / count;
let avg_wer = wer_sum / count;
let avg_bleu = bleu_sum / count;
let exact_match_rate = exact_matches as f64 / count;
println!("\nIm2latex subset results:");
println!(" Average CER: {:.4}", avg_cer);
println!(" Average WER: {:.4}", avg_wer);
println!(" Average BLEU: {:.2}", avg_bleu);
println!(" Exact match rate: {:.2}%", exact_match_rate * 100.0);
// Assert quality thresholds
assert!(avg_cer < 0.03, "CER too high: {:.4}", avg_cer);
assert!(avg_bleu > 80.0, "BLEU too low: {:.2}", avg_bleu);
test_server.shutdown().await;
}
#[tokio::test]
async fn test_accuracy_fractions() {
let test_server = TestServer::start()
.await
.expect("Failed to start test server");
let test_cases = vec![
((1, 2), r"\frac{1}{2}"),
((3, 4), r"\frac{3}{4}"),
((5, 6), r"\frac{5}{6}"),
((10, 3), r"\frac{10}{3}"),
];
let mut correct = 0;
for ((num, den), expected) in test_cases.iter() {
let image = images::generate_fraction(*num, *den);
let path = format!("/tmp/frac_{}_{}.png", num, den);
image.save(&path).unwrap();
let result = test_server
.process_image(&path, OutputFormat::LaTeX)
.await
.expect("Processing failed");
if latex::expressions_match(&result.latex, expected) {
correct += 1;
} else {
println!(
"Fraction {}/{} - Expected: {}, Got: {}",
num, den, expected, result.latex
);
}
}
let accuracy = correct as f64 / test_cases.len() as f64;
println!("Fraction accuracy: {:.2}%", accuracy * 100.0);
assert!(
accuracy >= 0.85,
"Fraction accuracy too low: {:.2}%",
accuracy * 100.0
);
test_server.shutdown().await;
}
#[tokio::test]
async fn test_accuracy_special_symbols() {
let test_server = TestServer::start()
.await
.expect("Failed to start test server");
let test_cases = vec![
(r"\alpha", r"\alpha"),
(r"\beta", r"\beta"),
(r"\sum", r"\sum"),
(r"\int", r"\int"),
(r"\pi", r"\pi"),
(r"\infty", r"\infty"),
];
let mut correct = 0;
for (symbol, expected) in test_cases.iter() {
let image = images::generate_symbol(symbol);
let path = format!("/tmp/symbol_{}.png", symbol.replace('\\', ""));
image.save(&path).unwrap();
let result = test_server
.process_image(&path, OutputFormat::LaTeX)
.await
.expect("Processing failed");
if result.latex.contains(expected) {
correct += 1;
} else {
println!(
"Symbol {} - Expected to contain: {}, Got: {}",
symbol, expected, result.latex
);
}
}
let accuracy = correct as f64 / test_cases.len() as f64;
println!("Special symbol accuracy: {:.2}%", accuracy * 100.0);
assert!(
accuracy >= 0.80,
"Symbol accuracy too low: {:.2}%",
accuracy * 100.0
);
test_server.shutdown().await;
}
#[tokio::test]
async fn test_accuracy_regression_detection() {
let test_server = TestServer::start()
.await
.expect("Failed to start test server");
// Load baseline results
let baseline = load_baseline_results();
// Run same test cases
let test_cases = load_regression_test_cases();
let mut regressions = Vec::new();
for case in test_cases.iter() {
let result = test_server
.process_image(&case.image_path, OutputFormat::LaTeX)
.await
.expect("Processing failed");
// Compare with baseline
if let Some(baseline_result) = baseline.get(&case.id) {
let current_cer = metrics::calculate_cer(&case.ground_truth, &result.latex);
let baseline_cer = baseline_result.cer;
// Check for regression (10% threshold)
if current_cer > baseline_cer * 1.10 {
regressions.push((
case.id.clone(),
baseline_cer,
current_cer,
baseline_result.latex.clone(),
result.latex.clone(),
));
}
}
}
if !regressions.is_empty() {
println!("Regressions detected:");
for (id, baseline_cer, current_cer, baseline_latex, current_latex) in &regressions {
println!(" {} - CER: {:.4} -> {:.4}", id, baseline_cer, current_cer);
println!(" Baseline: {}", baseline_latex);
println!(" Current: {}", current_latex);
}
}
assert!(
regressions.is_empty(),
"Found {} regressions",
regressions.len()
);
test_server.shutdown().await;
}
#[tokio::test]
async fn test_accuracy_confidence_calibration() {
let test_server = TestServer::start()
.await
.expect("Failed to start test server");
let test_cases = load_calibration_test_cases();
let mut high_conf_correct = 0;
let mut high_conf_total = 0;
let mut low_conf_correct = 0;
let mut low_conf_total = 0;
for case in test_cases.iter() {
let result = test_server
.process_image(&case.image_path, OutputFormat::LaTeX)
.await
.expect("Processing failed");
let is_correct = latex::normalize(&result.latex) == latex::normalize(&case.ground_truth);
if result.confidence > 0.9 {
high_conf_total += 1;
if is_correct {
high_conf_correct += 1;
}
} else if result.confidence < 0.7 {
low_conf_total += 1;
if is_correct {
low_conf_correct += 1;
}
}
}
let high_conf_accuracy = if high_conf_total > 0 {
high_conf_correct as f64 / high_conf_total as f64
} else {
1.0
};
let low_conf_accuracy = if low_conf_total > 0 {
low_conf_correct as f64 / low_conf_total as f64
} else {
0.0
};
println!("Confidence calibration:");
println!(
" High confidence (>0.9): {:.2}% accuracy ({}/{})",
high_conf_accuracy * 100.0,
high_conf_correct,
high_conf_total
);
println!(
" Low confidence (<0.7): {:.2}% accuracy ({}/{})",
low_conf_accuracy * 100.0,
low_conf_correct,
low_conf_total
);
// High confidence should correlate with high accuracy
assert!(
high_conf_accuracy > 0.95,
"High confidence predictions should be very accurate"
);
test_server.shutdown().await;
}
// Helper functions and types
#[derive(Debug, Clone)]
struct TestCase {
id: String,
image_path: String,
ground_truth: String,
}
#[derive(Debug, Clone)]
struct BaselineResult {
latex: String,
cer: f64,
}
fn load_im2latex_test_subset(count: usize) -> Vec<TestCase> {
// Load or generate Im2latex test subset
// For now, generate synthetic test cases
(0..count)
.map(|i| {
let eq = match i % 5 {
0 => format!("x^{}", i),
1 => format!("a + {}", i),
2 => format!(r"\frac{{{}}}{{{}}}", i, i + 1),
3 => format!("{}x + {}", i, i * 2),
_ => format!("y = {}x", i),
};
let image = images::generate_simple_equation(&eq);
let path = format!("/tmp/im2latex_{}.png", i);
image.save(&path).unwrap();
TestCase {
id: format!("im2latex_{}", i),
image_path: path,
ground_truth: eq,
}
})
.collect()
}
fn load_regression_test_cases() -> Vec<TestCase> {
// Load regression test cases from file or generate
vec![
TestCase {
id: "reg_001".to_string(),
image_path: "/tmp/reg_001.png".to_string(),
ground_truth: "x + y".to_string(),
},
// Add more test cases...
]
}
fn load_baseline_results() -> std::collections::HashMap<String, BaselineResult> {
// Load baseline results from file
let mut baseline = std::collections::HashMap::new();
baseline.insert(
"reg_001".to_string(),
BaselineResult {
latex: "x + y".to_string(),
cer: 0.0,
},
);
baseline
}
fn load_calibration_test_cases() -> Vec<TestCase> {
// Generate test cases with varying difficulty for confidence calibration
let mut cases = Vec::new();
// Easy cases
for i in 0..10 {
let eq = format!("x + {}", i);
let image = images::generate_simple_equation(&eq);
let path = format!("/tmp/calib_easy_{}.png", i);
image.save(&path).unwrap();
cases.push(TestCase {
id: format!("calib_easy_{}", i),
image_path: path,
ground_truth: eq,
});
}
// Hard cases (noisy)
for i in 0..10 {
let eq = format!("y^{}", i);
let mut image = images::generate_simple_equation(&eq);
images::add_noise(&mut image, 0.2);
let path = format!("/tmp/calib_hard_{}.png", i);
image.save(&path).unwrap();
cases.push(TestCase {
id: format!("calib_hard_{}", i),
image_path: path,
ground_truth: eq,
});
}
cases
}

View File

@@ -0,0 +1,80 @@
// API server integration tests
//
// Tests HTTP API endpoints, authentication, rate limiting, and async processing
use super::*;
use reqwest::{multipart, Client, StatusCode};
use serde_json::json;
use tokio;
#[tokio::test]
async fn test_api_post_text_with_file() {
let test_server = TestServer::start_api()
.await
.expect("Failed to start API server");
let client = Client::new();
// Create test image
let image = images::generate_simple_equation("x + y");
image.save("/tmp/api_test.png").unwrap();
let image_bytes = std::fs::read("/tmp/api_test.png").unwrap();
// Create multipart form
let form = multipart::Form::new().part(
"file",
multipart::Part::bytes(image_bytes)
.file_name("equation.png")
.mime_str("image/png")
.unwrap(),
);
// POST to /v3/text
let response = client
.post(&format!("{}/v3/text", test_server.base_url()))
.header("app_id", "test_app_id")
.header("app_key", "test_app_key")
.multipart(form)
.send()
.await
.expect("Request failed");
assert_eq!(response.status(), StatusCode::OK);
let result: serde_json::Value = response.json().await.unwrap();
assert!(result.get("request_id").is_some(), "Should have request_id");
assert!(result.get("text").is_some(), "Should have text field");
assert!(
result.get("processing_time_ms").is_some(),
"Should have processing time"
);
test_server.shutdown().await;
}
#[tokio::test]
async fn test_api_authentication_validation() {
let test_server = TestServer::start_api()
.await
.expect("Failed to start API server");
let client = Client::new();
let payload = json!({
"src": "base64data"
});
// Test missing auth
let response = client
.post(&format!("{}/v3/text", test_server.base_url()))
.json(&payload)
.send()
.await
.expect("Request failed");
assert_eq!(
response.status(),
StatusCode::UNAUTHORIZED,
"Should require authentication"
);
test_server.shutdown().await;
}

View File

@@ -0,0 +1,300 @@
// Cache integration tests
//
// Tests caching behavior, hit/miss ratios, similarity search, and persistence
//
// Note: These tests use mock test infrastructure.
// Real OCR processing requires ONNX models to be configured.
use super::*;
use crate::common::{CacheStats, OutputFormat};
#[tokio::test]
async fn test_cache_hit_miss_behavior() {
let test_server = TestServer::with_cache()
.await
.expect("Failed to start test server with cache");
let image = images::generate_simple_equation("x^2");
image.save("/tmp/cache_test_1.png").unwrap();
// First request - should miss cache
let result1 = test_server
.process_image("/tmp/cache_test_1.png", OutputFormat::LaTeX)
.await
.expect("Processing failed");
// Get cache stats
let _stats = test_server
.cache_stats()
.await
.expect("Failed to get cache stats");
// Second request - should hit cache
let result2 = test_server
.process_image("/tmp/cache_test_1.png", OutputFormat::LaTeX)
.await
.expect("Processing failed");
// Verify results match
assert_eq!(result1.latex, result2.latex, "Cached result should match");
test_server.shutdown().await;
}
#[tokio::test]
async fn test_cache_similarity_lookup() {
let test_server = TestServer::with_cache()
.await
.expect("Failed to start test server");
// Create original image
let image1 = images::generate_simple_equation("a + b");
image1.save("/tmp/similarity_1.png").unwrap();
// Create similar image (slightly different rendering)
let mut image2 = images::generate_simple_equation("a + b");
images::add_slight_variation(&mut image2, 0.05);
image2.save("/tmp/similarity_2.png").unwrap();
// Process first image
let result1 = test_server
.process_image("/tmp/similarity_1.png", OutputFormat::LaTeX)
.await
.expect("Processing failed");
// Process similar image
let result2 = test_server
.process_image("/tmp/similarity_2.png", OutputFormat::LaTeX)
.await
.expect("Processing failed");
// Results should be similar
let similarity = latex::calculate_similarity(&result1.latex, &result2.latex);
assert!(
similarity > 0.9,
"Similar images should produce similar results"
);
test_server.shutdown().await;
}
#[tokio::test]
async fn test_cache_eviction() {
// Start server with small cache size
let test_server = TestServer::with_cache_size(3)
.await
.expect("Failed to start test server");
// Create and process 5 different images
for i in 0..5 {
let eq = format!("x + {}", i);
let image = images::generate_simple_equation(&eq);
let path = format!("/tmp/eviction_{}.png", i);
image.save(&path).unwrap();
test_server
.process_image(&path, OutputFormat::LaTeX)
.await
.expect("Processing failed");
}
// Get cache stats
let stats = test_server
.cache_stats()
.await
.expect("Failed to get cache stats");
assert!(stats.current_size <= 3, "Cache should not exceed max size");
test_server.shutdown().await;
}
#[tokio::test]
async fn test_cache_persistence() {
let cache_dir = "/tmp/scipix_cache_persist";
std::fs::create_dir_all(cache_dir).unwrap();
// Start server with persistent cache
let test_server = TestServer::with_persistent_cache(cache_dir)
.await
.expect("Failed to start test server");
// Process image
let image = images::generate_simple_equation("persistent");
image.save("/tmp/persist_test.png").unwrap();
let result1 = test_server
.process_image("/tmp/persist_test.png", OutputFormat::LaTeX)
.await
.expect("Processing failed");
// Shutdown server
test_server.shutdown().await;
// Start new server with same cache directory
let test_server2 = TestServer::with_persistent_cache(cache_dir)
.await
.expect("Failed to start second test server");
// Process same image - should hit persistent cache
let result2 = test_server2
.process_image("/tmp/persist_test.png", OutputFormat::LaTeX)
.await
.expect("Processing failed");
// Results should match
assert_eq!(
result1.latex, result2.latex,
"Persistent cache should restore results"
);
test_server2.shutdown().await;
}
#[tokio::test]
async fn test_cache_invalidation() {
let test_server = TestServer::with_cache()
.await
.expect("Failed to start test server");
// Process image
let image = images::generate_simple_equation("invalidate");
image.save("/tmp/invalidate_test.png").unwrap();
let result1 = test_server
.process_image("/tmp/invalidate_test.png", OutputFormat::LaTeX)
.await
.expect("Processing failed");
// Invalidate cache
test_server
.invalidate_cache()
.await
.expect("Cache invalidation failed");
// Process again - should miss cache
let result2 = test_server
.process_image("/tmp/invalidate_test.png", OutputFormat::LaTeX)
.await
.expect("Processing failed");
// Results should match but processing should take time
assert_eq!(result1.latex, result2.latex, "Results should still match");
test_server.shutdown().await;
}
#[tokio::test]
async fn test_cache_hit_ratio() {
let test_server = TestServer::with_cache()
.await
.expect("Failed to start test server");
// Create test images
let equations = vec!["a", "b", "c"];
for eq in &equations {
let image = images::generate_simple_equation(eq);
image.save(&format!("/tmp/ratio_{}.png", eq)).unwrap();
}
// Process each image twice
for eq in &equations {
let path = format!("/tmp/ratio_{}.png", eq);
// First time (miss)
test_server
.process_image(&path, OutputFormat::LaTeX)
.await
.expect("Processing failed");
// Second time (hit)
test_server
.process_image(&path, OutputFormat::LaTeX)
.await
.expect("Processing failed");
}
// Get stats
let _stats = test_server
.cache_stats()
.await
.expect("Failed to get cache stats");
test_server.shutdown().await;
}
#[tokio::test]
async fn test_cache_ttl_expiration() {
// Start server with 1-second TTL
let test_server = TestServer::with_cache_ttl(1)
.await
.expect("Failed to start test server");
// Process image
let image = images::generate_simple_equation("ttl");
image.save("/tmp/ttl_test.png").unwrap();
let result1 = test_server
.process_image("/tmp/ttl_test.png", OutputFormat::LaTeX)
.await
.expect("Processing failed");
// Immediately reprocess - should hit cache
let result2 = test_server
.process_image("/tmp/ttl_test.png", OutputFormat::LaTeX)
.await
.expect("Processing failed");
assert_eq!(result1.latex, result2.latex);
test_server.shutdown().await;
}
#[tokio::test]
async fn test_cache_concurrent_access() {
let test_server = TestServer::with_cache()
.await
.expect("Failed to start test server");
let image = images::generate_simple_equation("concurrent");
image.save("/tmp/concurrent_cache.png").unwrap();
// First request to populate cache
test_server
.process_image("/tmp/concurrent_cache.png", OutputFormat::LaTeX)
.await
.expect("Processing failed");
// Spawn multiple concurrent requests
let mut handles = vec![];
for _ in 0..10 {
let server = test_server.clone();
let handle = tokio::spawn(async move {
server
.process_image("/tmp/concurrent_cache.png", OutputFormat::LaTeX)
.await
});
handles.push(handle);
}
// Wait for all to complete
let results = futures::future::join_all(handles).await;
// All should succeed and return same result
assert!(
results.iter().all(|r| r.is_ok()),
"All requests should succeed"
);
let first_latex = &results[0].as_ref().unwrap().as_ref().unwrap().latex;
assert!(
results
.iter()
.all(|r| { &r.as_ref().unwrap().as_ref().unwrap().latex == first_latex }),
"All results should match"
);
test_server.shutdown().await;
}
// Re-export CacheStats for backward compatibility
pub use crate::common::CacheStats as CacheStatsCompat;

View File

@@ -0,0 +1,227 @@
// CLI integration tests
//
// Tests command-line interface functionality
use super::*;
use assert_cmd::Command;
use predicates::prelude::*;
use std::process::Stdio;
#[test]
fn test_cli_ocr_command_with_file() {
// Create test image
let image = images::generate_simple_equation("x + 1");
image.save("/tmp/cli_test.png").unwrap();
// Run CLI command
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
cmd.arg("ocr")
.arg("/tmp/cli_test.png")
.assert()
.success()
.stdout(predicate::str::contains("x"))
.stdout(predicate::str::contains("LaTeX:"));
}
#[test]
fn test_cli_ocr_with_output_format() {
let image = images::generate_fraction(3, 4);
image.save("/tmp/cli_fraction.png").unwrap();
// Test LaTeX output
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
cmd.arg("ocr")
.arg("/tmp/cli_fraction.png")
.arg("--format")
.arg("latex")
.assert()
.success()
.stdout(predicate::str::contains(r"\frac"));
// Test MathML output
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
cmd.arg("ocr")
.arg("/tmp/cli_fraction.png")
.arg("--format")
.arg("mathml")
.assert()
.success()
.stdout(predicate::str::contains("<mfrac>"));
}
#[test]
fn test_cli_batch_command() {
// Create test directory with images
std::fs::create_dir_all("/tmp/cli_batch").unwrap();
let equations = vec!["a + b", "x - y", "2 * 3"];
for (i, eq) in equations.iter().enumerate() {
let image = images::generate_simple_equation(eq);
image.save(&format!("/tmp/cli_batch/eq_{}.png", i)).unwrap();
}
// Run batch command
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
cmd.arg("batch")
.arg("/tmp/cli_batch")
.arg("--output")
.arg("/tmp/cli_batch_results.json")
.assert()
.success();
// Verify output file
let results = std::fs::read_to_string("/tmp/cli_batch_results.json").unwrap();
assert!(results.contains("a"), "Should contain results");
assert!(results.len() > 100, "Should have substantial output");
}
#[test]
#[ignore] // Requires built binary and available port
fn test_cli_serve_command_startup() {
// This test requires the binary to be built first
// Use std::process::Command for spawn functionality
use std::process::Command as StdCommand;
// Get the binary path from environment, or fall back to cargo build path
let bin_path = std::env::var("CARGO_BIN_EXE_scipix-ocr")
.unwrap_or_else(|_| "target/debug/scipix-ocr".to_string());
let mut child = StdCommand::new(&bin_path)
.arg("serve")
.arg("--port")
.arg("18080")
.stdout(Stdio::piped())
.spawn()
.expect("Failed to start server");
// Wait for server startup
std::thread::sleep(std::time::Duration::from_secs(2));
// Check if server is running
let client = reqwest::blocking::Client::new();
let response = client
.get("http://localhost:18080/health")
.timeout(std::time::Duration::from_secs(5))
.send();
// Kill server
let _ = child.kill();
assert!(response.is_ok(), "Server should respond to health check");
}
#[test]
fn test_cli_config_command() {
// Test config show
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
cmd.arg("config").arg("show").assert().success().stdout(
predicate::str::contains("model_path").or(predicate::str::contains("Configuration")),
);
// Test config set
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
cmd.arg("config")
.arg("set")
.arg("preprocessing.enable_deskew")
.arg("true")
.assert()
.success();
}
#[test]
fn test_cli_invalid_file() {
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
cmd.arg("ocr")
.arg("/nonexistent/file.png")
.assert()
.failure()
.stderr(predicate::str::contains("not found").or(predicate::str::contains("error")));
}
#[test]
fn test_cli_exit_codes() {
// Success case
let image = images::generate_simple_equation("ok");
image.save("/tmp/exit_code_test.png").unwrap();
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
cmd.arg("ocr")
.arg("/tmp/exit_code_test.png")
.assert()
.code(0);
// Failure case
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
cmd.arg("ocr")
.arg("/nonexistent.png")
.assert()
.code(predicate::ne(0));
}
#[test]
fn test_cli_verbose_output() {
let image = images::generate_simple_equation("verbose");
image.save("/tmp/verbose_test.png").unwrap();
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
cmd.arg("ocr")
.arg("/tmp/verbose_test.png")
.arg("--verbose")
.assert()
.success()
.stdout(predicate::str::contains("Processing").or(predicate::str::contains("Confidence")));
}
#[test]
fn test_cli_json_output() {
let image = images::generate_simple_equation("json");
image.save("/tmp/json_test.png").unwrap();
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
let output = cmd
.arg("ocr")
.arg("/tmp/json_test.png")
.arg("--output-format")
.arg("json")
.output()
.expect("Failed to execute command");
let stdout = String::from_utf8_lossy(&output.stdout);
// Verify JSON structure
let json: serde_json::Value =
serde_json::from_str(&stdout).expect("Output should be valid JSON");
assert!(json.get("latex").is_some(), "Should have latex field");
assert!(
json.get("confidence").is_some(),
"Should have confidence field"
);
}
#[test]
fn test_cli_help_command() {
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
cmd.arg("--help")
.assert()
.success()
.stdout(predicate::str::contains("USAGE:"))
.stdout(predicate::str::contains("COMMANDS:"));
// Test subcommand help
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
cmd.arg("ocr")
.arg("--help")
.assert()
.success()
.stdout(predicate::str::contains("OPTIONS:"));
}
#[test]
fn test_cli_version_command() {
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
cmd.arg("--version")
.assert()
.success()
.stdout(predicate::str::contains(env!("CARGO_PKG_VERSION")));
}

View File

@@ -0,0 +1,14 @@
// Integration test module organization
//
// This module provides integration tests for the ruvector-scipix OCR system.
// Tests are organized by functionality area.
pub mod accuracy_tests;
pub mod api_tests;
pub mod cache_tests;
pub mod cli_tests;
pub mod performance_tests;
pub mod pipeline_tests;
// Re-export common test utilities
pub use crate::common::*;

View File

@@ -0,0 +1,386 @@
// Performance validation tests
//
// Tests latency, memory usage, throughput, and ensures no memory leaks
use super::*;
use std::time::{Duration, Instant};
use tokio;
#[tokio::test]
async fn test_performance_latency_within_bounds() {
let test_server = TestServer::start()
.await
.expect("Failed to start test server");
let image = images::generate_simple_equation("x + y");
image.save("/tmp/perf_latency.png").unwrap();
// Measure latency
let start = Instant::now();
let result = test_server
.process_image("/tmp/perf_latency.png", OutputFormat::LaTeX)
.await
.expect("Processing failed");
let latency = start.elapsed();
println!("Latency: {:?}", latency);
println!("Confidence: {}", result.confidence);
// Assert latency is within bounds (<100ms for simple equation)
assert!(latency.as_millis() < 100, "Latency too high: {:?}", latency);
test_server.shutdown().await;
}
#[tokio::test]
async fn test_performance_memory_usage_limits() {
let test_server = TestServer::start()
.await
.expect("Failed to start test server");
// Get initial memory usage
let initial_memory = get_memory_usage();
// Process multiple images
for i in 0..100 {
let eq = format!("x + {}", i);
let image = images::generate_simple_equation(&eq);
let path = format!("/tmp/perf_mem_{}.png", i);
image.save(&path).unwrap();
test_server
.process_image(&path, OutputFormat::LaTeX)
.await
.expect("Processing failed");
// Clean up
std::fs::remove_file(&path).unwrap();
}
// Get final memory usage
let final_memory = get_memory_usage();
let memory_increase = final_memory - initial_memory;
println!("Memory increase: {} MB", memory_increase / 1024 / 1024);
// Assert memory usage is reasonable (<100MB increase)
assert!(
memory_increase < 100 * 1024 * 1024,
"Memory usage too high: {} bytes",
memory_increase
);
test_server.shutdown().await;
}
#[tokio::test]
async fn test_performance_no_memory_leaks() {
let test_server = TestServer::start()
.await
.expect("Failed to start test server");
let image = images::generate_simple_equation("leak test");
image.save("/tmp/leak_test.png").unwrap();
// Process same image many times
let iterations = 1000;
let mut memory_samples = Vec::new();
for i in 0..iterations {
test_server
.process_image("/tmp/leak_test.png", OutputFormat::LaTeX)
.await
.expect("Processing failed");
// Sample memory every 100 iterations
if i % 100 == 0 {
memory_samples.push(get_memory_usage());
}
}
// Check for linear memory growth (leak indicator)
let first_sample = memory_samples[0];
let last_sample = *memory_samples.last().unwrap();
let growth_rate = (last_sample - first_sample) as f64 / iterations as f64;
println!("Memory growth rate: {} bytes/iteration", growth_rate);
println!("Samples: {:?}", memory_samples);
// Growth rate should be minimal (<1KB per iteration)
assert!(
growth_rate < 1024.0,
"Possible memory leak detected: {} bytes/iteration",
growth_rate
);
test_server.shutdown().await;
}
#[tokio::test]
async fn test_performance_throughput() {
let test_server = TestServer::start()
.await
.expect("Failed to start test server");
// Create test images
let image_count = 50;
for i in 0..image_count {
let eq = format!("throughput_{}", i);
let image = images::generate_simple_equation(&eq);
image.save(&format!("/tmp/throughput_{}.png", i)).unwrap();
}
// Measure throughput
let start = Instant::now();
for i in 0..image_count {
test_server
.process_image(&format!("/tmp/throughput_{}.png", i), OutputFormat::LaTeX)
.await
.expect("Processing failed");
}
let duration = start.elapsed();
let throughput = image_count as f64 / duration.as_secs_f64();
println!("Throughput: {:.2} images/second", throughput);
println!("Total time: {:?} for {} images", duration, image_count);
// Assert reasonable throughput (>5 images/second)
assert!(
throughput > 5.0,
"Throughput too low: {:.2} images/s",
throughput
);
// Cleanup
for i in 0..image_count {
std::fs::remove_file(&format!("/tmp/throughput_{}.png", i)).unwrap();
}
test_server.shutdown().await;
}
#[tokio::test]
async fn test_performance_concurrent_throughput() {
let test_server = TestServer::start()
.await
.expect("Failed to start test server");
// Create test image
let image = images::generate_simple_equation("concurrent");
image.save("/tmp/concurrent_throughput.png").unwrap();
let concurrent_requests = 20;
let start = Instant::now();
// Spawn concurrent requests
let mut handles = vec![];
for _ in 0..concurrent_requests {
let server = test_server.clone();
let handle = tokio::spawn(async move {
server
.process_image("/tmp/concurrent_throughput.png", OutputFormat::LaTeX)
.await
});
handles.push(handle);
}
// Wait for all to complete
let results = futures::future::join_all(handles).await;
let duration = start.elapsed();
let success_count = results.iter().filter(|r| r.is_ok()).count();
let throughput = concurrent_requests as f64 / duration.as_secs_f64();
println!("Concurrent throughput: {:.2} req/second", throughput);
println!("Success rate: {}/{}", success_count, concurrent_requests);
assert!(
success_count == concurrent_requests,
"All requests should succeed"
);
assert!(
throughput > 10.0,
"Concurrent throughput too low: {:.2}",
throughput
);
test_server.shutdown().await;
}
#[tokio::test]
async fn test_performance_latency_percentiles() {
let test_server = TestServer::start()
.await
.expect("Failed to start test server");
let iterations = 100;
let mut latencies = Vec::new();
for i in 0..iterations {
let eq = format!("p{}", i);
let image = images::generate_simple_equation(&eq);
let path = format!("/tmp/percentile_{}.png", i);
image.save(&path).unwrap();
let start = Instant::now();
test_server
.process_image(&path, OutputFormat::LaTeX)
.await
.expect("Processing failed");
let latency = start.elapsed();
latencies.push(latency.as_micros());
std::fs::remove_file(&path).unwrap();
}
// Sort latencies
latencies.sort();
// Calculate percentiles
let p50 = latencies[50];
let p95 = latencies[95];
let p99 = latencies[99];
println!("Latency percentiles:");
println!(" P50: {} μs ({} ms)", p50, p50 / 1000);
println!(" P95: {} μs ({} ms)", p95, p95 / 1000);
println!(" P99: {} μs ({} ms)", p99, p99 / 1000);
// Assert percentile targets
assert!(p50 < 100_000, "P50 latency too high: {} μs", p50); // <100ms
assert!(p95 < 200_000, "P95 latency too high: {} μs", p95); // <200ms
assert!(p99 < 500_000, "P99 latency too high: {} μs", p99); // <500ms
test_server.shutdown().await;
}
#[tokio::test]
async fn test_performance_batch_efficiency() {
let test_server = TestServer::start()
.await
.expect("Failed to start test server");
// Create test images
let batch_size = 10;
let mut paths = Vec::new();
for i in 0..batch_size {
let eq = format!("batch_{}", i);
let image = images::generate_simple_equation(&eq);
let path = format!("/tmp/batch_eff_{}.png", i);
image.save(&path).unwrap();
paths.push(path);
}
// Measure sequential processing
let start_sequential = Instant::now();
for path in &paths {
test_server
.process_image(path, OutputFormat::LaTeX)
.await
.expect("Processing failed");
}
let sequential_time = start_sequential.elapsed();
// Measure batch processing
let start_batch = Instant::now();
test_server
.process_batch(
&paths.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
OutputFormat::LaTeX,
)
.await
.expect("Batch processing failed");
let batch_time = start_batch.elapsed();
println!("Sequential time: {:?}", sequential_time);
println!("Batch time: {:?}", batch_time);
println!(
"Speedup: {:.2}x",
sequential_time.as_secs_f64() / batch_time.as_secs_f64()
);
// Batch should be faster
assert!(
batch_time < sequential_time,
"Batch processing should be faster"
);
// Cleanup
for path in paths {
std::fs::remove_file(&path).unwrap();
}
test_server.shutdown().await;
}
#[tokio::test]
async fn test_performance_cold_start_warmup() {
// Measure cold start
let start_cold = Instant::now();
let test_server = TestServer::start()
.await
.expect("Failed to start test server");
let cold_start_time = start_cold.elapsed();
println!("Cold start time: {:?}", cold_start_time);
// First request (warmup)
let image = images::generate_simple_equation("warmup");
image.save("/tmp/warmup.png").unwrap();
let start_first = Instant::now();
test_server
.process_image("/tmp/warmup.png", OutputFormat::LaTeX)
.await
.expect("Processing failed");
let first_request_time = start_first.elapsed();
// Second request (warmed up)
let start_second = Instant::now();
test_server
.process_image("/tmp/warmup.png", OutputFormat::LaTeX)
.await
.expect("Processing failed");
let second_request_time = start_second.elapsed();
println!("First request time: {:?}", first_request_time);
println!("Second request time: {:?}", second_request_time);
// Cold start should be reasonable (<5s)
assert!(
cold_start_time.as_secs() < 5,
"Cold start too slow: {:?}",
cold_start_time
);
// Second request should be faster (model loaded)
assert!(
second_request_time < first_request_time,
"Warmed up request should be faster"
);
test_server.shutdown().await;
}
// Helper function to get current memory usage
fn get_memory_usage() -> usize {
#[cfg(target_os = "linux")]
{
// Read from /proc/self/statm
if let Ok(content) = std::fs::read_to_string("/proc/self/statm") {
if let Some(rss) = content.split_whitespace().nth(1) {
if let Ok(pages) = rss.parse::<usize>() {
// Convert pages to bytes (assuming 4KB pages)
return pages * 4096;
}
}
}
}
// Fallback for other platforms or if reading fails
0
}

View File

@@ -0,0 +1,248 @@
// Full pipeline integration tests
//
// Tests the complete OCR pipeline from image input to final output
//
// Note: These tests use mock test infrastructure.
// Real OCR processing requires ONNX models to be configured.
use super::*;
use crate::common::{OutputFormat, ProcessingOptions};
#[tokio::test]
async fn test_png_to_latex_pipeline() {
let test_server = TestServer::start()
.await
.expect("Failed to start test server");
// Create test image
let image = images::generate_simple_equation("x^2 + 2x + 1");
let image_path = "/tmp/test_equation.png";
image.save(image_path).unwrap();
// Process through pipeline
let result = test_server
.process_image(image_path, OutputFormat::LaTeX)
.await
.expect("Pipeline processing failed");
// Verify output
assert!(!result.latex.is_empty(), "LaTeX output should not be empty");
assert!(
result.confidence > 0.7,
"Confidence too low: {}",
result.confidence
);
assert!(result.latex.contains("x"), "Should contain variable x");
test_server.shutdown().await;
}
#[tokio::test]
async fn test_jpeg_to_mathml_pipeline() {
let test_server = TestServer::start()
.await
.expect("Failed to start test server");
// Create JPEG test image
let image = images::generate_fraction(1, 2);
let image_path = "/tmp/test_fraction.jpg";
image.save(image_path).unwrap();
// Process to MathML
let result = test_server
.process_image(image_path, OutputFormat::MathML)
.await
.expect("Pipeline processing failed");
// Verify MathML structure
assert!(result.mathml.is_some(), "MathML output should be present");
test_server.shutdown().await;
}
#[tokio::test]
async fn test_webp_to_html_pipeline() {
let test_server = TestServer::start()
.await
.expect("Failed to start test server");
// Create WebP test image
let image = images::generate_integral("x dx");
let image_path = "/tmp/test_integral.webp";
// Note: WebP support may require additional image codec
image.save(image_path).unwrap_or_else(|_| {
// Fallback to PNG if WebP not supported
image.save("/tmp/test_integral.png").unwrap();
});
let actual_path = if std::path::Path::new(image_path).exists() {
image_path
} else {
"/tmp/test_integral.png"
};
// Process to HTML
let _result = test_server
.process_image(actual_path, OutputFormat::HTML)
.await
.expect("Pipeline processing failed");
test_server.shutdown().await;
}
#[tokio::test]
async fn test_pipeline_timeout_handling() {
let test_server = TestServer::with_timeout(100)
.await
.expect("Failed to start test server");
// Create complex image that might take time
let complex_image = images::generate_complex_equation();
complex_image.save("/tmp/complex.png").unwrap();
let start = std::time::Instant::now();
let _result = test_server
.process_image("/tmp/complex.png", OutputFormat::LaTeX)
.await;
let duration = start.elapsed();
// Should either complete or timeout within reasonable time
assert!(
duration.as_millis() < 500,
"Should timeout or complete quickly"
);
test_server.shutdown().await;
}
#[tokio::test]
async fn test_batch_pipeline_processing() {
let test_server = TestServer::start()
.await
.expect("Failed to start test server");
// Create multiple test images
let test_images = vec![
("x + y", "/tmp/batch_1.png"),
("a - b", "/tmp/batch_2.png"),
("2 * 3", "/tmp/batch_3.png"),
("x / y", "/tmp/batch_4.png"),
];
for (equation, path) in &test_images {
let img = images::generate_simple_equation(equation);
img.save(path).unwrap();
}
// Process batch
let paths: Vec<&str> = test_images.iter().map(|(_, p)| *p).collect();
let results = test_server
.process_batch(&paths, OutputFormat::LaTeX)
.await
.expect("Batch processing failed");
// Verify all processed
assert_eq!(results.len(), 4, "Should process all images");
for (i, result) in results.iter().enumerate() {
assert!(!result.latex.is_empty(), "Result {} should have LaTeX", i);
assert!(result.confidence > 0.5, "Result {} confidence too low", i);
}
test_server.shutdown().await;
}
#[tokio::test]
async fn test_pipeline_with_preprocessing() {
let test_server = TestServer::start()
.await
.expect("Failed to start test server");
// Create noisy image
let mut image = images::generate_simple_equation("f(x) = x^2");
images::add_noise(&mut image, 0.1);
image.save("/tmp/noisy.png").unwrap();
// Process with preprocessing enabled
let result = test_server
.process_image_with_options(
"/tmp/noisy.png",
OutputFormat::LaTeX,
ProcessingOptions {
enable_preprocessing: true,
enable_denoising: true,
enable_deskew: true,
..Default::default()
},
)
.await
.expect("Processing failed");
// Should still recognize despite noise
assert!(
!result.latex.is_empty(),
"Should extract LaTeX from noisy image"
);
test_server.shutdown().await;
}
#[tokio::test]
async fn test_multi_format_output() {
let test_server = TestServer::start()
.await
.expect("Failed to start test server");
// Create test image
let image = images::generate_fraction(3, 4);
image.save("/tmp/fraction.png").unwrap();
// Request multiple output formats
let result = test_server
.process_image_with_options(
"/tmp/fraction.png",
OutputFormat::All,
ProcessingOptions {
include_latex: true,
include_mathml: true,
include_ascii: true,
include_text: true,
..Default::default()
},
)
.await
.expect("Processing failed");
// Verify output present
assert!(!result.latex.is_empty(), "Should have LaTeX");
assert!(result.mathml.is_some(), "Should have MathML");
test_server.shutdown().await;
}
#[tokio::test]
async fn test_pipeline_caching() {
let test_server = TestServer::with_cache()
.await
.expect("Failed to start test server");
// Create test image
let image = images::generate_simple_equation("a + b = c");
image.save("/tmp/cached.png").unwrap();
// First processing
let result1 = test_server
.process_image("/tmp/cached.png", OutputFormat::LaTeX)
.await
.expect("First processing failed");
// Second processing (should hit cache)
let result2 = test_server
.process_image("/tmp/cached.png", OutputFormat::LaTeX)
.await
.expect("Second processing failed");
// Verify cache hit
assert_eq!(result1.latex, result2.latex, "Results should match");
test_server.shutdown().await;
}