Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,372 @@
// Configuration tests for ruvector-scipix
//
// Tests configuration loading, serialization, validation, and defaults.
// Target: 90%+ coverage of config module
#[cfg(test)]
mod config_tests {
use std::path::PathBuf;
// Mock configuration structures for testing
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
struct PreprocessConfig {
target_dpi: u32,
max_dimension: u32,
denoise_strength: f32,
contrast_enhancement: bool,
auto_rotate: bool,
binarization_method: String,
}
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
struct OcrModelConfig {
model_path: PathBuf,
device: String,
batch_size: usize,
confidence_threshold: f32,
}
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
struct OutputConfig {
format: String,
include_confidence: bool,
include_geometry: bool,
}
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
struct ScipixConfig {
preprocessing: PreprocessConfig,
model: OcrModelConfig,
output: OutputConfig,
}
impl Default for PreprocessConfig {
fn default() -> Self {
Self {
target_dpi: 300,
max_dimension: 4096,
denoise_strength: 0.5,
contrast_enhancement: true,
auto_rotate: true,
binarization_method: "adaptive".to_string(),
}
}
}
impl Default for OcrModelConfig {
fn default() -> Self {
Self {
model_path: PathBuf::from("models/scipix_model.onnx"),
device: "cpu".to_string(),
batch_size: 4,
confidence_threshold: 0.7,
}
}
}
impl Default for OutputConfig {
fn default() -> Self {
Self {
format: "latex".to_string(),
include_confidence: true,
include_geometry: false,
}
}
}
impl Default for ScipixConfig {
fn default() -> Self {
Self {
preprocessing: PreprocessConfig::default(),
model: OcrModelConfig::default(),
output: OutputConfig::default(),
}
}
}
#[test]
fn test_default_config_creation() {
let config = ScipixConfig::default();
assert_eq!(config.preprocessing.target_dpi, 300);
assert_eq!(config.model.device, "cpu");
assert_eq!(config.output.format, "latex");
}
#[test]
fn test_preprocessing_config_defaults() {
let config = PreprocessConfig::default();
assert_eq!(config.target_dpi, 300);
assert_eq!(config.max_dimension, 4096);
assert_eq!(config.denoise_strength, 0.5);
assert!(config.contrast_enhancement);
assert!(config.auto_rotate);
assert_eq!(config.binarization_method, "adaptive");
}
#[test]
fn test_model_config_defaults() {
let config = OcrModelConfig::default();
assert_eq!(config.model_path, PathBuf::from("models/scipix_model.onnx"));
assert_eq!(config.device, "cpu");
assert_eq!(config.batch_size, 4);
assert_eq!(config.confidence_threshold, 0.7);
}
#[test]
fn test_output_config_defaults() {
let config = OutputConfig::default();
assert_eq!(config.format, "latex");
assert!(config.include_confidence);
assert!(!config.include_geometry);
}
#[test]
fn test_toml_serialization() {
let config = ScipixConfig::default();
let toml_str = toml::to_string(&config).expect("Failed to serialize to TOML");
assert!(toml_str.contains("target_dpi = 300"));
assert!(toml_str.contains("device = \"cpu\""));
assert!(toml_str.contains("format = \"latex\""));
}
#[test]
fn test_toml_deserialization() {
let toml_str = r#"
[preprocessing]
target_dpi = 300
max_dimension = 4096
denoise_strength = 0.5
contrast_enhancement = true
auto_rotate = true
binarization_method = "adaptive"
[model]
model_path = "models/scipix_model.onnx"
device = "cpu"
batch_size = 4
confidence_threshold = 0.7
[output]
format = "latex"
include_confidence = true
include_geometry = false
"#;
let config: ScipixConfig = toml::from_str(toml_str).expect("Failed to deserialize TOML");
assert_eq!(config.preprocessing.target_dpi, 300);
assert_eq!(config.model.device, "cpu");
assert_eq!(config.output.format, "latex");
}
#[test]
fn test_json_serialization() {
let config = ScipixConfig::default();
let json_str = serde_json::to_string(&config).expect("Failed to serialize to JSON");
assert!(json_str.contains("\"target_dpi\":300"));
assert!(json_str.contains("\"device\":\"cpu\""));
}
#[test]
fn test_json_deserialization() {
let json_str = r#"{
"preprocessing": {
"target_dpi": 300,
"max_dimension": 4096,
"denoise_strength": 0.5,
"contrast_enhancement": true,
"auto_rotate": true,
"binarization_method": "adaptive"
},
"model": {
"model_path": "models/scipix_model.onnx",
"device": "cpu",
"batch_size": 4,
"confidence_threshold": 0.7
},
"output": {
"format": "latex",
"include_confidence": true,
"include_geometry": false
}
}"#;
let config: ScipixConfig =
serde_json::from_str(json_str).expect("Failed to deserialize JSON");
assert_eq!(config.preprocessing.target_dpi, 300);
assert_eq!(config.model.device, "cpu");
}
#[test]
fn test_preset_configurations() {
// High quality preset
let high_quality = ScipixConfig {
preprocessing: PreprocessConfig {
target_dpi: 600,
denoise_strength: 0.8,
..Default::default()
},
model: OcrModelConfig {
confidence_threshold: 0.9,
..Default::default()
},
..Default::default()
};
assert_eq!(high_quality.preprocessing.target_dpi, 600);
assert_eq!(high_quality.model.confidence_threshold, 0.9);
// Fast preset
let fast = ScipixConfig {
preprocessing: PreprocessConfig {
target_dpi: 150,
contrast_enhancement: false,
auto_rotate: false,
..Default::default()
},
model: OcrModelConfig {
batch_size: 8,
confidence_threshold: 0.5,
..Default::default()
},
..Default::default()
};
assert_eq!(fast.preprocessing.target_dpi, 150);
assert_eq!(fast.model.batch_size, 8);
}
#[test]
fn test_config_validation_valid() {
let config = ScipixConfig::default();
// Basic validation checks
assert!(config.preprocessing.target_dpi > 0);
assert!(config.preprocessing.max_dimension > 0);
assert!(config.preprocessing.denoise_strength >= 0.0);
assert!(config.preprocessing.denoise_strength <= 1.0);
assert!(config.model.batch_size > 0);
assert!(config.model.confidence_threshold >= 0.0);
assert!(config.model.confidence_threshold <= 1.0);
}
#[test]
fn test_config_validation_invalid_values() {
// Test invalid DPI
let mut config = ScipixConfig::default();
config.preprocessing.target_dpi = 0;
assert_eq!(config.preprocessing.target_dpi, 0); // Would fail validation
// Test invalid confidence threshold
config = ScipixConfig::default();
config.model.confidence_threshold = 1.5;
assert!(config.model.confidence_threshold > 1.0); // Would fail validation
}
#[test]
fn test_environment_variable_overrides() {
// Simulate environment variable overrides
let mut config = ScipixConfig::default();
// Override device from environment
let env_device = std::env::var("MATHPIX_DEVICE").unwrap_or_else(|_| "cpu".to_string());
config.model.device = env_device;
// Override batch size from environment
let env_batch_size = std::env::var("MATHPIX_BATCH_SIZE")
.ok()
.and_then(|s| s.parse::<usize>().ok())
.unwrap_or(config.model.batch_size);
config.model.batch_size = env_batch_size;
assert!(!config.model.device.is_empty());
assert!(config.model.batch_size > 0);
}
#[test]
fn test_config_cloning() {
let config1 = ScipixConfig::default();
let config2 = config1.clone();
assert_eq!(config1, config2);
assert_eq!(
config1.preprocessing.target_dpi,
config2.preprocessing.target_dpi
);
}
#[test]
fn test_partial_config_update() {
let mut config = ScipixConfig::default();
// Update only preprocessing settings
config.preprocessing.target_dpi = 450;
config.preprocessing.denoise_strength = 0.7;
assert_eq!(config.preprocessing.target_dpi, 450);
assert_eq!(config.preprocessing.denoise_strength, 0.7);
// Other settings should remain default
assert_eq!(config.model.device, "cpu");
assert_eq!(config.output.format, "latex");
}
#[test]
fn test_binarization_methods() {
let methods = vec!["otsu", "adaptive", "sauvola", "niblack"];
for method in methods {
let mut config = PreprocessConfig::default();
config.binarization_method = method.to_string();
assert_eq!(config.binarization_method, method);
}
}
#[test]
fn test_output_formats() {
let formats = vec!["latex", "mathml", "mmd", "ascii", "unicode"];
for format in formats {
let mut config = OutputConfig::default();
config.format = format.to_string();
assert_eq!(config.format, format);
}
}
#[test]
fn test_device_configurations() {
let devices = vec!["cpu", "cuda", "cuda:0", "cuda:1"];
for device in devices {
let mut config = OcrModelConfig::default();
config.device = device.to_string();
assert_eq!(config.device, device);
}
}
#[test]
fn test_config_roundtrip_toml() {
let original = ScipixConfig::default();
let toml_str = toml::to_string(&original).unwrap();
let deserialized: ScipixConfig = toml::from_str(&toml_str).unwrap();
assert_eq!(original, deserialized);
}
#[test]
fn test_config_roundtrip_json() {
let original = ScipixConfig::default();
let json_str = serde_json::to_string(&original).unwrap();
let deserialized: ScipixConfig = serde_json::from_str(&json_str).unwrap();
assert_eq!(original, deserialized);
}
}

View File

@@ -0,0 +1,344 @@
// Error handling tests for ruvector-scipix
//
// Tests error types, conversions, display messages, and retry logic.
// Target: 95%+ coverage of error handling code
#[cfg(test)]
mod error_tests {
use std::fmt;
use std::io;
// Mock error types for testing
#[derive(Debug, Clone, PartialEq)]
enum ScipixError {
// Image errors
InvalidImageFormat(String),
ImageTooLarge { size: u64, max: u64 },
ImagePreprocessingFailed(String),
ImageLoadError(String),
// Model errors
ModelNotFound(String),
ModelLoadError(String),
InferenceError(String),
// OCR errors
TextDetectionFailed(String),
TextRecognitionFailed(String),
LowConfidence { score: f32, threshold: f32 },
// Math parsing errors
ParseError(String),
InvalidExpression(String),
// I/O errors
IoError(String),
// API errors
ApiError { status: u16, message: String },
RateLimitExceeded,
// System errors
Timeout(std::time::Duration),
OutOfMemory,
Internal(String),
}
impl fmt::Display for ScipixError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::InvalidImageFormat(format) => {
write!(f, "Invalid image format: {}", format)
}
Self::ImageTooLarge { size, max } => {
write!(f, "Image too large: {} bytes (max: {} bytes)", size, max)
}
Self::ImagePreprocessingFailed(reason) => {
write!(f, "Image preprocessing failed: {}", reason)
}
Self::ImageLoadError(msg) => write!(f, "Failed to load image: {}", msg),
Self::ModelNotFound(model) => write!(f, "Model not found: {}", model),
Self::ModelLoadError(msg) => write!(f, "Failed to load model: {}", msg),
Self::InferenceError(msg) => write!(f, "Model inference failed: {}", msg),
Self::TextDetectionFailed(msg) => write!(f, "Text detection failed: {}", msg),
Self::TextRecognitionFailed(msg) => {
write!(f, "Text recognition failed: {}", msg)
}
Self::LowConfidence { score, threshold } => write!(
f,
"Low confidence score: {:.2} (threshold: {:.2})",
score, threshold
),
Self::ParseError(msg) => write!(f, "Parse error: {}", msg),
Self::InvalidExpression(expr) => write!(f, "Invalid expression: {}", expr),
Self::IoError(msg) => write!(f, "I/O error: {}", msg),
Self::ApiError { status, message } => {
write!(f, "API error {}: {}", status, message)
}
Self::RateLimitExceeded => write!(f, "Rate limit exceeded"),
Self::Timeout(duration) => write!(f, "Operation timed out after {:?}", duration),
Self::OutOfMemory => write!(f, "Out of memory"),
Self::Internal(msg) => write!(f, "Internal error: {}", msg),
}
}
}
impl std::error::Error for ScipixError {}
impl From<io::Error> for ScipixError {
fn from(err: io::Error) -> Self {
Self::IoError(err.to_string())
}
}
impl ScipixError {
fn is_retryable(&self) -> bool {
matches!(
self,
Self::Timeout(_)
| Self::ApiError { status: 503, .. }
| Self::ApiError { status: 429, .. }
| Self::InferenceError(_)
)
}
fn status_code(&self) -> Option<u16> {
match self {
Self::InvalidImageFormat(_) => Some(400),
Self::ImageTooLarge { .. } => Some(413),
Self::ModelNotFound(_) => Some(404),
Self::RateLimitExceeded => Some(429),
Self::ApiError { status, .. } => Some(*status),
Self::Timeout(_) => Some(408),
Self::OutOfMemory => Some(507),
_ => Some(500),
}
}
}
#[test]
fn test_error_creation() {
let err = ScipixError::InvalidImageFormat("svg".to_string());
assert_eq!(
err,
ScipixError::InvalidImageFormat("svg".to_string())
);
}
#[test]
fn test_error_display_invalid_format() {
let err = ScipixError::InvalidImageFormat("svg".to_string());
assert_eq!(err.to_string(), "Invalid image format: svg");
}
#[test]
fn test_error_display_image_too_large() {
let err = ScipixError::ImageTooLarge {
size: 15_000_000,
max: 10_000_000,
};
assert_eq!(
err.to_string(),
"Image too large: 15000000 bytes (max: 10000000 bytes)"
);
}
#[test]
fn test_error_display_low_confidence() {
let err = ScipixError::LowConfidence {
score: 0.65,
threshold: 0.8,
};
assert_eq!(
err.to_string(),
"Low confidence score: 0.65 (threshold: 0.80)"
);
}
#[test]
fn test_error_display_api_error() {
let err = ScipixError::ApiError {
status: 404,
message: "Not found".to_string(),
};
assert_eq!(err.to_string(), "API error 404: Not found");
}
#[test]
fn test_error_display_timeout() {
let err = ScipixError::Timeout(std::time::Duration::from_secs(30));
assert_eq!(err.to_string(), "Operation timed out after 30s");
}
#[test]
fn test_io_error_conversion() {
let io_err = io::Error::new(io::ErrorKind::NotFound, "file not found");
let scipix_err: ScipixError = io_err.into();
match scipix_err {
ScipixError::IoError(msg) => assert!(msg.contains("file not found")),
_ => panic!("Expected IoError variant"),
}
}
#[test]
fn test_is_retryable_timeout() {
let err = ScipixError::Timeout(std::time::Duration::from_secs(10));
assert!(err.is_retryable());
}
#[test]
fn test_is_retryable_503() {
let err = ScipixError::ApiError {
status: 503,
message: "Service Unavailable".to_string(),
};
assert!(err.is_retryable());
}
#[test]
fn test_is_retryable_429() {
let err = ScipixError::ApiError {
status: 429,
message: "Too Many Requests".to_string(),
};
assert!(err.is_retryable());
}
#[test]
fn test_is_not_retryable_404() {
let err = ScipixError::ApiError {
status: 404,
message: "Not Found".to_string(),
};
assert!(!err.is_retryable());
}
#[test]
fn test_is_not_retryable_invalid_format() {
let err = ScipixError::InvalidImageFormat("svg".to_string());
assert!(!err.is_retryable());
}
#[test]
fn test_status_code_invalid_format() {
let err = ScipixError::InvalidImageFormat("svg".to_string());
assert_eq!(err.status_code(), Some(400));
}
#[test]
fn test_status_code_image_too_large() {
let err = ScipixError::ImageTooLarge {
size: 15_000_000,
max: 10_000_000,
};
assert_eq!(err.status_code(), Some(413));
}
#[test]
fn test_status_code_not_found() {
let err = ScipixError::ModelNotFound("model.onnx".to_string());
assert_eq!(err.status_code(), Some(404));
}
#[test]
fn test_status_code_rate_limit() {
let err = ScipixError::RateLimitExceeded;
assert_eq!(err.status_code(), Some(429));
}
#[test]
fn test_status_code_timeout() {
let err = ScipixError::Timeout(std::time::Duration::from_secs(30));
assert_eq!(err.status_code(), Some(408));
}
#[test]
fn test_status_code_out_of_memory() {
let err = ScipixError::OutOfMemory;
assert_eq!(err.status_code(), Some(507));
}
#[test]
fn test_status_code_internal() {
let err = ScipixError::Internal("something went wrong".to_string());
assert_eq!(err.status_code(), Some(500));
}
#[test]
fn test_error_cloning() {
let err1 = ScipixError::InvalidImageFormat("svg".to_string());
let err2 = err1.clone();
assert_eq!(err1, err2);
}
#[test]
fn test_multiple_error_types() {
let errors = vec![
ScipixError::InvalidImageFormat("svg".to_string()),
ScipixError::ImageTooLarge {
size: 15_000_000,
max: 10_000_000,
},
ScipixError::ModelNotFound("model.onnx".to_string()),
ScipixError::RateLimitExceeded,
ScipixError::Timeout(std::time::Duration::from_secs(30)),
];
assert_eq!(errors.len(), 5);
for err in &errors {
assert!(!err.to_string().is_empty());
}
}
#[test]
fn test_error_categorization() {
let image_errors = vec![
ScipixError::InvalidImageFormat("svg".to_string()),
ScipixError::ImageTooLarge {
size: 15_000_000,
max: 10_000_000,
},
ScipixError::ImagePreprocessingFailed("deskew failed".to_string()),
];
for err in &image_errors {
match err {
ScipixError::InvalidImageFormat(_)
| ScipixError::ImageTooLarge { .. }
| ScipixError::ImagePreprocessingFailed(_) => {
// Image-related errors
assert!(err.status_code().is_some());
}
_ => panic!("Expected image error"),
}
}
}
#[test]
fn test_retryable_errors_collection() {
let errors = vec![
ScipixError::Timeout(std::time::Duration::from_secs(30)),
ScipixError::ApiError {
status: 503,
message: "Service Unavailable".to_string(),
},
ScipixError::InferenceError("temporary failure".to_string()),
];
let retryable_count = errors.iter().filter(|e| e.is_retryable()).count();
assert_eq!(retryable_count, 3);
}
#[test]
fn test_non_retryable_errors_collection() {
let errors = vec![
ScipixError::InvalidImageFormat("svg".to_string()),
ScipixError::ModelNotFound("model.onnx".to_string()),
ScipixError::ParseError("invalid latex".to_string()),
];
let retryable_count = errors.iter().filter(|e| e.is_retryable()).count();
assert_eq!(retryable_count, 0);
}
}

View File

@@ -0,0 +1,596 @@
// Math parsing tests for ruvector-scipix
//
// Tests symbol recognition, AST construction, and LaTeX/MathML/AsciiMath generation
// for various mathematical expressions including fractions, roots, matrices, integrals, etc.
// Target: 90%+ coverage of math parsing module
#[cfg(test)]
mod math_tests {
// Mock math structures for testing
#[derive(Debug, Clone, PartialEq)]
enum MathSymbol {
// Numbers
Digit(char),
// Variables
Variable(char),
// Greek letters
Alpha,
Beta,
Gamma,
Delta,
Epsilon,
Pi,
Sigma,
Omega,
// Operators
Plus,
Minus,
Times,
Divide,
Equals,
// Relations
LessThan,
GreaterThan,
LessEqual,
GreaterEqual,
NotEqual,
// Special symbols
Infinity,
Partial,
Nabla,
Integral,
Sum,
Product,
Root,
Sqrt,
// Brackets
LeftParen,
RightParen,
LeftBracket,
RightBracket,
LeftBrace,
RightBrace,
}
#[derive(Debug, Clone, PartialEq)]
enum MathNode {
Number(String),
Variable(String),
Symbol(MathSymbol),
BinaryOp {
op: String,
left: Box<MathNode>,
right: Box<MathNode>,
},
Fraction {
numerator: Box<MathNode>,
denominator: Box<MathNode>,
},
Superscript {
base: Box<MathNode>,
exponent: Box<MathNode>,
},
Subscript {
base: Box<MathNode>,
index: Box<MathNode>,
},
Root {
degree: Option<Box<MathNode>>,
radicand: Box<MathNode>,
},
Matrix {
rows: usize,
cols: usize,
elements: Vec<Vec<MathNode>>,
},
Integral {
lower: Option<Box<MathNode>>,
upper: Option<Box<MathNode>>,
integrand: Box<MathNode>,
},
Summation {
lower: Option<Box<MathNode>>,
upper: Option<Box<MathNode>>,
term: Box<MathNode>,
},
}
impl MathNode {
fn to_latex(&self) -> String {
match self {
Self::Number(n) => n.clone(),
Self::Variable(v) => v.clone(),
Self::Symbol(MathSymbol::Plus) => "+".to_string(),
Self::Symbol(MathSymbol::Minus) => "-".to_string(),
Self::Symbol(MathSymbol::Times) => r"\times".to_string(),
Self::Symbol(MathSymbol::Divide) => r"\div".to_string(),
Self::Symbol(MathSymbol::Pi) => r"\pi".to_string(),
Self::Symbol(MathSymbol::Alpha) => r"\alpha".to_string(),
Self::Symbol(MathSymbol::Infinity) => r"\infty".to_string(),
Self::BinaryOp { op, left, right } => {
format!("{} {} {}", left.to_latex(), op, right.to_latex())
}
Self::Fraction {
numerator,
denominator,
} => {
format!(r"\frac{{{}}}{{{}}}", numerator.to_latex(), denominator.to_latex())
}
Self::Superscript { base, exponent } => {
format!("{}^{{{}}}", base.to_latex(), exponent.to_latex())
}
Self::Subscript { base, index } => {
format!("{}_{{{}}}", base.to_latex(), index.to_latex())
}
Self::Root { degree: None, radicand } => {
format!(r"\sqrt{{{}}}", radicand.to_latex())
}
Self::Root { degree: Some(n), radicand } => {
format!(r"\sqrt[{{{}}}]{{{}}}", n.to_latex(), radicand.to_latex())
}
Self::Matrix { elements, .. } => {
let mut result = r"\begin{bmatrix}".to_string();
for (i, row) in elements.iter().enumerate() {
if i > 0 {
result.push_str(r" \\ ");
}
for (j, elem) in row.iter().enumerate() {
if j > 0 {
result.push_str(" & ");
}
result.push_str(&elem.to_latex());
}
}
result.push_str(r" \end{bmatrix}");
result
}
Self::Integral { lower, upper, integrand } => {
let mut result = r"\int".to_string();
if let Some(l) = lower {
result.push_str(&format!("_{{{}}}", l.to_latex()));
}
if let Some(u) = upper {
result.push_str(&format!("^{{{}}}", u.to_latex()));
}
result.push_str(&format!(" {} dx", integrand.to_latex()));
result
}
Self::Summation { lower, upper, term } => {
let mut result = r"\sum".to_string();
if let Some(l) = lower {
result.push_str(&format!("_{{{}}}", l.to_latex()));
}
if let Some(u) = upper {
result.push_str(&format!("^{{{}}}", u.to_latex()));
}
result.push_str(&format!(" {}", term.to_latex()));
result
}
_ => String::new(),
}
}
fn to_mathml(&self) -> String {
match self {
Self::Number(n) => format!("<mn>{}</mn>", n),
Self::Variable(v) => format!("<mi>{}</mi>", v),
Self::BinaryOp { op, left, right } => {
format!(
"<mrow>{}<mo>{}</mo>{}</mrow>",
left.to_mathml(),
op,
right.to_mathml()
)
}
Self::Fraction {
numerator,
denominator,
} => {
format!(
"<mfrac>{}{}</mfrac>",
numerator.to_mathml(),
denominator.to_mathml()
)
}
Self::Superscript { base, exponent } => {
format!(
"<msup>{}{}</msup>",
base.to_mathml(),
exponent.to_mathml()
)
}
Self::Root { degree: None, radicand } => {
format!("<msqrt>{}</msqrt>", radicand.to_mathml())
}
_ => String::new(),
}
}
fn to_asciimath(&self) -> String {
match self {
Self::Number(n) => n.clone(),
Self::Variable(v) => v.clone(),
Self::BinaryOp { op, left, right } => {
format!("{} {} {}", left.to_asciimath(), op, right.to_asciimath())
}
Self::Fraction {
numerator,
denominator,
} => {
format!("({})/({})", numerator.to_asciimath(), denominator.to_asciimath())
}
Self::Superscript { base, exponent } => {
format!("{}^{}", base.to_asciimath(), exponent.to_asciimath())
}
Self::Root { degree: None, radicand } => {
format!("sqrt({})", radicand.to_asciimath())
}
_ => String::new(),
}
}
}
#[test]
fn test_symbol_recognition_numbers() {
let symbols = vec![
MathSymbol::Digit('0'),
MathSymbol::Digit('1'),
MathSymbol::Digit('9'),
];
for symbol in symbols {
assert!(matches!(symbol, MathSymbol::Digit(_)));
}
}
#[test]
fn test_symbol_recognition_variables() {
let symbols = vec![
MathSymbol::Variable('x'),
MathSymbol::Variable('y'),
MathSymbol::Variable('z'),
];
for symbol in symbols {
assert!(matches!(symbol, MathSymbol::Variable(_)));
}
}
#[test]
fn test_symbol_recognition_greek() {
let greeks = vec![
(MathSymbol::Alpha, "α"),
(MathSymbol::Beta, "β"),
(MathSymbol::Gamma, "γ"),
(MathSymbol::Delta, "δ"),
(MathSymbol::Pi, "π"),
(MathSymbol::Sigma, "Σ"),
(MathSymbol::Omega, "Ω"),
];
assert_eq!(greeks.len(), 7);
}
#[test]
fn test_symbol_recognition_operators() {
let ops = vec![
MathSymbol::Plus,
MathSymbol::Minus,
MathSymbol::Times,
MathSymbol::Divide,
MathSymbol::Equals,
];
assert_eq!(ops.len(), 5);
}
#[test]
fn test_ast_construction_simple_addition() {
let expr = MathNode::BinaryOp {
op: "+".to_string(),
left: Box::new(MathNode::Variable("x".to_string())),
right: Box::new(MathNode::Variable("y".to_string())),
};
assert!(matches!(expr, MathNode::BinaryOp { .. }));
}
#[test]
fn test_ast_construction_simple_multiplication() {
let expr = MathNode::BinaryOp {
op: "*".to_string(),
left: Box::new(MathNode::Number("2".to_string())),
right: Box::new(MathNode::Variable("x".to_string())),
};
match expr {
MathNode::BinaryOp { op, .. } => assert_eq!(op, "*"),
_ => panic!("Expected BinaryOp"),
}
}
#[test]
fn test_latex_generation_simple_addition() {
let expr = MathNode::BinaryOp {
op: "+".to_string(),
left: Box::new(MathNode::Variable("x".to_string())),
right: Box::new(MathNode::Variable("y".to_string())),
};
let latex = expr.to_latex();
assert_eq!(latex, "x + y");
}
#[test]
fn test_latex_generation_fraction_simple() {
let frac = MathNode::Fraction {
numerator: Box::new(MathNode::Number("1".to_string())),
denominator: Box::new(MathNode::Number("2".to_string())),
};
let latex = frac.to_latex();
assert_eq!(latex, r"\frac{1}{2}");
}
#[test]
fn test_latex_generation_fraction_variables() {
let frac = MathNode::Fraction {
numerator: Box::new(MathNode::Variable("a".to_string())),
denominator: Box::new(MathNode::Variable("b".to_string())),
};
let latex = frac.to_latex();
assert_eq!(latex, r"\frac{a}{b}");
}
#[test]
fn test_latex_generation_fraction_complex() {
let numerator = MathNode::BinaryOp {
op: "+".to_string(),
left: Box::new(MathNode::Variable("a".to_string())),
right: Box::new(MathNode::Number("1".to_string())),
};
let frac = MathNode::Fraction {
numerator: Box::new(numerator),
denominator: Box::new(MathNode::Variable("b".to_string())),
};
let latex = frac.to_latex();
assert_eq!(latex, r"\frac{a + 1}{b}");
}
#[test]
fn test_latex_generation_root_square() {
let root = MathNode::Root {
degree: None,
radicand: Box::new(MathNode::Variable("x".to_string())),
};
let latex = root.to_latex();
assert_eq!(latex, r"\sqrt{x}");
}
#[test]
fn test_latex_generation_root_nth() {
let root = MathNode::Root {
degree: Some(Box::new(MathNode::Number("3".to_string()))),
radicand: Box::new(MathNode::Variable("x".to_string())),
};
let latex = root.to_latex();
assert_eq!(latex, r"\sqrt[{3}]{x}");
}
#[test]
fn test_latex_generation_superscript() {
let power = MathNode::Superscript {
base: Box::new(MathNode::Variable("x".to_string())),
exponent: Box::new(MathNode::Number("2".to_string())),
};
let latex = power.to_latex();
assert_eq!(latex, "x^{2}");
}
#[test]
fn test_latex_generation_subscript() {
let sub = MathNode::Subscript {
base: Box::new(MathNode::Variable("x".to_string())),
index: Box::new(MathNode::Number("1".to_string())),
};
let latex = sub.to_latex();
assert_eq!(latex, "x_{1}");
}
#[test]
fn test_latex_generation_subscript_and_superscript() {
let base = MathNode::Variable("x".to_string());
let with_sub = MathNode::Subscript {
base: Box::new(base),
index: Box::new(MathNode::Number("1".to_string())),
};
let with_both = MathNode::Superscript {
base: Box::new(with_sub),
exponent: Box::new(MathNode::Number("2".to_string())),
};
let latex = with_both.to_latex();
assert_eq!(latex, "x_{1}^{2}");
}
#[test]
fn test_latex_generation_matrix_2x2() {
let matrix = MathNode::Matrix {
rows: 2,
cols: 2,
elements: vec![
vec![
MathNode::Number("1".to_string()),
MathNode::Number("2".to_string()),
],
vec![
MathNode::Number("3".to_string()),
MathNode::Number("4".to_string()),
],
],
};
let latex = matrix.to_latex();
assert!(latex.contains(r"\begin{bmatrix}"));
assert!(latex.contains(r"\end{bmatrix}"));
assert!(latex.contains("1 & 2"));
assert!(latex.contains("3 & 4"));
}
#[test]
fn test_latex_generation_matrix_3x3() {
let matrix = MathNode::Matrix {
rows: 3,
cols: 3,
elements: vec![
vec![
MathNode::Number("1".to_string()),
MathNode::Number("2".to_string()),
MathNode::Number("3".to_string()),
],
vec![
MathNode::Number("4".to_string()),
MathNode::Number("5".to_string()),
MathNode::Number("6".to_string()),
],
vec![
MathNode::Number("7".to_string()),
MathNode::Number("8".to_string()),
MathNode::Number("9".to_string()),
],
],
};
let latex = matrix.to_latex();
assert!(latex.contains(r"\begin{bmatrix}"));
assert!(latex.contains("1 & 2 & 3"));
}
#[test]
fn test_latex_generation_integral_simple() {
let integral = MathNode::Integral {
lower: None,
upper: None,
integrand: Box::new(MathNode::Variable("x".to_string())),
};
let latex = integral.to_latex();
assert!(latex.contains(r"\int"));
assert!(latex.contains("x dx"));
}
#[test]
fn test_latex_generation_integral_with_limits() {
let integral = MathNode::Integral {
lower: Some(Box::new(MathNode::Number("0".to_string()))),
upper: Some(Box::new(MathNode::Number("1".to_string()))),
integrand: Box::new(MathNode::Variable("x".to_string())),
};
let latex = integral.to_latex();
assert!(latex.contains(r"\int_{0}^{1}"));
}
#[test]
fn test_latex_generation_summation() {
let sum = MathNode::Summation {
lower: Some(Box::new(MathNode::BinaryOp {
op: "=".to_string(),
left: Box::new(MathNode::Variable("i".to_string())),
right: Box::new(MathNode::Number("1".to_string())),
})),
upper: Some(Box::new(MathNode::Variable("n".to_string()))),
term: Box::new(MathNode::Variable("i".to_string())),
};
let latex = sum.to_latex();
assert!(latex.contains(r"\sum"));
}
#[test]
fn test_mathml_generation_number() {
let num = MathNode::Number("42".to_string());
let mathml = num.to_mathml();
assert_eq!(mathml, "<mn>42</mn>");
}
#[test]
fn test_mathml_generation_variable() {
let var = MathNode::Variable("x".to_string());
let mathml = var.to_mathml();
assert_eq!(mathml, "<mi>x</mi>");
}
#[test]
fn test_mathml_generation_fraction() {
let frac = MathNode::Fraction {
numerator: Box::new(MathNode::Number("1".to_string())),
denominator: Box::new(MathNode::Number("2".to_string())),
};
let mathml = frac.to_mathml();
assert!(mathml.contains("<mfrac>"));
assert!(mathml.contains("<mn>1</mn>"));
assert!(mathml.contains("<mn>2</mn>"));
}
#[test]
fn test_mathml_generation_superscript() {
let power = MathNode::Superscript {
base: Box::new(MathNode::Variable("x".to_string())),
exponent: Box::new(MathNode::Number("2".to_string())),
};
let mathml = power.to_mathml();
assert!(mathml.contains("<msup>"));
assert!(mathml.contains("<mi>x</mi>"));
assert!(mathml.contains("<mn>2</mn>"));
}
#[test]
fn test_asciimath_generation_simple() {
let expr = MathNode::BinaryOp {
op: "+".to_string(),
left: Box::new(MathNode::Variable("x".to_string())),
right: Box::new(MathNode::Number("1".to_string())),
};
let ascii = expr.to_asciimath();
assert_eq!(ascii, "x + 1");
}
#[test]
fn test_asciimath_generation_fraction() {
let frac = MathNode::Fraction {
numerator: Box::new(MathNode::Variable("a".to_string())),
denominator: Box::new(MathNode::Variable("b".to_string())),
};
let ascii = frac.to_asciimath();
assert_eq!(ascii, "(a)/(b)");
}
#[test]
fn test_asciimath_generation_power() {
let power = MathNode::Superscript {
base: Box::new(MathNode::Variable("x".to_string())),
exponent: Box::new(MathNode::Number("2".to_string())),
};
let ascii = power.to_asciimath();
assert_eq!(ascii, "x^2");
}
}

View File

@@ -0,0 +1,68 @@
// Unit test module organization for ruvector-scipix
//
// This module organizes all unit tests following Rust testing best practices.
// Each submodule tests a specific component in isolation with comprehensive coverage.
/// Configuration tests - Test config loading, validation, defaults
pub mod config_tests;
/// Error handling tests - Test error types, conversions, display
pub mod error_tests;
/// Preprocessing tests - Test image preprocessing pipeline
pub mod preprocess_tests;
/// Math parsing tests - Test mathematical expression parsing and recognition
pub mod math_tests;
/// Output formatting tests - Test LaTeX, MathML, and other format generation
pub mod output_tests;
/// OCR engine tests - Test OCR model loading and inference
pub mod ocr_tests;
#[cfg(test)]
mod common {
use std::path::PathBuf;
/// Get path to test fixtures directory
pub fn fixtures_dir() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests")
.join("fixtures")
}
/// Get path to a specific test fixture
pub fn fixture_path(name: &str) -> PathBuf {
fixtures_dir().join(name)
}
/// Check if a fixture exists
pub fn has_fixture(name: &str) -> bool {
fixture_path(name).exists()
}
/// Normalize LaTeX string for comparison (remove whitespace)
pub fn normalize_latex(latex: &str) -> String {
latex
.chars()
.filter(|c| !c.is_whitespace())
.collect::<String>()
.to_lowercase()
}
/// Calculate simple string similarity (0.0 to 1.0)
pub fn string_similarity(a: &str, b: &str) -> f64 {
if a == b {
return 1.0;
}
if a.is_empty() || b.is_empty() {
return 0.0;
}
let max_len = a.len().max(b.len());
let matching = a.chars().zip(b.chars()).filter(|(x, y)| x == y).count();
matching as f64 / max_len as f64
}
}

View File

@@ -0,0 +1,385 @@
// OCR engine tests for ruvector-scipix
//
// Tests OCR engine initialization, model loading, inference options,
// and batch processing capabilities.
// Target: 85%+ coverage of OCR engine module
#[cfg(test)]
mod ocr_tests {
use std::path::PathBuf;
// Mock OCR engine structures
#[derive(Debug, Clone)]
struct OcrEngine {
model_path: PathBuf,
device: String,
batch_size: usize,
loaded: bool,
}
#[derive(Debug, Clone)]
struct OcrOptions {
confidence_threshold: f32,
detect_rotation: bool,
preprocessing: bool,
language: String,
}
#[derive(Debug, Clone)]
struct OcrResult {
text: String,
confidence: f32,
bounding_boxes: Vec<BoundingBox>,
processing_time_ms: u64,
}
#[derive(Debug, Clone)]
struct BoundingBox {
x: u32,
y: u32,
width: u32,
height: u32,
confidence: f32,
}
impl Default for OcrOptions {
fn default() -> Self {
Self {
confidence_threshold: 0.7,
detect_rotation: true,
preprocessing: true,
language: "en".to_string(),
}
}
}
impl OcrEngine {
fn new(model_path: PathBuf, device: &str) -> Result<Self, String> {
if !model_path.to_string_lossy().ends_with(".onnx") {
return Err("Model must be .onnx format".to_string());
}
Ok(Self {
model_path,
device: device.to_string(),
batch_size: 4,
loaded: false,
})
}
fn load(&mut self) -> Result<(), String> {
if self.loaded {
return Err("Model already loaded".to_string());
}
self.loaded = true;
Ok(())
}
fn is_loaded(&self) -> bool {
self.loaded
}
fn process(&self, _image_data: &[u8], options: &OcrOptions) -> Result<OcrResult, String> {
if !self.loaded {
return Err("Model not loaded".to_string());
}
Ok(OcrResult {
text: "x^2 + 1".to_string(),
confidence: 0.95,
bounding_boxes: vec![BoundingBox {
x: 10,
y: 20,
width: 100,
height: 50,
confidence: 0.95,
}],
processing_time_ms: 123,
})
}
fn process_batch(
&self,
images: &[Vec<u8>],
options: &OcrOptions,
) -> Result<Vec<OcrResult>, String> {
if !self.loaded {
return Err("Model not loaded".to_string());
}
images
.iter()
.map(|img| self.process(img, options))
.collect()
}
fn set_batch_size(&mut self, size: usize) {
self.batch_size = size;
}
}
#[test]
fn test_engine_creation() {
let engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu");
assert!(engine.is_ok());
}
#[test]
fn test_engine_creation_invalid_model() {
let engine = OcrEngine::new(PathBuf::from("model.txt"), "cpu");
assert!(engine.is_err());
}
#[test]
fn test_engine_creation_cpu_device() {
let engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
assert_eq!(engine.device, "cpu");
}
#[test]
fn test_engine_creation_cuda_device() {
let engine = OcrEngine::new(PathBuf::from("model.onnx"), "cuda").unwrap();
assert_eq!(engine.device, "cuda");
}
#[test]
fn test_model_loading() {
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
assert!(!engine.is_loaded());
let result = engine.load();
assert!(result.is_ok());
assert!(engine.is_loaded());
}
#[test]
fn test_model_loading_twice() {
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
engine.load().unwrap();
let second_load = engine.load();
assert!(second_load.is_err());
}
#[test]
fn test_ocr_options_default() {
let options = OcrOptions::default();
assert_eq!(options.confidence_threshold, 0.7);
assert!(options.detect_rotation);
assert!(options.preprocessing);
assert_eq!(options.language, "en");
}
#[test]
fn test_ocr_options_custom() {
let options = OcrOptions {
confidence_threshold: 0.9,
detect_rotation: false,
preprocessing: true,
language: "math".to_string(),
};
assert_eq!(options.confidence_threshold, 0.9);
assert!(!options.detect_rotation);
}
#[test]
fn test_process_without_loading() {
let engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
let options = OcrOptions::default();
let result = engine.process(&[0u8; 100], &options);
assert!(result.is_err());
}
#[test]
fn test_process_after_loading() {
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
engine.load().unwrap();
let options = OcrOptions::default();
let result = engine.process(&[0u8; 100], &options);
assert!(result.is_ok());
}
#[test]
fn test_process_result_structure() {
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
engine.load().unwrap();
let options = OcrOptions::default();
let result = engine.process(&[0u8; 100], &options).unwrap();
assert!(!result.text.is_empty());
assert!(result.confidence > 0.0 && result.confidence <= 1.0);
assert!(!result.bounding_boxes.is_empty());
assert!(result.processing_time_ms > 0);
}
#[test]
fn test_batch_processing() {
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
engine.load().unwrap();
let images = vec![vec![0u8; 100], vec![1u8; 100], vec![2u8; 100]];
let options = OcrOptions::default();
let results = engine.process_batch(&images, &options).unwrap();
assert_eq!(results.len(), 3);
}
#[test]
fn test_batch_processing_empty() {
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
engine.load().unwrap();
let images: Vec<Vec<u8>> = vec![];
let options = OcrOptions::default();
let results = engine.process_batch(&images, &options).unwrap();
assert_eq!(results.len(), 0);
}
#[test]
fn test_batch_processing_single_image() {
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
engine.load().unwrap();
let images = vec![vec![0u8; 100]];
let options = OcrOptions::default();
let results = engine.process_batch(&images, &options).unwrap();
assert_eq!(results.len(), 1);
}
#[test]
fn test_batch_size_configuration() {
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
assert_eq!(engine.batch_size, 4);
engine.set_batch_size(8);
assert_eq!(engine.batch_size, 8);
engine.set_batch_size(16);
assert_eq!(engine.batch_size, 16);
}
#[test]
fn test_bounding_box_structure() {
let bbox = BoundingBox {
x: 10,
y: 20,
width: 100,
height: 50,
confidence: 0.95,
};
assert_eq!(bbox.x, 10);
assert_eq!(bbox.y, 20);
assert_eq!(bbox.width, 100);
assert_eq!(bbox.height, 50);
assert_eq!(bbox.confidence, 0.95);
}
#[test]
fn test_multiple_bounding_boxes() {
let boxes = vec![
BoundingBox {
x: 10,
y: 20,
width: 50,
height: 30,
confidence: 0.95,
},
BoundingBox {
x: 70,
y: 20,
width: 60,
height: 30,
confidence: 0.93,
},
];
assert_eq!(boxes.len(), 2);
assert!(boxes.iter().all(|b| b.confidence > 0.9));
}
#[test]
fn test_options_language_variants() {
let languages = vec!["en", "math", "mixed", "es", "fr", "de"];
for lang in languages {
let options = OcrOptions {
language: lang.to_string(),
..Default::default()
};
assert_eq!(options.language, lang);
}
}
#[test]
fn test_options_confidence_thresholds() {
let thresholds = vec![0.5, 0.7, 0.8, 0.9, 0.95];
for threshold in thresholds {
let options = OcrOptions {
confidence_threshold: threshold,
..Default::default()
};
assert_eq!(options.confidence_threshold, threshold);
}
}
#[test]
fn test_options_preprocessing_toggle() {
let mut options = OcrOptions::default();
assert!(options.preprocessing);
options.preprocessing = false;
assert!(!options.preprocessing);
}
#[test]
fn test_options_rotation_detection_toggle() {
let mut options = OcrOptions::default();
assert!(options.detect_rotation);
options.detect_rotation = false;
assert!(!options.detect_rotation);
}
#[test]
fn test_engine_with_different_devices() {
let devices = vec!["cpu", "cuda", "cuda:0", "cuda:1"];
for device in devices {
let engine = OcrEngine::new(PathBuf::from("model.onnx"), device);
assert!(engine.is_ok());
assert_eq!(engine.unwrap().device, device);
}
}
#[test]
fn test_ocr_result_cloning() {
let result = OcrResult {
text: "test".to_string(),
confidence: 0.95,
bounding_boxes: vec![],
processing_time_ms: 100,
};
let cloned = result.clone();
assert_eq!(result.text, cloned.text);
assert_eq!(result.confidence, cloned.confidence);
}
}

View File

@@ -0,0 +1,409 @@
// Output formatting tests for ruvector-scipix
//
// Tests output format conversion between LaTeX, MathML, AsciiMath, etc.
// and MMD delimiter handling, JSON serialization.
// Target: 85%+ coverage of output formatting module
#[cfg(test)]
mod output_tests {
use serde::{Deserialize, Serialize};
// Mock output format types
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
enum OutputFormat {
Latex,
MathML,
AsciiMath,
MMD, // Scipix Markdown
Unicode,
PlainText,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct FormattedOutput {
format: OutputFormat,
content: String,
metadata: Option<OutputMetadata>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct OutputMetadata {
confidence: f32,
processing_time_ms: u64,
num_symbols: usize,
}
// Mock format converter
fn convert_format(input: &str, from: OutputFormat, to: OutputFormat) -> Result<String, String> {
match (from, to) {
(OutputFormat::Latex, OutputFormat::MathML) => {
Ok(latex_to_mathml(input))
}
(OutputFormat::Latex, OutputFormat::AsciiMath) => {
Ok(latex_to_asciimath(input))
}
(OutputFormat::Latex, OutputFormat::Unicode) => {
Ok(latex_to_unicode(input))
}
(OutputFormat::Latex, OutputFormat::PlainText) => {
Ok(latex_to_plaintext(input))
}
(OutputFormat::MathML, OutputFormat::Latex) => {
Ok(mathml_to_latex(input))
}
_ => Ok(input.to_string()),
}
}
fn latex_to_mathml(latex: &str) -> String {
// Simple mock conversion
if latex.contains(r"\frac") {
format!("<mfrac>{}</mfrac>", latex.replace(r"\frac", ""))
} else if latex.contains("^") {
"<msup></msup>".to_string()
} else {
format!("<math>{}</math>", latex)
}
}
fn latex_to_asciimath(latex: &str) -> String {
latex
.replace(r"\frac{", "(")
.replace("}{", ")/(")
.replace("}", ")")
.replace("^", "^")
}
fn latex_to_unicode(latex: &str) -> String {
latex
.replace(r"\alpha", "α")
.replace(r"\beta", "β")
.replace(r"\gamma", "γ")
.replace(r"\pi", "π")
.replace(r"\sigma", "σ")
.replace(r"\infty", "")
}
fn latex_to_plaintext(latex: &str) -> String {
latex
.replace(r"\frac{", "(")
.replace("}{", ")/(")
.replace("}", ")")
.replace("^", "**")
.replace("_", "")
}
fn mathml_to_latex(mathml: &str) -> String {
// Very simple mock
if mathml.contains("<mfrac>") {
r"\frac{a}{b}".to_string()
} else {
"x".to_string()
}
}
fn apply_mmd_delimiters(latex: &str, inline: bool) -> String {
if inline {
format!("${}$", latex)
} else {
format!("$$\n{}\n$$", latex)
}
}
#[test]
fn test_format_conversion_latex_to_mathml() {
let latex = r"\frac{1}{2}";
let mathml = convert_format(latex, OutputFormat::Latex, OutputFormat::MathML).unwrap();
assert!(mathml.contains("<mfrac>"));
}
#[test]
fn test_format_conversion_latex_to_asciimath() {
let latex = r"x^2 + 1";
let ascii = convert_format(latex, OutputFormat::Latex, OutputFormat::AsciiMath).unwrap();
assert!(ascii.contains("x^2"));
}
#[test]
fn test_format_conversion_latex_to_unicode() {
let latex = r"\alpha + \beta";
let unicode = convert_format(latex, OutputFormat::Latex, OutputFormat::Unicode).unwrap();
assert!(unicode.contains("α"));
assert!(unicode.contains("β"));
}
#[test]
fn test_format_conversion_latex_to_plaintext() {
let latex = r"\frac{a}{b}";
let text = convert_format(latex, OutputFormat::Latex, OutputFormat::PlainText).unwrap();
assert!(text.contains("(a)/(b)") || text.contains("a/b"));
}
#[test]
fn test_format_conversion_mathml_to_latex() {
let mathml = "<mfrac><mn>1</mn><mn>2</mn></mfrac>";
let latex = convert_format(mathml, OutputFormat::MathML, OutputFormat::Latex).unwrap();
assert!(latex.contains(r"\frac") || latex.contains("/"));
}
#[test]
fn test_mmd_delimiter_inline() {
let latex = "x^2";
let mmd = apply_mmd_delimiters(latex, true);
assert_eq!(mmd, "$x^2$");
}
#[test]
fn test_mmd_delimiter_display() {
let latex = r"\int_0^1 x dx";
let mmd = apply_mmd_delimiters(latex, false);
assert!(mmd.starts_with("$$"));
assert!(mmd.ends_with("$$"));
assert!(mmd.contains(latex));
}
#[test]
fn test_mmd_delimiter_multiple_inline() {
let equations = vec!["x + 1", "y - 2", "z * 3"];
for eq in equations {
let mmd = apply_mmd_delimiters(eq, true);
assert!(mmd.starts_with("$"));
assert!(mmd.ends_with("$"));
}
}
#[test]
fn test_json_serialization_formatted_output() {
let output = FormattedOutput {
format: OutputFormat::Latex,
content: r"\frac{1}{2}".to_string(),
metadata: Some(OutputMetadata {
confidence: 0.95,
processing_time_ms: 123,
num_symbols: 5,
}),
};
let json = serde_json::to_string(&output).unwrap();
assert!(json.contains("Latex"));
assert!(json.contains(r"\frac"));
assert!(json.contains("0.95"));
}
#[test]
fn test_json_deserialization_formatted_output() {
let json = r#"{
"format": "Latex",
"content": "x^2 + 1",
"metadata": {
"confidence": 0.92,
"processing_time_ms": 87,
"num_symbols": 4
}
}"#;
let output: FormattedOutput = serde_json::from_str(json).unwrap();
assert_eq!(output.format, OutputFormat::Latex);
assert_eq!(output.content, "x^2 + 1");
assert!(output.metadata.is_some());
}
#[test]
fn test_json_serialization_all_formats() {
let formats = vec![
OutputFormat::Latex,
OutputFormat::MathML,
OutputFormat::AsciiMath,
OutputFormat::MMD,
OutputFormat::Unicode,
OutputFormat::PlainText,
];
for format in formats {
let output = FormattedOutput {
format: format.clone(),
content: "test".to_string(),
metadata: None,
};
let json = serde_json::to_string(&output).unwrap();
assert!(!json.is_empty());
}
}
#[test]
fn test_scipix_api_compatibility_response() {
#[derive(Serialize, Deserialize)]
struct ScipixResponse {
latex: String,
mathml: Option<String>,
text: String,
confidence: f32,
#[serde(rename = "confidence_rate")]
confidence_rate: f32,
}
let response = ScipixResponse {
latex: r"\frac{1}{2}".to_string(),
mathml: Some("<mfrac><mn>1</mn><mn>2</mn></mfrac>".to_string()),
text: "1/2".to_string(),
confidence: 0.95,
confidence_rate: 0.93,
};
let json = serde_json::to_string(&response).unwrap();
assert!(json.contains("latex"));
assert!(json.contains("confidence_rate"));
}
#[test]
fn test_scipix_api_compatibility_request() {
#[derive(Serialize, Deserialize)]
struct ScipixRequest {
src: String,
formats: Vec<String>,
#[serde(rename = "ocr")]
ocr_types: Vec<String>,
}
let request = ScipixRequest {
src: "data:image/png;base64,iVBORw0KGgo...".to_string(),
formats: vec!["latex".to_string(), "mathml".to_string()],
ocr_types: vec!["math".to_string(), "text".to_string()],
};
let json = serde_json::to_string(&request).unwrap();
assert!(json.contains("src"));
assert!(json.contains("formats"));
assert!(json.contains("ocr"));
}
#[test]
fn test_unicode_symbol_conversion() {
let conversions = vec![
(r"\alpha", "α"),
(r"\beta", "β"),
(r"\gamma", "γ"),
(r"\delta", "δ"),
(r"\pi", "π"),
(r"\sigma", "σ"),
(r"\omega", "ω"),
(r"\infty", ""),
(r"\partial", ""),
(r"\nabla", ""),
];
for (latex, expected_unicode) in conversions {
let unicode = latex_to_unicode(latex);
assert!(
unicode.contains(expected_unicode),
"Failed to convert {} to {}",
latex,
expected_unicode
);
}
}
#[test]
fn test_output_format_enumeration() {
let formats = vec![
OutputFormat::Latex,
OutputFormat::MathML,
OutputFormat::AsciiMath,
OutputFormat::MMD,
OutputFormat::Unicode,
OutputFormat::PlainText,
];
assert_eq!(formats.len(), 6);
}
#[test]
fn test_formatted_output_with_no_metadata() {
let output = FormattedOutput {
format: OutputFormat::Latex,
content: "x + y".to_string(),
metadata: None,
};
assert!(output.metadata.is_none());
let json = serde_json::to_string(&output).unwrap();
assert!(json.contains("null"));
}
#[test]
fn test_formatted_output_cloning() {
let output1 = FormattedOutput {
format: OutputFormat::Latex,
content: "test".to_string(),
metadata: None,
};
let output2 = output1.clone();
assert_eq!(output1.format, output2.format);
assert_eq!(output1.content, output2.content);
}
#[test]
fn test_multiple_format_conversions_chain() {
let latex = r"\frac{1}{2}";
// Latex -> MathML
let mathml = convert_format(latex, OutputFormat::Latex, OutputFormat::MathML).unwrap();
assert!(mathml.contains("<mfrac>"));
// MathML -> Latex
let back_to_latex = convert_format(&mathml, OutputFormat::MathML, OutputFormat::Latex).unwrap();
assert!(back_to_latex.contains(r"\frac") || !back_to_latex.is_empty());
}
#[test]
fn test_special_latex_commands_preservation() {
let latex_commands = vec![
r"\sum_{i=1}^{n}",
r"\int_0^1",
r"\prod_{k=1}^{m}",
r"\lim_{x \to \infty}",
];
for latex in latex_commands {
let output = FormattedOutput {
format: OutputFormat::Latex,
content: latex.to_string(),
metadata: None,
};
assert_eq!(output.content, latex);
}
}
#[test]
fn test_output_with_confidence_metadata() {
let output = FormattedOutput {
format: OutputFormat::Latex,
content: r"x^2".to_string(),
metadata: Some(OutputMetadata {
confidence: 0.98,
processing_time_ms: 45,
num_symbols: 3,
}),
};
let metadata = output.metadata.unwrap();
assert_eq!(metadata.confidence, 0.98);
assert_eq!(metadata.processing_time_ms, 45);
assert_eq!(metadata.num_symbols, 3);
}
}

View File

@@ -0,0 +1,377 @@
// Preprocessing tests for ruvector-scipix
//
// Tests image preprocessing functions including grayscale conversion,
// Gaussian blur, Otsu thresholding, rotation detection, deskewing,
// CLAHE enhancement, and pipeline chaining.
// Target: 90%+ coverage of preprocessing module
#[cfg(test)]
mod preprocess_tests {
use std::f32::consts::PI;
// Mock image structures for testing
#[derive(Debug, Clone, PartialEq)]
struct GrayImage {
width: u32,
height: u32,
data: Vec<u8>,
}
impl GrayImage {
fn new(width: u32, height: u32) -> Self {
Self {
width,
height,
data: vec![0; (width * height) as usize],
}
}
fn from_fn<F>(width: u32, height: u32, f: F) -> Self
where
F: Fn(u32, u32) -> u8,
{
let mut data = Vec::with_capacity((width * height) as usize);
for y in 0..height {
for x in 0..width {
data.push(f(x, y));
}
}
Self {
width,
height,
data,
}
}
fn get_pixel(&self, x: u32, y: u32) -> u8 {
self.data[(y * self.width + x) as usize]
}
}
// Mock preprocessing functions
fn to_grayscale(rgb: &[u8; 3]) -> u8 {
(0.299 * rgb[0] as f32 + 0.587 * rgb[1] as f32 + 0.114 * rgb[2] as f32) as u8
}
fn gaussian_blur(image: &GrayImage, sigma: f32) -> GrayImage {
// Simple mock - just return a copy
image.clone()
}
fn otsu_threshold(image: &GrayImage) -> u8 {
// Simple mock implementation
let sum: u32 = image.data.iter().map(|&x| x as u32).sum();
let avg = sum / image.data.len() as u32;
avg as u8
}
fn apply_threshold(image: &GrayImage, threshold: u8) -> GrayImage {
GrayImage::from_fn(image.width, image.height, |x, y| {
if image.get_pixel(x, y) > threshold {
255
} else {
0
}
})
}
fn detect_rotation_angle(image: &GrayImage) -> f32 {
// Mock: return 0 for simplicity
0.0
}
fn deskew_angle(image: &GrayImage) -> f32 {
// Mock: return small random angle
2.5
}
fn apply_clahe(image: &GrayImage, clip_limit: f32) -> GrayImage {
// Mock: increase contrast slightly
GrayImage::from_fn(image.width, image.height, |x, y| {
let pixel = image.get_pixel(x, y);
((pixel as f32 * 1.2).min(255.0)) as u8
})
}
#[test]
fn test_grayscale_conversion_white() {
let white = [255u8, 255, 255];
let gray = to_grayscale(&white);
assert_eq!(gray, 255);
}
#[test]
fn test_grayscale_conversion_black() {
let black = [0u8, 0, 0];
let gray = to_grayscale(&black);
assert_eq!(gray, 0);
}
#[test]
fn test_grayscale_conversion_red() {
let red = [255u8, 0, 0];
let gray = to_grayscale(&red);
// 0.299 * 255 ≈ 76
assert!(gray >= 70 && gray <= 80);
}
#[test]
fn test_grayscale_conversion_green() {
let green = [0u8, 255, 0];
let gray = to_grayscale(&green);
// 0.587 * 255 ≈ 150
assert!(gray >= 145 && gray <= 155);
}
#[test]
fn test_grayscale_conversion_blue() {
let blue = [0u8, 0, 255];
let gray = to_grayscale(&blue);
// 0.114 * 255 ≈ 29
assert!(gray >= 25 && gray <= 35);
}
#[test]
fn test_gaussian_blur_preserves_dimensions() {
let image = GrayImage::new(100, 100);
let blurred = gaussian_blur(&image, 1.0);
assert_eq!(blurred.width, 100);
assert_eq!(blurred.height, 100);
}
#[test]
fn test_gaussian_blur_multiple_sigmas() {
let image = GrayImage::new(50, 50);
let sigmas = vec![0.5, 1.0, 1.5, 2.0, 3.0];
for sigma in sigmas {
let blurred = gaussian_blur(&image, sigma);
assert_eq!(blurred.width, image.width);
assert_eq!(blurred.height, image.height);
}
}
#[test]
fn test_otsu_thresholding_uniform_image() {
let image = GrayImage::from_fn(50, 50, |_, _| 128);
let threshold = otsu_threshold(&image);
assert_eq!(threshold, 128);
}
#[test]
fn test_otsu_thresholding_bimodal_image() {
// Create image with two distinct levels
let image = GrayImage::from_fn(100, 100, |x, y| {
if (x + y) % 2 == 0 {
50
} else {
200
}
});
let threshold = otsu_threshold(&image);
// Threshold should be between the two peaks
assert!(threshold > 50 && threshold < 200);
}
#[test]
fn test_apply_threshold_creates_binary_image() {
let image = GrayImage::from_fn(50, 50, |x, y| ((x + y) % 256) as u8);
let binary = apply_threshold(&image, 128);
// Check all pixels are either 0 or 255
for pixel in binary.data.iter() {
assert!(*pixel == 0 || *pixel == 255);
}
}
#[test]
fn test_apply_threshold_low_threshold() {
let image = GrayImage::from_fn(50, 50, |_, _| 100);
let binary = apply_threshold(&image, 50);
// All pixels should be 255 (above threshold)
assert!(binary.data.iter().all(|&x| x == 255));
}
#[test]
fn test_apply_threshold_high_threshold() {
let image = GrayImage::from_fn(50, 50, |_, _| 100);
let binary = apply_threshold(&image, 150);
// All pixels should be 0 (below threshold)
assert!(binary.data.iter().all(|&x| x == 0));
}
#[test]
fn test_rotation_detection_zero() {
let image = GrayImage::new(100, 100);
let angle = detect_rotation_angle(&image);
assert!((angle - 0.0).abs() < 1.0);
}
#[test]
fn test_rotation_detection_90_degrees() {
let image = GrayImage::from_fn(100, 100, |x, _| x as u8);
let angle = detect_rotation_angle(&image);
// In real implementation, should detect 0, 90, 180, or 270
assert!(angle >= -180.0 && angle <= 180.0);
}
#[test]
fn test_rotation_detection_180_degrees() {
let image = GrayImage::from_fn(100, 100, |x, y| ((x + y) % 256) as u8);
let angle = detect_rotation_angle(&image);
assert!(angle >= -180.0 && angle <= 180.0);
}
#[test]
fn test_rotation_detection_270_degrees() {
let image = GrayImage::new(100, 100);
let angle = detect_rotation_angle(&image);
assert!(angle >= -180.0 && angle <= 180.0);
}
#[test]
fn test_deskew_angle_detection() {
let image = GrayImage::new(100, 100);
let angle = deskew_angle(&image);
// Skew angle should typically be small (< 45 degrees)
assert!(angle.abs() < 45.0);
}
#[test]
fn test_deskew_angle_horizontal_lines() {
let image = GrayImage::from_fn(100, 100, |_, y| {
if y % 10 == 0 {
255
} else {
0
}
});
let angle = deskew_angle(&image);
// Should detect minimal skew for horizontal lines
assert!(angle.abs() < 5.0);
}
#[test]
fn test_clahe_enhancement() {
let image = GrayImage::from_fn(100, 100, |x, y| ((x + y) % 128) as u8);
let enhanced = apply_clahe(&image, 2.0);
assert_eq!(enhanced.width, image.width);
assert_eq!(enhanced.height, image.height);
}
#[test]
fn test_clahe_increases_contrast() {
let low_contrast = GrayImage::from_fn(50, 50, |x, _| (100 + x % 20) as u8);
let enhanced = apply_clahe(&low_contrast, 2.0);
// Calculate simple contrast measure
let original_range = calculate_range(&low_contrast);
let enhanced_range = calculate_range(&enhanced);
// Enhanced image should have equal or greater range
assert!(enhanced_range >= original_range);
}
#[test]
fn test_clahe_preserves_dimensions() {
let image = GrayImage::new(256, 256);
let enhanced = apply_clahe(&image, 2.0);
assert_eq!(enhanced.width, 256);
assert_eq!(enhanced.height, 256);
}
#[test]
fn test_clahe_different_clip_limits() {
let image = GrayImage::from_fn(50, 50, |x, y| ((x + y) % 256) as u8);
let clip_limits = vec![1.0, 2.0, 3.0, 4.0];
for limit in clip_limits {
let enhanced = apply_clahe(&image, limit);
assert_eq!(enhanced.width, image.width);
assert_eq!(enhanced.height, image.height);
}
}
#[test]
fn test_pipeline_chaining_blur_then_threshold() {
let image = GrayImage::from_fn(100, 100, |x, y| ((x + y) % 256) as u8);
// Chain operations
let blurred = gaussian_blur(&image, 1.0);
let threshold = otsu_threshold(&blurred);
let binary = apply_threshold(&blurred, threshold);
// Verify final result is binary
assert!(binary.data.iter().all(|&x| x == 0 || x == 255));
}
#[test]
fn test_pipeline_chaining_enhance_then_threshold() {
let image = GrayImage::from_fn(100, 100, |x, y| ((x + y) % 128) as u8);
// Chain CLAHE then threshold
let enhanced = apply_clahe(&image, 2.0);
let threshold = otsu_threshold(&enhanced);
let binary = apply_threshold(&enhanced, threshold);
assert!(binary.data.iter().all(|&x| x == 0 || x == 255));
}
#[test]
fn test_pipeline_full_preprocessing() {
let image = GrayImage::from_fn(100, 100, |x, y| ((x + y) % 256) as u8);
// Full pipeline: blur -> enhance -> threshold
let blurred = gaussian_blur(&image, 1.0);
let enhanced = apply_clahe(&blurred, 2.0);
let threshold = otsu_threshold(&enhanced);
let binary = apply_threshold(&enhanced, threshold);
assert_eq!(binary.width, image.width);
assert_eq!(binary.height, image.height);
assert!(binary.data.iter().all(|&x| x == 0 || x == 255));
}
#[test]
fn test_pipeline_preserves_dimensions_throughout() {
let image = GrayImage::new(200, 150);
let blurred = gaussian_blur(&image, 1.5);
assert_eq!((blurred.width, blurred.height), (200, 150));
let enhanced = apply_clahe(&blurred, 2.0);
assert_eq!((enhanced.width, enhanced.height), (200, 150));
let binary = apply_threshold(&enhanced, 128);
assert_eq!((binary.width, binary.height), (200, 150));
}
// Helper functions
fn calculate_range(image: &GrayImage) -> u8 {
let min = *image.data.iter().min().unwrap_or(&0);
let max = *image.data.iter().max().unwrap_or(&255);
max - min
}
#[test]
fn test_edge_case_empty_like_image() {
let tiny = GrayImage::new(1, 1);
assert_eq!(tiny.width, 1);
assert_eq!(tiny.height, 1);
}
#[test]
fn test_edge_case_large_image_dimensions() {
let large = GrayImage::new(4096, 4096);
assert_eq!(large.width, 4096);
assert_eq!(large.height, 4096);
}
}