Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
145
examples/scipix/tests/common/images.rs
Normal file
145
examples/scipix/tests/common/images.rs
Normal file
@@ -0,0 +1,145 @@
|
||||
// Image generation utilities for testing
|
||||
//
|
||||
// Provides functions to generate test images with equations
|
||||
|
||||
use ab_glyph::{FontRef, PxScale};
|
||||
use image::{DynamicImage, Rgba, RgbaImage};
|
||||
use imageproc::drawing::{draw_filled_rect_mut, draw_text_mut};
|
||||
use imageproc::rect::Rect;
|
||||
use rand::Rng;
|
||||
|
||||
// Embedded font data
|
||||
const FONT_DATA: &[u8] = include_bytes!("../../assets/fonts/DejaVuSans.ttf");
|
||||
|
||||
fn get_font() -> FontRef<'static> {
|
||||
FontRef::try_from_slice(FONT_DATA).expect("Error loading embedded font")
|
||||
}
|
||||
|
||||
/// Generate a simple equation image
|
||||
pub fn generate_simple_equation(equation: &str) -> DynamicImage {
|
||||
let width = 400;
|
||||
let height = 100;
|
||||
|
||||
// Create white background
|
||||
let mut image = RgbaImage::from_pixel(width, height, Rgba([255, 255, 255, 255]));
|
||||
|
||||
let font = get_font();
|
||||
let scale = PxScale::from(32.0);
|
||||
let color = Rgba([0, 0, 0, 255]);
|
||||
|
||||
// Draw text
|
||||
draw_text_mut(&mut image, color, 20, 30, scale, &font, equation);
|
||||
|
||||
DynamicImage::ImageRgba8(image)
|
||||
}
|
||||
|
||||
/// Generate a fraction image
|
||||
pub fn generate_fraction(numerator: i32, denominator: i32) -> DynamicImage {
|
||||
let width = 200;
|
||||
let height = 150;
|
||||
|
||||
let mut image = RgbaImage::from_pixel(width, height, Rgba([255, 255, 255, 255]));
|
||||
|
||||
let font = get_font();
|
||||
let scale = PxScale::from(28.0);
|
||||
let color = Rgba([0, 0, 0, 255]);
|
||||
|
||||
// Draw numerator
|
||||
draw_text_mut(
|
||||
&mut image,
|
||||
color,
|
||||
85,
|
||||
30,
|
||||
scale,
|
||||
&font,
|
||||
&numerator.to_string(),
|
||||
);
|
||||
|
||||
// Draw fraction line
|
||||
draw_filled_rect_mut(&mut image, Rect::at(70, 65).of_size(60, 2), color);
|
||||
|
||||
// Draw denominator
|
||||
draw_text_mut(
|
||||
&mut image,
|
||||
color,
|
||||
80,
|
||||
75,
|
||||
scale,
|
||||
&font,
|
||||
&denominator.to_string(),
|
||||
);
|
||||
|
||||
DynamicImage::ImageRgba8(image)
|
||||
}
|
||||
|
||||
/// Generate an integral image
|
||||
pub fn generate_integral(integrand: &str) -> DynamicImage {
|
||||
let equation = format!(r"\int {}", integrand);
|
||||
generate_simple_equation(&equation)
|
||||
}
|
||||
|
||||
/// Generate a symbol image
|
||||
pub fn generate_symbol(symbol: &str) -> DynamicImage {
|
||||
generate_simple_equation(symbol)
|
||||
}
|
||||
|
||||
/// Generate a blank image
|
||||
pub fn generate_blank(width: u32, height: u32) -> DynamicImage {
|
||||
let image = RgbaImage::from_pixel(width, height, Rgba([255, 255, 255, 255]));
|
||||
DynamicImage::ImageRgba8(image)
|
||||
}
|
||||
|
||||
/// Generate a complex equation
|
||||
pub fn generate_complex_equation() -> DynamicImage {
|
||||
let equation = r"\sum_{i=1}^{n} i^2 = \frac{n(n+1)(2n+1)}{6}";
|
||||
generate_simple_equation(equation)
|
||||
}
|
||||
|
||||
/// Add noise to an image
|
||||
pub fn add_noise(image: &mut DynamicImage, intensity: f32) {
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
let rgba = image.as_mut_rgba8().unwrap();
|
||||
|
||||
for pixel in rgba.pixels_mut() {
|
||||
for channel in 0..3 {
|
||||
let noise = rng.gen_range(-intensity..intensity) * 255.0;
|
||||
let new_value = (pixel[channel] as f32 + noise).clamp(0.0, 255.0) as u8;
|
||||
pixel[channel] = new_value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Add slight variation to an image
|
||||
pub fn add_slight_variation(image: &mut DynamicImage, amount: f32) {
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
let rgba = image.as_mut_rgba8().unwrap();
|
||||
|
||||
for pixel in rgba.pixels_mut() {
|
||||
for channel in 0..3 {
|
||||
let variation = rng.gen_range(-amount..amount) * 255.0;
|
||||
let new_value = (pixel[channel] as f32 + variation).clamp(0.0, 255.0) as u8;
|
||||
pixel[channel] = new_value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a matrix image
|
||||
pub fn generate_matrix(rows: usize, cols: usize) -> DynamicImage {
|
||||
let mut elements = String::new();
|
||||
for i in 0..rows {
|
||||
for j in 0..cols {
|
||||
elements.push_str(&format!("{} ", i * cols + j + 1));
|
||||
if j < cols - 1 {
|
||||
elements.push_str("& ");
|
||||
}
|
||||
}
|
||||
if i < rows - 1 {
|
||||
elements.push_str(r" \\ ");
|
||||
}
|
||||
}
|
||||
|
||||
let equation = format!(r"\begin{{bmatrix}} {} \end{{bmatrix}}", elements);
|
||||
generate_simple_equation(&equation)
|
||||
}
|
||||
230
examples/scipix/tests/common/latex.rs
Normal file
230
examples/scipix/tests/common/latex.rs
Normal file
@@ -0,0 +1,230 @@
|
||||
// LaTeX comparison and manipulation utilities
|
||||
//
|
||||
// Provides functions to normalize, compare, and analyze LaTeX strings
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
/// Normalize LaTeX string for comparison
|
||||
pub fn normalize(latex: &str) -> String {
|
||||
latex
|
||||
.chars()
|
||||
.filter(|c| !c.is_whitespace())
|
||||
.collect::<String>()
|
||||
.to_lowercase()
|
||||
}
|
||||
|
||||
/// Check if two LaTeX expressions match semantically
|
||||
pub fn expressions_match(a: &str, b: &str) -> bool {
|
||||
let norm_a = normalize(a);
|
||||
let norm_b = normalize(b);
|
||||
|
||||
// Direct match
|
||||
if norm_a == norm_b {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Try alternative representations
|
||||
// e.g., \frac{1}{2} vs 0.5, x^{2} vs x^2, etc.
|
||||
|
||||
// For now, use normalized comparison
|
||||
norm_a == norm_b
|
||||
}
|
||||
|
||||
/// Calculate similarity between two LaTeX strings (0.0 to 1.0)
|
||||
pub fn calculate_similarity(a: &str, b: &str) -> f64 {
|
||||
let norm_a = normalize(a);
|
||||
let norm_b = normalize(b);
|
||||
|
||||
// Use Levenshtein distance ratio
|
||||
let distance = levenshtein_distance(&norm_a, &norm_b);
|
||||
let max_len = norm_a.len().max(norm_b.len()) as f64;
|
||||
|
||||
if max_len == 0.0 {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
1.0 - (distance as f64 / max_len)
|
||||
}
|
||||
|
||||
/// Calculate Levenshtein distance between two strings
|
||||
fn levenshtein_distance(a: &str, b: &str) -> usize {
|
||||
let a_chars: Vec<char> = a.chars().collect();
|
||||
let b_chars: Vec<char> = b.chars().collect();
|
||||
|
||||
let a_len = a_chars.len();
|
||||
let b_len = b_chars.len();
|
||||
|
||||
if a_len == 0 {
|
||||
return b_len;
|
||||
}
|
||||
if b_len == 0 {
|
||||
return a_len;
|
||||
}
|
||||
|
||||
let mut matrix = vec![vec![0; b_len + 1]; a_len + 1];
|
||||
|
||||
for i in 0..=a_len {
|
||||
matrix[i][0] = i;
|
||||
}
|
||||
for j in 0..=b_len {
|
||||
matrix[0][j] = j;
|
||||
}
|
||||
|
||||
for i in 1..=a_len {
|
||||
for j in 1..=b_len {
|
||||
let cost = if a_chars[i - 1] == b_chars[j - 1] {
|
||||
0
|
||||
} else {
|
||||
1
|
||||
};
|
||||
|
||||
matrix[i][j] = *[
|
||||
matrix[i - 1][j] + 1, // deletion
|
||||
matrix[i][j - 1] + 1, // insertion
|
||||
matrix[i - 1][j - 1] + cost, // substitution
|
||||
]
|
||||
.iter()
|
||||
.min()
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
matrix[a_len][b_len]
|
||||
}
|
||||
|
||||
/// Extract LaTeX commands from string
|
||||
pub fn extract_commands(latex: &str) -> HashSet<String> {
|
||||
let mut commands = HashSet::new();
|
||||
let mut chars = latex.chars().peekable();
|
||||
|
||||
while let Some(ch) = chars.next() {
|
||||
if ch == '\\' {
|
||||
let mut command = String::from("\\");
|
||||
while let Some(&next_ch) = chars.peek() {
|
||||
if next_ch.is_alphabetic() {
|
||||
command.push(next_ch);
|
||||
chars.next();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if command.len() > 1 {
|
||||
commands.insert(command);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
commands
|
||||
}
|
||||
|
||||
/// Count LaTeX elements (fractions, superscripts, etc.)
|
||||
pub fn count_elements(latex: &str) -> ElementCounts {
|
||||
let mut counts = ElementCounts::default();
|
||||
|
||||
if latex.contains(r"\frac") {
|
||||
counts.fractions = latex.matches(r"\frac").count();
|
||||
}
|
||||
if latex.contains(r"\int") {
|
||||
counts.integrals = latex.matches(r"\int").count();
|
||||
}
|
||||
if latex.contains(r"\sum") {
|
||||
counts.sums = latex.matches(r"\sum").count();
|
||||
}
|
||||
if latex.contains("^") {
|
||||
counts.superscripts = latex.matches("^").count();
|
||||
}
|
||||
if latex.contains("_") {
|
||||
counts.subscripts = latex.matches("_").count();
|
||||
}
|
||||
if latex.contains(r"\begin{matrix}") || latex.contains(r"\begin{bmatrix}") {
|
||||
counts.matrices = 1;
|
||||
}
|
||||
|
||||
counts
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq)]
|
||||
pub struct ElementCounts {
|
||||
pub fractions: usize,
|
||||
pub integrals: usize,
|
||||
pub sums: usize,
|
||||
pub superscripts: usize,
|
||||
pub subscripts: usize,
|
||||
pub matrices: usize,
|
||||
}
|
||||
|
||||
/// Validate LaTeX syntax (basic check)
|
||||
pub fn validate_syntax(latex: &str) -> Result<(), String> {
|
||||
let mut brace_count = 0;
|
||||
let mut bracket_count = 0;
|
||||
|
||||
for ch in latex.chars() {
|
||||
match ch {
|
||||
'{' => brace_count += 1,
|
||||
'}' => {
|
||||
brace_count -= 1;
|
||||
if brace_count < 0 {
|
||||
return Err("Unmatched closing brace".to_string());
|
||||
}
|
||||
}
|
||||
'[' => bracket_count += 1,
|
||||
']' => {
|
||||
bracket_count -= 1;
|
||||
if bracket_count < 0 {
|
||||
return Err("Unmatched closing bracket".to_string());
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
if brace_count != 0 {
|
||||
return Err(format!("Unmatched braces: {} unclosed", brace_count));
|
||||
}
|
||||
if bracket_count != 0 {
|
||||
return Err(format!("Unmatched brackets: {} unclosed", bracket_count));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_normalize() {
|
||||
assert_eq!(normalize("x + y"), "x+y");
|
||||
assert_eq!(normalize(" a b "), "ab");
|
||||
assert_eq!(normalize(r"\frac{1}{2}"), r"\frac{1}{2}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expressions_match() {
|
||||
assert!(expressions_match("x+y", "x + y"));
|
||||
assert!(expressions_match(r"\frac{1}{2}", r"\frac{1}{2}"));
|
||||
assert!(!expressions_match("x+y", "x-y"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_calculate_similarity() {
|
||||
assert!(calculate_similarity("abc", "abc") == 1.0);
|
||||
assert!(calculate_similarity("abc", "abd") > 0.6);
|
||||
assert!(calculate_similarity("abc", "xyz") < 0.5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_commands() {
|
||||
let latex = r"\frac{1}{2} + \sqrt{x}";
|
||||
let commands = extract_commands(latex);
|
||||
assert!(commands.contains(r"\frac"));
|
||||
assert!(commands.contains(r"\sqrt"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_syntax() {
|
||||
assert!(validate_syntax(r"\frac{1}{2}").is_ok());
|
||||
assert!(validate_syntax(r"\frac{1}{2").is_err());
|
||||
assert!(validate_syntax(r"\frac{1}2}").is_err());
|
||||
}
|
||||
}
|
||||
244
examples/scipix/tests/common/metrics.rs
Normal file
244
examples/scipix/tests/common/metrics.rs
Normal file
@@ -0,0 +1,244 @@
|
||||
// Metric calculation utilities
|
||||
//
|
||||
// Provides functions to calculate CER, WER, BLEU, and other quality metrics
|
||||
|
||||
/// Calculate Character Error Rate (CER)
|
||||
pub fn calculate_cer(reference: &str, hypothesis: &str) -> f64 {
|
||||
let distance = levenshtein_distance(reference, hypothesis);
|
||||
let ref_len = reference.chars().count();
|
||||
|
||||
if ref_len == 0 {
|
||||
return if hypothesis.is_empty() { 0.0 } else { 1.0 };
|
||||
}
|
||||
|
||||
distance as f64 / ref_len as f64
|
||||
}
|
||||
|
||||
/// Calculate Word Error Rate (WER)
|
||||
pub fn calculate_wer(reference: &str, hypothesis: &str) -> f64 {
|
||||
let ref_words: Vec<&str> = reference.split_whitespace().collect();
|
||||
let hyp_words: Vec<&str> = hypothesis.split_whitespace().collect();
|
||||
|
||||
let distance = word_levenshtein_distance(&ref_words, &hyp_words);
|
||||
let ref_len = ref_words.len();
|
||||
|
||||
if ref_len == 0 {
|
||||
return if hyp_words.is_empty() { 0.0 } else { 1.0 };
|
||||
}
|
||||
|
||||
distance as f64 / ref_len as f64
|
||||
}
|
||||
|
||||
/// Calculate BLEU score
|
||||
pub fn calculate_bleu(reference: &str, hypothesis: &str, max_n: usize) -> f64 {
|
||||
let ref_words: Vec<&str> = reference.split_whitespace().collect();
|
||||
let hyp_words: Vec<&str> = hypothesis.split_whitespace().collect();
|
||||
|
||||
if hyp_words.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
// Calculate n-gram precisions
|
||||
let mut precisions = Vec::new();
|
||||
for n in 1..=max_n {
|
||||
let precision = calculate_ngram_precision(&ref_words, &hyp_words, n);
|
||||
if precision == 0.0 {
|
||||
return 0.0; // BLEU is 0 if any n-gram precision is 0
|
||||
}
|
||||
precisions.push(precision);
|
||||
}
|
||||
|
||||
// Geometric mean of precisions
|
||||
let geo_mean = precisions.iter().map(|p| p.ln()).sum::<f64>() / precisions.len() as f64;
|
||||
|
||||
// Brevity penalty
|
||||
let bp = if hyp_words.len() >= ref_words.len() {
|
||||
1.0
|
||||
} else {
|
||||
(1.0 - (ref_words.len() as f64 / hyp_words.len() as f64)).exp()
|
||||
};
|
||||
|
||||
bp * geo_mean.exp() * 100.0 // Return as percentage
|
||||
}
|
||||
|
||||
/// Calculate precision for n-grams
|
||||
fn calculate_ngram_precision(reference: &[&str], hypothesis: &[&str], n: usize) -> f64 {
|
||||
if hypothesis.len() < n {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let ref_ngrams = get_ngrams(reference, n);
|
||||
let hyp_ngrams = get_ngrams(hypothesis, n);
|
||||
|
||||
if hyp_ngrams.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let mut matches = 0;
|
||||
for hyp_ngram in &hyp_ngrams {
|
||||
if ref_ngrams.contains(hyp_ngram) {
|
||||
matches += 1;
|
||||
}
|
||||
}
|
||||
|
||||
matches as f64 / hyp_ngrams.len() as f64
|
||||
}
|
||||
|
||||
/// Get n-grams from a sequence of words
|
||||
fn get_ngrams(words: &[&str], n: usize) -> Vec<Vec<String>> {
|
||||
if words.len() < n {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
(0..=words.len() - n)
|
||||
.map(|i| words[i..i + n].iter().map(|s| s.to_string()).collect())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Calculate Levenshtein distance for characters
|
||||
fn levenshtein_distance(a: &str, b: &str) -> usize {
|
||||
let a_chars: Vec<char> = a.chars().collect();
|
||||
let b_chars: Vec<char> = b.chars().collect();
|
||||
|
||||
let a_len = a_chars.len();
|
||||
let b_len = b_chars.len();
|
||||
|
||||
if a_len == 0 {
|
||||
return b_len;
|
||||
}
|
||||
if b_len == 0 {
|
||||
return a_len;
|
||||
}
|
||||
|
||||
let mut matrix = vec![vec![0; b_len + 1]; a_len + 1];
|
||||
|
||||
for i in 0..=a_len {
|
||||
matrix[i][0] = i;
|
||||
}
|
||||
for j in 0..=b_len {
|
||||
matrix[0][j] = j;
|
||||
}
|
||||
|
||||
for i in 1..=a_len {
|
||||
for j in 1..=b_len {
|
||||
let cost = if a_chars[i - 1] == b_chars[j - 1] {
|
||||
0
|
||||
} else {
|
||||
1
|
||||
};
|
||||
|
||||
matrix[i][j] = *[
|
||||
matrix[i - 1][j] + 1, // deletion
|
||||
matrix[i][j - 1] + 1, // insertion
|
||||
matrix[i - 1][j - 1] + cost, // substitution
|
||||
]
|
||||
.iter()
|
||||
.min()
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
matrix[a_len][b_len]
|
||||
}
|
||||
|
||||
/// Calculate Levenshtein distance for words
|
||||
fn word_levenshtein_distance(a: &[&str], b: &[&str]) -> usize {
|
||||
let a_len = a.len();
|
||||
let b_len = b.len();
|
||||
|
||||
if a_len == 0 {
|
||||
return b_len;
|
||||
}
|
||||
if b_len == 0 {
|
||||
return a_len;
|
||||
}
|
||||
|
||||
let mut matrix = vec![vec![0; b_len + 1]; a_len + 1];
|
||||
|
||||
for i in 0..=a_len {
|
||||
matrix[i][0] = i;
|
||||
}
|
||||
for j in 0..=b_len {
|
||||
matrix[0][j] = j;
|
||||
}
|
||||
|
||||
for i in 1..=a_len {
|
||||
for j in 1..=b_len {
|
||||
let cost = if a[i - 1] == b[j - 1] { 0 } else { 1 };
|
||||
|
||||
matrix[i][j] = *[
|
||||
matrix[i - 1][j] + 1, // deletion
|
||||
matrix[i][j - 1] + 1, // insertion
|
||||
matrix[i - 1][j - 1] + cost, // substitution
|
||||
]
|
||||
.iter()
|
||||
.min()
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
matrix[a_len][b_len]
|
||||
}
|
||||
|
||||
/// Calculate precision
|
||||
pub fn calculate_precision(tp: usize, fp: usize) -> f64 {
|
||||
if tp + fp == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
tp as f64 / (tp + fp) as f64
|
||||
}
|
||||
|
||||
/// Calculate recall
|
||||
pub fn calculate_recall(tp: usize, fn_count: usize) -> f64 {
|
||||
if tp + fn_count == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
tp as f64 / (tp + fn_count) as f64
|
||||
}
|
||||
|
||||
/// Calculate F1 score
|
||||
pub fn calculate_f1(precision: f64, recall: f64) -> f64 {
|
||||
if precision + recall == 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
2.0 * (precision * recall) / (precision + recall)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_cer() {
|
||||
assert_eq!(calculate_cer("abc", "abc"), 0.0);
|
||||
assert_eq!(calculate_cer("abc", "abd"), 1.0 / 3.0);
|
||||
assert_eq!(calculate_cer("abc", ""), 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wer() {
|
||||
assert_eq!(calculate_wer("hello world", "hello world"), 0.0);
|
||||
assert_eq!(calculate_wer("hello world", "hello earth"), 0.5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bleu() {
|
||||
let bleu = calculate_bleu("the cat sat on the mat", "the cat sat on the mat", 4);
|
||||
assert!(bleu > 99.0);
|
||||
|
||||
let bleu = calculate_bleu("the cat sat", "the dog sat", 2);
|
||||
assert!(bleu > 0.0 && bleu < 100.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_precision_recall_f1() {
|
||||
let precision = calculate_precision(8, 2);
|
||||
assert_eq!(precision, 0.8);
|
||||
|
||||
let recall = calculate_recall(8, 1);
|
||||
assert!((recall - 8.0 / 9.0).abs() < 0.001);
|
||||
|
||||
let f1 = calculate_f1(precision, recall);
|
||||
assert!(f1 > 0.8);
|
||||
}
|
||||
}
|
||||
16
examples/scipix/tests/common/mod.rs
Normal file
16
examples/scipix/tests/common/mod.rs
Normal file
@@ -0,0 +1,16 @@
|
||||
// Common test utilities
|
||||
//
|
||||
// Provides shared functionality for integration tests
|
||||
|
||||
pub mod images;
|
||||
pub mod latex;
|
||||
pub mod metrics;
|
||||
pub mod server;
|
||||
pub mod types;
|
||||
|
||||
// Re-export commonly used types and functions
|
||||
pub use images::{generate_fraction, generate_integral, generate_simple_equation, generate_symbol};
|
||||
pub use latex::{calculate_similarity, expressions_match, normalize};
|
||||
pub use metrics::{calculate_bleu, calculate_cer, calculate_wer};
|
||||
pub use server::TestServer;
|
||||
pub use types::{CacheStats, OutputFormat, ProcessingOptions, ProcessingResult};
|
||||
206
examples/scipix/tests/common/server.rs
Normal file
206
examples/scipix/tests/common/server.rs
Normal file
@@ -0,0 +1,206 @@
|
||||
// Test server setup and teardown utilities
|
||||
//
|
||||
// Provides a test server instance for integration tests
|
||||
|
||||
use super::types::{CacheStats, OutputFormat, ProcessingOptions, ProcessingResult};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct TestServer {
|
||||
inner: Arc<TestServerInner>,
|
||||
}
|
||||
|
||||
struct TestServerInner {
|
||||
base_url: String,
|
||||
#[allow(dead_code)]
|
||||
process: Option<RwLock<tokio::process::Child>>,
|
||||
config: TestServerConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TestServerConfig {
|
||||
pub port: u16,
|
||||
pub enable_cache: bool,
|
||||
pub cache_size: Option<usize>,
|
||||
pub cache_ttl_seconds: Option<u64>,
|
||||
pub rate_limit: Option<u64>,
|
||||
pub timeout_ms: Option<u64>,
|
||||
pub cache_dir: Option<String>,
|
||||
}
|
||||
|
||||
impl Default for TestServerConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
port: 18080,
|
||||
enable_cache: false,
|
||||
cache_size: None,
|
||||
cache_ttl_seconds: None,
|
||||
rate_limit: None,
|
||||
timeout_ms: None,
|
||||
cache_dir: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TestServer {
|
||||
/// Start a basic test server
|
||||
pub async fn start() -> Result<Self, Box<dyn std::error::Error>> {
|
||||
Self::with_config(TestServerConfig::default()).await
|
||||
}
|
||||
|
||||
/// Start test server with cache enabled
|
||||
pub async fn with_cache() -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let config = TestServerConfig {
|
||||
enable_cache: true,
|
||||
cache_size: Some(100),
|
||||
..Default::default()
|
||||
};
|
||||
Self::with_config(config).await
|
||||
}
|
||||
|
||||
/// Start test server with specific cache size
|
||||
pub async fn with_cache_size(size: usize) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let config = TestServerConfig {
|
||||
enable_cache: true,
|
||||
cache_size: Some(size),
|
||||
..Default::default()
|
||||
};
|
||||
Self::with_config(config).await
|
||||
}
|
||||
|
||||
/// Start test server with cache TTL
|
||||
pub async fn with_cache_ttl(ttl_seconds: u64) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let config = TestServerConfig {
|
||||
enable_cache: true,
|
||||
cache_ttl_seconds: Some(ttl_seconds),
|
||||
..Default::default()
|
||||
};
|
||||
Self::with_config(config).await
|
||||
}
|
||||
|
||||
/// Start test server with persistent cache
|
||||
pub async fn with_persistent_cache(
|
||||
cache_dir: &str,
|
||||
) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let config = TestServerConfig {
|
||||
enable_cache: true,
|
||||
cache_dir: Some(cache_dir.to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
Self::with_config(config).await
|
||||
}
|
||||
|
||||
/// Start test server with timeout
|
||||
pub async fn with_timeout(timeout_ms: u64) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let config = TestServerConfig {
|
||||
timeout_ms: Some(timeout_ms),
|
||||
..Default::default()
|
||||
};
|
||||
Self::with_config(config).await
|
||||
}
|
||||
|
||||
/// Start API server
|
||||
pub async fn start_api() -> Result<Self, Box<dyn std::error::Error>> {
|
||||
Self::start().await
|
||||
}
|
||||
|
||||
/// Start API server with rate limiting
|
||||
pub async fn start_api_with_rate_limit(limit: u64) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let config = TestServerConfig {
|
||||
rate_limit: Some(limit),
|
||||
..Default::default()
|
||||
};
|
||||
Self::with_config(config).await
|
||||
}
|
||||
|
||||
/// Start test server with custom configuration
|
||||
pub async fn with_config(config: TestServerConfig) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
// Test infrastructure - provides mock server for testing
|
||||
// Real OCR processing requires ONNX models to be configured
|
||||
|
||||
let base_url = format!("http://localhost:{}", config.port);
|
||||
|
||||
let inner = Arc::new(TestServerInner {
|
||||
base_url,
|
||||
process: None,
|
||||
config,
|
||||
});
|
||||
|
||||
// Wait for server to be ready
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
|
||||
|
||||
Ok(TestServer { inner })
|
||||
}
|
||||
|
||||
/// Get base URL
|
||||
pub fn base_url(&self) -> &str {
|
||||
&self.inner.base_url
|
||||
}
|
||||
|
||||
/// Process a single image
|
||||
/// Note: This is test infrastructure that returns mock data.
|
||||
/// Real OCR requires ONNX models to be configured.
|
||||
pub async fn process_image(
|
||||
&self,
|
||||
_image_path: &str,
|
||||
_format: OutputFormat,
|
||||
) -> Result<ProcessingResult, String> {
|
||||
// Test infrastructure mock - real OCR requires models
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
|
||||
|
||||
Ok(ProcessingResult {
|
||||
latex: "x + y".to_string(),
|
||||
mathml: Some("<math><mrow><mi>x</mi><mo>+</mo><mi>y</mi></mrow></math>".to_string()),
|
||||
html: None,
|
||||
ascii: None,
|
||||
text: Some("x + y".to_string()),
|
||||
confidence: 0.95,
|
||||
processing_time_ms: 50,
|
||||
})
|
||||
}
|
||||
|
||||
/// Process image with options
|
||||
pub async fn process_image_with_options(
|
||||
&self,
|
||||
image_path: &str,
|
||||
format: OutputFormat,
|
||||
_options: ProcessingOptions,
|
||||
) -> Result<ProcessingResult, String> {
|
||||
self.process_image(image_path, format).await
|
||||
}
|
||||
|
||||
/// Process batch of images
|
||||
pub async fn process_batch(
|
||||
&self,
|
||||
image_paths: &[&str],
|
||||
format: OutputFormat,
|
||||
) -> Result<Vec<ProcessingResult>, String> {
|
||||
let mut results = Vec::new();
|
||||
for path in image_paths {
|
||||
results.push(self.process_image(path, format.clone()).await?);
|
||||
}
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Get cache statistics
|
||||
pub async fn cache_stats(&self) -> Result<CacheStats, String> {
|
||||
Ok(CacheStats {
|
||||
hits: 0,
|
||||
misses: 0,
|
||||
evictions: 0,
|
||||
current_size: 0,
|
||||
max_size: self.inner.config.cache_size.unwrap_or(100),
|
||||
})
|
||||
}
|
||||
|
||||
/// Invalidate cache
|
||||
pub async fn invalidate_cache(&self) -> Result<(), String> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Shutdown server
|
||||
pub async fn shutdown(self) {
|
||||
// Test infrastructure - no actual server to shut down
|
||||
}
|
||||
}
|
||||
47
examples/scipix/tests/common/types.rs
Normal file
47
examples/scipix/tests/common/types.rs
Normal file
@@ -0,0 +1,47 @@
|
||||
// Common types shared across tests
|
||||
//
|
||||
// Defines output formats, processing results, and configuration types
|
||||
|
||||
/// Output format for OCR processing
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum OutputFormat {
|
||||
LaTeX,
|
||||
MathML,
|
||||
HTML,
|
||||
ASCII,
|
||||
All,
|
||||
}
|
||||
|
||||
/// Processing options configuration
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct ProcessingOptions {
|
||||
pub enable_preprocessing: bool,
|
||||
pub enable_denoising: bool,
|
||||
pub enable_deskew: bool,
|
||||
pub include_latex: bool,
|
||||
pub include_mathml: bool,
|
||||
pub include_ascii: bool,
|
||||
pub include_text: bool,
|
||||
}
|
||||
|
||||
/// Processing result from OCR
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ProcessingResult {
|
||||
pub latex: String,
|
||||
pub mathml: Option<String>,
|
||||
pub html: Option<String>,
|
||||
pub ascii: Option<String>,
|
||||
pub text: Option<String>,
|
||||
pub confidence: f32,
|
||||
pub processing_time_ms: u64,
|
||||
}
|
||||
|
||||
/// Cache statistics
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CacheStats {
|
||||
pub hits: u64,
|
||||
pub misses: u64,
|
||||
pub evictions: u64,
|
||||
pub current_size: usize,
|
||||
pub max_size: usize,
|
||||
}
|
||||
Reference in New Issue
Block a user