Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
221
vendor/ruvector/examples/scipix/tests/README.md
vendored
Normal file
221
vendor/ruvector/examples/scipix/tests/README.md
vendored
Normal file
@@ -0,0 +1,221 @@
|
||||
# Ruvector-Scipix Integration Tests
|
||||
|
||||
Comprehensive integration test suite for the scipix OCR system.
|
||||
|
||||
## Test Structure
|
||||
|
||||
### Integration Tests (`integration/`)
|
||||
|
||||
1. **pipeline_tests.rs** (9,284 bytes)
|
||||
- Full pipeline tests: Image → Preprocess → OCR → Output
|
||||
- Multiple input formats (PNG, JPEG, WebP)
|
||||
- Multiple output formats (LaTeX, MathML, HTML, ASCII)
|
||||
- Error propagation and timeout handling
|
||||
- Batch processing and caching
|
||||
|
||||
2. **api_tests.rs** (2,100 bytes)
|
||||
- POST /v3/text with file upload
|
||||
- POST /v3/text with base64
|
||||
- POST /v3/text with URL
|
||||
- Rate limiting behavior
|
||||
- Authentication validation
|
||||
- Error response formats
|
||||
- Concurrent request handling
|
||||
|
||||
3. **cli_tests.rs** (6,226 bytes)
|
||||
- `ocr` command with file
|
||||
- `batch` command with directory
|
||||
- `serve` command startup
|
||||
- `config` command
|
||||
- Exit codes and error handling
|
||||
- Output format options
|
||||
|
||||
4. **cache_tests.rs** (10,907 bytes)
|
||||
- Cache hit/miss behavior
|
||||
- Similarity-based lookup
|
||||
- Cache eviction policies
|
||||
- Persistence across restarts
|
||||
- TTL expiration
|
||||
- Concurrent cache access
|
||||
|
||||
5. **accuracy_tests.rs** (11,864 bytes)
|
||||
- Im2latex-100k sample subset
|
||||
- CER (Character Error Rate) calculation
|
||||
- WER (Word Error Rate) calculation
|
||||
- BLEU score measurement
|
||||
- Regression detection
|
||||
- Confidence calibration
|
||||
|
||||
6. **performance_tests.rs** (10,638 bytes)
|
||||
- Latency within bounds (<100ms)
|
||||
- Memory usage limits
|
||||
- Memory leak detection
|
||||
- Throughput targets
|
||||
- Latency percentiles (P50, P95, P99)
|
||||
- Concurrent throughput
|
||||
|
||||
### Common Utilities (`common/`)
|
||||
|
||||
1. **server.rs** (6,700 bytes)
|
||||
- TestServer setup and teardown
|
||||
- Configuration management
|
||||
- Mock server implementation
|
||||
- Process management
|
||||
|
||||
2. **images.rs** (4,000 bytes)
|
||||
- Test image generation
|
||||
- Equation rendering
|
||||
- Fraction and symbol generation
|
||||
- Noise and variation injection
|
||||
|
||||
3. **latex.rs** (5,900 bytes)
|
||||
- LaTeX normalization
|
||||
- Expression comparison
|
||||
- Similarity calculation
|
||||
- Command extraction
|
||||
- Syntax validation
|
||||
|
||||
4. **metrics.rs** (6,000 bytes)
|
||||
- CER calculation
|
||||
- WER calculation
|
||||
- BLEU score
|
||||
- Precision/Recall/F1
|
||||
- Levenshtein distance
|
||||
|
||||
## Running Tests
|
||||
|
||||
### Run All Integration Tests
|
||||
```bash
|
||||
cargo test --test '*' --all-features
|
||||
```
|
||||
|
||||
### Run Specific Test Suite
|
||||
```bash
|
||||
# Pipeline tests
|
||||
cargo test --test integration::pipeline_tests
|
||||
|
||||
# API tests
|
||||
cargo test --test integration::api_tests
|
||||
|
||||
# CLI tests
|
||||
cargo test --test integration::cli_tests
|
||||
|
||||
# Cache tests
|
||||
cargo test --test integration::cache_tests
|
||||
|
||||
# Accuracy tests
|
||||
cargo test --test integration::accuracy_tests
|
||||
|
||||
# Performance tests
|
||||
cargo test --test integration::performance_tests
|
||||
```
|
||||
|
||||
### Run with Logging
|
||||
```bash
|
||||
RUST_LOG=debug cargo test --test '*' -- --nocapture
|
||||
```
|
||||
|
||||
### Run Specific Test
|
||||
```bash
|
||||
cargo test test_pipeline_png_to_latex
|
||||
```
|
||||
|
||||
## Test Dependencies
|
||||
|
||||
Add to `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dev-dependencies]
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
tokio-test = "0.4"
|
||||
reqwest = { version = "0.11", features = ["json", "multipart"] }
|
||||
assert_cmd = "2.0"
|
||||
predicates = "3.0"
|
||||
serde_json = "1.0"
|
||||
image = "0.24"
|
||||
imageproc = "0.23"
|
||||
rusttype = "0.9"
|
||||
rand = "0.8"
|
||||
futures = "0.3"
|
||||
base64 = "0.21"
|
||||
env_logger = "0.10"
|
||||
```
|
||||
|
||||
## Test Data
|
||||
|
||||
Test images are generated programmatically or stored in:
|
||||
- `/tmp/scipix_test/` - Generated test images
|
||||
- `/tmp/scipix_cache/` - Cache testing
|
||||
- `/tmp/scipix_results/` - Test results
|
||||
|
||||
## Metrics and Thresholds
|
||||
|
||||
### Accuracy
|
||||
- Average CER: <0.03
|
||||
- Average BLEU: >80.0
|
||||
- Fraction accuracy: >85%
|
||||
- Symbol accuracy: >80%
|
||||
|
||||
### Performance
|
||||
- Simple equation latency: <100ms
|
||||
- P50 latency: <100ms
|
||||
- P95 latency: <200ms
|
||||
- P99 latency: <500ms
|
||||
- Throughput: >5 images/second
|
||||
- Concurrent throughput: >10 req/second
|
||||
|
||||
### Memory
|
||||
- Memory increase: <100MB after 100 images
|
||||
- Memory leak rate: <1KB/iteration
|
||||
- Cold start time: <5 seconds
|
||||
|
||||
## Test Coverage
|
||||
|
||||
Total lines of test code: **2,473+**
|
||||
|
||||
- Integration tests: ~1,500 lines
|
||||
- Common utilities: ~900 lines
|
||||
- Test infrastructure: ~100 lines
|
||||
|
||||
Target coverage: **80%+** for integration tests
|
||||
|
||||
## CI/CD Integration
|
||||
|
||||
These tests are designed to run in:
|
||||
- GitHub Actions
|
||||
- GitLab CI
|
||||
- Jenkins
|
||||
- Local development
|
||||
|
||||
See `.github/workflows/test.yml` for CI configuration.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Tests Failing
|
||||
1. Ensure test dependencies are installed
|
||||
2. Check if test server can start on port 18080
|
||||
3. Verify test data directories are writable
|
||||
4. Check model files are accessible
|
||||
|
||||
### Performance Tests Failing
|
||||
- Performance tests may be environment-dependent
|
||||
- Adjust thresholds in test configuration if needed
|
||||
- Run on dedicated test machines for consistent results
|
||||
|
||||
### Memory Tests Failing
|
||||
- Memory tests require stable baseline
|
||||
- Close other applications during testing
|
||||
- Use `--test-threads=1` for serial execution
|
||||
|
||||
## Contributing
|
||||
|
||||
When adding new integration tests:
|
||||
1. Follow existing test structure
|
||||
2. Add descriptive test names
|
||||
3. Include error messages in assertions
|
||||
4. Update this README with new tests
|
||||
5. Ensure tests are deterministic and isolated
|
||||
|
||||
## License
|
||||
|
||||
Same as ruvector-scipix project.
|
||||
169
vendor/ruvector/examples/scipix/tests/SUMMARY.md
vendored
Normal file
169
vendor/ruvector/examples/scipix/tests/SUMMARY.md
vendored
Normal file
@@ -0,0 +1,169 @@
|
||||
# Integration Tests Summary
|
||||
|
||||
## Created Files
|
||||
|
||||
### Integration Tests (7 files)
|
||||
- `integration/mod.rs` - Test module organization
|
||||
- `integration/pipeline_tests.rs` - Full pipeline tests (9.1KB)
|
||||
- `integration/api_tests.rs` - API server tests (2.1KB)
|
||||
- `integration/cli_tests.rs` - CLI command tests (6.1KB)
|
||||
- `integration/cache_tests.rs` - Cache behavior tests (11KB)
|
||||
- `integration/accuracy_tests.rs` - Accuracy validation (12KB)
|
||||
- `integration/performance_tests.rs` - Performance validation (11KB)
|
||||
|
||||
### Common Utilities (5 files)
|
||||
- `common/mod.rs` - Utility module organization
|
||||
- `common/server.rs` - Test server setup/teardown (6.7KB)
|
||||
- `common/images.rs` - Image generation utilities (4.0KB)
|
||||
- `common/latex.rs` - LaTeX comparison utilities (5.9KB)
|
||||
- `common/metrics.rs` - Metric calculation (CER, WER, BLEU) (6.0KB)
|
||||
|
||||
### Test Infrastructure (2 files)
|
||||
- `lib.rs` - Test library root
|
||||
- `README.md` - Comprehensive test documentation
|
||||
|
||||
## Test Coverage
|
||||
|
||||
### Pipeline Tests
|
||||
✅ PNG → LaTeX pipeline
|
||||
✅ JPEG → MathML pipeline
|
||||
✅ WebP → HTML pipeline
|
||||
✅ Error propagation
|
||||
✅ Timeout handling
|
||||
✅ Batch processing
|
||||
✅ Preprocessing pipeline
|
||||
✅ Multi-format output
|
||||
✅ Caching integration
|
||||
|
||||
### API Tests
|
||||
✅ POST /v3/text with file upload
|
||||
✅ POST /v3/text with base64
|
||||
✅ POST /v3/text with URL
|
||||
✅ Rate limiting (5 req/min)
|
||||
✅ Authentication validation
|
||||
✅ Error responses
|
||||
✅ Concurrent requests (10 parallel)
|
||||
✅ Health check endpoint
|
||||
✅ Options processing
|
||||
|
||||
### CLI Tests
|
||||
✅ `ocr` command with file
|
||||
✅ `ocr` with output formats
|
||||
✅ `batch` command
|
||||
✅ `serve` command startup
|
||||
✅ `config` command (show/set)
|
||||
✅ Invalid file handling
|
||||
✅ Exit codes
|
||||
✅ Verbose output
|
||||
✅ JSON output
|
||||
✅ Help and version commands
|
||||
|
||||
### Cache Tests
|
||||
✅ Cache hit/miss behavior
|
||||
✅ Similarity-based lookup
|
||||
✅ Cache eviction (LRU)
|
||||
✅ Persistence across restarts
|
||||
✅ Cache invalidation
|
||||
✅ Hit ratio calculation
|
||||
✅ TTL expiration
|
||||
✅ Concurrent cache access
|
||||
|
||||
### Accuracy Tests
|
||||
✅ Simple expressions (CER < 0.05)
|
||||
✅ Im2latex-100k subset (50 samples)
|
||||
✅ Fractions (85%+ accuracy)
|
||||
✅ Special symbols (80%+ accuracy)
|
||||
✅ Regression detection
|
||||
✅ Confidence calibration
|
||||
|
||||
### Performance Tests
|
||||
✅ Latency within bounds (<100ms)
|
||||
✅ Memory usage limits (<100MB growth)
|
||||
✅ Memory leak detection (<1KB/iter)
|
||||
✅ Throughput (>5 img/sec)
|
||||
✅ Concurrent throughput (>10 req/sec)
|
||||
✅ Latency percentiles (P50/P95/P99)
|
||||
✅ Batch efficiency
|
||||
✅ Cold start warmup
|
||||
|
||||
## Key Features
|
||||
|
||||
### Test Utilities
|
||||
- **TestServer**: Mock server with configurable options
|
||||
- **Image Generation**: Programmatic equation rendering
|
||||
- **LaTeX Comparison**: Normalization and similarity
|
||||
- **Metrics**: CER, WER, BLEU calculation
|
||||
- **Cache Stats**: Hit/miss tracking
|
||||
|
||||
### Quality Metrics
|
||||
- Character Error Rate (CER)
|
||||
- Word Error Rate (WER)
|
||||
- BLEU score
|
||||
- Precision/Recall/F1
|
||||
- Confidence scores
|
||||
- Processing time
|
||||
|
||||
### Performance Targets
|
||||
- Latency: <100ms (simple equations)
|
||||
- Throughput: >5 images/second
|
||||
- Memory: <100MB increase
|
||||
- No memory leaks
|
||||
- P50: <100ms, P95: <200ms, P99: <500ms
|
||||
|
||||
## Total Statistics
|
||||
|
||||
- **Total Files**: 14
|
||||
- **Total Lines**: 2,473+
|
||||
- **Test Count**: 50+
|
||||
- **Coverage Target**: 80%+
|
||||
|
||||
## Dependencies Required
|
||||
|
||||
```toml
|
||||
[dev-dependencies]
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
tokio-test = "0.4"
|
||||
reqwest = { version = "0.11", features = ["json", "multipart"] }
|
||||
assert_cmd = "2.0"
|
||||
predicates = "3.0"
|
||||
serde_json = "1.0"
|
||||
image = "0.24"
|
||||
imageproc = "0.23"
|
||||
rusttype = "0.9"
|
||||
rand = "0.8"
|
||||
futures = "0.3"
|
||||
base64 = "0.21"
|
||||
env_logger = "0.10"
|
||||
```
|
||||
|
||||
## Running Tests
|
||||
|
||||
```bash
|
||||
# All integration tests
|
||||
cargo test --test '*' --all-features
|
||||
|
||||
# Specific test suite
|
||||
cargo test --test integration::pipeline_tests
|
||||
|
||||
# With logging
|
||||
RUST_LOG=debug cargo test --test '*' -- --nocapture
|
||||
|
||||
# Single test
|
||||
cargo test test_pipeline_png_to_latex
|
||||
```
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. ✅ Integration tests created
|
||||
2. ⏳ Add test data (Im2latex subset)
|
||||
3. ⏳ Implement actual OCR engine
|
||||
4. ⏳ Implement API server
|
||||
5. ⏳ Implement CLI
|
||||
6. ⏳ Add CI/CD pipeline
|
||||
7. ⏳ Run tests and fix failures
|
||||
|
||||
---
|
||||
|
||||
Created: 2025-11-28
|
||||
Author: Testing Agent
|
||||
Status: Complete
|
||||
145
vendor/ruvector/examples/scipix/tests/common/images.rs
vendored
Normal file
145
vendor/ruvector/examples/scipix/tests/common/images.rs
vendored
Normal file
@@ -0,0 +1,145 @@
|
||||
// Image generation utilities for testing
|
||||
//
|
||||
// Provides functions to generate test images with equations
|
||||
|
||||
use ab_glyph::{FontRef, PxScale};
|
||||
use image::{DynamicImage, Rgba, RgbaImage};
|
||||
use imageproc::drawing::{draw_filled_rect_mut, draw_text_mut};
|
||||
use imageproc::rect::Rect;
|
||||
use rand::Rng;
|
||||
|
||||
// Embedded font data
|
||||
const FONT_DATA: &[u8] = include_bytes!("../../assets/fonts/DejaVuSans.ttf");
|
||||
|
||||
fn get_font() -> FontRef<'static> {
|
||||
FontRef::try_from_slice(FONT_DATA).expect("Error loading embedded font")
|
||||
}
|
||||
|
||||
/// Generate a simple equation image
|
||||
pub fn generate_simple_equation(equation: &str) -> DynamicImage {
|
||||
let width = 400;
|
||||
let height = 100;
|
||||
|
||||
// Create white background
|
||||
let mut image = RgbaImage::from_pixel(width, height, Rgba([255, 255, 255, 255]));
|
||||
|
||||
let font = get_font();
|
||||
let scale = PxScale::from(32.0);
|
||||
let color = Rgba([0, 0, 0, 255]);
|
||||
|
||||
// Draw text
|
||||
draw_text_mut(&mut image, color, 20, 30, scale, &font, equation);
|
||||
|
||||
DynamicImage::ImageRgba8(image)
|
||||
}
|
||||
|
||||
/// Generate a fraction image
|
||||
pub fn generate_fraction(numerator: i32, denominator: i32) -> DynamicImage {
|
||||
let width = 200;
|
||||
let height = 150;
|
||||
|
||||
let mut image = RgbaImage::from_pixel(width, height, Rgba([255, 255, 255, 255]));
|
||||
|
||||
let font = get_font();
|
||||
let scale = PxScale::from(28.0);
|
||||
let color = Rgba([0, 0, 0, 255]);
|
||||
|
||||
// Draw numerator
|
||||
draw_text_mut(
|
||||
&mut image,
|
||||
color,
|
||||
85,
|
||||
30,
|
||||
scale,
|
||||
&font,
|
||||
&numerator.to_string(),
|
||||
);
|
||||
|
||||
// Draw fraction line
|
||||
draw_filled_rect_mut(&mut image, Rect::at(70, 65).of_size(60, 2), color);
|
||||
|
||||
// Draw denominator
|
||||
draw_text_mut(
|
||||
&mut image,
|
||||
color,
|
||||
80,
|
||||
75,
|
||||
scale,
|
||||
&font,
|
||||
&denominator.to_string(),
|
||||
);
|
||||
|
||||
DynamicImage::ImageRgba8(image)
|
||||
}
|
||||
|
||||
/// Generate an integral image
|
||||
pub fn generate_integral(integrand: &str) -> DynamicImage {
|
||||
let equation = format!(r"\int {}", integrand);
|
||||
generate_simple_equation(&equation)
|
||||
}
|
||||
|
||||
/// Generate a symbol image
|
||||
pub fn generate_symbol(symbol: &str) -> DynamicImage {
|
||||
generate_simple_equation(symbol)
|
||||
}
|
||||
|
||||
/// Generate a blank image
|
||||
pub fn generate_blank(width: u32, height: u32) -> DynamicImage {
|
||||
let image = RgbaImage::from_pixel(width, height, Rgba([255, 255, 255, 255]));
|
||||
DynamicImage::ImageRgba8(image)
|
||||
}
|
||||
|
||||
/// Generate a complex equation
|
||||
pub fn generate_complex_equation() -> DynamicImage {
|
||||
let equation = r"\sum_{i=1}^{n} i^2 = \frac{n(n+1)(2n+1)}{6}";
|
||||
generate_simple_equation(equation)
|
||||
}
|
||||
|
||||
/// Add noise to an image
|
||||
pub fn add_noise(image: &mut DynamicImage, intensity: f32) {
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
let rgba = image.as_mut_rgba8().unwrap();
|
||||
|
||||
for pixel in rgba.pixels_mut() {
|
||||
for channel in 0..3 {
|
||||
let noise = rng.gen_range(-intensity..intensity) * 255.0;
|
||||
let new_value = (pixel[channel] as f32 + noise).clamp(0.0, 255.0) as u8;
|
||||
pixel[channel] = new_value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Add slight variation to an image
|
||||
pub fn add_slight_variation(image: &mut DynamicImage, amount: f32) {
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
let rgba = image.as_mut_rgba8().unwrap();
|
||||
|
||||
for pixel in rgba.pixels_mut() {
|
||||
for channel in 0..3 {
|
||||
let variation = rng.gen_range(-amount..amount) * 255.0;
|
||||
let new_value = (pixel[channel] as f32 + variation).clamp(0.0, 255.0) as u8;
|
||||
pixel[channel] = new_value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a matrix image
|
||||
pub fn generate_matrix(rows: usize, cols: usize) -> DynamicImage {
|
||||
let mut elements = String::new();
|
||||
for i in 0..rows {
|
||||
for j in 0..cols {
|
||||
elements.push_str(&format!("{} ", i * cols + j + 1));
|
||||
if j < cols - 1 {
|
||||
elements.push_str("& ");
|
||||
}
|
||||
}
|
||||
if i < rows - 1 {
|
||||
elements.push_str(r" \\ ");
|
||||
}
|
||||
}
|
||||
|
||||
let equation = format!(r"\begin{{bmatrix}} {} \end{{bmatrix}}", elements);
|
||||
generate_simple_equation(&equation)
|
||||
}
|
||||
230
vendor/ruvector/examples/scipix/tests/common/latex.rs
vendored
Normal file
230
vendor/ruvector/examples/scipix/tests/common/latex.rs
vendored
Normal file
@@ -0,0 +1,230 @@
|
||||
// LaTeX comparison and manipulation utilities
|
||||
//
|
||||
// Provides functions to normalize, compare, and analyze LaTeX strings
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
/// Normalize LaTeX string for comparison
|
||||
pub fn normalize(latex: &str) -> String {
|
||||
latex
|
||||
.chars()
|
||||
.filter(|c| !c.is_whitespace())
|
||||
.collect::<String>()
|
||||
.to_lowercase()
|
||||
}
|
||||
|
||||
/// Check if two LaTeX expressions match semantically
|
||||
pub fn expressions_match(a: &str, b: &str) -> bool {
|
||||
let norm_a = normalize(a);
|
||||
let norm_b = normalize(b);
|
||||
|
||||
// Direct match
|
||||
if norm_a == norm_b {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Try alternative representations
|
||||
// e.g., \frac{1}{2} vs 0.5, x^{2} vs x^2, etc.
|
||||
|
||||
// For now, use normalized comparison
|
||||
norm_a == norm_b
|
||||
}
|
||||
|
||||
/// Calculate similarity between two LaTeX strings (0.0 to 1.0)
|
||||
pub fn calculate_similarity(a: &str, b: &str) -> f64 {
|
||||
let norm_a = normalize(a);
|
||||
let norm_b = normalize(b);
|
||||
|
||||
// Use Levenshtein distance ratio
|
||||
let distance = levenshtein_distance(&norm_a, &norm_b);
|
||||
let max_len = norm_a.len().max(norm_b.len()) as f64;
|
||||
|
||||
if max_len == 0.0 {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
1.0 - (distance as f64 / max_len)
|
||||
}
|
||||
|
||||
/// Calculate Levenshtein distance between two strings
|
||||
fn levenshtein_distance(a: &str, b: &str) -> usize {
|
||||
let a_chars: Vec<char> = a.chars().collect();
|
||||
let b_chars: Vec<char> = b.chars().collect();
|
||||
|
||||
let a_len = a_chars.len();
|
||||
let b_len = b_chars.len();
|
||||
|
||||
if a_len == 0 {
|
||||
return b_len;
|
||||
}
|
||||
if b_len == 0 {
|
||||
return a_len;
|
||||
}
|
||||
|
||||
let mut matrix = vec![vec![0; b_len + 1]; a_len + 1];
|
||||
|
||||
for i in 0..=a_len {
|
||||
matrix[i][0] = i;
|
||||
}
|
||||
for j in 0..=b_len {
|
||||
matrix[0][j] = j;
|
||||
}
|
||||
|
||||
for i in 1..=a_len {
|
||||
for j in 1..=b_len {
|
||||
let cost = if a_chars[i - 1] == b_chars[j - 1] {
|
||||
0
|
||||
} else {
|
||||
1
|
||||
};
|
||||
|
||||
matrix[i][j] = *[
|
||||
matrix[i - 1][j] + 1, // deletion
|
||||
matrix[i][j - 1] + 1, // insertion
|
||||
matrix[i - 1][j - 1] + cost, // substitution
|
||||
]
|
||||
.iter()
|
||||
.min()
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
matrix[a_len][b_len]
|
||||
}
|
||||
|
||||
/// Extract LaTeX commands from string
|
||||
pub fn extract_commands(latex: &str) -> HashSet<String> {
|
||||
let mut commands = HashSet::new();
|
||||
let mut chars = latex.chars().peekable();
|
||||
|
||||
while let Some(ch) = chars.next() {
|
||||
if ch == '\\' {
|
||||
let mut command = String::from("\\");
|
||||
while let Some(&next_ch) = chars.peek() {
|
||||
if next_ch.is_alphabetic() {
|
||||
command.push(next_ch);
|
||||
chars.next();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if command.len() > 1 {
|
||||
commands.insert(command);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
commands
|
||||
}
|
||||
|
||||
/// Count LaTeX elements (fractions, superscripts, etc.)
|
||||
pub fn count_elements(latex: &str) -> ElementCounts {
|
||||
let mut counts = ElementCounts::default();
|
||||
|
||||
if latex.contains(r"\frac") {
|
||||
counts.fractions = latex.matches(r"\frac").count();
|
||||
}
|
||||
if latex.contains(r"\int") {
|
||||
counts.integrals = latex.matches(r"\int").count();
|
||||
}
|
||||
if latex.contains(r"\sum") {
|
||||
counts.sums = latex.matches(r"\sum").count();
|
||||
}
|
||||
if latex.contains("^") {
|
||||
counts.superscripts = latex.matches("^").count();
|
||||
}
|
||||
if latex.contains("_") {
|
||||
counts.subscripts = latex.matches("_").count();
|
||||
}
|
||||
if latex.contains(r"\begin{matrix}") || latex.contains(r"\begin{bmatrix}") {
|
||||
counts.matrices = 1;
|
||||
}
|
||||
|
||||
counts
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq)]
|
||||
pub struct ElementCounts {
|
||||
pub fractions: usize,
|
||||
pub integrals: usize,
|
||||
pub sums: usize,
|
||||
pub superscripts: usize,
|
||||
pub subscripts: usize,
|
||||
pub matrices: usize,
|
||||
}
|
||||
|
||||
/// Validate LaTeX syntax (basic check)
|
||||
pub fn validate_syntax(latex: &str) -> Result<(), String> {
|
||||
let mut brace_count = 0;
|
||||
let mut bracket_count = 0;
|
||||
|
||||
for ch in latex.chars() {
|
||||
match ch {
|
||||
'{' => brace_count += 1,
|
||||
'}' => {
|
||||
brace_count -= 1;
|
||||
if brace_count < 0 {
|
||||
return Err("Unmatched closing brace".to_string());
|
||||
}
|
||||
}
|
||||
'[' => bracket_count += 1,
|
||||
']' => {
|
||||
bracket_count -= 1;
|
||||
if bracket_count < 0 {
|
||||
return Err("Unmatched closing bracket".to_string());
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
if brace_count != 0 {
|
||||
return Err(format!("Unmatched braces: {} unclosed", brace_count));
|
||||
}
|
||||
if bracket_count != 0 {
|
||||
return Err(format!("Unmatched brackets: {} unclosed", bracket_count));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_normalize() {
|
||||
assert_eq!(normalize("x + y"), "x+y");
|
||||
assert_eq!(normalize(" a b "), "ab");
|
||||
assert_eq!(normalize(r"\frac{1}{2}"), r"\frac{1}{2}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expressions_match() {
|
||||
assert!(expressions_match("x+y", "x + y"));
|
||||
assert!(expressions_match(r"\frac{1}{2}", r"\frac{1}{2}"));
|
||||
assert!(!expressions_match("x+y", "x-y"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_calculate_similarity() {
|
||||
assert!(calculate_similarity("abc", "abc") == 1.0);
|
||||
assert!(calculate_similarity("abc", "abd") > 0.6);
|
||||
assert!(calculate_similarity("abc", "xyz") < 0.5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_commands() {
|
||||
let latex = r"\frac{1}{2} + \sqrt{x}";
|
||||
let commands = extract_commands(latex);
|
||||
assert!(commands.contains(r"\frac"));
|
||||
assert!(commands.contains(r"\sqrt"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_syntax() {
|
||||
assert!(validate_syntax(r"\frac{1}{2}").is_ok());
|
||||
assert!(validate_syntax(r"\frac{1}{2").is_err());
|
||||
assert!(validate_syntax(r"\frac{1}2}").is_err());
|
||||
}
|
||||
}
|
||||
244
vendor/ruvector/examples/scipix/tests/common/metrics.rs
vendored
Normal file
244
vendor/ruvector/examples/scipix/tests/common/metrics.rs
vendored
Normal file
@@ -0,0 +1,244 @@
|
||||
// Metric calculation utilities
|
||||
//
|
||||
// Provides functions to calculate CER, WER, BLEU, and other quality metrics
|
||||
|
||||
/// Calculate Character Error Rate (CER)
|
||||
pub fn calculate_cer(reference: &str, hypothesis: &str) -> f64 {
|
||||
let distance = levenshtein_distance(reference, hypothesis);
|
||||
let ref_len = reference.chars().count();
|
||||
|
||||
if ref_len == 0 {
|
||||
return if hypothesis.is_empty() { 0.0 } else { 1.0 };
|
||||
}
|
||||
|
||||
distance as f64 / ref_len as f64
|
||||
}
|
||||
|
||||
/// Calculate Word Error Rate (WER)
|
||||
pub fn calculate_wer(reference: &str, hypothesis: &str) -> f64 {
|
||||
let ref_words: Vec<&str> = reference.split_whitespace().collect();
|
||||
let hyp_words: Vec<&str> = hypothesis.split_whitespace().collect();
|
||||
|
||||
let distance = word_levenshtein_distance(&ref_words, &hyp_words);
|
||||
let ref_len = ref_words.len();
|
||||
|
||||
if ref_len == 0 {
|
||||
return if hyp_words.is_empty() { 0.0 } else { 1.0 };
|
||||
}
|
||||
|
||||
distance as f64 / ref_len as f64
|
||||
}
|
||||
|
||||
/// Calculate BLEU score
|
||||
pub fn calculate_bleu(reference: &str, hypothesis: &str, max_n: usize) -> f64 {
|
||||
let ref_words: Vec<&str> = reference.split_whitespace().collect();
|
||||
let hyp_words: Vec<&str> = hypothesis.split_whitespace().collect();
|
||||
|
||||
if hyp_words.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
// Calculate n-gram precisions
|
||||
let mut precisions = Vec::new();
|
||||
for n in 1..=max_n {
|
||||
let precision = calculate_ngram_precision(&ref_words, &hyp_words, n);
|
||||
if precision == 0.0 {
|
||||
return 0.0; // BLEU is 0 if any n-gram precision is 0
|
||||
}
|
||||
precisions.push(precision);
|
||||
}
|
||||
|
||||
// Geometric mean of precisions
|
||||
let geo_mean = precisions.iter().map(|p| p.ln()).sum::<f64>() / precisions.len() as f64;
|
||||
|
||||
// Brevity penalty
|
||||
let bp = if hyp_words.len() >= ref_words.len() {
|
||||
1.0
|
||||
} else {
|
||||
(1.0 - (ref_words.len() as f64 / hyp_words.len() as f64)).exp()
|
||||
};
|
||||
|
||||
bp * geo_mean.exp() * 100.0 // Return as percentage
|
||||
}
|
||||
|
||||
/// Calculate precision for n-grams
|
||||
fn calculate_ngram_precision(reference: &[&str], hypothesis: &[&str], n: usize) -> f64 {
|
||||
if hypothesis.len() < n {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let ref_ngrams = get_ngrams(reference, n);
|
||||
let hyp_ngrams = get_ngrams(hypothesis, n);
|
||||
|
||||
if hyp_ngrams.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let mut matches = 0;
|
||||
for hyp_ngram in &hyp_ngrams {
|
||||
if ref_ngrams.contains(hyp_ngram) {
|
||||
matches += 1;
|
||||
}
|
||||
}
|
||||
|
||||
matches as f64 / hyp_ngrams.len() as f64
|
||||
}
|
||||
|
||||
/// Get n-grams from a sequence of words
|
||||
fn get_ngrams(words: &[&str], n: usize) -> Vec<Vec<String>> {
|
||||
if words.len() < n {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
(0..=words.len() - n)
|
||||
.map(|i| words[i..i + n].iter().map(|s| s.to_string()).collect())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Calculate Levenshtein distance for characters
|
||||
fn levenshtein_distance(a: &str, b: &str) -> usize {
|
||||
let a_chars: Vec<char> = a.chars().collect();
|
||||
let b_chars: Vec<char> = b.chars().collect();
|
||||
|
||||
let a_len = a_chars.len();
|
||||
let b_len = b_chars.len();
|
||||
|
||||
if a_len == 0 {
|
||||
return b_len;
|
||||
}
|
||||
if b_len == 0 {
|
||||
return a_len;
|
||||
}
|
||||
|
||||
let mut matrix = vec![vec![0; b_len + 1]; a_len + 1];
|
||||
|
||||
for i in 0..=a_len {
|
||||
matrix[i][0] = i;
|
||||
}
|
||||
for j in 0..=b_len {
|
||||
matrix[0][j] = j;
|
||||
}
|
||||
|
||||
for i in 1..=a_len {
|
||||
for j in 1..=b_len {
|
||||
let cost = if a_chars[i - 1] == b_chars[j - 1] {
|
||||
0
|
||||
} else {
|
||||
1
|
||||
};
|
||||
|
||||
matrix[i][j] = *[
|
||||
matrix[i - 1][j] + 1, // deletion
|
||||
matrix[i][j - 1] + 1, // insertion
|
||||
matrix[i - 1][j - 1] + cost, // substitution
|
||||
]
|
||||
.iter()
|
||||
.min()
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
matrix[a_len][b_len]
|
||||
}
|
||||
|
||||
/// Calculate Levenshtein distance for words
|
||||
fn word_levenshtein_distance(a: &[&str], b: &[&str]) -> usize {
|
||||
let a_len = a.len();
|
||||
let b_len = b.len();
|
||||
|
||||
if a_len == 0 {
|
||||
return b_len;
|
||||
}
|
||||
if b_len == 0 {
|
||||
return a_len;
|
||||
}
|
||||
|
||||
let mut matrix = vec![vec![0; b_len + 1]; a_len + 1];
|
||||
|
||||
for i in 0..=a_len {
|
||||
matrix[i][0] = i;
|
||||
}
|
||||
for j in 0..=b_len {
|
||||
matrix[0][j] = j;
|
||||
}
|
||||
|
||||
for i in 1..=a_len {
|
||||
for j in 1..=b_len {
|
||||
let cost = if a[i - 1] == b[j - 1] { 0 } else { 1 };
|
||||
|
||||
matrix[i][j] = *[
|
||||
matrix[i - 1][j] + 1, // deletion
|
||||
matrix[i][j - 1] + 1, // insertion
|
||||
matrix[i - 1][j - 1] + cost, // substitution
|
||||
]
|
||||
.iter()
|
||||
.min()
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
matrix[a_len][b_len]
|
||||
}
|
||||
|
||||
/// Calculate precision
|
||||
pub fn calculate_precision(tp: usize, fp: usize) -> f64 {
|
||||
if tp + fp == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
tp as f64 / (tp + fp) as f64
|
||||
}
|
||||
|
||||
/// Calculate recall
|
||||
pub fn calculate_recall(tp: usize, fn_count: usize) -> f64 {
|
||||
if tp + fn_count == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
tp as f64 / (tp + fn_count) as f64
|
||||
}
|
||||
|
||||
/// Calculate F1 score
|
||||
pub fn calculate_f1(precision: f64, recall: f64) -> f64 {
|
||||
if precision + recall == 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
2.0 * (precision * recall) / (precision + recall)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_cer() {
|
||||
assert_eq!(calculate_cer("abc", "abc"), 0.0);
|
||||
assert_eq!(calculate_cer("abc", "abd"), 1.0 / 3.0);
|
||||
assert_eq!(calculate_cer("abc", ""), 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wer() {
|
||||
assert_eq!(calculate_wer("hello world", "hello world"), 0.0);
|
||||
assert_eq!(calculate_wer("hello world", "hello earth"), 0.5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bleu() {
|
||||
let bleu = calculate_bleu("the cat sat on the mat", "the cat sat on the mat", 4);
|
||||
assert!(bleu > 99.0);
|
||||
|
||||
let bleu = calculate_bleu("the cat sat", "the dog sat", 2);
|
||||
assert!(bleu > 0.0 && bleu < 100.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_precision_recall_f1() {
|
||||
let precision = calculate_precision(8, 2);
|
||||
assert_eq!(precision, 0.8);
|
||||
|
||||
let recall = calculate_recall(8, 1);
|
||||
assert!((recall - 8.0 / 9.0).abs() < 0.001);
|
||||
|
||||
let f1 = calculate_f1(precision, recall);
|
||||
assert!(f1 > 0.8);
|
||||
}
|
||||
}
|
||||
16
vendor/ruvector/examples/scipix/tests/common/mod.rs
vendored
Normal file
16
vendor/ruvector/examples/scipix/tests/common/mod.rs
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
// Common test utilities
|
||||
//
|
||||
// Provides shared functionality for integration tests
|
||||
|
||||
pub mod images;
|
||||
pub mod latex;
|
||||
pub mod metrics;
|
||||
pub mod server;
|
||||
pub mod types;
|
||||
|
||||
// Re-export commonly used types and functions
|
||||
pub use images::{generate_fraction, generate_integral, generate_simple_equation, generate_symbol};
|
||||
pub use latex::{calculate_similarity, expressions_match, normalize};
|
||||
pub use metrics::{calculate_bleu, calculate_cer, calculate_wer};
|
||||
pub use server::TestServer;
|
||||
pub use types::{CacheStats, OutputFormat, ProcessingOptions, ProcessingResult};
|
||||
206
vendor/ruvector/examples/scipix/tests/common/server.rs
vendored
Normal file
206
vendor/ruvector/examples/scipix/tests/common/server.rs
vendored
Normal file
@@ -0,0 +1,206 @@
|
||||
// Test server setup and teardown utilities
|
||||
//
|
||||
// Provides a test server instance for integration tests
|
||||
|
||||
use super::types::{CacheStats, OutputFormat, ProcessingOptions, ProcessingResult};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct TestServer {
|
||||
inner: Arc<TestServerInner>,
|
||||
}
|
||||
|
||||
struct TestServerInner {
|
||||
base_url: String,
|
||||
#[allow(dead_code)]
|
||||
process: Option<RwLock<tokio::process::Child>>,
|
||||
config: TestServerConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TestServerConfig {
|
||||
pub port: u16,
|
||||
pub enable_cache: bool,
|
||||
pub cache_size: Option<usize>,
|
||||
pub cache_ttl_seconds: Option<u64>,
|
||||
pub rate_limit: Option<u64>,
|
||||
pub timeout_ms: Option<u64>,
|
||||
pub cache_dir: Option<String>,
|
||||
}
|
||||
|
||||
impl Default for TestServerConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
port: 18080,
|
||||
enable_cache: false,
|
||||
cache_size: None,
|
||||
cache_ttl_seconds: None,
|
||||
rate_limit: None,
|
||||
timeout_ms: None,
|
||||
cache_dir: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TestServer {
|
||||
/// Start a basic test server
|
||||
pub async fn start() -> Result<Self, Box<dyn std::error::Error>> {
|
||||
Self::with_config(TestServerConfig::default()).await
|
||||
}
|
||||
|
||||
/// Start test server with cache enabled
|
||||
pub async fn with_cache() -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let config = TestServerConfig {
|
||||
enable_cache: true,
|
||||
cache_size: Some(100),
|
||||
..Default::default()
|
||||
};
|
||||
Self::with_config(config).await
|
||||
}
|
||||
|
||||
/// Start test server with specific cache size
|
||||
pub async fn with_cache_size(size: usize) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let config = TestServerConfig {
|
||||
enable_cache: true,
|
||||
cache_size: Some(size),
|
||||
..Default::default()
|
||||
};
|
||||
Self::with_config(config).await
|
||||
}
|
||||
|
||||
/// Start test server with cache TTL
|
||||
pub async fn with_cache_ttl(ttl_seconds: u64) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let config = TestServerConfig {
|
||||
enable_cache: true,
|
||||
cache_ttl_seconds: Some(ttl_seconds),
|
||||
..Default::default()
|
||||
};
|
||||
Self::with_config(config).await
|
||||
}
|
||||
|
||||
/// Start test server with persistent cache
|
||||
pub async fn with_persistent_cache(
|
||||
cache_dir: &str,
|
||||
) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let config = TestServerConfig {
|
||||
enable_cache: true,
|
||||
cache_dir: Some(cache_dir.to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
Self::with_config(config).await
|
||||
}
|
||||
|
||||
/// Start test server with timeout
|
||||
pub async fn with_timeout(timeout_ms: u64) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let config = TestServerConfig {
|
||||
timeout_ms: Some(timeout_ms),
|
||||
..Default::default()
|
||||
};
|
||||
Self::with_config(config).await
|
||||
}
|
||||
|
||||
/// Start API server
|
||||
pub async fn start_api() -> Result<Self, Box<dyn std::error::Error>> {
|
||||
Self::start().await
|
||||
}
|
||||
|
||||
/// Start API server with rate limiting
|
||||
pub async fn start_api_with_rate_limit(limit: u64) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let config = TestServerConfig {
|
||||
rate_limit: Some(limit),
|
||||
..Default::default()
|
||||
};
|
||||
Self::with_config(config).await
|
||||
}
|
||||
|
||||
/// Start test server with custom configuration
|
||||
pub async fn with_config(config: TestServerConfig) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
// Test infrastructure - provides mock server for testing
|
||||
// Real OCR processing requires ONNX models to be configured
|
||||
|
||||
let base_url = format!("http://localhost:{}", config.port);
|
||||
|
||||
let inner = Arc::new(TestServerInner {
|
||||
base_url,
|
||||
process: None,
|
||||
config,
|
||||
});
|
||||
|
||||
// Wait for server to be ready
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
|
||||
|
||||
Ok(TestServer { inner })
|
||||
}
|
||||
|
||||
/// Get base URL
|
||||
pub fn base_url(&self) -> &str {
|
||||
&self.inner.base_url
|
||||
}
|
||||
|
||||
/// Process a single image
|
||||
/// Note: This is test infrastructure that returns mock data.
|
||||
/// Real OCR requires ONNX models to be configured.
|
||||
pub async fn process_image(
|
||||
&self,
|
||||
_image_path: &str,
|
||||
_format: OutputFormat,
|
||||
) -> Result<ProcessingResult, String> {
|
||||
// Test infrastructure mock - real OCR requires models
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
|
||||
|
||||
Ok(ProcessingResult {
|
||||
latex: "x + y".to_string(),
|
||||
mathml: Some("<math><mrow><mi>x</mi><mo>+</mo><mi>y</mi></mrow></math>".to_string()),
|
||||
html: None,
|
||||
ascii: None,
|
||||
text: Some("x + y".to_string()),
|
||||
confidence: 0.95,
|
||||
processing_time_ms: 50,
|
||||
})
|
||||
}
|
||||
|
||||
/// Process image with options
|
||||
pub async fn process_image_with_options(
|
||||
&self,
|
||||
image_path: &str,
|
||||
format: OutputFormat,
|
||||
_options: ProcessingOptions,
|
||||
) -> Result<ProcessingResult, String> {
|
||||
self.process_image(image_path, format).await
|
||||
}
|
||||
|
||||
/// Process batch of images
|
||||
pub async fn process_batch(
|
||||
&self,
|
||||
image_paths: &[&str],
|
||||
format: OutputFormat,
|
||||
) -> Result<Vec<ProcessingResult>, String> {
|
||||
let mut results = Vec::new();
|
||||
for path in image_paths {
|
||||
results.push(self.process_image(path, format.clone()).await?);
|
||||
}
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Get cache statistics
|
||||
pub async fn cache_stats(&self) -> Result<CacheStats, String> {
|
||||
Ok(CacheStats {
|
||||
hits: 0,
|
||||
misses: 0,
|
||||
evictions: 0,
|
||||
current_size: 0,
|
||||
max_size: self.inner.config.cache_size.unwrap_or(100),
|
||||
})
|
||||
}
|
||||
|
||||
/// Invalidate cache
|
||||
pub async fn invalidate_cache(&self) -> Result<(), String> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Shutdown server
|
||||
pub async fn shutdown(self) {
|
||||
// Test infrastructure - no actual server to shut down
|
||||
}
|
||||
}
|
||||
47
vendor/ruvector/examples/scipix/tests/common/types.rs
vendored
Normal file
47
vendor/ruvector/examples/scipix/tests/common/types.rs
vendored
Normal file
@@ -0,0 +1,47 @@
|
||||
// Common types shared across tests
|
||||
//
|
||||
// Defines output formats, processing results, and configuration types
|
||||
|
||||
/// Output format for OCR processing
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum OutputFormat {
|
||||
LaTeX,
|
||||
MathML,
|
||||
HTML,
|
||||
ASCII,
|
||||
All,
|
||||
}
|
||||
|
||||
/// Processing options configuration
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct ProcessingOptions {
|
||||
pub enable_preprocessing: bool,
|
||||
pub enable_denoising: bool,
|
||||
pub enable_deskew: bool,
|
||||
pub include_latex: bool,
|
||||
pub include_mathml: bool,
|
||||
pub include_ascii: bool,
|
||||
pub include_text: bool,
|
||||
}
|
||||
|
||||
/// Processing result from OCR
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ProcessingResult {
|
||||
pub latex: String,
|
||||
pub mathml: Option<String>,
|
||||
pub html: Option<String>,
|
||||
pub ascii: Option<String>,
|
||||
pub text: Option<String>,
|
||||
pub confidence: f32,
|
||||
pub processing_time_ms: u64,
|
||||
}
|
||||
|
||||
/// Cache statistics
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CacheStats {
|
||||
pub hits: u64,
|
||||
pub misses: u64,
|
||||
pub evictions: u64,
|
||||
pub current_size: usize,
|
||||
pub max_size: usize,
|
||||
}
|
||||
57
vendor/ruvector/examples/scipix/tests/fixtures/README.md
vendored
Normal file
57
vendor/ruvector/examples/scipix/tests/fixtures/README.md
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
# Test Fixtures for ruvector-scipix
|
||||
|
||||
This directory contains test fixtures including sample images, expected outputs, and configuration files for unit and integration tests.
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
fixtures/
|
||||
├── images/ # Test images
|
||||
│ ├── simple/ # Simple equations
|
||||
│ ├── complex/ # Complex expressions
|
||||
│ ├── matrices/ # Matrix expressions
|
||||
│ └── symbols/ # Special mathematical symbols
|
||||
├── expected/ # Expected LaTeX outputs
|
||||
├── configs/ # Test configuration files
|
||||
└── README.md # This file
|
||||
```
|
||||
|
||||
## Test Images
|
||||
|
||||
### Simple Equations
|
||||
- `simple_addition.png` - Basic x + y
|
||||
- `simple_fraction.png` - Simple fraction 1/2
|
||||
- `quadratic.png` - Quadratic formula
|
||||
|
||||
### Complex Expressions
|
||||
- `nested_fraction.png` - Nested fractions
|
||||
- `integral.png` - Integral with limits
|
||||
- `summation.png` - Summation notation
|
||||
|
||||
### Matrices
|
||||
- `matrix_2x2.png` - 2x2 matrix
|
||||
- `matrix_3x3.png` - 3x3 matrix
|
||||
|
||||
### Special Symbols
|
||||
- `greek_letters.png` - Greek letters
|
||||
- `operators.png` - Mathematical operators
|
||||
|
||||
## Expected Outputs
|
||||
|
||||
Each test image has a corresponding `.txt` file in the `expected/` directory containing the expected LaTeX output.
|
||||
|
||||
## Adding New Fixtures
|
||||
|
||||
1. Add the test image to the appropriate subdirectory
|
||||
2. Create a corresponding expected output file
|
||||
3. Update test cases in the unit tests to reference the new fixture
|
||||
|
||||
## Generating Test Images
|
||||
|
||||
You can use the synthetic data generator in `tests/testdata/synthetic_generator.rs` to create test images programmatically.
|
||||
|
||||
## Notes
|
||||
|
||||
- All test images should be in PNG format
|
||||
- Expected outputs should use standard LaTeX notation
|
||||
- Keep image sizes reasonable (< 1MB) for fast test execution
|
||||
18
vendor/ruvector/examples/scipix/tests/fixtures/configs/test_config.toml
vendored
Normal file
18
vendor/ruvector/examples/scipix/tests/fixtures/configs/test_config.toml
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
[preprocessing]
|
||||
target_dpi = 300
|
||||
max_dimension = 4096
|
||||
denoise_strength = 0.5
|
||||
contrast_enhancement = true
|
||||
auto_rotate = true
|
||||
binarization_method = "adaptive"
|
||||
|
||||
[model]
|
||||
model_path = "models/test_model.onnx"
|
||||
device = "cpu"
|
||||
batch_size = 4
|
||||
confidence_threshold = 0.7
|
||||
|
||||
[output]
|
||||
format = "latex"
|
||||
include_confidence = true
|
||||
include_geometry = false
|
||||
1
vendor/ruvector/examples/scipix/tests/fixtures/expected/quadratic.txt
vendored
Normal file
1
vendor/ruvector/examples/scipix/tests/fixtures/expected/quadratic.txt
vendored
Normal file
@@ -0,0 +1 @@
|
||||
x = \frac{-b \pm \sqrt{b^2 - 4ac}}{2a}
|
||||
1
vendor/ruvector/examples/scipix/tests/fixtures/expected/simple_addition.txt
vendored
Normal file
1
vendor/ruvector/examples/scipix/tests/fixtures/expected/simple_addition.txt
vendored
Normal file
@@ -0,0 +1 @@
|
||||
x + y
|
||||
1
vendor/ruvector/examples/scipix/tests/fixtures/expected/simple_fraction.txt
vendored
Normal file
1
vendor/ruvector/examples/scipix/tests/fixtures/expected/simple_fraction.txt
vendored
Normal file
@@ -0,0 +1 @@
|
||||
\frac{1}{2}
|
||||
450
vendor/ruvector/examples/scipix/tests/integration/accuracy_tests.rs
vendored
Normal file
450
vendor/ruvector/examples/scipix/tests/integration/accuracy_tests.rs
vendored
Normal file
@@ -0,0 +1,450 @@
|
||||
// Accuracy validation tests
|
||||
//
|
||||
// Tests OCR accuracy against Im2latex-100k subset and calculates CER, WER, BLEU
|
||||
|
||||
use super::*;
|
||||
use tokio;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_accuracy_simple_expressions() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
let test_cases = vec![
|
||||
("x + 1", "x + 1"),
|
||||
("2x - 3", "2x - 3"),
|
||||
("a = b", "a = b"),
|
||||
("f(x)", "f(x)"),
|
||||
("y^2", "y^2"),
|
||||
];
|
||||
|
||||
let mut total_cer = 0.0;
|
||||
let mut correct = 0;
|
||||
|
||||
for (equation, expected) in test_cases.iter() {
|
||||
let image = images::generate_simple_equation(equation);
|
||||
let path = format!("/tmp/accuracy_simple_{}.png", equation.replace(' ', "_"));
|
||||
image.save(&path).unwrap();
|
||||
|
||||
let result = test_server
|
||||
.process_image(&path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
let cer = metrics::calculate_cer(expected, &result.latex);
|
||||
total_cer += cer;
|
||||
|
||||
if latex::normalize(&result.latex) == latex::normalize(expected) {
|
||||
correct += 1;
|
||||
}
|
||||
|
||||
println!(
|
||||
"Equation: {} | CER: {:.4} | Got: {}",
|
||||
equation, cer, result.latex
|
||||
);
|
||||
}
|
||||
|
||||
let avg_cer = total_cer / test_cases.len() as f64;
|
||||
let accuracy = correct as f64 / test_cases.len() as f64;
|
||||
|
||||
println!(
|
||||
"Simple expressions - Avg CER: {:.4}, Accuracy: {:.2}%",
|
||||
avg_cer,
|
||||
accuracy * 100.0
|
||||
);
|
||||
|
||||
assert!(avg_cer < 0.05, "Average CER too high: {:.4}", avg_cer);
|
||||
assert!(
|
||||
accuracy > 0.90,
|
||||
"Accuracy too low: {:.2}%",
|
||||
accuracy * 100.0
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_accuracy_im2latex_subset() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Load Im2latex-100k test subset (sample)
|
||||
let test_cases = load_im2latex_test_subset(50); // Test 50 samples
|
||||
|
||||
let mut cer_sum = 0.0;
|
||||
let mut wer_sum = 0.0;
|
||||
let mut bleu_sum = 0.0;
|
||||
let mut exact_matches = 0;
|
||||
|
||||
for (i, case) in test_cases.iter().enumerate() {
|
||||
// Generate or load image
|
||||
let image_path = case.image_path.clone();
|
||||
|
||||
let result = test_server
|
||||
.process_image(&image_path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Calculate metrics
|
||||
let cer = metrics::calculate_cer(&case.ground_truth, &result.latex);
|
||||
let wer = metrics::calculate_wer(&case.ground_truth, &result.latex);
|
||||
let bleu = metrics::calculate_bleu(&case.ground_truth, &result.latex, 4);
|
||||
|
||||
cer_sum += cer;
|
||||
wer_sum += wer;
|
||||
bleu_sum += bleu;
|
||||
|
||||
if latex::normalize(&result.latex) == latex::normalize(&case.ground_truth) {
|
||||
exact_matches += 1;
|
||||
}
|
||||
|
||||
if i % 10 == 0 {
|
||||
println!("Processed {}/{} samples", i + 1, test_cases.len());
|
||||
}
|
||||
}
|
||||
|
||||
let count = test_cases.len() as f64;
|
||||
let avg_cer = cer_sum / count;
|
||||
let avg_wer = wer_sum / count;
|
||||
let avg_bleu = bleu_sum / count;
|
||||
let exact_match_rate = exact_matches as f64 / count;
|
||||
|
||||
println!("\nIm2latex subset results:");
|
||||
println!(" Average CER: {:.4}", avg_cer);
|
||||
println!(" Average WER: {:.4}", avg_wer);
|
||||
println!(" Average BLEU: {:.2}", avg_bleu);
|
||||
println!(" Exact match rate: {:.2}%", exact_match_rate * 100.0);
|
||||
|
||||
// Assert quality thresholds
|
||||
assert!(avg_cer < 0.03, "CER too high: {:.4}", avg_cer);
|
||||
assert!(avg_bleu > 80.0, "BLEU too low: {:.2}", avg_bleu);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_accuracy_fractions() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
let test_cases = vec![
|
||||
((1, 2), r"\frac{1}{2}"),
|
||||
((3, 4), r"\frac{3}{4}"),
|
||||
((5, 6), r"\frac{5}{6}"),
|
||||
((10, 3), r"\frac{10}{3}"),
|
||||
];
|
||||
|
||||
let mut correct = 0;
|
||||
|
||||
for ((num, den), expected) in test_cases.iter() {
|
||||
let image = images::generate_fraction(*num, *den);
|
||||
let path = format!("/tmp/frac_{}_{}.png", num, den);
|
||||
image.save(&path).unwrap();
|
||||
|
||||
let result = test_server
|
||||
.process_image(&path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
if latex::expressions_match(&result.latex, expected) {
|
||||
correct += 1;
|
||||
} else {
|
||||
println!(
|
||||
"Fraction {}/{} - Expected: {}, Got: {}",
|
||||
num, den, expected, result.latex
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let accuracy = correct as f64 / test_cases.len() as f64;
|
||||
println!("Fraction accuracy: {:.2}%", accuracy * 100.0);
|
||||
|
||||
assert!(
|
||||
accuracy >= 0.85,
|
||||
"Fraction accuracy too low: {:.2}%",
|
||||
accuracy * 100.0
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_accuracy_special_symbols() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
let test_cases = vec![
|
||||
(r"\alpha", r"\alpha"),
|
||||
(r"\beta", r"\beta"),
|
||||
(r"\sum", r"\sum"),
|
||||
(r"\int", r"\int"),
|
||||
(r"\pi", r"\pi"),
|
||||
(r"\infty", r"\infty"),
|
||||
];
|
||||
|
||||
let mut correct = 0;
|
||||
|
||||
for (symbol, expected) in test_cases.iter() {
|
||||
let image = images::generate_symbol(symbol);
|
||||
let path = format!("/tmp/symbol_{}.png", symbol.replace('\\', ""));
|
||||
image.save(&path).unwrap();
|
||||
|
||||
let result = test_server
|
||||
.process_image(&path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
if result.latex.contains(expected) {
|
||||
correct += 1;
|
||||
} else {
|
||||
println!(
|
||||
"Symbol {} - Expected to contain: {}, Got: {}",
|
||||
symbol, expected, result.latex
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let accuracy = correct as f64 / test_cases.len() as f64;
|
||||
println!("Special symbol accuracy: {:.2}%", accuracy * 100.0);
|
||||
|
||||
assert!(
|
||||
accuracy >= 0.80,
|
||||
"Symbol accuracy too low: {:.2}%",
|
||||
accuracy * 100.0
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_accuracy_regression_detection() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Load baseline results
|
||||
let baseline = load_baseline_results();
|
||||
|
||||
// Run same test cases
|
||||
let test_cases = load_regression_test_cases();
|
||||
|
||||
let mut regressions = Vec::new();
|
||||
|
||||
for case in test_cases.iter() {
|
||||
let result = test_server
|
||||
.process_image(&case.image_path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Compare with baseline
|
||||
if let Some(baseline_result) = baseline.get(&case.id) {
|
||||
let current_cer = metrics::calculate_cer(&case.ground_truth, &result.latex);
|
||||
let baseline_cer = baseline_result.cer;
|
||||
|
||||
// Check for regression (10% threshold)
|
||||
if current_cer > baseline_cer * 1.10 {
|
||||
regressions.push((
|
||||
case.id.clone(),
|
||||
baseline_cer,
|
||||
current_cer,
|
||||
baseline_result.latex.clone(),
|
||||
result.latex.clone(),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !regressions.is_empty() {
|
||||
println!("Regressions detected:");
|
||||
for (id, baseline_cer, current_cer, baseline_latex, current_latex) in ®ressions {
|
||||
println!(" {} - CER: {:.4} -> {:.4}", id, baseline_cer, current_cer);
|
||||
println!(" Baseline: {}", baseline_latex);
|
||||
println!(" Current: {}", current_latex);
|
||||
}
|
||||
}
|
||||
|
||||
assert!(
|
||||
regressions.is_empty(),
|
||||
"Found {} regressions",
|
||||
regressions.len()
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_accuracy_confidence_calibration() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
let test_cases = load_calibration_test_cases();
|
||||
|
||||
let mut high_conf_correct = 0;
|
||||
let mut high_conf_total = 0;
|
||||
let mut low_conf_correct = 0;
|
||||
let mut low_conf_total = 0;
|
||||
|
||||
for case in test_cases.iter() {
|
||||
let result = test_server
|
||||
.process_image(&case.image_path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
let is_correct = latex::normalize(&result.latex) == latex::normalize(&case.ground_truth);
|
||||
|
||||
if result.confidence > 0.9 {
|
||||
high_conf_total += 1;
|
||||
if is_correct {
|
||||
high_conf_correct += 1;
|
||||
}
|
||||
} else if result.confidence < 0.7 {
|
||||
low_conf_total += 1;
|
||||
if is_correct {
|
||||
low_conf_correct += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let high_conf_accuracy = if high_conf_total > 0 {
|
||||
high_conf_correct as f64 / high_conf_total as f64
|
||||
} else {
|
||||
1.0
|
||||
};
|
||||
|
||||
let low_conf_accuracy = if low_conf_total > 0 {
|
||||
low_conf_correct as f64 / low_conf_total as f64
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
println!("Confidence calibration:");
|
||||
println!(
|
||||
" High confidence (>0.9): {:.2}% accuracy ({}/{})",
|
||||
high_conf_accuracy * 100.0,
|
||||
high_conf_correct,
|
||||
high_conf_total
|
||||
);
|
||||
println!(
|
||||
" Low confidence (<0.7): {:.2}% accuracy ({}/{})",
|
||||
low_conf_accuracy * 100.0,
|
||||
low_conf_correct,
|
||||
low_conf_total
|
||||
);
|
||||
|
||||
// High confidence should correlate with high accuracy
|
||||
assert!(
|
||||
high_conf_accuracy > 0.95,
|
||||
"High confidence predictions should be very accurate"
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
// Helper functions and types
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct TestCase {
|
||||
id: String,
|
||||
image_path: String,
|
||||
ground_truth: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct BaselineResult {
|
||||
latex: String,
|
||||
cer: f64,
|
||||
}
|
||||
|
||||
fn load_im2latex_test_subset(count: usize) -> Vec<TestCase> {
|
||||
// Load or generate Im2latex test subset
|
||||
// For now, generate synthetic test cases
|
||||
(0..count)
|
||||
.map(|i| {
|
||||
let eq = match i % 5 {
|
||||
0 => format!("x^{}", i),
|
||||
1 => format!("a + {}", i),
|
||||
2 => format!(r"\frac{{{}}}{{{}}}", i, i + 1),
|
||||
3 => format!("{}x + {}", i, i * 2),
|
||||
_ => format!("y = {}x", i),
|
||||
};
|
||||
|
||||
let image = images::generate_simple_equation(&eq);
|
||||
let path = format!("/tmp/im2latex_{}.png", i);
|
||||
image.save(&path).unwrap();
|
||||
|
||||
TestCase {
|
||||
id: format!("im2latex_{}", i),
|
||||
image_path: path,
|
||||
ground_truth: eq,
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn load_regression_test_cases() -> Vec<TestCase> {
|
||||
// Load regression test cases from file or generate
|
||||
vec![
|
||||
TestCase {
|
||||
id: "reg_001".to_string(),
|
||||
image_path: "/tmp/reg_001.png".to_string(),
|
||||
ground_truth: "x + y".to_string(),
|
||||
},
|
||||
// Add more test cases...
|
||||
]
|
||||
}
|
||||
|
||||
fn load_baseline_results() -> std::collections::HashMap<String, BaselineResult> {
|
||||
// Load baseline results from file
|
||||
let mut baseline = std::collections::HashMap::new();
|
||||
|
||||
baseline.insert(
|
||||
"reg_001".to_string(),
|
||||
BaselineResult {
|
||||
latex: "x + y".to_string(),
|
||||
cer: 0.0,
|
||||
},
|
||||
);
|
||||
|
||||
baseline
|
||||
}
|
||||
|
||||
fn load_calibration_test_cases() -> Vec<TestCase> {
|
||||
// Generate test cases with varying difficulty for confidence calibration
|
||||
let mut cases = Vec::new();
|
||||
|
||||
// Easy cases
|
||||
for i in 0..10 {
|
||||
let eq = format!("x + {}", i);
|
||||
let image = images::generate_simple_equation(&eq);
|
||||
let path = format!("/tmp/calib_easy_{}.png", i);
|
||||
image.save(&path).unwrap();
|
||||
|
||||
cases.push(TestCase {
|
||||
id: format!("calib_easy_{}", i),
|
||||
image_path: path,
|
||||
ground_truth: eq,
|
||||
});
|
||||
}
|
||||
|
||||
// Hard cases (noisy)
|
||||
for i in 0..10 {
|
||||
let eq = format!("y^{}", i);
|
||||
let mut image = images::generate_simple_equation(&eq);
|
||||
images::add_noise(&mut image, 0.2);
|
||||
let path = format!("/tmp/calib_hard_{}.png", i);
|
||||
image.save(&path).unwrap();
|
||||
|
||||
cases.push(TestCase {
|
||||
id: format!("calib_hard_{}", i),
|
||||
image_path: path,
|
||||
ground_truth: eq,
|
||||
});
|
||||
}
|
||||
|
||||
cases
|
||||
}
|
||||
80
vendor/ruvector/examples/scipix/tests/integration/api_tests.rs
vendored
Normal file
80
vendor/ruvector/examples/scipix/tests/integration/api_tests.rs
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
// API server integration tests
|
||||
//
|
||||
// Tests HTTP API endpoints, authentication, rate limiting, and async processing
|
||||
|
||||
use super::*;
|
||||
use reqwest::{multipart, Client, StatusCode};
|
||||
use serde_json::json;
|
||||
use tokio;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_api_post_text_with_file() {
|
||||
let test_server = TestServer::start_api()
|
||||
.await
|
||||
.expect("Failed to start API server");
|
||||
let client = Client::new();
|
||||
|
||||
// Create test image
|
||||
let image = images::generate_simple_equation("x + y");
|
||||
image.save("/tmp/api_test.png").unwrap();
|
||||
let image_bytes = std::fs::read("/tmp/api_test.png").unwrap();
|
||||
|
||||
// Create multipart form
|
||||
let form = multipart::Form::new().part(
|
||||
"file",
|
||||
multipart::Part::bytes(image_bytes)
|
||||
.file_name("equation.png")
|
||||
.mime_str("image/png")
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
// POST to /v3/text
|
||||
let response = client
|
||||
.post(&format!("{}/v3/text", test_server.base_url()))
|
||||
.header("app_id", "test_app_id")
|
||||
.header("app_key", "test_app_key")
|
||||
.multipart(form)
|
||||
.send()
|
||||
.await
|
||||
.expect("Request failed");
|
||||
|
||||
assert_eq!(response.status(), StatusCode::OK);
|
||||
|
||||
let result: serde_json::Value = response.json().await.unwrap();
|
||||
assert!(result.get("request_id").is_some(), "Should have request_id");
|
||||
assert!(result.get("text").is_some(), "Should have text field");
|
||||
assert!(
|
||||
result.get("processing_time_ms").is_some(),
|
||||
"Should have processing time"
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_api_authentication_validation() {
|
||||
let test_server = TestServer::start_api()
|
||||
.await
|
||||
.expect("Failed to start API server");
|
||||
let client = Client::new();
|
||||
|
||||
let payload = json!({
|
||||
"src": "base64data"
|
||||
});
|
||||
|
||||
// Test missing auth
|
||||
let response = client
|
||||
.post(&format!("{}/v3/text", test_server.base_url()))
|
||||
.json(&payload)
|
||||
.send()
|
||||
.await
|
||||
.expect("Request failed");
|
||||
|
||||
assert_eq!(
|
||||
response.status(),
|
||||
StatusCode::UNAUTHORIZED,
|
||||
"Should require authentication"
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
300
vendor/ruvector/examples/scipix/tests/integration/cache_tests.rs
vendored
Normal file
300
vendor/ruvector/examples/scipix/tests/integration/cache_tests.rs
vendored
Normal file
@@ -0,0 +1,300 @@
|
||||
// Cache integration tests
|
||||
//
|
||||
// Tests caching behavior, hit/miss ratios, similarity search, and persistence
|
||||
//
|
||||
// Note: These tests use mock test infrastructure.
|
||||
// Real OCR processing requires ONNX models to be configured.
|
||||
|
||||
use super::*;
|
||||
use crate::common::{CacheStats, OutputFormat};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_hit_miss_behavior() {
|
||||
let test_server = TestServer::with_cache()
|
||||
.await
|
||||
.expect("Failed to start test server with cache");
|
||||
|
||||
let image = images::generate_simple_equation("x^2");
|
||||
image.save("/tmp/cache_test_1.png").unwrap();
|
||||
|
||||
// First request - should miss cache
|
||||
let result1 = test_server
|
||||
.process_image("/tmp/cache_test_1.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Get cache stats
|
||||
let _stats = test_server
|
||||
.cache_stats()
|
||||
.await
|
||||
.expect("Failed to get cache stats");
|
||||
|
||||
// Second request - should hit cache
|
||||
let result2 = test_server
|
||||
.process_image("/tmp/cache_test_1.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Verify results match
|
||||
assert_eq!(result1.latex, result2.latex, "Cached result should match");
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_similarity_lookup() {
|
||||
let test_server = TestServer::with_cache()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create original image
|
||||
let image1 = images::generate_simple_equation("a + b");
|
||||
image1.save("/tmp/similarity_1.png").unwrap();
|
||||
|
||||
// Create similar image (slightly different rendering)
|
||||
let mut image2 = images::generate_simple_equation("a + b");
|
||||
images::add_slight_variation(&mut image2, 0.05);
|
||||
image2.save("/tmp/similarity_2.png").unwrap();
|
||||
|
||||
// Process first image
|
||||
let result1 = test_server
|
||||
.process_image("/tmp/similarity_1.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Process similar image
|
||||
let result2 = test_server
|
||||
.process_image("/tmp/similarity_2.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Results should be similar
|
||||
let similarity = latex::calculate_similarity(&result1.latex, &result2.latex);
|
||||
assert!(
|
||||
similarity > 0.9,
|
||||
"Similar images should produce similar results"
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_eviction() {
|
||||
// Start server with small cache size
|
||||
let test_server = TestServer::with_cache_size(3)
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create and process 5 different images
|
||||
for i in 0..5 {
|
||||
let eq = format!("x + {}", i);
|
||||
let image = images::generate_simple_equation(&eq);
|
||||
let path = format!("/tmp/eviction_{}.png", i);
|
||||
image.save(&path).unwrap();
|
||||
|
||||
test_server
|
||||
.process_image(&path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
}
|
||||
|
||||
// Get cache stats
|
||||
let stats = test_server
|
||||
.cache_stats()
|
||||
.await
|
||||
.expect("Failed to get cache stats");
|
||||
assert!(stats.current_size <= 3, "Cache should not exceed max size");
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_persistence() {
|
||||
let cache_dir = "/tmp/scipix_cache_persist";
|
||||
std::fs::create_dir_all(cache_dir).unwrap();
|
||||
|
||||
// Start server with persistent cache
|
||||
let test_server = TestServer::with_persistent_cache(cache_dir)
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Process image
|
||||
let image = images::generate_simple_equation("persistent");
|
||||
image.save("/tmp/persist_test.png").unwrap();
|
||||
|
||||
let result1 = test_server
|
||||
.process_image("/tmp/persist_test.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Shutdown server
|
||||
test_server.shutdown().await;
|
||||
|
||||
// Start new server with same cache directory
|
||||
let test_server2 = TestServer::with_persistent_cache(cache_dir)
|
||||
.await
|
||||
.expect("Failed to start second test server");
|
||||
|
||||
// Process same image - should hit persistent cache
|
||||
let result2 = test_server2
|
||||
.process_image("/tmp/persist_test.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Results should match
|
||||
assert_eq!(
|
||||
result1.latex, result2.latex,
|
||||
"Persistent cache should restore results"
|
||||
);
|
||||
|
||||
test_server2.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_invalidation() {
|
||||
let test_server = TestServer::with_cache()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Process image
|
||||
let image = images::generate_simple_equation("invalidate");
|
||||
image.save("/tmp/invalidate_test.png").unwrap();
|
||||
|
||||
let result1 = test_server
|
||||
.process_image("/tmp/invalidate_test.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Invalidate cache
|
||||
test_server
|
||||
.invalidate_cache()
|
||||
.await
|
||||
.expect("Cache invalidation failed");
|
||||
|
||||
// Process again - should miss cache
|
||||
let result2 = test_server
|
||||
.process_image("/tmp/invalidate_test.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Results should match but processing should take time
|
||||
assert_eq!(result1.latex, result2.latex, "Results should still match");
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_hit_ratio() {
|
||||
let test_server = TestServer::with_cache()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create test images
|
||||
let equations = vec!["a", "b", "c"];
|
||||
for eq in &equations {
|
||||
let image = images::generate_simple_equation(eq);
|
||||
image.save(&format!("/tmp/ratio_{}.png", eq)).unwrap();
|
||||
}
|
||||
|
||||
// Process each image twice
|
||||
for eq in &equations {
|
||||
let path = format!("/tmp/ratio_{}.png", eq);
|
||||
|
||||
// First time (miss)
|
||||
test_server
|
||||
.process_image(&path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Second time (hit)
|
||||
test_server
|
||||
.process_image(&path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
}
|
||||
|
||||
// Get stats
|
||||
let _stats = test_server
|
||||
.cache_stats()
|
||||
.await
|
||||
.expect("Failed to get cache stats");
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_ttl_expiration() {
|
||||
// Start server with 1-second TTL
|
||||
let test_server = TestServer::with_cache_ttl(1)
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Process image
|
||||
let image = images::generate_simple_equation("ttl");
|
||||
image.save("/tmp/ttl_test.png").unwrap();
|
||||
|
||||
let result1 = test_server
|
||||
.process_image("/tmp/ttl_test.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Immediately reprocess - should hit cache
|
||||
let result2 = test_server
|
||||
.process_image("/tmp/ttl_test.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
assert_eq!(result1.latex, result2.latex);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_concurrent_access() {
|
||||
let test_server = TestServer::with_cache()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
let image = images::generate_simple_equation("concurrent");
|
||||
image.save("/tmp/concurrent_cache.png").unwrap();
|
||||
|
||||
// First request to populate cache
|
||||
test_server
|
||||
.process_image("/tmp/concurrent_cache.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Spawn multiple concurrent requests
|
||||
let mut handles = vec![];
|
||||
for _ in 0..10 {
|
||||
let server = test_server.clone();
|
||||
let handle = tokio::spawn(async move {
|
||||
server
|
||||
.process_image("/tmp/concurrent_cache.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
});
|
||||
handles.push(handle);
|
||||
}
|
||||
|
||||
// Wait for all to complete
|
||||
let results = futures::future::join_all(handles).await;
|
||||
|
||||
// All should succeed and return same result
|
||||
assert!(
|
||||
results.iter().all(|r| r.is_ok()),
|
||||
"All requests should succeed"
|
||||
);
|
||||
|
||||
let first_latex = &results[0].as_ref().unwrap().as_ref().unwrap().latex;
|
||||
assert!(
|
||||
results
|
||||
.iter()
|
||||
.all(|r| { &r.as_ref().unwrap().as_ref().unwrap().latex == first_latex }),
|
||||
"All results should match"
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
// Re-export CacheStats for backward compatibility
|
||||
pub use crate::common::CacheStats as CacheStatsCompat;
|
||||
227
vendor/ruvector/examples/scipix/tests/integration/cli_tests.rs
vendored
Normal file
227
vendor/ruvector/examples/scipix/tests/integration/cli_tests.rs
vendored
Normal file
@@ -0,0 +1,227 @@
|
||||
// CLI integration tests
|
||||
//
|
||||
// Tests command-line interface functionality
|
||||
|
||||
use super::*;
|
||||
use assert_cmd::Command;
|
||||
use predicates::prelude::*;
|
||||
use std::process::Stdio;
|
||||
|
||||
#[test]
|
||||
fn test_cli_ocr_command_with_file() {
|
||||
// Create test image
|
||||
let image = images::generate_simple_equation("x + 1");
|
||||
image.save("/tmp/cli_test.png").unwrap();
|
||||
|
||||
// Run CLI command
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("ocr")
|
||||
.arg("/tmp/cli_test.png")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(predicate::str::contains("x"))
|
||||
.stdout(predicate::str::contains("LaTeX:"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cli_ocr_with_output_format() {
|
||||
let image = images::generate_fraction(3, 4);
|
||||
image.save("/tmp/cli_fraction.png").unwrap();
|
||||
|
||||
// Test LaTeX output
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("ocr")
|
||||
.arg("/tmp/cli_fraction.png")
|
||||
.arg("--format")
|
||||
.arg("latex")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(predicate::str::contains(r"\frac"));
|
||||
|
||||
// Test MathML output
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("ocr")
|
||||
.arg("/tmp/cli_fraction.png")
|
||||
.arg("--format")
|
||||
.arg("mathml")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(predicate::str::contains("<mfrac>"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cli_batch_command() {
|
||||
// Create test directory with images
|
||||
std::fs::create_dir_all("/tmp/cli_batch").unwrap();
|
||||
|
||||
let equations = vec!["a + b", "x - y", "2 * 3"];
|
||||
for (i, eq) in equations.iter().enumerate() {
|
||||
let image = images::generate_simple_equation(eq);
|
||||
image.save(&format!("/tmp/cli_batch/eq_{}.png", i)).unwrap();
|
||||
}
|
||||
|
||||
// Run batch command
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("batch")
|
||||
.arg("/tmp/cli_batch")
|
||||
.arg("--output")
|
||||
.arg("/tmp/cli_batch_results.json")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
// Verify output file
|
||||
let results = std::fs::read_to_string("/tmp/cli_batch_results.json").unwrap();
|
||||
assert!(results.contains("a"), "Should contain results");
|
||||
assert!(results.len() > 100, "Should have substantial output");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore] // Requires built binary and available port
|
||||
fn test_cli_serve_command_startup() {
|
||||
// This test requires the binary to be built first
|
||||
// Use std::process::Command for spawn functionality
|
||||
use std::process::Command as StdCommand;
|
||||
|
||||
// Get the binary path from environment, or fall back to cargo build path
|
||||
let bin_path = std::env::var("CARGO_BIN_EXE_scipix-ocr")
|
||||
.unwrap_or_else(|_| "target/debug/scipix-ocr".to_string());
|
||||
|
||||
let mut child = StdCommand::new(&bin_path)
|
||||
.arg("serve")
|
||||
.arg("--port")
|
||||
.arg("18080")
|
||||
.stdout(Stdio::piped())
|
||||
.spawn()
|
||||
.expect("Failed to start server");
|
||||
|
||||
// Wait for server startup
|
||||
std::thread::sleep(std::time::Duration::from_secs(2));
|
||||
|
||||
// Check if server is running
|
||||
let client = reqwest::blocking::Client::new();
|
||||
let response = client
|
||||
.get("http://localhost:18080/health")
|
||||
.timeout(std::time::Duration::from_secs(5))
|
||||
.send();
|
||||
|
||||
// Kill server
|
||||
let _ = child.kill();
|
||||
|
||||
assert!(response.is_ok(), "Server should respond to health check");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cli_config_command() {
|
||||
// Test config show
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("config").arg("show").assert().success().stdout(
|
||||
predicate::str::contains("model_path").or(predicate::str::contains("Configuration")),
|
||||
);
|
||||
|
||||
// Test config set
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("config")
|
||||
.arg("set")
|
||||
.arg("preprocessing.enable_deskew")
|
||||
.arg("true")
|
||||
.assert()
|
||||
.success();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cli_invalid_file() {
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("ocr")
|
||||
.arg("/nonexistent/file.png")
|
||||
.assert()
|
||||
.failure()
|
||||
.stderr(predicate::str::contains("not found").or(predicate::str::contains("error")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cli_exit_codes() {
|
||||
// Success case
|
||||
let image = images::generate_simple_equation("ok");
|
||||
image.save("/tmp/exit_code_test.png").unwrap();
|
||||
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("ocr")
|
||||
.arg("/tmp/exit_code_test.png")
|
||||
.assert()
|
||||
.code(0);
|
||||
|
||||
// Failure case
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("ocr")
|
||||
.arg("/nonexistent.png")
|
||||
.assert()
|
||||
.code(predicate::ne(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cli_verbose_output() {
|
||||
let image = images::generate_simple_equation("verbose");
|
||||
image.save("/tmp/verbose_test.png").unwrap();
|
||||
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("ocr")
|
||||
.arg("/tmp/verbose_test.png")
|
||||
.arg("--verbose")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(predicate::str::contains("Processing").or(predicate::str::contains("Confidence")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cli_json_output() {
|
||||
let image = images::generate_simple_equation("json");
|
||||
image.save("/tmp/json_test.png").unwrap();
|
||||
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
let output = cmd
|
||||
.arg("ocr")
|
||||
.arg("/tmp/json_test.png")
|
||||
.arg("--output-format")
|
||||
.arg("json")
|
||||
.output()
|
||||
.expect("Failed to execute command");
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
|
||||
// Verify JSON structure
|
||||
let json: serde_json::Value =
|
||||
serde_json::from_str(&stdout).expect("Output should be valid JSON");
|
||||
|
||||
assert!(json.get("latex").is_some(), "Should have latex field");
|
||||
assert!(
|
||||
json.get("confidence").is_some(),
|
||||
"Should have confidence field"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cli_help_command() {
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("--help")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(predicate::str::contains("USAGE:"))
|
||||
.stdout(predicate::str::contains("COMMANDS:"));
|
||||
|
||||
// Test subcommand help
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("ocr")
|
||||
.arg("--help")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(predicate::str::contains("OPTIONS:"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cli_version_command() {
|
||||
let mut cmd = Command::cargo_bin("scipix-ocr").unwrap();
|
||||
cmd.arg("--version")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(predicate::str::contains(env!("CARGO_PKG_VERSION")));
|
||||
}
|
||||
14
vendor/ruvector/examples/scipix/tests/integration/mod.rs
vendored
Normal file
14
vendor/ruvector/examples/scipix/tests/integration/mod.rs
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
// Integration test module organization
|
||||
//
|
||||
// This module provides integration tests for the ruvector-scipix OCR system.
|
||||
// Tests are organized by functionality area.
|
||||
|
||||
pub mod accuracy_tests;
|
||||
pub mod api_tests;
|
||||
pub mod cache_tests;
|
||||
pub mod cli_tests;
|
||||
pub mod performance_tests;
|
||||
pub mod pipeline_tests;
|
||||
|
||||
// Re-export common test utilities
|
||||
pub use crate::common::*;
|
||||
386
vendor/ruvector/examples/scipix/tests/integration/performance_tests.rs
vendored
Normal file
386
vendor/ruvector/examples/scipix/tests/integration/performance_tests.rs
vendored
Normal file
@@ -0,0 +1,386 @@
|
||||
// Performance validation tests
|
||||
//
|
||||
// Tests latency, memory usage, throughput, and ensures no memory leaks
|
||||
|
||||
use super::*;
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_performance_latency_within_bounds() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
let image = images::generate_simple_equation("x + y");
|
||||
image.save("/tmp/perf_latency.png").unwrap();
|
||||
|
||||
// Measure latency
|
||||
let start = Instant::now();
|
||||
let result = test_server
|
||||
.process_image("/tmp/perf_latency.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
let latency = start.elapsed();
|
||||
|
||||
println!("Latency: {:?}", latency);
|
||||
println!("Confidence: {}", result.confidence);
|
||||
|
||||
// Assert latency is within bounds (<100ms for simple equation)
|
||||
assert!(latency.as_millis() < 100, "Latency too high: {:?}", latency);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_performance_memory_usage_limits() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Get initial memory usage
|
||||
let initial_memory = get_memory_usage();
|
||||
|
||||
// Process multiple images
|
||||
for i in 0..100 {
|
||||
let eq = format!("x + {}", i);
|
||||
let image = images::generate_simple_equation(&eq);
|
||||
let path = format!("/tmp/perf_mem_{}.png", i);
|
||||
image.save(&path).unwrap();
|
||||
|
||||
test_server
|
||||
.process_image(&path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Clean up
|
||||
std::fs::remove_file(&path).unwrap();
|
||||
}
|
||||
|
||||
// Get final memory usage
|
||||
let final_memory = get_memory_usage();
|
||||
let memory_increase = final_memory - initial_memory;
|
||||
|
||||
println!("Memory increase: {} MB", memory_increase / 1024 / 1024);
|
||||
|
||||
// Assert memory usage is reasonable (<100MB increase)
|
||||
assert!(
|
||||
memory_increase < 100 * 1024 * 1024,
|
||||
"Memory usage too high: {} bytes",
|
||||
memory_increase
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_performance_no_memory_leaks() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
let image = images::generate_simple_equation("leak test");
|
||||
image.save("/tmp/leak_test.png").unwrap();
|
||||
|
||||
// Process same image many times
|
||||
let iterations = 1000;
|
||||
let mut memory_samples = Vec::new();
|
||||
|
||||
for i in 0..iterations {
|
||||
test_server
|
||||
.process_image("/tmp/leak_test.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Sample memory every 100 iterations
|
||||
if i % 100 == 0 {
|
||||
memory_samples.push(get_memory_usage());
|
||||
}
|
||||
}
|
||||
|
||||
// Check for linear memory growth (leak indicator)
|
||||
let first_sample = memory_samples[0];
|
||||
let last_sample = *memory_samples.last().unwrap();
|
||||
let growth_rate = (last_sample - first_sample) as f64 / iterations as f64;
|
||||
|
||||
println!("Memory growth rate: {} bytes/iteration", growth_rate);
|
||||
println!("Samples: {:?}", memory_samples);
|
||||
|
||||
// Growth rate should be minimal (<1KB per iteration)
|
||||
assert!(
|
||||
growth_rate < 1024.0,
|
||||
"Possible memory leak detected: {} bytes/iteration",
|
||||
growth_rate
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_performance_throughput() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create test images
|
||||
let image_count = 50;
|
||||
for i in 0..image_count {
|
||||
let eq = format!("throughput_{}", i);
|
||||
let image = images::generate_simple_equation(&eq);
|
||||
image.save(&format!("/tmp/throughput_{}.png", i)).unwrap();
|
||||
}
|
||||
|
||||
// Measure throughput
|
||||
let start = Instant::now();
|
||||
|
||||
for i in 0..image_count {
|
||||
test_server
|
||||
.process_image(&format!("/tmp/throughput_{}.png", i), OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
}
|
||||
|
||||
let duration = start.elapsed();
|
||||
let throughput = image_count as f64 / duration.as_secs_f64();
|
||||
|
||||
println!("Throughput: {:.2} images/second", throughput);
|
||||
println!("Total time: {:?} for {} images", duration, image_count);
|
||||
|
||||
// Assert reasonable throughput (>5 images/second)
|
||||
assert!(
|
||||
throughput > 5.0,
|
||||
"Throughput too low: {:.2} images/s",
|
||||
throughput
|
||||
);
|
||||
|
||||
// Cleanup
|
||||
for i in 0..image_count {
|
||||
std::fs::remove_file(&format!("/tmp/throughput_{}.png", i)).unwrap();
|
||||
}
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_performance_concurrent_throughput() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create test image
|
||||
let image = images::generate_simple_equation("concurrent");
|
||||
image.save("/tmp/concurrent_throughput.png").unwrap();
|
||||
|
||||
let concurrent_requests = 20;
|
||||
let start = Instant::now();
|
||||
|
||||
// Spawn concurrent requests
|
||||
let mut handles = vec![];
|
||||
for _ in 0..concurrent_requests {
|
||||
let server = test_server.clone();
|
||||
let handle = tokio::spawn(async move {
|
||||
server
|
||||
.process_image("/tmp/concurrent_throughput.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
});
|
||||
handles.push(handle);
|
||||
}
|
||||
|
||||
// Wait for all to complete
|
||||
let results = futures::future::join_all(handles).await;
|
||||
let duration = start.elapsed();
|
||||
|
||||
let success_count = results.iter().filter(|r| r.is_ok()).count();
|
||||
let throughput = concurrent_requests as f64 / duration.as_secs_f64();
|
||||
|
||||
println!("Concurrent throughput: {:.2} req/second", throughput);
|
||||
println!("Success rate: {}/{}", success_count, concurrent_requests);
|
||||
|
||||
assert!(
|
||||
success_count == concurrent_requests,
|
||||
"All requests should succeed"
|
||||
);
|
||||
assert!(
|
||||
throughput > 10.0,
|
||||
"Concurrent throughput too low: {:.2}",
|
||||
throughput
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_performance_latency_percentiles() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
let iterations = 100;
|
||||
let mut latencies = Vec::new();
|
||||
|
||||
for i in 0..iterations {
|
||||
let eq = format!("p{}", i);
|
||||
let image = images::generate_simple_equation(&eq);
|
||||
let path = format!("/tmp/percentile_{}.png", i);
|
||||
image.save(&path).unwrap();
|
||||
|
||||
let start = Instant::now();
|
||||
test_server
|
||||
.process_image(&path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
let latency = start.elapsed();
|
||||
|
||||
latencies.push(latency.as_micros());
|
||||
|
||||
std::fs::remove_file(&path).unwrap();
|
||||
}
|
||||
|
||||
// Sort latencies
|
||||
latencies.sort();
|
||||
|
||||
// Calculate percentiles
|
||||
let p50 = latencies[50];
|
||||
let p95 = latencies[95];
|
||||
let p99 = latencies[99];
|
||||
|
||||
println!("Latency percentiles:");
|
||||
println!(" P50: {} μs ({} ms)", p50, p50 / 1000);
|
||||
println!(" P95: {} μs ({} ms)", p95, p95 / 1000);
|
||||
println!(" P99: {} μs ({} ms)", p99, p99 / 1000);
|
||||
|
||||
// Assert percentile targets
|
||||
assert!(p50 < 100_000, "P50 latency too high: {} μs", p50); // <100ms
|
||||
assert!(p95 < 200_000, "P95 latency too high: {} μs", p95); // <200ms
|
||||
assert!(p99 < 500_000, "P99 latency too high: {} μs", p99); // <500ms
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_performance_batch_efficiency() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create test images
|
||||
let batch_size = 10;
|
||||
let mut paths = Vec::new();
|
||||
|
||||
for i in 0..batch_size {
|
||||
let eq = format!("batch_{}", i);
|
||||
let image = images::generate_simple_equation(&eq);
|
||||
let path = format!("/tmp/batch_eff_{}.png", i);
|
||||
image.save(&path).unwrap();
|
||||
paths.push(path);
|
||||
}
|
||||
|
||||
// Measure sequential processing
|
||||
let start_sequential = Instant::now();
|
||||
for path in &paths {
|
||||
test_server
|
||||
.process_image(path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
}
|
||||
let sequential_time = start_sequential.elapsed();
|
||||
|
||||
// Measure batch processing
|
||||
let start_batch = Instant::now();
|
||||
test_server
|
||||
.process_batch(
|
||||
&paths.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
|
||||
OutputFormat::LaTeX,
|
||||
)
|
||||
.await
|
||||
.expect("Batch processing failed");
|
||||
let batch_time = start_batch.elapsed();
|
||||
|
||||
println!("Sequential time: {:?}", sequential_time);
|
||||
println!("Batch time: {:?}", batch_time);
|
||||
println!(
|
||||
"Speedup: {:.2}x",
|
||||
sequential_time.as_secs_f64() / batch_time.as_secs_f64()
|
||||
);
|
||||
|
||||
// Batch should be faster
|
||||
assert!(
|
||||
batch_time < sequential_time,
|
||||
"Batch processing should be faster"
|
||||
);
|
||||
|
||||
// Cleanup
|
||||
for path in paths {
|
||||
std::fs::remove_file(&path).unwrap();
|
||||
}
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_performance_cold_start_warmup() {
|
||||
// Measure cold start
|
||||
let start_cold = Instant::now();
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
let cold_start_time = start_cold.elapsed();
|
||||
|
||||
println!("Cold start time: {:?}", cold_start_time);
|
||||
|
||||
// First request (warmup)
|
||||
let image = images::generate_simple_equation("warmup");
|
||||
image.save("/tmp/warmup.png").unwrap();
|
||||
|
||||
let start_first = Instant::now();
|
||||
test_server
|
||||
.process_image("/tmp/warmup.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
let first_request_time = start_first.elapsed();
|
||||
|
||||
// Second request (warmed up)
|
||||
let start_second = Instant::now();
|
||||
test_server
|
||||
.process_image("/tmp/warmup.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
let second_request_time = start_second.elapsed();
|
||||
|
||||
println!("First request time: {:?}", first_request_time);
|
||||
println!("Second request time: {:?}", second_request_time);
|
||||
|
||||
// Cold start should be reasonable (<5s)
|
||||
assert!(
|
||||
cold_start_time.as_secs() < 5,
|
||||
"Cold start too slow: {:?}",
|
||||
cold_start_time
|
||||
);
|
||||
|
||||
// Second request should be faster (model loaded)
|
||||
assert!(
|
||||
second_request_time < first_request_time,
|
||||
"Warmed up request should be faster"
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
// Helper function to get current memory usage
|
||||
fn get_memory_usage() -> usize {
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
// Read from /proc/self/statm
|
||||
if let Ok(content) = std::fs::read_to_string("/proc/self/statm") {
|
||||
if let Some(rss) = content.split_whitespace().nth(1) {
|
||||
if let Ok(pages) = rss.parse::<usize>() {
|
||||
// Convert pages to bytes (assuming 4KB pages)
|
||||
return pages * 4096;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback for other platforms or if reading fails
|
||||
0
|
||||
}
|
||||
248
vendor/ruvector/examples/scipix/tests/integration/pipeline_tests.rs
vendored
Normal file
248
vendor/ruvector/examples/scipix/tests/integration/pipeline_tests.rs
vendored
Normal file
@@ -0,0 +1,248 @@
|
||||
// Full pipeline integration tests
|
||||
//
|
||||
// Tests the complete OCR pipeline from image input to final output
|
||||
//
|
||||
// Note: These tests use mock test infrastructure.
|
||||
// Real OCR processing requires ONNX models to be configured.
|
||||
|
||||
use super::*;
|
||||
use crate::common::{OutputFormat, ProcessingOptions};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_png_to_latex_pipeline() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create test image
|
||||
let image = images::generate_simple_equation("x^2 + 2x + 1");
|
||||
let image_path = "/tmp/test_equation.png";
|
||||
image.save(image_path).unwrap();
|
||||
|
||||
// Process through pipeline
|
||||
let result = test_server
|
||||
.process_image(image_path, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Pipeline processing failed");
|
||||
|
||||
// Verify output
|
||||
assert!(!result.latex.is_empty(), "LaTeX output should not be empty");
|
||||
assert!(
|
||||
result.confidence > 0.7,
|
||||
"Confidence too low: {}",
|
||||
result.confidence
|
||||
);
|
||||
assert!(result.latex.contains("x"), "Should contain variable x");
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_jpeg_to_mathml_pipeline() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create JPEG test image
|
||||
let image = images::generate_fraction(1, 2);
|
||||
let image_path = "/tmp/test_fraction.jpg";
|
||||
image.save(image_path).unwrap();
|
||||
|
||||
// Process to MathML
|
||||
let result = test_server
|
||||
.process_image(image_path, OutputFormat::MathML)
|
||||
.await
|
||||
.expect("Pipeline processing failed");
|
||||
|
||||
// Verify MathML structure
|
||||
assert!(result.mathml.is_some(), "MathML output should be present");
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_webp_to_html_pipeline() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create WebP test image
|
||||
let image = images::generate_integral("x dx");
|
||||
let image_path = "/tmp/test_integral.webp";
|
||||
// Note: WebP support may require additional image codec
|
||||
image.save(image_path).unwrap_or_else(|_| {
|
||||
// Fallback to PNG if WebP not supported
|
||||
image.save("/tmp/test_integral.png").unwrap();
|
||||
});
|
||||
|
||||
let actual_path = if std::path::Path::new(image_path).exists() {
|
||||
image_path
|
||||
} else {
|
||||
"/tmp/test_integral.png"
|
||||
};
|
||||
|
||||
// Process to HTML
|
||||
let _result = test_server
|
||||
.process_image(actual_path, OutputFormat::HTML)
|
||||
.await
|
||||
.expect("Pipeline processing failed");
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_pipeline_timeout_handling() {
|
||||
let test_server = TestServer::with_timeout(100)
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create complex image that might take time
|
||||
let complex_image = images::generate_complex_equation();
|
||||
complex_image.save("/tmp/complex.png").unwrap();
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
let _result = test_server
|
||||
.process_image("/tmp/complex.png", OutputFormat::LaTeX)
|
||||
.await;
|
||||
let duration = start.elapsed();
|
||||
|
||||
// Should either complete or timeout within reasonable time
|
||||
assert!(
|
||||
duration.as_millis() < 500,
|
||||
"Should timeout or complete quickly"
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_batch_pipeline_processing() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create multiple test images
|
||||
let test_images = vec![
|
||||
("x + y", "/tmp/batch_1.png"),
|
||||
("a - b", "/tmp/batch_2.png"),
|
||||
("2 * 3", "/tmp/batch_3.png"),
|
||||
("x / y", "/tmp/batch_4.png"),
|
||||
];
|
||||
|
||||
for (equation, path) in &test_images {
|
||||
let img = images::generate_simple_equation(equation);
|
||||
img.save(path).unwrap();
|
||||
}
|
||||
|
||||
// Process batch
|
||||
let paths: Vec<&str> = test_images.iter().map(|(_, p)| *p).collect();
|
||||
let results = test_server
|
||||
.process_batch(&paths, OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Batch processing failed");
|
||||
|
||||
// Verify all processed
|
||||
assert_eq!(results.len(), 4, "Should process all images");
|
||||
for (i, result) in results.iter().enumerate() {
|
||||
assert!(!result.latex.is_empty(), "Result {} should have LaTeX", i);
|
||||
assert!(result.confidence > 0.5, "Result {} confidence too low", i);
|
||||
}
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_pipeline_with_preprocessing() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create noisy image
|
||||
let mut image = images::generate_simple_equation("f(x) = x^2");
|
||||
images::add_noise(&mut image, 0.1);
|
||||
image.save("/tmp/noisy.png").unwrap();
|
||||
|
||||
// Process with preprocessing enabled
|
||||
let result = test_server
|
||||
.process_image_with_options(
|
||||
"/tmp/noisy.png",
|
||||
OutputFormat::LaTeX,
|
||||
ProcessingOptions {
|
||||
enable_preprocessing: true,
|
||||
enable_denoising: true,
|
||||
enable_deskew: true,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Should still recognize despite noise
|
||||
assert!(
|
||||
!result.latex.is_empty(),
|
||||
"Should extract LaTeX from noisy image"
|
||||
);
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_multi_format_output() {
|
||||
let test_server = TestServer::start()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create test image
|
||||
let image = images::generate_fraction(3, 4);
|
||||
image.save("/tmp/fraction.png").unwrap();
|
||||
|
||||
// Request multiple output formats
|
||||
let result = test_server
|
||||
.process_image_with_options(
|
||||
"/tmp/fraction.png",
|
||||
OutputFormat::All,
|
||||
ProcessingOptions {
|
||||
include_latex: true,
|
||||
include_mathml: true,
|
||||
include_ascii: true,
|
||||
include_text: true,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
.await
|
||||
.expect("Processing failed");
|
||||
|
||||
// Verify output present
|
||||
assert!(!result.latex.is_empty(), "Should have LaTeX");
|
||||
assert!(result.mathml.is_some(), "Should have MathML");
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_pipeline_caching() {
|
||||
let test_server = TestServer::with_cache()
|
||||
.await
|
||||
.expect("Failed to start test server");
|
||||
|
||||
// Create test image
|
||||
let image = images::generate_simple_equation("a + b = c");
|
||||
image.save("/tmp/cached.png").unwrap();
|
||||
|
||||
// First processing
|
||||
let result1 = test_server
|
||||
.process_image("/tmp/cached.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("First processing failed");
|
||||
|
||||
// Second processing (should hit cache)
|
||||
let result2 = test_server
|
||||
.process_image("/tmp/cached.png", OutputFormat::LaTeX)
|
||||
.await
|
||||
.expect("Second processing failed");
|
||||
|
||||
// Verify cache hit
|
||||
assert_eq!(result1.latex, result2.latex, "Results should match");
|
||||
|
||||
test_server.shutdown().await;
|
||||
}
|
||||
40
vendor/ruvector/examples/scipix/tests/lib.rs
vendored
Normal file
40
vendor/ruvector/examples/scipix/tests/lib.rs
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
// Integration test library for ruvector-scipix
|
||||
//
|
||||
// This library provides the test infrastructure and utilities
|
||||
// for integration testing the scipix OCR system.
|
||||
|
||||
// Common test utilities
|
||||
pub mod common;
|
||||
|
||||
// Integration test modules
|
||||
pub mod integration;
|
||||
|
||||
// Test configuration
|
||||
#[cfg(test)]
|
||||
mod test_config {
|
||||
use std::sync::Once;
|
||||
|
||||
static INIT: Once = Once::new();
|
||||
|
||||
/// Initialize test environment once
|
||||
pub fn init() {
|
||||
INIT.call_once(|| {
|
||||
// Setup test logging
|
||||
let _ = env_logger::builder().is_test(true).try_init();
|
||||
|
||||
// Create test directories
|
||||
let test_dirs = vec![
|
||||
"/tmp/scipix_test",
|
||||
"/tmp/scipix_cache",
|
||||
"/tmp/scipix_results",
|
||||
];
|
||||
|
||||
for dir in test_dirs {
|
||||
std::fs::create_dir_all(dir).ok();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Convenience re-exports for tests
|
||||
pub use common::*;
|
||||
645
vendor/ruvector/examples/scipix/tests/math_tests.rs
vendored
Normal file
645
vendor/ruvector/examples/scipix/tests/math_tests.rs
vendored
Normal file
@@ -0,0 +1,645 @@
|
||||
//! Comprehensive integration tests for mathematical expression parsing and conversion
|
||||
//!
|
||||
//! These tests cover complex mathematical expressions including:
|
||||
//! - Fractions and nested fractions
|
||||
//! - Radicals (square roots, nth roots)
|
||||
//! - Powers and exponents
|
||||
//! - Matrices and vectors
|
||||
//! - Integrals and summations
|
||||
//! - Trigonometric functions
|
||||
//! - Greek letters and special symbols
|
||||
//! - Complex nested expressions
|
||||
//!
|
||||
//! NOTE: These tests require the `math` feature to be enabled.
|
||||
//! Run with: cargo test --features math
|
||||
|
||||
#![cfg(feature = "math")]
|
||||
|
||||
use ruvector_scipix::math::{
|
||||
parse_expression, to_asciimath, to_latex, to_mathml, AsciiMathGenerator, BinaryOp, BracketType,
|
||||
LaTeXConfig, LaTeXGenerator, LargeOpType, MathExpr, MathNode,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_quadratic_formula() {
|
||||
// The famous quadratic formula: x = (-b ± √(b² - 4ac)) / 2a
|
||||
let latex_input = r"\frac{-b + \sqrt{b^2 - 4*a*c}}{2*a}";
|
||||
let expr = parse_expression(latex_input).unwrap();
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
assert!(latex.contains(r"\frac"));
|
||||
assert!(latex.contains(r"\sqrt"));
|
||||
assert!(latex.contains("b"));
|
||||
assert!(latex.contains("a"));
|
||||
assert!(latex.contains("c"));
|
||||
|
||||
let mathml = to_mathml(&expr);
|
||||
assert!(mathml.contains("<mfrac>"));
|
||||
assert!(mathml.contains("<msqrt>"));
|
||||
|
||||
let asciimath = to_asciimath(&expr);
|
||||
assert!(asciimath.contains("sqrt"));
|
||||
assert!(asciimath.contains("/"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pythagorean_theorem() {
|
||||
// a² + b² = c²
|
||||
let input = "a^2 + b^2 = c^2";
|
||||
let expr = parse_expression(input).unwrap();
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
assert!(latex.contains("a^{2}"));
|
||||
assert!(latex.contains("b^{2}"));
|
||||
assert!(latex.contains("c^{2}"));
|
||||
assert!(latex.contains("="));
|
||||
|
||||
let mathml = to_mathml(&expr);
|
||||
assert!(mathml.contains("<msup>"));
|
||||
assert!(mathml.contains("<mo>=</mo>"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_euler_identity() {
|
||||
// e^(iπ) + 1 = 0
|
||||
let input = "e^{i*\\pi} + 1 = 0";
|
||||
let expr = parse_expression(input).unwrap();
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
assert!(latex.contains("e^"));
|
||||
assert!(latex.contains("\\pi"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_nested_fractions() {
|
||||
// Complex continued fraction
|
||||
let input = r"\frac{1}{\frac{2}{\frac{3}{4}}}";
|
||||
let expr = parse_expression(input).unwrap();
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
// Should contain multiple \frac commands
|
||||
assert!(latex.matches(r"\frac").count() >= 3);
|
||||
|
||||
let mathml = to_mathml(&expr);
|
||||
assert!(mathml.matches("<mfrac>").count() >= 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_matrix_2x2() {
|
||||
// 2x2 matrix
|
||||
let expr = MathExpr::new(
|
||||
MathNode::Matrix {
|
||||
rows: vec![
|
||||
vec![
|
||||
MathNode::Number {
|
||||
value: "1".to_string(),
|
||||
is_decimal: false,
|
||||
},
|
||||
MathNode::Number {
|
||||
value: "2".to_string(),
|
||||
is_decimal: false,
|
||||
},
|
||||
],
|
||||
vec![
|
||||
MathNode::Number {
|
||||
value: "3".to_string(),
|
||||
is_decimal: false,
|
||||
},
|
||||
MathNode::Number {
|
||||
value: "4".to_string(),
|
||||
is_decimal: false,
|
||||
},
|
||||
],
|
||||
],
|
||||
bracket_type: BracketType::Brackets,
|
||||
},
|
||||
1.0,
|
||||
);
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
assert!(latex.contains(r"\begin{bmatrix}"));
|
||||
assert!(latex.contains(r"\end{bmatrix}"));
|
||||
assert!(latex.contains("&"));
|
||||
assert!(latex.contains(r"\\"));
|
||||
|
||||
let mathml = to_mathml(&expr);
|
||||
assert!(mathml.contains("<mtable>"));
|
||||
assert!(mathml.contains("<mtr>"));
|
||||
assert!(mathml.contains("<mtd>"));
|
||||
|
||||
let asciimath = to_asciimath(&expr);
|
||||
assert!(asciimath.contains("["));
|
||||
assert!(asciimath.contains(";"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_matrix_3x3() {
|
||||
// 3x3 identity matrix
|
||||
let expr = MathExpr::new(
|
||||
MathNode::Matrix {
|
||||
rows: vec![
|
||||
vec![
|
||||
MathNode::Number {
|
||||
value: "1".to_string(),
|
||||
is_decimal: false,
|
||||
},
|
||||
MathNode::Number {
|
||||
value: "0".to_string(),
|
||||
is_decimal: false,
|
||||
},
|
||||
MathNode::Number {
|
||||
value: "0".to_string(),
|
||||
is_decimal: false,
|
||||
},
|
||||
],
|
||||
vec![
|
||||
MathNode::Number {
|
||||
value: "0".to_string(),
|
||||
is_decimal: false,
|
||||
},
|
||||
MathNode::Number {
|
||||
value: "1".to_string(),
|
||||
is_decimal: false,
|
||||
},
|
||||
MathNode::Number {
|
||||
value: "0".to_string(),
|
||||
is_decimal: false,
|
||||
},
|
||||
],
|
||||
vec![
|
||||
MathNode::Number {
|
||||
value: "0".to_string(),
|
||||
is_decimal: false,
|
||||
},
|
||||
MathNode::Number {
|
||||
value: "0".to_string(),
|
||||
is_decimal: false,
|
||||
},
|
||||
MathNode::Number {
|
||||
value: "1".to_string(),
|
||||
is_decimal: false,
|
||||
},
|
||||
],
|
||||
],
|
||||
bracket_type: BracketType::Parentheses,
|
||||
},
|
||||
1.0,
|
||||
);
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
assert!(latex.contains(r"\begin{pmatrix}"));
|
||||
assert!(latex.matches(r"\\").count() >= 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_definite_integral() {
|
||||
// ∫₀¹ x² dx
|
||||
let expr = MathExpr::new(
|
||||
MathNode::LargeOp {
|
||||
op_type: LargeOpType::Integral,
|
||||
lower: Some(Box::new(MathNode::Number {
|
||||
value: "0".to_string(),
|
||||
is_decimal: false,
|
||||
})),
|
||||
upper: Some(Box::new(MathNode::Number {
|
||||
value: "1".to_string(),
|
||||
is_decimal: false,
|
||||
})),
|
||||
content: Box::new(MathNode::Binary {
|
||||
op: BinaryOp::Power,
|
||||
left: Box::new(MathNode::Symbol {
|
||||
value: "x".to_string(),
|
||||
unicode: Some('x'),
|
||||
}),
|
||||
right: Box::new(MathNode::Number {
|
||||
value: "2".to_string(),
|
||||
is_decimal: false,
|
||||
}),
|
||||
}),
|
||||
},
|
||||
1.0,
|
||||
);
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
assert!(latex.contains(r"\int"));
|
||||
assert!(latex.contains("_{0}"));
|
||||
assert!(latex.contains("^{1}"));
|
||||
|
||||
let mathml = to_mathml(&expr);
|
||||
assert!(mathml.contains("<munderover>"));
|
||||
assert!(mathml.contains("∫"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_summation() {
|
||||
// ∑_{i=1}^{n} i²
|
||||
let expr = MathExpr::new(
|
||||
MathNode::LargeOp {
|
||||
op_type: LargeOpType::Sum,
|
||||
lower: Some(Box::new(MathNode::Binary {
|
||||
op: BinaryOp::Equal,
|
||||
left: Box::new(MathNode::Symbol {
|
||||
value: "i".to_string(),
|
||||
unicode: Some('i'),
|
||||
}),
|
||||
right: Box::new(MathNode::Number {
|
||||
value: "1".to_string(),
|
||||
is_decimal: false,
|
||||
}),
|
||||
})),
|
||||
upper: Some(Box::new(MathNode::Symbol {
|
||||
value: "n".to_string(),
|
||||
unicode: Some('n'),
|
||||
})),
|
||||
content: Box::new(MathNode::Binary {
|
||||
op: BinaryOp::Power,
|
||||
left: Box::new(MathNode::Symbol {
|
||||
value: "i".to_string(),
|
||||
unicode: Some('i'),
|
||||
}),
|
||||
right: Box::new(MathNode::Number {
|
||||
value: "2".to_string(),
|
||||
is_decimal: false,
|
||||
}),
|
||||
}),
|
||||
},
|
||||
1.0,
|
||||
);
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
assert!(latex.contains(r"\sum"));
|
||||
assert!(latex.contains("_{i"));
|
||||
assert!(latex.contains("^{n}"));
|
||||
|
||||
let mathml = to_mathml(&expr);
|
||||
assert!(mathml.contains("∑"));
|
||||
assert!(mathml.contains("<munderover>"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_product_notation() {
|
||||
// ∏_{k=1}^{n} k
|
||||
let expr = MathExpr::new(
|
||||
MathNode::LargeOp {
|
||||
op_type: LargeOpType::Product,
|
||||
lower: Some(Box::new(MathNode::Binary {
|
||||
op: BinaryOp::Equal,
|
||||
left: Box::new(MathNode::Symbol {
|
||||
value: "k".to_string(),
|
||||
unicode: Some('k'),
|
||||
}),
|
||||
right: Box::new(MathNode::Number {
|
||||
value: "1".to_string(),
|
||||
is_decimal: false,
|
||||
}),
|
||||
})),
|
||||
upper: Some(Box::new(MathNode::Symbol {
|
||||
value: "n".to_string(),
|
||||
unicode: Some('n'),
|
||||
})),
|
||||
content: Box::new(MathNode::Symbol {
|
||||
value: "k".to_string(),
|
||||
unicode: Some('k'),
|
||||
}),
|
||||
},
|
||||
1.0,
|
||||
);
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
assert!(latex.contains(r"\prod"));
|
||||
|
||||
let mathml = to_mathml(&expr);
|
||||
assert!(mathml.contains("∏"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_nth_root() {
|
||||
// ∛8 (cube root)
|
||||
let input = r"\sqrt[3]{8}";
|
||||
let expr = parse_expression(input).unwrap();
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
assert!(latex.contains(r"\sqrt[3]"));
|
||||
|
||||
let mathml = to_mathml(&expr);
|
||||
assert!(mathml.contains("<mroot>"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_complex_radical() {
|
||||
// √(a² + b²)
|
||||
let input = r"\sqrt{a^2 + b^2}";
|
||||
let expr = parse_expression(input).unwrap();
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
assert!(latex.contains(r"\sqrt"));
|
||||
assert!(latex.contains("a^{2}"));
|
||||
assert!(latex.contains("b^{2}"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_binomial_coefficient() {
|
||||
// (n choose k) represented as fraction
|
||||
let input = r"\frac{n}{k}";
|
||||
let expr = parse_expression(input).unwrap();
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
assert!(latex.contains(r"\frac{n}{k}"));
|
||||
|
||||
let mathml = to_mathml(&expr);
|
||||
assert!(mathml.contains("<mfrac>"));
|
||||
assert!(mathml.contains("<mi>n</mi>"));
|
||||
assert!(mathml.contains("<mi>k</mi>"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_trigonometric_functions() {
|
||||
// sin²(x) + cos²(x) = 1
|
||||
let input = r"\sin{x}^2 + \cos{x}^2 = 1";
|
||||
let expr = parse_expression(input).unwrap();
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
assert!(latex.contains(r"\sin"));
|
||||
assert!(latex.contains(r"\cos"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_limits() {
|
||||
// lim_{x→∞} (1 + 1/x)^x = e
|
||||
// Simplified: testing basic limit structure
|
||||
let input = r"\sum_{x=1}^{10} x";
|
||||
let expr = parse_expression(input).unwrap();
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
assert!(latex.contains(r"\sum"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_greek_letters() {
|
||||
// α + β + γ = δ
|
||||
let input = r"\alpha + \beta + \gamma = \delta";
|
||||
let expr = parse_expression(input).unwrap();
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
assert!(latex.contains(r"\alpha"));
|
||||
assert!(latex.contains(r"\beta"));
|
||||
assert!(latex.contains(r"\gamma"));
|
||||
assert!(latex.contains(r"\delta"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_subscript_and_superscript() {
|
||||
// a₁² + a₂² = a₃²
|
||||
let input = "a_1^2 + a_2^2 = a_3^2";
|
||||
let expr = parse_expression(input).unwrap();
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
assert!(latex.contains("a_{1}^{2}"));
|
||||
assert!(latex.contains("a_{2}^{2}"));
|
||||
assert!(latex.contains("a_{3}^{2}"));
|
||||
|
||||
let mathml = to_mathml(&expr);
|
||||
assert!(mathml.contains("<msubsup>"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_operator_precedence() {
|
||||
// 1 + 2 * 3 should parse as 1 + (2 * 3)
|
||||
let input = "1 + 2 * 3";
|
||||
let expr = parse_expression(input).unwrap();
|
||||
|
||||
match expr.root {
|
||||
MathNode::Binary {
|
||||
op: BinaryOp::Add,
|
||||
right,
|
||||
..
|
||||
} => {
|
||||
assert!(matches!(
|
||||
*right,
|
||||
MathNode::Binary {
|
||||
op: BinaryOp::Multiply,
|
||||
..
|
||||
}
|
||||
));
|
||||
}
|
||||
_ => panic!("Expected addition with multiplication on right"),
|
||||
}
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
// Should not have unnecessary parentheses around 2 * 3
|
||||
assert!(!latex.contains("(2"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parentheses_grouping() {
|
||||
// (1 + 2) * 3 should parse as (1 + 2) * 3
|
||||
let input = "(1 + 2) * 3";
|
||||
let expr = parse_expression(input).unwrap();
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
// The addition should be grouped
|
||||
assert!(latex.contains("1 + 2"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_complex_nested_expression() {
|
||||
// Complex expression with multiple levels of nesting
|
||||
let input = r"\frac{\sqrt{a + b}}{c^2 - d^2}";
|
||||
let expr = parse_expression(input).unwrap();
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
assert!(latex.contains(r"\frac"));
|
||||
assert!(latex.contains(r"\sqrt"));
|
||||
assert!(latex.contains("c^{2}"));
|
||||
assert!(latex.contains("d^{2}"));
|
||||
|
||||
let mathml = to_mathml(&expr);
|
||||
assert!(mathml.contains("<mfrac>"));
|
||||
assert!(mathml.contains("<msqrt>"));
|
||||
assert!(mathml.contains("<msup>"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_latex_config_display_style() {
|
||||
let expr = parse_expression(r"\frac{1}{2}").unwrap();
|
||||
|
||||
let config_inline = LaTeXConfig {
|
||||
display_style: false,
|
||||
auto_size_delimiters: true,
|
||||
spacing: true,
|
||||
};
|
||||
|
||||
let config_display = LaTeXConfig {
|
||||
display_style: true,
|
||||
auto_size_delimiters: true,
|
||||
spacing: true,
|
||||
};
|
||||
|
||||
let gen_inline = LaTeXGenerator::with_config(config_inline);
|
||||
let gen_display = LaTeXGenerator::with_config(config_display);
|
||||
|
||||
let latex_inline = gen_inline.generate(&expr);
|
||||
let latex_display = gen_display.generate(&expr);
|
||||
|
||||
// Both should contain \frac
|
||||
assert!(latex_inline.contains(r"\frac"));
|
||||
assert!(latex_display.contains(r"\frac"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_latex_config_no_auto_size() {
|
||||
let expr = MathExpr::new(
|
||||
MathNode::Group {
|
||||
content: Box::new(MathNode::Number {
|
||||
value: "42".to_string(),
|
||||
is_decimal: false,
|
||||
}),
|
||||
bracket_type: BracketType::Parentheses,
|
||||
},
|
||||
1.0,
|
||||
);
|
||||
|
||||
let config_auto = LaTeXConfig {
|
||||
display_style: false,
|
||||
auto_size_delimiters: true,
|
||||
spacing: true,
|
||||
};
|
||||
|
||||
let config_no_auto = LaTeXConfig {
|
||||
display_style: false,
|
||||
auto_size_delimiters: false,
|
||||
spacing: true,
|
||||
};
|
||||
|
||||
let gen_auto = LaTeXGenerator::with_config(config_auto);
|
||||
let gen_no_auto = LaTeXGenerator::with_config(config_no_auto);
|
||||
|
||||
let latex_auto = gen_auto.generate(&expr);
|
||||
let latex_no_auto = gen_no_auto.generate(&expr);
|
||||
|
||||
assert!(latex_auto.contains(r"\left("));
|
||||
assert!(!latex_no_auto.contains(r"\left("));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_asciimath_unicode_vs_ascii() {
|
||||
let expr = parse_expression("2 * 3").unwrap();
|
||||
|
||||
let gen_unicode = AsciiMathGenerator::new();
|
||||
let gen_ascii = AsciiMathGenerator::ascii_only();
|
||||
|
||||
let unicode_output = gen_unicode.generate(&expr);
|
||||
let ascii_output = gen_ascii.generate(&expr);
|
||||
|
||||
assert!(unicode_output.contains("×") || unicode_output.contains("*"));
|
||||
assert!(ascii_output.contains("*"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_double_integral() {
|
||||
let expr = MathExpr::new(
|
||||
MathNode::LargeOp {
|
||||
op_type: LargeOpType::DoubleIntegral,
|
||||
lower: None,
|
||||
upper: None,
|
||||
content: Box::new(MathNode::Symbol {
|
||||
value: "f".to_string(),
|
||||
unicode: Some('f'),
|
||||
}),
|
||||
},
|
||||
1.0,
|
||||
);
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
assert!(latex.contains(r"\iint"));
|
||||
|
||||
let mathml = to_mathml(&expr);
|
||||
assert!(mathml.contains("∬"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_triple_integral() {
|
||||
let expr = MathExpr::new(
|
||||
MathNode::LargeOp {
|
||||
op_type: LargeOpType::TripleIntegral,
|
||||
lower: None,
|
||||
upper: None,
|
||||
content: Box::new(MathNode::Symbol {
|
||||
value: "f".to_string(),
|
||||
unicode: Some('f'),
|
||||
}),
|
||||
},
|
||||
1.0,
|
||||
);
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
assert!(latex.contains(r"\iiint"));
|
||||
|
||||
let mathml = to_mathml(&expr);
|
||||
assert!(mathml.contains("∭"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_decimal_numbers() {
|
||||
let input = "3.14159";
|
||||
let expr = parse_expression(input).unwrap();
|
||||
|
||||
match expr.root {
|
||||
MathNode::Number { value, is_decimal } => {
|
||||
assert_eq!(value, "3.14159");
|
||||
assert!(is_decimal);
|
||||
}
|
||||
_ => panic!("Expected decimal number"),
|
||||
}
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
assert!(latex.contains("3.14159"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_large_expression() {
|
||||
// Test a realistically complex expression
|
||||
let input = r"\frac{-b \pm \sqrt{b^2 - 4ac}}{2a}";
|
||||
let expr = parse_expression(input).unwrap();
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
assert!(latex.contains(r"\frac"));
|
||||
assert!(latex.contains(r"\sqrt"));
|
||||
assert!(latex.contains("b^{2}"));
|
||||
|
||||
let mathml = to_mathml(&expr);
|
||||
assert!(mathml.contains("<mfrac>"));
|
||||
assert!(mathml.contains("<msqrt>"));
|
||||
|
||||
let asciimath = to_asciimath(&expr);
|
||||
assert!(asciimath.contains("sqrt"));
|
||||
assert!(asciimath.contains("/"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_all_bracket_types() {
|
||||
for bracket_type in &[
|
||||
BracketType::Parentheses,
|
||||
BracketType::Brackets,
|
||||
BracketType::Braces,
|
||||
BracketType::Vertical,
|
||||
] {
|
||||
let expr = MathExpr::new(
|
||||
MathNode::Group {
|
||||
content: Box::new(MathNode::Symbol {
|
||||
value: "x".to_string(),
|
||||
unicode: Some('x'),
|
||||
}),
|
||||
bracket_type: *bracket_type,
|
||||
},
|
||||
1.0,
|
||||
);
|
||||
|
||||
let latex = to_latex(&expr);
|
||||
let mathml = to_mathml(&expr);
|
||||
|
||||
// All should produce valid output
|
||||
assert!(!latex.is_empty());
|
||||
assert!(!mathml.is_empty());
|
||||
}
|
||||
}
|
||||
372
vendor/ruvector/examples/scipix/tests/unit/config_tests.rs
vendored
Normal file
372
vendor/ruvector/examples/scipix/tests/unit/config_tests.rs
vendored
Normal file
@@ -0,0 +1,372 @@
|
||||
// Configuration tests for ruvector-scipix
|
||||
//
|
||||
// Tests configuration loading, serialization, validation, and defaults.
|
||||
// Target: 90%+ coverage of config module
|
||||
|
||||
#[cfg(test)]
|
||||
mod config_tests {
|
||||
use std::path::PathBuf;
|
||||
|
||||
// Mock configuration structures for testing
|
||||
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
|
||||
struct PreprocessConfig {
|
||||
target_dpi: u32,
|
||||
max_dimension: u32,
|
||||
denoise_strength: f32,
|
||||
contrast_enhancement: bool,
|
||||
auto_rotate: bool,
|
||||
binarization_method: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
|
||||
struct OcrModelConfig {
|
||||
model_path: PathBuf,
|
||||
device: String,
|
||||
batch_size: usize,
|
||||
confidence_threshold: f32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
|
||||
struct OutputConfig {
|
||||
format: String,
|
||||
include_confidence: bool,
|
||||
include_geometry: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
|
||||
struct ScipixConfig {
|
||||
preprocessing: PreprocessConfig,
|
||||
model: OcrModelConfig,
|
||||
output: OutputConfig,
|
||||
}
|
||||
|
||||
impl Default for PreprocessConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
target_dpi: 300,
|
||||
max_dimension: 4096,
|
||||
denoise_strength: 0.5,
|
||||
contrast_enhancement: true,
|
||||
auto_rotate: true,
|
||||
binarization_method: "adaptive".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for OcrModelConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
model_path: PathBuf::from("models/scipix_model.onnx"),
|
||||
device: "cpu".to_string(),
|
||||
batch_size: 4,
|
||||
confidence_threshold: 0.7,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for OutputConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
format: "latex".to_string(),
|
||||
include_confidence: true,
|
||||
include_geometry: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ScipixConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
preprocessing: PreprocessConfig::default(),
|
||||
model: OcrModelConfig::default(),
|
||||
output: OutputConfig::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_default_config_creation() {
|
||||
let config = ScipixConfig::default();
|
||||
|
||||
assert_eq!(config.preprocessing.target_dpi, 300);
|
||||
assert_eq!(config.model.device, "cpu");
|
||||
assert_eq!(config.output.format, "latex");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_preprocessing_config_defaults() {
|
||||
let config = PreprocessConfig::default();
|
||||
|
||||
assert_eq!(config.target_dpi, 300);
|
||||
assert_eq!(config.max_dimension, 4096);
|
||||
assert_eq!(config.denoise_strength, 0.5);
|
||||
assert!(config.contrast_enhancement);
|
||||
assert!(config.auto_rotate);
|
||||
assert_eq!(config.binarization_method, "adaptive");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_model_config_defaults() {
|
||||
let config = OcrModelConfig::default();
|
||||
|
||||
assert_eq!(config.model_path, PathBuf::from("models/scipix_model.onnx"));
|
||||
assert_eq!(config.device, "cpu");
|
||||
assert_eq!(config.batch_size, 4);
|
||||
assert_eq!(config.confidence_threshold, 0.7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_output_config_defaults() {
|
||||
let config = OutputConfig::default();
|
||||
|
||||
assert_eq!(config.format, "latex");
|
||||
assert!(config.include_confidence);
|
||||
assert!(!config.include_geometry);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_toml_serialization() {
|
||||
let config = ScipixConfig::default();
|
||||
|
||||
let toml_str = toml::to_string(&config).expect("Failed to serialize to TOML");
|
||||
|
||||
assert!(toml_str.contains("target_dpi = 300"));
|
||||
assert!(toml_str.contains("device = \"cpu\""));
|
||||
assert!(toml_str.contains("format = \"latex\""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_toml_deserialization() {
|
||||
let toml_str = r#"
|
||||
[preprocessing]
|
||||
target_dpi = 300
|
||||
max_dimension = 4096
|
||||
denoise_strength = 0.5
|
||||
contrast_enhancement = true
|
||||
auto_rotate = true
|
||||
binarization_method = "adaptive"
|
||||
|
||||
[model]
|
||||
model_path = "models/scipix_model.onnx"
|
||||
device = "cpu"
|
||||
batch_size = 4
|
||||
confidence_threshold = 0.7
|
||||
|
||||
[output]
|
||||
format = "latex"
|
||||
include_confidence = true
|
||||
include_geometry = false
|
||||
"#;
|
||||
|
||||
let config: ScipixConfig = toml::from_str(toml_str).expect("Failed to deserialize TOML");
|
||||
|
||||
assert_eq!(config.preprocessing.target_dpi, 300);
|
||||
assert_eq!(config.model.device, "cpu");
|
||||
assert_eq!(config.output.format, "latex");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_serialization() {
|
||||
let config = ScipixConfig::default();
|
||||
|
||||
let json_str = serde_json::to_string(&config).expect("Failed to serialize to JSON");
|
||||
|
||||
assert!(json_str.contains("\"target_dpi\":300"));
|
||||
assert!(json_str.contains("\"device\":\"cpu\""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_deserialization() {
|
||||
let json_str = r#"{
|
||||
"preprocessing": {
|
||||
"target_dpi": 300,
|
||||
"max_dimension": 4096,
|
||||
"denoise_strength": 0.5,
|
||||
"contrast_enhancement": true,
|
||||
"auto_rotate": true,
|
||||
"binarization_method": "adaptive"
|
||||
},
|
||||
"model": {
|
||||
"model_path": "models/scipix_model.onnx",
|
||||
"device": "cpu",
|
||||
"batch_size": 4,
|
||||
"confidence_threshold": 0.7
|
||||
},
|
||||
"output": {
|
||||
"format": "latex",
|
||||
"include_confidence": true,
|
||||
"include_geometry": false
|
||||
}
|
||||
}"#;
|
||||
|
||||
let config: ScipixConfig =
|
||||
serde_json::from_str(json_str).expect("Failed to deserialize JSON");
|
||||
|
||||
assert_eq!(config.preprocessing.target_dpi, 300);
|
||||
assert_eq!(config.model.device, "cpu");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_preset_configurations() {
|
||||
// High quality preset
|
||||
let high_quality = ScipixConfig {
|
||||
preprocessing: PreprocessConfig {
|
||||
target_dpi: 600,
|
||||
denoise_strength: 0.8,
|
||||
..Default::default()
|
||||
},
|
||||
model: OcrModelConfig {
|
||||
confidence_threshold: 0.9,
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
assert_eq!(high_quality.preprocessing.target_dpi, 600);
|
||||
assert_eq!(high_quality.model.confidence_threshold, 0.9);
|
||||
|
||||
// Fast preset
|
||||
let fast = ScipixConfig {
|
||||
preprocessing: PreprocessConfig {
|
||||
target_dpi: 150,
|
||||
contrast_enhancement: false,
|
||||
auto_rotate: false,
|
||||
..Default::default()
|
||||
},
|
||||
model: OcrModelConfig {
|
||||
batch_size: 8,
|
||||
confidence_threshold: 0.5,
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
assert_eq!(fast.preprocessing.target_dpi, 150);
|
||||
assert_eq!(fast.model.batch_size, 8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_config_validation_valid() {
|
||||
let config = ScipixConfig::default();
|
||||
|
||||
// Basic validation checks
|
||||
assert!(config.preprocessing.target_dpi > 0);
|
||||
assert!(config.preprocessing.max_dimension > 0);
|
||||
assert!(config.preprocessing.denoise_strength >= 0.0);
|
||||
assert!(config.preprocessing.denoise_strength <= 1.0);
|
||||
assert!(config.model.batch_size > 0);
|
||||
assert!(config.model.confidence_threshold >= 0.0);
|
||||
assert!(config.model.confidence_threshold <= 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_config_validation_invalid_values() {
|
||||
// Test invalid DPI
|
||||
let mut config = ScipixConfig::default();
|
||||
config.preprocessing.target_dpi = 0;
|
||||
assert_eq!(config.preprocessing.target_dpi, 0); // Would fail validation
|
||||
|
||||
// Test invalid confidence threshold
|
||||
config = ScipixConfig::default();
|
||||
config.model.confidence_threshold = 1.5;
|
||||
assert!(config.model.confidence_threshold > 1.0); // Would fail validation
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_environment_variable_overrides() {
|
||||
// Simulate environment variable overrides
|
||||
let mut config = ScipixConfig::default();
|
||||
|
||||
// Override device from environment
|
||||
let env_device = std::env::var("MATHPIX_DEVICE").unwrap_or_else(|_| "cpu".to_string());
|
||||
config.model.device = env_device;
|
||||
|
||||
// Override batch size from environment
|
||||
let env_batch_size = std::env::var("MATHPIX_BATCH_SIZE")
|
||||
.ok()
|
||||
.and_then(|s| s.parse::<usize>().ok())
|
||||
.unwrap_or(config.model.batch_size);
|
||||
config.model.batch_size = env_batch_size;
|
||||
|
||||
assert!(!config.model.device.is_empty());
|
||||
assert!(config.model.batch_size > 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_config_cloning() {
|
||||
let config1 = ScipixConfig::default();
|
||||
let config2 = config1.clone();
|
||||
|
||||
assert_eq!(config1, config2);
|
||||
assert_eq!(
|
||||
config1.preprocessing.target_dpi,
|
||||
config2.preprocessing.target_dpi
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_partial_config_update() {
|
||||
let mut config = ScipixConfig::default();
|
||||
|
||||
// Update only preprocessing settings
|
||||
config.preprocessing.target_dpi = 450;
|
||||
config.preprocessing.denoise_strength = 0.7;
|
||||
|
||||
assert_eq!(config.preprocessing.target_dpi, 450);
|
||||
assert_eq!(config.preprocessing.denoise_strength, 0.7);
|
||||
// Other settings should remain default
|
||||
assert_eq!(config.model.device, "cpu");
|
||||
assert_eq!(config.output.format, "latex");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_binarization_methods() {
|
||||
let methods = vec!["otsu", "adaptive", "sauvola", "niblack"];
|
||||
|
||||
for method in methods {
|
||||
let mut config = PreprocessConfig::default();
|
||||
config.binarization_method = method.to_string();
|
||||
assert_eq!(config.binarization_method, method);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_output_formats() {
|
||||
let formats = vec!["latex", "mathml", "mmd", "ascii", "unicode"];
|
||||
|
||||
for format in formats {
|
||||
let mut config = OutputConfig::default();
|
||||
config.format = format.to_string();
|
||||
assert_eq!(config.format, format);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_device_configurations() {
|
||||
let devices = vec!["cpu", "cuda", "cuda:0", "cuda:1"];
|
||||
|
||||
for device in devices {
|
||||
let mut config = OcrModelConfig::default();
|
||||
config.device = device.to_string();
|
||||
assert_eq!(config.device, device);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_config_roundtrip_toml() {
|
||||
let original = ScipixConfig::default();
|
||||
let toml_str = toml::to_string(&original).unwrap();
|
||||
let deserialized: ScipixConfig = toml::from_str(&toml_str).unwrap();
|
||||
|
||||
assert_eq!(original, deserialized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_config_roundtrip_json() {
|
||||
let original = ScipixConfig::default();
|
||||
let json_str = serde_json::to_string(&original).unwrap();
|
||||
let deserialized: ScipixConfig = serde_json::from_str(&json_str).unwrap();
|
||||
|
||||
assert_eq!(original, deserialized);
|
||||
}
|
||||
}
|
||||
344
vendor/ruvector/examples/scipix/tests/unit/error_tests.rs
vendored
Normal file
344
vendor/ruvector/examples/scipix/tests/unit/error_tests.rs
vendored
Normal file
@@ -0,0 +1,344 @@
|
||||
// Error handling tests for ruvector-scipix
|
||||
//
|
||||
// Tests error types, conversions, display messages, and retry logic.
|
||||
// Target: 95%+ coverage of error handling code
|
||||
|
||||
#[cfg(test)]
|
||||
mod error_tests {
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
|
||||
// Mock error types for testing
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
enum ScipixError {
|
||||
// Image errors
|
||||
InvalidImageFormat(String),
|
||||
ImageTooLarge { size: u64, max: u64 },
|
||||
ImagePreprocessingFailed(String),
|
||||
ImageLoadError(String),
|
||||
|
||||
// Model errors
|
||||
ModelNotFound(String),
|
||||
ModelLoadError(String),
|
||||
InferenceError(String),
|
||||
|
||||
// OCR errors
|
||||
TextDetectionFailed(String),
|
||||
TextRecognitionFailed(String),
|
||||
LowConfidence { score: f32, threshold: f32 },
|
||||
|
||||
// Math parsing errors
|
||||
ParseError(String),
|
||||
InvalidExpression(String),
|
||||
|
||||
// I/O errors
|
||||
IoError(String),
|
||||
|
||||
// API errors
|
||||
ApiError { status: u16, message: String },
|
||||
RateLimitExceeded,
|
||||
|
||||
// System errors
|
||||
Timeout(std::time::Duration),
|
||||
OutOfMemory,
|
||||
Internal(String),
|
||||
}
|
||||
|
||||
impl fmt::Display for ScipixError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::InvalidImageFormat(format) => {
|
||||
write!(f, "Invalid image format: {}", format)
|
||||
}
|
||||
Self::ImageTooLarge { size, max } => {
|
||||
write!(f, "Image too large: {} bytes (max: {} bytes)", size, max)
|
||||
}
|
||||
Self::ImagePreprocessingFailed(reason) => {
|
||||
write!(f, "Image preprocessing failed: {}", reason)
|
||||
}
|
||||
Self::ImageLoadError(msg) => write!(f, "Failed to load image: {}", msg),
|
||||
Self::ModelNotFound(model) => write!(f, "Model not found: {}", model),
|
||||
Self::ModelLoadError(msg) => write!(f, "Failed to load model: {}", msg),
|
||||
Self::InferenceError(msg) => write!(f, "Model inference failed: {}", msg),
|
||||
Self::TextDetectionFailed(msg) => write!(f, "Text detection failed: {}", msg),
|
||||
Self::TextRecognitionFailed(msg) => {
|
||||
write!(f, "Text recognition failed: {}", msg)
|
||||
}
|
||||
Self::LowConfidence { score, threshold } => write!(
|
||||
f,
|
||||
"Low confidence score: {:.2} (threshold: {:.2})",
|
||||
score, threshold
|
||||
),
|
||||
Self::ParseError(msg) => write!(f, "Parse error: {}", msg),
|
||||
Self::InvalidExpression(expr) => write!(f, "Invalid expression: {}", expr),
|
||||
Self::IoError(msg) => write!(f, "I/O error: {}", msg),
|
||||
Self::ApiError { status, message } => {
|
||||
write!(f, "API error {}: {}", status, message)
|
||||
}
|
||||
Self::RateLimitExceeded => write!(f, "Rate limit exceeded"),
|
||||
Self::Timeout(duration) => write!(f, "Operation timed out after {:?}", duration),
|
||||
Self::OutOfMemory => write!(f, "Out of memory"),
|
||||
Self::Internal(msg) => write!(f, "Internal error: {}", msg),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for ScipixError {}
|
||||
|
||||
impl From<io::Error> for ScipixError {
|
||||
fn from(err: io::Error) -> Self {
|
||||
Self::IoError(err.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl ScipixError {
|
||||
fn is_retryable(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
Self::Timeout(_)
|
||||
| Self::ApiError { status: 503, .. }
|
||||
| Self::ApiError { status: 429, .. }
|
||||
| Self::InferenceError(_)
|
||||
)
|
||||
}
|
||||
|
||||
fn status_code(&self) -> Option<u16> {
|
||||
match self {
|
||||
Self::InvalidImageFormat(_) => Some(400),
|
||||
Self::ImageTooLarge { .. } => Some(413),
|
||||
Self::ModelNotFound(_) => Some(404),
|
||||
Self::RateLimitExceeded => Some(429),
|
||||
Self::ApiError { status, .. } => Some(*status),
|
||||
Self::Timeout(_) => Some(408),
|
||||
Self::OutOfMemory => Some(507),
|
||||
_ => Some(500),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_creation() {
|
||||
let err = ScipixError::InvalidImageFormat("svg".to_string());
|
||||
assert_eq!(
|
||||
err,
|
||||
ScipixError::InvalidImageFormat("svg".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_display_invalid_format() {
|
||||
let err = ScipixError::InvalidImageFormat("svg".to_string());
|
||||
assert_eq!(err.to_string(), "Invalid image format: svg");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_display_image_too_large() {
|
||||
let err = ScipixError::ImageTooLarge {
|
||||
size: 15_000_000,
|
||||
max: 10_000_000,
|
||||
};
|
||||
assert_eq!(
|
||||
err.to_string(),
|
||||
"Image too large: 15000000 bytes (max: 10000000 bytes)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_display_low_confidence() {
|
||||
let err = ScipixError::LowConfidence {
|
||||
score: 0.65,
|
||||
threshold: 0.8,
|
||||
};
|
||||
assert_eq!(
|
||||
err.to_string(),
|
||||
"Low confidence score: 0.65 (threshold: 0.80)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_display_api_error() {
|
||||
let err = ScipixError::ApiError {
|
||||
status: 404,
|
||||
message: "Not found".to_string(),
|
||||
};
|
||||
assert_eq!(err.to_string(), "API error 404: Not found");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_display_timeout() {
|
||||
let err = ScipixError::Timeout(std::time::Duration::from_secs(30));
|
||||
assert_eq!(err.to_string(), "Operation timed out after 30s");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_io_error_conversion() {
|
||||
let io_err = io::Error::new(io::ErrorKind::NotFound, "file not found");
|
||||
let scipix_err: ScipixError = io_err.into();
|
||||
|
||||
match scipix_err {
|
||||
ScipixError::IoError(msg) => assert!(msg.contains("file not found")),
|
||||
_ => panic!("Expected IoError variant"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_retryable_timeout() {
|
||||
let err = ScipixError::Timeout(std::time::Duration::from_secs(10));
|
||||
assert!(err.is_retryable());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_retryable_503() {
|
||||
let err = ScipixError::ApiError {
|
||||
status: 503,
|
||||
message: "Service Unavailable".to_string(),
|
||||
};
|
||||
assert!(err.is_retryable());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_retryable_429() {
|
||||
let err = ScipixError::ApiError {
|
||||
status: 429,
|
||||
message: "Too Many Requests".to_string(),
|
||||
};
|
||||
assert!(err.is_retryable());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_not_retryable_404() {
|
||||
let err = ScipixError::ApiError {
|
||||
status: 404,
|
||||
message: "Not Found".to_string(),
|
||||
};
|
||||
assert!(!err.is_retryable());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_not_retryable_invalid_format() {
|
||||
let err = ScipixError::InvalidImageFormat("svg".to_string());
|
||||
assert!(!err.is_retryable());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_status_code_invalid_format() {
|
||||
let err = ScipixError::InvalidImageFormat("svg".to_string());
|
||||
assert_eq!(err.status_code(), Some(400));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_status_code_image_too_large() {
|
||||
let err = ScipixError::ImageTooLarge {
|
||||
size: 15_000_000,
|
||||
max: 10_000_000,
|
||||
};
|
||||
assert_eq!(err.status_code(), Some(413));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_status_code_not_found() {
|
||||
let err = ScipixError::ModelNotFound("model.onnx".to_string());
|
||||
assert_eq!(err.status_code(), Some(404));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_status_code_rate_limit() {
|
||||
let err = ScipixError::RateLimitExceeded;
|
||||
assert_eq!(err.status_code(), Some(429));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_status_code_timeout() {
|
||||
let err = ScipixError::Timeout(std::time::Duration::from_secs(30));
|
||||
assert_eq!(err.status_code(), Some(408));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_status_code_out_of_memory() {
|
||||
let err = ScipixError::OutOfMemory;
|
||||
assert_eq!(err.status_code(), Some(507));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_status_code_internal() {
|
||||
let err = ScipixError::Internal("something went wrong".to_string());
|
||||
assert_eq!(err.status_code(), Some(500));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_cloning() {
|
||||
let err1 = ScipixError::InvalidImageFormat("svg".to_string());
|
||||
let err2 = err1.clone();
|
||||
assert_eq!(err1, err2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_error_types() {
|
||||
let errors = vec![
|
||||
ScipixError::InvalidImageFormat("svg".to_string()),
|
||||
ScipixError::ImageTooLarge {
|
||||
size: 15_000_000,
|
||||
max: 10_000_000,
|
||||
},
|
||||
ScipixError::ModelNotFound("model.onnx".to_string()),
|
||||
ScipixError::RateLimitExceeded,
|
||||
ScipixError::Timeout(std::time::Duration::from_secs(30)),
|
||||
];
|
||||
|
||||
assert_eq!(errors.len(), 5);
|
||||
for err in &errors {
|
||||
assert!(!err.to_string().is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_categorization() {
|
||||
let image_errors = vec![
|
||||
ScipixError::InvalidImageFormat("svg".to_string()),
|
||||
ScipixError::ImageTooLarge {
|
||||
size: 15_000_000,
|
||||
max: 10_000_000,
|
||||
},
|
||||
ScipixError::ImagePreprocessingFailed("deskew failed".to_string()),
|
||||
];
|
||||
|
||||
for err in &image_errors {
|
||||
match err {
|
||||
ScipixError::InvalidImageFormat(_)
|
||||
| ScipixError::ImageTooLarge { .. }
|
||||
| ScipixError::ImagePreprocessingFailed(_) => {
|
||||
// Image-related errors
|
||||
assert!(err.status_code().is_some());
|
||||
}
|
||||
_ => panic!("Expected image error"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_retryable_errors_collection() {
|
||||
let errors = vec![
|
||||
ScipixError::Timeout(std::time::Duration::from_secs(30)),
|
||||
ScipixError::ApiError {
|
||||
status: 503,
|
||||
message: "Service Unavailable".to_string(),
|
||||
},
|
||||
ScipixError::InferenceError("temporary failure".to_string()),
|
||||
];
|
||||
|
||||
let retryable_count = errors.iter().filter(|e| e.is_retryable()).count();
|
||||
assert_eq!(retryable_count, 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_non_retryable_errors_collection() {
|
||||
let errors = vec![
|
||||
ScipixError::InvalidImageFormat("svg".to_string()),
|
||||
ScipixError::ModelNotFound("model.onnx".to_string()),
|
||||
ScipixError::ParseError("invalid latex".to_string()),
|
||||
];
|
||||
|
||||
let retryable_count = errors.iter().filter(|e| e.is_retryable()).count();
|
||||
assert_eq!(retryable_count, 0);
|
||||
}
|
||||
}
|
||||
596
vendor/ruvector/examples/scipix/tests/unit/math_tests.rs
vendored
Normal file
596
vendor/ruvector/examples/scipix/tests/unit/math_tests.rs
vendored
Normal file
@@ -0,0 +1,596 @@
|
||||
// Math parsing tests for ruvector-scipix
|
||||
//
|
||||
// Tests symbol recognition, AST construction, and LaTeX/MathML/AsciiMath generation
|
||||
// for various mathematical expressions including fractions, roots, matrices, integrals, etc.
|
||||
// Target: 90%+ coverage of math parsing module
|
||||
|
||||
#[cfg(test)]
|
||||
mod math_tests {
|
||||
// Mock math structures for testing
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
enum MathSymbol {
|
||||
// Numbers
|
||||
Digit(char),
|
||||
|
||||
// Variables
|
||||
Variable(char),
|
||||
|
||||
// Greek letters
|
||||
Alpha,
|
||||
Beta,
|
||||
Gamma,
|
||||
Delta,
|
||||
Epsilon,
|
||||
Pi,
|
||||
Sigma,
|
||||
Omega,
|
||||
|
||||
// Operators
|
||||
Plus,
|
||||
Minus,
|
||||
Times,
|
||||
Divide,
|
||||
Equals,
|
||||
|
||||
// Relations
|
||||
LessThan,
|
||||
GreaterThan,
|
||||
LessEqual,
|
||||
GreaterEqual,
|
||||
NotEqual,
|
||||
|
||||
// Special symbols
|
||||
Infinity,
|
||||
Partial,
|
||||
Nabla,
|
||||
Integral,
|
||||
Sum,
|
||||
Product,
|
||||
Root,
|
||||
Sqrt,
|
||||
|
||||
// Brackets
|
||||
LeftParen,
|
||||
RightParen,
|
||||
LeftBracket,
|
||||
RightBracket,
|
||||
LeftBrace,
|
||||
RightBrace,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
enum MathNode {
|
||||
Number(String),
|
||||
Variable(String),
|
||||
Symbol(MathSymbol),
|
||||
BinaryOp {
|
||||
op: String,
|
||||
left: Box<MathNode>,
|
||||
right: Box<MathNode>,
|
||||
},
|
||||
Fraction {
|
||||
numerator: Box<MathNode>,
|
||||
denominator: Box<MathNode>,
|
||||
},
|
||||
Superscript {
|
||||
base: Box<MathNode>,
|
||||
exponent: Box<MathNode>,
|
||||
},
|
||||
Subscript {
|
||||
base: Box<MathNode>,
|
||||
index: Box<MathNode>,
|
||||
},
|
||||
Root {
|
||||
degree: Option<Box<MathNode>>,
|
||||
radicand: Box<MathNode>,
|
||||
},
|
||||
Matrix {
|
||||
rows: usize,
|
||||
cols: usize,
|
||||
elements: Vec<Vec<MathNode>>,
|
||||
},
|
||||
Integral {
|
||||
lower: Option<Box<MathNode>>,
|
||||
upper: Option<Box<MathNode>>,
|
||||
integrand: Box<MathNode>,
|
||||
},
|
||||
Summation {
|
||||
lower: Option<Box<MathNode>>,
|
||||
upper: Option<Box<MathNode>>,
|
||||
term: Box<MathNode>,
|
||||
},
|
||||
}
|
||||
|
||||
impl MathNode {
|
||||
fn to_latex(&self) -> String {
|
||||
match self {
|
||||
Self::Number(n) => n.clone(),
|
||||
Self::Variable(v) => v.clone(),
|
||||
Self::Symbol(MathSymbol::Plus) => "+".to_string(),
|
||||
Self::Symbol(MathSymbol::Minus) => "-".to_string(),
|
||||
Self::Symbol(MathSymbol::Times) => r"\times".to_string(),
|
||||
Self::Symbol(MathSymbol::Divide) => r"\div".to_string(),
|
||||
Self::Symbol(MathSymbol::Pi) => r"\pi".to_string(),
|
||||
Self::Symbol(MathSymbol::Alpha) => r"\alpha".to_string(),
|
||||
Self::Symbol(MathSymbol::Infinity) => r"\infty".to_string(),
|
||||
Self::BinaryOp { op, left, right } => {
|
||||
format!("{} {} {}", left.to_latex(), op, right.to_latex())
|
||||
}
|
||||
Self::Fraction {
|
||||
numerator,
|
||||
denominator,
|
||||
} => {
|
||||
format!(r"\frac{{{}}}{{{}}}", numerator.to_latex(), denominator.to_latex())
|
||||
}
|
||||
Self::Superscript { base, exponent } => {
|
||||
format!("{}^{{{}}}", base.to_latex(), exponent.to_latex())
|
||||
}
|
||||
Self::Subscript { base, index } => {
|
||||
format!("{}_{{{}}}", base.to_latex(), index.to_latex())
|
||||
}
|
||||
Self::Root { degree: None, radicand } => {
|
||||
format!(r"\sqrt{{{}}}", radicand.to_latex())
|
||||
}
|
||||
Self::Root { degree: Some(n), radicand } => {
|
||||
format!(r"\sqrt[{{{}}}]{{{}}}", n.to_latex(), radicand.to_latex())
|
||||
}
|
||||
Self::Matrix { elements, .. } => {
|
||||
let mut result = r"\begin{bmatrix}".to_string();
|
||||
for (i, row) in elements.iter().enumerate() {
|
||||
if i > 0 {
|
||||
result.push_str(r" \\ ");
|
||||
}
|
||||
for (j, elem) in row.iter().enumerate() {
|
||||
if j > 0 {
|
||||
result.push_str(" & ");
|
||||
}
|
||||
result.push_str(&elem.to_latex());
|
||||
}
|
||||
}
|
||||
result.push_str(r" \end{bmatrix}");
|
||||
result
|
||||
}
|
||||
Self::Integral { lower, upper, integrand } => {
|
||||
let mut result = r"\int".to_string();
|
||||
if let Some(l) = lower {
|
||||
result.push_str(&format!("_{{{}}}", l.to_latex()));
|
||||
}
|
||||
if let Some(u) = upper {
|
||||
result.push_str(&format!("^{{{}}}", u.to_latex()));
|
||||
}
|
||||
result.push_str(&format!(" {} dx", integrand.to_latex()));
|
||||
result
|
||||
}
|
||||
Self::Summation { lower, upper, term } => {
|
||||
let mut result = r"\sum".to_string();
|
||||
if let Some(l) = lower {
|
||||
result.push_str(&format!("_{{{}}}", l.to_latex()));
|
||||
}
|
||||
if let Some(u) = upper {
|
||||
result.push_str(&format!("^{{{}}}", u.to_latex()));
|
||||
}
|
||||
result.push_str(&format!(" {}", term.to_latex()));
|
||||
result
|
||||
}
|
||||
_ => String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn to_mathml(&self) -> String {
|
||||
match self {
|
||||
Self::Number(n) => format!("<mn>{}</mn>", n),
|
||||
Self::Variable(v) => format!("<mi>{}</mi>", v),
|
||||
Self::BinaryOp { op, left, right } => {
|
||||
format!(
|
||||
"<mrow>{}<mo>{}</mo>{}</mrow>",
|
||||
left.to_mathml(),
|
||||
op,
|
||||
right.to_mathml()
|
||||
)
|
||||
}
|
||||
Self::Fraction {
|
||||
numerator,
|
||||
denominator,
|
||||
} => {
|
||||
format!(
|
||||
"<mfrac>{}{}</mfrac>",
|
||||
numerator.to_mathml(),
|
||||
denominator.to_mathml()
|
||||
)
|
||||
}
|
||||
Self::Superscript { base, exponent } => {
|
||||
format!(
|
||||
"<msup>{}{}</msup>",
|
||||
base.to_mathml(),
|
||||
exponent.to_mathml()
|
||||
)
|
||||
}
|
||||
Self::Root { degree: None, radicand } => {
|
||||
format!("<msqrt>{}</msqrt>", radicand.to_mathml())
|
||||
}
|
||||
_ => String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn to_asciimath(&self) -> String {
|
||||
match self {
|
||||
Self::Number(n) => n.clone(),
|
||||
Self::Variable(v) => v.clone(),
|
||||
Self::BinaryOp { op, left, right } => {
|
||||
format!("{} {} {}", left.to_asciimath(), op, right.to_asciimath())
|
||||
}
|
||||
Self::Fraction {
|
||||
numerator,
|
||||
denominator,
|
||||
} => {
|
||||
format!("({})/({})", numerator.to_asciimath(), denominator.to_asciimath())
|
||||
}
|
||||
Self::Superscript { base, exponent } => {
|
||||
format!("{}^{}", base.to_asciimath(), exponent.to_asciimath())
|
||||
}
|
||||
Self::Root { degree: None, radicand } => {
|
||||
format!("sqrt({})", radicand.to_asciimath())
|
||||
}
|
||||
_ => String::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_symbol_recognition_numbers() {
|
||||
let symbols = vec![
|
||||
MathSymbol::Digit('0'),
|
||||
MathSymbol::Digit('1'),
|
||||
MathSymbol::Digit('9'),
|
||||
];
|
||||
|
||||
for symbol in symbols {
|
||||
assert!(matches!(symbol, MathSymbol::Digit(_)));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_symbol_recognition_variables() {
|
||||
let symbols = vec![
|
||||
MathSymbol::Variable('x'),
|
||||
MathSymbol::Variable('y'),
|
||||
MathSymbol::Variable('z'),
|
||||
];
|
||||
|
||||
for symbol in symbols {
|
||||
assert!(matches!(symbol, MathSymbol::Variable(_)));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_symbol_recognition_greek() {
|
||||
let greeks = vec![
|
||||
(MathSymbol::Alpha, "α"),
|
||||
(MathSymbol::Beta, "β"),
|
||||
(MathSymbol::Gamma, "γ"),
|
||||
(MathSymbol::Delta, "δ"),
|
||||
(MathSymbol::Pi, "π"),
|
||||
(MathSymbol::Sigma, "Σ"),
|
||||
(MathSymbol::Omega, "Ω"),
|
||||
];
|
||||
|
||||
assert_eq!(greeks.len(), 7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_symbol_recognition_operators() {
|
||||
let ops = vec![
|
||||
MathSymbol::Plus,
|
||||
MathSymbol::Minus,
|
||||
MathSymbol::Times,
|
||||
MathSymbol::Divide,
|
||||
MathSymbol::Equals,
|
||||
];
|
||||
|
||||
assert_eq!(ops.len(), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ast_construction_simple_addition() {
|
||||
let expr = MathNode::BinaryOp {
|
||||
op: "+".to_string(),
|
||||
left: Box::new(MathNode::Variable("x".to_string())),
|
||||
right: Box::new(MathNode::Variable("y".to_string())),
|
||||
};
|
||||
|
||||
assert!(matches!(expr, MathNode::BinaryOp { .. }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ast_construction_simple_multiplication() {
|
||||
let expr = MathNode::BinaryOp {
|
||||
op: "*".to_string(),
|
||||
left: Box::new(MathNode::Number("2".to_string())),
|
||||
right: Box::new(MathNode::Variable("x".to_string())),
|
||||
};
|
||||
|
||||
match expr {
|
||||
MathNode::BinaryOp { op, .. } => assert_eq!(op, "*"),
|
||||
_ => panic!("Expected BinaryOp"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_latex_generation_simple_addition() {
|
||||
let expr = MathNode::BinaryOp {
|
||||
op: "+".to_string(),
|
||||
left: Box::new(MathNode::Variable("x".to_string())),
|
||||
right: Box::new(MathNode::Variable("y".to_string())),
|
||||
};
|
||||
|
||||
let latex = expr.to_latex();
|
||||
assert_eq!(latex, "x + y");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_latex_generation_fraction_simple() {
|
||||
let frac = MathNode::Fraction {
|
||||
numerator: Box::new(MathNode::Number("1".to_string())),
|
||||
denominator: Box::new(MathNode::Number("2".to_string())),
|
||||
};
|
||||
|
||||
let latex = frac.to_latex();
|
||||
assert_eq!(latex, r"\frac{1}{2}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_latex_generation_fraction_variables() {
|
||||
let frac = MathNode::Fraction {
|
||||
numerator: Box::new(MathNode::Variable("a".to_string())),
|
||||
denominator: Box::new(MathNode::Variable("b".to_string())),
|
||||
};
|
||||
|
||||
let latex = frac.to_latex();
|
||||
assert_eq!(latex, r"\frac{a}{b}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_latex_generation_fraction_complex() {
|
||||
let numerator = MathNode::BinaryOp {
|
||||
op: "+".to_string(),
|
||||
left: Box::new(MathNode::Variable("a".to_string())),
|
||||
right: Box::new(MathNode::Number("1".to_string())),
|
||||
};
|
||||
|
||||
let frac = MathNode::Fraction {
|
||||
numerator: Box::new(numerator),
|
||||
denominator: Box::new(MathNode::Variable("b".to_string())),
|
||||
};
|
||||
|
||||
let latex = frac.to_latex();
|
||||
assert_eq!(latex, r"\frac{a + 1}{b}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_latex_generation_root_square() {
|
||||
let root = MathNode::Root {
|
||||
degree: None,
|
||||
radicand: Box::new(MathNode::Variable("x".to_string())),
|
||||
};
|
||||
|
||||
let latex = root.to_latex();
|
||||
assert_eq!(latex, r"\sqrt{x}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_latex_generation_root_nth() {
|
||||
let root = MathNode::Root {
|
||||
degree: Some(Box::new(MathNode::Number("3".to_string()))),
|
||||
radicand: Box::new(MathNode::Variable("x".to_string())),
|
||||
};
|
||||
|
||||
let latex = root.to_latex();
|
||||
assert_eq!(latex, r"\sqrt[{3}]{x}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_latex_generation_superscript() {
|
||||
let power = MathNode::Superscript {
|
||||
base: Box::new(MathNode::Variable("x".to_string())),
|
||||
exponent: Box::new(MathNode::Number("2".to_string())),
|
||||
};
|
||||
|
||||
let latex = power.to_latex();
|
||||
assert_eq!(latex, "x^{2}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_latex_generation_subscript() {
|
||||
let sub = MathNode::Subscript {
|
||||
base: Box::new(MathNode::Variable("x".to_string())),
|
||||
index: Box::new(MathNode::Number("1".to_string())),
|
||||
};
|
||||
|
||||
let latex = sub.to_latex();
|
||||
assert_eq!(latex, "x_{1}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_latex_generation_subscript_and_superscript() {
|
||||
let base = MathNode::Variable("x".to_string());
|
||||
let with_sub = MathNode::Subscript {
|
||||
base: Box::new(base),
|
||||
index: Box::new(MathNode::Number("1".to_string())),
|
||||
};
|
||||
let with_both = MathNode::Superscript {
|
||||
base: Box::new(with_sub),
|
||||
exponent: Box::new(MathNode::Number("2".to_string())),
|
||||
};
|
||||
|
||||
let latex = with_both.to_latex();
|
||||
assert_eq!(latex, "x_{1}^{2}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_latex_generation_matrix_2x2() {
|
||||
let matrix = MathNode::Matrix {
|
||||
rows: 2,
|
||||
cols: 2,
|
||||
elements: vec![
|
||||
vec![
|
||||
MathNode::Number("1".to_string()),
|
||||
MathNode::Number("2".to_string()),
|
||||
],
|
||||
vec![
|
||||
MathNode::Number("3".to_string()),
|
||||
MathNode::Number("4".to_string()),
|
||||
],
|
||||
],
|
||||
};
|
||||
|
||||
let latex = matrix.to_latex();
|
||||
assert!(latex.contains(r"\begin{bmatrix}"));
|
||||
assert!(latex.contains(r"\end{bmatrix}"));
|
||||
assert!(latex.contains("1 & 2"));
|
||||
assert!(latex.contains("3 & 4"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_latex_generation_matrix_3x3() {
|
||||
let matrix = MathNode::Matrix {
|
||||
rows: 3,
|
||||
cols: 3,
|
||||
elements: vec![
|
||||
vec![
|
||||
MathNode::Number("1".to_string()),
|
||||
MathNode::Number("2".to_string()),
|
||||
MathNode::Number("3".to_string()),
|
||||
],
|
||||
vec![
|
||||
MathNode::Number("4".to_string()),
|
||||
MathNode::Number("5".to_string()),
|
||||
MathNode::Number("6".to_string()),
|
||||
],
|
||||
vec![
|
||||
MathNode::Number("7".to_string()),
|
||||
MathNode::Number("8".to_string()),
|
||||
MathNode::Number("9".to_string()),
|
||||
],
|
||||
],
|
||||
};
|
||||
|
||||
let latex = matrix.to_latex();
|
||||
assert!(latex.contains(r"\begin{bmatrix}"));
|
||||
assert!(latex.contains("1 & 2 & 3"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_latex_generation_integral_simple() {
|
||||
let integral = MathNode::Integral {
|
||||
lower: None,
|
||||
upper: None,
|
||||
integrand: Box::new(MathNode::Variable("x".to_string())),
|
||||
};
|
||||
|
||||
let latex = integral.to_latex();
|
||||
assert!(latex.contains(r"\int"));
|
||||
assert!(latex.contains("x dx"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_latex_generation_integral_with_limits() {
|
||||
let integral = MathNode::Integral {
|
||||
lower: Some(Box::new(MathNode::Number("0".to_string()))),
|
||||
upper: Some(Box::new(MathNode::Number("1".to_string()))),
|
||||
integrand: Box::new(MathNode::Variable("x".to_string())),
|
||||
};
|
||||
|
||||
let latex = integral.to_latex();
|
||||
assert!(latex.contains(r"\int_{0}^{1}"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_latex_generation_summation() {
|
||||
let sum = MathNode::Summation {
|
||||
lower: Some(Box::new(MathNode::BinaryOp {
|
||||
op: "=".to_string(),
|
||||
left: Box::new(MathNode::Variable("i".to_string())),
|
||||
right: Box::new(MathNode::Number("1".to_string())),
|
||||
})),
|
||||
upper: Some(Box::new(MathNode::Variable("n".to_string()))),
|
||||
term: Box::new(MathNode::Variable("i".to_string())),
|
||||
};
|
||||
|
||||
let latex = sum.to_latex();
|
||||
assert!(latex.contains(r"\sum"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mathml_generation_number() {
|
||||
let num = MathNode::Number("42".to_string());
|
||||
let mathml = num.to_mathml();
|
||||
assert_eq!(mathml, "<mn>42</mn>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mathml_generation_variable() {
|
||||
let var = MathNode::Variable("x".to_string());
|
||||
let mathml = var.to_mathml();
|
||||
assert_eq!(mathml, "<mi>x</mi>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mathml_generation_fraction() {
|
||||
let frac = MathNode::Fraction {
|
||||
numerator: Box::new(MathNode::Number("1".to_string())),
|
||||
denominator: Box::new(MathNode::Number("2".to_string())),
|
||||
};
|
||||
|
||||
let mathml = frac.to_mathml();
|
||||
assert!(mathml.contains("<mfrac>"));
|
||||
assert!(mathml.contains("<mn>1</mn>"));
|
||||
assert!(mathml.contains("<mn>2</mn>"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mathml_generation_superscript() {
|
||||
let power = MathNode::Superscript {
|
||||
base: Box::new(MathNode::Variable("x".to_string())),
|
||||
exponent: Box::new(MathNode::Number("2".to_string())),
|
||||
};
|
||||
|
||||
let mathml = power.to_mathml();
|
||||
assert!(mathml.contains("<msup>"));
|
||||
assert!(mathml.contains("<mi>x</mi>"));
|
||||
assert!(mathml.contains("<mn>2</mn>"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_asciimath_generation_simple() {
|
||||
let expr = MathNode::BinaryOp {
|
||||
op: "+".to_string(),
|
||||
left: Box::new(MathNode::Variable("x".to_string())),
|
||||
right: Box::new(MathNode::Number("1".to_string())),
|
||||
};
|
||||
|
||||
let ascii = expr.to_asciimath();
|
||||
assert_eq!(ascii, "x + 1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_asciimath_generation_fraction() {
|
||||
let frac = MathNode::Fraction {
|
||||
numerator: Box::new(MathNode::Variable("a".to_string())),
|
||||
denominator: Box::new(MathNode::Variable("b".to_string())),
|
||||
};
|
||||
|
||||
let ascii = frac.to_asciimath();
|
||||
assert_eq!(ascii, "(a)/(b)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_asciimath_generation_power() {
|
||||
let power = MathNode::Superscript {
|
||||
base: Box::new(MathNode::Variable("x".to_string())),
|
||||
exponent: Box::new(MathNode::Number("2".to_string())),
|
||||
};
|
||||
|
||||
let ascii = power.to_asciimath();
|
||||
assert_eq!(ascii, "x^2");
|
||||
}
|
||||
}
|
||||
68
vendor/ruvector/examples/scipix/tests/unit/mod.rs
vendored
Normal file
68
vendor/ruvector/examples/scipix/tests/unit/mod.rs
vendored
Normal file
@@ -0,0 +1,68 @@
|
||||
// Unit test module organization for ruvector-scipix
|
||||
//
|
||||
// This module organizes all unit tests following Rust testing best practices.
|
||||
// Each submodule tests a specific component in isolation with comprehensive coverage.
|
||||
|
||||
/// Configuration tests - Test config loading, validation, defaults
|
||||
pub mod config_tests;
|
||||
|
||||
/// Error handling tests - Test error types, conversions, display
|
||||
pub mod error_tests;
|
||||
|
||||
/// Preprocessing tests - Test image preprocessing pipeline
|
||||
pub mod preprocess_tests;
|
||||
|
||||
/// Math parsing tests - Test mathematical expression parsing and recognition
|
||||
pub mod math_tests;
|
||||
|
||||
/// Output formatting tests - Test LaTeX, MathML, and other format generation
|
||||
pub mod output_tests;
|
||||
|
||||
/// OCR engine tests - Test OCR model loading and inference
|
||||
pub mod ocr_tests;
|
||||
|
||||
#[cfg(test)]
|
||||
mod common {
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Get path to test fixtures directory
|
||||
pub fn fixtures_dir() -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("tests")
|
||||
.join("fixtures")
|
||||
}
|
||||
|
||||
/// Get path to a specific test fixture
|
||||
pub fn fixture_path(name: &str) -> PathBuf {
|
||||
fixtures_dir().join(name)
|
||||
}
|
||||
|
||||
/// Check if a fixture exists
|
||||
pub fn has_fixture(name: &str) -> bool {
|
||||
fixture_path(name).exists()
|
||||
}
|
||||
|
||||
/// Normalize LaTeX string for comparison (remove whitespace)
|
||||
pub fn normalize_latex(latex: &str) -> String {
|
||||
latex
|
||||
.chars()
|
||||
.filter(|c| !c.is_whitespace())
|
||||
.collect::<String>()
|
||||
.to_lowercase()
|
||||
}
|
||||
|
||||
/// Calculate simple string similarity (0.0 to 1.0)
|
||||
pub fn string_similarity(a: &str, b: &str) -> f64 {
|
||||
if a == b {
|
||||
return 1.0;
|
||||
}
|
||||
if a.is_empty() || b.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let max_len = a.len().max(b.len());
|
||||
let matching = a.chars().zip(b.chars()).filter(|(x, y)| x == y).count();
|
||||
|
||||
matching as f64 / max_len as f64
|
||||
}
|
||||
}
|
||||
385
vendor/ruvector/examples/scipix/tests/unit/ocr_tests.rs
vendored
Normal file
385
vendor/ruvector/examples/scipix/tests/unit/ocr_tests.rs
vendored
Normal file
@@ -0,0 +1,385 @@
|
||||
// OCR engine tests for ruvector-scipix
|
||||
//
|
||||
// Tests OCR engine initialization, model loading, inference options,
|
||||
// and batch processing capabilities.
|
||||
// Target: 85%+ coverage of OCR engine module
|
||||
|
||||
#[cfg(test)]
|
||||
mod ocr_tests {
|
||||
use std::path::PathBuf;
|
||||
|
||||
// Mock OCR engine structures
|
||||
#[derive(Debug, Clone)]
|
||||
struct OcrEngine {
|
||||
model_path: PathBuf,
|
||||
device: String,
|
||||
batch_size: usize,
|
||||
loaded: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct OcrOptions {
|
||||
confidence_threshold: f32,
|
||||
detect_rotation: bool,
|
||||
preprocessing: bool,
|
||||
language: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct OcrResult {
|
||||
text: String,
|
||||
confidence: f32,
|
||||
bounding_boxes: Vec<BoundingBox>,
|
||||
processing_time_ms: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct BoundingBox {
|
||||
x: u32,
|
||||
y: u32,
|
||||
width: u32,
|
||||
height: u32,
|
||||
confidence: f32,
|
||||
}
|
||||
|
||||
impl Default for OcrOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
confidence_threshold: 0.7,
|
||||
detect_rotation: true,
|
||||
preprocessing: true,
|
||||
language: "en".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl OcrEngine {
|
||||
fn new(model_path: PathBuf, device: &str) -> Result<Self, String> {
|
||||
if !model_path.to_string_lossy().ends_with(".onnx") {
|
||||
return Err("Model must be .onnx format".to_string());
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
model_path,
|
||||
device: device.to_string(),
|
||||
batch_size: 4,
|
||||
loaded: false,
|
||||
})
|
||||
}
|
||||
|
||||
fn load(&mut self) -> Result<(), String> {
|
||||
if self.loaded {
|
||||
return Err("Model already loaded".to_string());
|
||||
}
|
||||
self.loaded = true;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn is_loaded(&self) -> bool {
|
||||
self.loaded
|
||||
}
|
||||
|
||||
fn process(&self, _image_data: &[u8], options: &OcrOptions) -> Result<OcrResult, String> {
|
||||
if !self.loaded {
|
||||
return Err("Model not loaded".to_string());
|
||||
}
|
||||
|
||||
Ok(OcrResult {
|
||||
text: "x^2 + 1".to_string(),
|
||||
confidence: 0.95,
|
||||
bounding_boxes: vec![BoundingBox {
|
||||
x: 10,
|
||||
y: 20,
|
||||
width: 100,
|
||||
height: 50,
|
||||
confidence: 0.95,
|
||||
}],
|
||||
processing_time_ms: 123,
|
||||
})
|
||||
}
|
||||
|
||||
fn process_batch(
|
||||
&self,
|
||||
images: &[Vec<u8>],
|
||||
options: &OcrOptions,
|
||||
) -> Result<Vec<OcrResult>, String> {
|
||||
if !self.loaded {
|
||||
return Err("Model not loaded".to_string());
|
||||
}
|
||||
|
||||
images
|
||||
.iter()
|
||||
.map(|img| self.process(img, options))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn set_batch_size(&mut self, size: usize) {
|
||||
self.batch_size = size;
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_engine_creation() {
|
||||
let engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu");
|
||||
assert!(engine.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_engine_creation_invalid_model() {
|
||||
let engine = OcrEngine::new(PathBuf::from("model.txt"), "cpu");
|
||||
assert!(engine.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_engine_creation_cpu_device() {
|
||||
let engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
|
||||
assert_eq!(engine.device, "cpu");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_engine_creation_cuda_device() {
|
||||
let engine = OcrEngine::new(PathBuf::from("model.onnx"), "cuda").unwrap();
|
||||
assert_eq!(engine.device, "cuda");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_model_loading() {
|
||||
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
|
||||
|
||||
assert!(!engine.is_loaded());
|
||||
|
||||
let result = engine.load();
|
||||
assert!(result.is_ok());
|
||||
assert!(engine.is_loaded());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_model_loading_twice() {
|
||||
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
|
||||
|
||||
engine.load().unwrap();
|
||||
let second_load = engine.load();
|
||||
|
||||
assert!(second_load.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ocr_options_default() {
|
||||
let options = OcrOptions::default();
|
||||
|
||||
assert_eq!(options.confidence_threshold, 0.7);
|
||||
assert!(options.detect_rotation);
|
||||
assert!(options.preprocessing);
|
||||
assert_eq!(options.language, "en");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ocr_options_custom() {
|
||||
let options = OcrOptions {
|
||||
confidence_threshold: 0.9,
|
||||
detect_rotation: false,
|
||||
preprocessing: true,
|
||||
language: "math".to_string(),
|
||||
};
|
||||
|
||||
assert_eq!(options.confidence_threshold, 0.9);
|
||||
assert!(!options.detect_rotation);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_process_without_loading() {
|
||||
let engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
|
||||
let options = OcrOptions::default();
|
||||
|
||||
let result = engine.process(&[0u8; 100], &options);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_process_after_loading() {
|
||||
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
|
||||
engine.load().unwrap();
|
||||
|
||||
let options = OcrOptions::default();
|
||||
let result = engine.process(&[0u8; 100], &options);
|
||||
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_process_result_structure() {
|
||||
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
|
||||
engine.load().unwrap();
|
||||
|
||||
let options = OcrOptions::default();
|
||||
let result = engine.process(&[0u8; 100], &options).unwrap();
|
||||
|
||||
assert!(!result.text.is_empty());
|
||||
assert!(result.confidence > 0.0 && result.confidence <= 1.0);
|
||||
assert!(!result.bounding_boxes.is_empty());
|
||||
assert!(result.processing_time_ms > 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batch_processing() {
|
||||
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
|
||||
engine.load().unwrap();
|
||||
|
||||
let images = vec![vec![0u8; 100], vec![1u8; 100], vec![2u8; 100]];
|
||||
let options = OcrOptions::default();
|
||||
|
||||
let results = engine.process_batch(&images, &options).unwrap();
|
||||
|
||||
assert_eq!(results.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batch_processing_empty() {
|
||||
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
|
||||
engine.load().unwrap();
|
||||
|
||||
let images: Vec<Vec<u8>> = vec![];
|
||||
let options = OcrOptions::default();
|
||||
|
||||
let results = engine.process_batch(&images, &options).unwrap();
|
||||
|
||||
assert_eq!(results.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batch_processing_single_image() {
|
||||
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
|
||||
engine.load().unwrap();
|
||||
|
||||
let images = vec![vec![0u8; 100]];
|
||||
let options = OcrOptions::default();
|
||||
|
||||
let results = engine.process_batch(&images, &options).unwrap();
|
||||
|
||||
assert_eq!(results.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batch_size_configuration() {
|
||||
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
|
||||
|
||||
assert_eq!(engine.batch_size, 4);
|
||||
|
||||
engine.set_batch_size(8);
|
||||
assert_eq!(engine.batch_size, 8);
|
||||
|
||||
engine.set_batch_size(16);
|
||||
assert_eq!(engine.batch_size, 16);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bounding_box_structure() {
|
||||
let bbox = BoundingBox {
|
||||
x: 10,
|
||||
y: 20,
|
||||
width: 100,
|
||||
height: 50,
|
||||
confidence: 0.95,
|
||||
};
|
||||
|
||||
assert_eq!(bbox.x, 10);
|
||||
assert_eq!(bbox.y, 20);
|
||||
assert_eq!(bbox.width, 100);
|
||||
assert_eq!(bbox.height, 50);
|
||||
assert_eq!(bbox.confidence, 0.95);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_bounding_boxes() {
|
||||
let boxes = vec![
|
||||
BoundingBox {
|
||||
x: 10,
|
||||
y: 20,
|
||||
width: 50,
|
||||
height: 30,
|
||||
confidence: 0.95,
|
||||
},
|
||||
BoundingBox {
|
||||
x: 70,
|
||||
y: 20,
|
||||
width: 60,
|
||||
height: 30,
|
||||
confidence: 0.93,
|
||||
},
|
||||
];
|
||||
|
||||
assert_eq!(boxes.len(), 2);
|
||||
assert!(boxes.iter().all(|b| b.confidence > 0.9));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_options_language_variants() {
|
||||
let languages = vec!["en", "math", "mixed", "es", "fr", "de"];
|
||||
|
||||
for lang in languages {
|
||||
let options = OcrOptions {
|
||||
language: lang.to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
assert_eq!(options.language, lang);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_options_confidence_thresholds() {
|
||||
let thresholds = vec![0.5, 0.7, 0.8, 0.9, 0.95];
|
||||
|
||||
for threshold in thresholds {
|
||||
let options = OcrOptions {
|
||||
confidence_threshold: threshold,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
assert_eq!(options.confidence_threshold, threshold);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_options_preprocessing_toggle() {
|
||||
let mut options = OcrOptions::default();
|
||||
assert!(options.preprocessing);
|
||||
|
||||
options.preprocessing = false;
|
||||
assert!(!options.preprocessing);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_options_rotation_detection_toggle() {
|
||||
let mut options = OcrOptions::default();
|
||||
assert!(options.detect_rotation);
|
||||
|
||||
options.detect_rotation = false;
|
||||
assert!(!options.detect_rotation);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_engine_with_different_devices() {
|
||||
let devices = vec!["cpu", "cuda", "cuda:0", "cuda:1"];
|
||||
|
||||
for device in devices {
|
||||
let engine = OcrEngine::new(PathBuf::from("model.onnx"), device);
|
||||
assert!(engine.is_ok());
|
||||
assert_eq!(engine.unwrap().device, device);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ocr_result_cloning() {
|
||||
let result = OcrResult {
|
||||
text: "test".to_string(),
|
||||
confidence: 0.95,
|
||||
bounding_boxes: vec![],
|
||||
processing_time_ms: 100,
|
||||
};
|
||||
|
||||
let cloned = result.clone();
|
||||
assert_eq!(result.text, cloned.text);
|
||||
assert_eq!(result.confidence, cloned.confidence);
|
||||
}
|
||||
}
|
||||
409
vendor/ruvector/examples/scipix/tests/unit/output_tests.rs
vendored
Normal file
409
vendor/ruvector/examples/scipix/tests/unit/output_tests.rs
vendored
Normal file
@@ -0,0 +1,409 @@
|
||||
// Output formatting tests for ruvector-scipix
|
||||
//
|
||||
// Tests output format conversion between LaTeX, MathML, AsciiMath, etc.
|
||||
// and MMD delimiter handling, JSON serialization.
|
||||
// Target: 85%+ coverage of output formatting module
|
||||
|
||||
#[cfg(test)]
|
||||
mod output_tests {
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
// Mock output format types
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
enum OutputFormat {
|
||||
Latex,
|
||||
MathML,
|
||||
AsciiMath,
|
||||
MMD, // Scipix Markdown
|
||||
Unicode,
|
||||
PlainText,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct FormattedOutput {
|
||||
format: OutputFormat,
|
||||
content: String,
|
||||
metadata: Option<OutputMetadata>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct OutputMetadata {
|
||||
confidence: f32,
|
||||
processing_time_ms: u64,
|
||||
num_symbols: usize,
|
||||
}
|
||||
|
||||
// Mock format converter
|
||||
fn convert_format(input: &str, from: OutputFormat, to: OutputFormat) -> Result<String, String> {
|
||||
match (from, to) {
|
||||
(OutputFormat::Latex, OutputFormat::MathML) => {
|
||||
Ok(latex_to_mathml(input))
|
||||
}
|
||||
(OutputFormat::Latex, OutputFormat::AsciiMath) => {
|
||||
Ok(latex_to_asciimath(input))
|
||||
}
|
||||
(OutputFormat::Latex, OutputFormat::Unicode) => {
|
||||
Ok(latex_to_unicode(input))
|
||||
}
|
||||
(OutputFormat::Latex, OutputFormat::PlainText) => {
|
||||
Ok(latex_to_plaintext(input))
|
||||
}
|
||||
(OutputFormat::MathML, OutputFormat::Latex) => {
|
||||
Ok(mathml_to_latex(input))
|
||||
}
|
||||
_ => Ok(input.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
fn latex_to_mathml(latex: &str) -> String {
|
||||
// Simple mock conversion
|
||||
if latex.contains(r"\frac") {
|
||||
format!("<mfrac>{}</mfrac>", latex.replace(r"\frac", ""))
|
||||
} else if latex.contains("^") {
|
||||
"<msup></msup>".to_string()
|
||||
} else {
|
||||
format!("<math>{}</math>", latex)
|
||||
}
|
||||
}
|
||||
|
||||
fn latex_to_asciimath(latex: &str) -> String {
|
||||
latex
|
||||
.replace(r"\frac{", "(")
|
||||
.replace("}{", ")/(")
|
||||
.replace("}", ")")
|
||||
.replace("^", "^")
|
||||
}
|
||||
|
||||
fn latex_to_unicode(latex: &str) -> String {
|
||||
latex
|
||||
.replace(r"\alpha", "α")
|
||||
.replace(r"\beta", "β")
|
||||
.replace(r"\gamma", "γ")
|
||||
.replace(r"\pi", "π")
|
||||
.replace(r"\sigma", "σ")
|
||||
.replace(r"\infty", "∞")
|
||||
}
|
||||
|
||||
fn latex_to_plaintext(latex: &str) -> String {
|
||||
latex
|
||||
.replace(r"\frac{", "(")
|
||||
.replace("}{", ")/(")
|
||||
.replace("}", ")")
|
||||
.replace("^", "**")
|
||||
.replace("_", "")
|
||||
}
|
||||
|
||||
fn mathml_to_latex(mathml: &str) -> String {
|
||||
// Very simple mock
|
||||
if mathml.contains("<mfrac>") {
|
||||
r"\frac{a}{b}".to_string()
|
||||
} else {
|
||||
"x".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
fn apply_mmd_delimiters(latex: &str, inline: bool) -> String {
|
||||
if inline {
|
||||
format!("${}$", latex)
|
||||
} else {
|
||||
format!("$$\n{}\n$$", latex)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_conversion_latex_to_mathml() {
|
||||
let latex = r"\frac{1}{2}";
|
||||
let mathml = convert_format(latex, OutputFormat::Latex, OutputFormat::MathML).unwrap();
|
||||
|
||||
assert!(mathml.contains("<mfrac>"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_conversion_latex_to_asciimath() {
|
||||
let latex = r"x^2 + 1";
|
||||
let ascii = convert_format(latex, OutputFormat::Latex, OutputFormat::AsciiMath).unwrap();
|
||||
|
||||
assert!(ascii.contains("x^2"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_conversion_latex_to_unicode() {
|
||||
let latex = r"\alpha + \beta";
|
||||
let unicode = convert_format(latex, OutputFormat::Latex, OutputFormat::Unicode).unwrap();
|
||||
|
||||
assert!(unicode.contains("α"));
|
||||
assert!(unicode.contains("β"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_conversion_latex_to_plaintext() {
|
||||
let latex = r"\frac{a}{b}";
|
||||
let text = convert_format(latex, OutputFormat::Latex, OutputFormat::PlainText).unwrap();
|
||||
|
||||
assert!(text.contains("(a)/(b)") || text.contains("a/b"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_conversion_mathml_to_latex() {
|
||||
let mathml = "<mfrac><mn>1</mn><mn>2</mn></mfrac>";
|
||||
let latex = convert_format(mathml, OutputFormat::MathML, OutputFormat::Latex).unwrap();
|
||||
|
||||
assert!(latex.contains(r"\frac") || latex.contains("/"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mmd_delimiter_inline() {
|
||||
let latex = "x^2";
|
||||
let mmd = apply_mmd_delimiters(latex, true);
|
||||
|
||||
assert_eq!(mmd, "$x^2$");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mmd_delimiter_display() {
|
||||
let latex = r"\int_0^1 x dx";
|
||||
let mmd = apply_mmd_delimiters(latex, false);
|
||||
|
||||
assert!(mmd.starts_with("$$"));
|
||||
assert!(mmd.ends_with("$$"));
|
||||
assert!(mmd.contains(latex));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mmd_delimiter_multiple_inline() {
|
||||
let equations = vec!["x + 1", "y - 2", "z * 3"];
|
||||
|
||||
for eq in equations {
|
||||
let mmd = apply_mmd_delimiters(eq, true);
|
||||
assert!(mmd.starts_with("$"));
|
||||
assert!(mmd.ends_with("$"));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_serialization_formatted_output() {
|
||||
let output = FormattedOutput {
|
||||
format: OutputFormat::Latex,
|
||||
content: r"\frac{1}{2}".to_string(),
|
||||
metadata: Some(OutputMetadata {
|
||||
confidence: 0.95,
|
||||
processing_time_ms: 123,
|
||||
num_symbols: 5,
|
||||
}),
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&output).unwrap();
|
||||
|
||||
assert!(json.contains("Latex"));
|
||||
assert!(json.contains(r"\frac"));
|
||||
assert!(json.contains("0.95"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_deserialization_formatted_output() {
|
||||
let json = r#"{
|
||||
"format": "Latex",
|
||||
"content": "x^2 + 1",
|
||||
"metadata": {
|
||||
"confidence": 0.92,
|
||||
"processing_time_ms": 87,
|
||||
"num_symbols": 4
|
||||
}
|
||||
}"#;
|
||||
|
||||
let output: FormattedOutput = serde_json::from_str(json).unwrap();
|
||||
|
||||
assert_eq!(output.format, OutputFormat::Latex);
|
||||
assert_eq!(output.content, "x^2 + 1");
|
||||
assert!(output.metadata.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_serialization_all_formats() {
|
||||
let formats = vec![
|
||||
OutputFormat::Latex,
|
||||
OutputFormat::MathML,
|
||||
OutputFormat::AsciiMath,
|
||||
OutputFormat::MMD,
|
||||
OutputFormat::Unicode,
|
||||
OutputFormat::PlainText,
|
||||
];
|
||||
|
||||
for format in formats {
|
||||
let output = FormattedOutput {
|
||||
format: format.clone(),
|
||||
content: "test".to_string(),
|
||||
metadata: None,
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&output).unwrap();
|
||||
assert!(!json.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scipix_api_compatibility_response() {
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct ScipixResponse {
|
||||
latex: String,
|
||||
mathml: Option<String>,
|
||||
text: String,
|
||||
confidence: f32,
|
||||
#[serde(rename = "confidence_rate")]
|
||||
confidence_rate: f32,
|
||||
}
|
||||
|
||||
let response = ScipixResponse {
|
||||
latex: r"\frac{1}{2}".to_string(),
|
||||
mathml: Some("<mfrac><mn>1</mn><mn>2</mn></mfrac>".to_string()),
|
||||
text: "1/2".to_string(),
|
||||
confidence: 0.95,
|
||||
confidence_rate: 0.93,
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&response).unwrap();
|
||||
|
||||
assert!(json.contains("latex"));
|
||||
assert!(json.contains("confidence_rate"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scipix_api_compatibility_request() {
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct ScipixRequest {
|
||||
src: String,
|
||||
formats: Vec<String>,
|
||||
#[serde(rename = "ocr")]
|
||||
ocr_types: Vec<String>,
|
||||
}
|
||||
|
||||
let request = ScipixRequest {
|
||||
src: "data:image/png;base64,iVBORw0KGgo...".to_string(),
|
||||
formats: vec!["latex".to_string(), "mathml".to_string()],
|
||||
ocr_types: vec!["math".to_string(), "text".to_string()],
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&request).unwrap();
|
||||
|
||||
assert!(json.contains("src"));
|
||||
assert!(json.contains("formats"));
|
||||
assert!(json.contains("ocr"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unicode_symbol_conversion() {
|
||||
let conversions = vec![
|
||||
(r"\alpha", "α"),
|
||||
(r"\beta", "β"),
|
||||
(r"\gamma", "γ"),
|
||||
(r"\delta", "δ"),
|
||||
(r"\pi", "π"),
|
||||
(r"\sigma", "σ"),
|
||||
(r"\omega", "ω"),
|
||||
(r"\infty", "∞"),
|
||||
(r"\partial", "∂"),
|
||||
(r"\nabla", "∇"),
|
||||
];
|
||||
|
||||
for (latex, expected_unicode) in conversions {
|
||||
let unicode = latex_to_unicode(latex);
|
||||
assert!(
|
||||
unicode.contains(expected_unicode),
|
||||
"Failed to convert {} to {}",
|
||||
latex,
|
||||
expected_unicode
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_output_format_enumeration() {
|
||||
let formats = vec![
|
||||
OutputFormat::Latex,
|
||||
OutputFormat::MathML,
|
||||
OutputFormat::AsciiMath,
|
||||
OutputFormat::MMD,
|
||||
OutputFormat::Unicode,
|
||||
OutputFormat::PlainText,
|
||||
];
|
||||
|
||||
assert_eq!(formats.len(), 6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_formatted_output_with_no_metadata() {
|
||||
let output = FormattedOutput {
|
||||
format: OutputFormat::Latex,
|
||||
content: "x + y".to_string(),
|
||||
metadata: None,
|
||||
};
|
||||
|
||||
assert!(output.metadata.is_none());
|
||||
let json = serde_json::to_string(&output).unwrap();
|
||||
assert!(json.contains("null"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_formatted_output_cloning() {
|
||||
let output1 = FormattedOutput {
|
||||
format: OutputFormat::Latex,
|
||||
content: "test".to_string(),
|
||||
metadata: None,
|
||||
};
|
||||
|
||||
let output2 = output1.clone();
|
||||
assert_eq!(output1.format, output2.format);
|
||||
assert_eq!(output1.content, output2.content);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_format_conversions_chain() {
|
||||
let latex = r"\frac{1}{2}";
|
||||
|
||||
// Latex -> MathML
|
||||
let mathml = convert_format(latex, OutputFormat::Latex, OutputFormat::MathML).unwrap();
|
||||
assert!(mathml.contains("<mfrac>"));
|
||||
|
||||
// MathML -> Latex
|
||||
let back_to_latex = convert_format(&mathml, OutputFormat::MathML, OutputFormat::Latex).unwrap();
|
||||
assert!(back_to_latex.contains(r"\frac") || !back_to_latex.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_special_latex_commands_preservation() {
|
||||
let latex_commands = vec![
|
||||
r"\sum_{i=1}^{n}",
|
||||
r"\int_0^1",
|
||||
r"\prod_{k=1}^{m}",
|
||||
r"\lim_{x \to \infty}",
|
||||
];
|
||||
|
||||
for latex in latex_commands {
|
||||
let output = FormattedOutput {
|
||||
format: OutputFormat::Latex,
|
||||
content: latex.to_string(),
|
||||
metadata: None,
|
||||
};
|
||||
|
||||
assert_eq!(output.content, latex);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_output_with_confidence_metadata() {
|
||||
let output = FormattedOutput {
|
||||
format: OutputFormat::Latex,
|
||||
content: r"x^2".to_string(),
|
||||
metadata: Some(OutputMetadata {
|
||||
confidence: 0.98,
|
||||
processing_time_ms: 45,
|
||||
num_symbols: 3,
|
||||
}),
|
||||
};
|
||||
|
||||
let metadata = output.metadata.unwrap();
|
||||
assert_eq!(metadata.confidence, 0.98);
|
||||
assert_eq!(metadata.processing_time_ms, 45);
|
||||
assert_eq!(metadata.num_symbols, 3);
|
||||
}
|
||||
}
|
||||
377
vendor/ruvector/examples/scipix/tests/unit/preprocess_tests.rs
vendored
Normal file
377
vendor/ruvector/examples/scipix/tests/unit/preprocess_tests.rs
vendored
Normal file
@@ -0,0 +1,377 @@
|
||||
// Preprocessing tests for ruvector-scipix
|
||||
//
|
||||
// Tests image preprocessing functions including grayscale conversion,
|
||||
// Gaussian blur, Otsu thresholding, rotation detection, deskewing,
|
||||
// CLAHE enhancement, and pipeline chaining.
|
||||
// Target: 90%+ coverage of preprocessing module
|
||||
|
||||
#[cfg(test)]
|
||||
mod preprocess_tests {
|
||||
use std::f32::consts::PI;
|
||||
|
||||
// Mock image structures for testing
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
struct GrayImage {
|
||||
width: u32,
|
||||
height: u32,
|
||||
data: Vec<u8>,
|
||||
}
|
||||
|
||||
impl GrayImage {
|
||||
fn new(width: u32, height: u32) -> Self {
|
||||
Self {
|
||||
width,
|
||||
height,
|
||||
data: vec![0; (width * height) as usize],
|
||||
}
|
||||
}
|
||||
|
||||
fn from_fn<F>(width: u32, height: u32, f: F) -> Self
|
||||
where
|
||||
F: Fn(u32, u32) -> u8,
|
||||
{
|
||||
let mut data = Vec::with_capacity((width * height) as usize);
|
||||
for y in 0..height {
|
||||
for x in 0..width {
|
||||
data.push(f(x, y));
|
||||
}
|
||||
}
|
||||
Self {
|
||||
width,
|
||||
height,
|
||||
data,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_pixel(&self, x: u32, y: u32) -> u8 {
|
||||
self.data[(y * self.width + x) as usize]
|
||||
}
|
||||
}
|
||||
|
||||
// Mock preprocessing functions
|
||||
fn to_grayscale(rgb: &[u8; 3]) -> u8 {
|
||||
(0.299 * rgb[0] as f32 + 0.587 * rgb[1] as f32 + 0.114 * rgb[2] as f32) as u8
|
||||
}
|
||||
|
||||
fn gaussian_blur(image: &GrayImage, sigma: f32) -> GrayImage {
|
||||
// Simple mock - just return a copy
|
||||
image.clone()
|
||||
}
|
||||
|
||||
fn otsu_threshold(image: &GrayImage) -> u8 {
|
||||
// Simple mock implementation
|
||||
let sum: u32 = image.data.iter().map(|&x| x as u32).sum();
|
||||
let avg = sum / image.data.len() as u32;
|
||||
avg as u8
|
||||
}
|
||||
|
||||
fn apply_threshold(image: &GrayImage, threshold: u8) -> GrayImage {
|
||||
GrayImage::from_fn(image.width, image.height, |x, y| {
|
||||
if image.get_pixel(x, y) > threshold {
|
||||
255
|
||||
} else {
|
||||
0
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn detect_rotation_angle(image: &GrayImage) -> f32 {
|
||||
// Mock: return 0 for simplicity
|
||||
0.0
|
||||
}
|
||||
|
||||
fn deskew_angle(image: &GrayImage) -> f32 {
|
||||
// Mock: return small random angle
|
||||
2.5
|
||||
}
|
||||
|
||||
fn apply_clahe(image: &GrayImage, clip_limit: f32) -> GrayImage {
|
||||
// Mock: increase contrast slightly
|
||||
GrayImage::from_fn(image.width, image.height, |x, y| {
|
||||
let pixel = image.get_pixel(x, y);
|
||||
((pixel as f32 * 1.2).min(255.0)) as u8
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_grayscale_conversion_white() {
|
||||
let white = [255u8, 255, 255];
|
||||
let gray = to_grayscale(&white);
|
||||
assert_eq!(gray, 255);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_grayscale_conversion_black() {
|
||||
let black = [0u8, 0, 0];
|
||||
let gray = to_grayscale(&black);
|
||||
assert_eq!(gray, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_grayscale_conversion_red() {
|
||||
let red = [255u8, 0, 0];
|
||||
let gray = to_grayscale(&red);
|
||||
// 0.299 * 255 ≈ 76
|
||||
assert!(gray >= 70 && gray <= 80);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_grayscale_conversion_green() {
|
||||
let green = [0u8, 255, 0];
|
||||
let gray = to_grayscale(&green);
|
||||
// 0.587 * 255 ≈ 150
|
||||
assert!(gray >= 145 && gray <= 155);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_grayscale_conversion_blue() {
|
||||
let blue = [0u8, 0, 255];
|
||||
let gray = to_grayscale(&blue);
|
||||
// 0.114 * 255 ≈ 29
|
||||
assert!(gray >= 25 && gray <= 35);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_gaussian_blur_preserves_dimensions() {
|
||||
let image = GrayImage::new(100, 100);
|
||||
let blurred = gaussian_blur(&image, 1.0);
|
||||
|
||||
assert_eq!(blurred.width, 100);
|
||||
assert_eq!(blurred.height, 100);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_gaussian_blur_multiple_sigmas() {
|
||||
let image = GrayImage::new(50, 50);
|
||||
|
||||
let sigmas = vec![0.5, 1.0, 1.5, 2.0, 3.0];
|
||||
for sigma in sigmas {
|
||||
let blurred = gaussian_blur(&image, sigma);
|
||||
assert_eq!(blurred.width, image.width);
|
||||
assert_eq!(blurred.height, image.height);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_otsu_thresholding_uniform_image() {
|
||||
let image = GrayImage::from_fn(50, 50, |_, _| 128);
|
||||
let threshold = otsu_threshold(&image);
|
||||
assert_eq!(threshold, 128);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_otsu_thresholding_bimodal_image() {
|
||||
// Create image with two distinct levels
|
||||
let image = GrayImage::from_fn(100, 100, |x, y| {
|
||||
if (x + y) % 2 == 0 {
|
||||
50
|
||||
} else {
|
||||
200
|
||||
}
|
||||
});
|
||||
|
||||
let threshold = otsu_threshold(&image);
|
||||
// Threshold should be between the two peaks
|
||||
assert!(threshold > 50 && threshold < 200);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_threshold_creates_binary_image() {
|
||||
let image = GrayImage::from_fn(50, 50, |x, y| ((x + y) % 256) as u8);
|
||||
let binary = apply_threshold(&image, 128);
|
||||
|
||||
// Check all pixels are either 0 or 255
|
||||
for pixel in binary.data.iter() {
|
||||
assert!(*pixel == 0 || *pixel == 255);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_threshold_low_threshold() {
|
||||
let image = GrayImage::from_fn(50, 50, |_, _| 100);
|
||||
let binary = apply_threshold(&image, 50);
|
||||
|
||||
// All pixels should be 255 (above threshold)
|
||||
assert!(binary.data.iter().all(|&x| x == 255));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_threshold_high_threshold() {
|
||||
let image = GrayImage::from_fn(50, 50, |_, _| 100);
|
||||
let binary = apply_threshold(&image, 150);
|
||||
|
||||
// All pixels should be 0 (below threshold)
|
||||
assert!(binary.data.iter().all(|&x| x == 0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rotation_detection_zero() {
|
||||
let image = GrayImage::new(100, 100);
|
||||
let angle = detect_rotation_angle(&image);
|
||||
assert!((angle - 0.0).abs() < 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rotation_detection_90_degrees() {
|
||||
let image = GrayImage::from_fn(100, 100, |x, _| x as u8);
|
||||
let angle = detect_rotation_angle(&image);
|
||||
// In real implementation, should detect 0, 90, 180, or 270
|
||||
assert!(angle >= -180.0 && angle <= 180.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rotation_detection_180_degrees() {
|
||||
let image = GrayImage::from_fn(100, 100, |x, y| ((x + y) % 256) as u8);
|
||||
let angle = detect_rotation_angle(&image);
|
||||
assert!(angle >= -180.0 && angle <= 180.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rotation_detection_270_degrees() {
|
||||
let image = GrayImage::new(100, 100);
|
||||
let angle = detect_rotation_angle(&image);
|
||||
assert!(angle >= -180.0 && angle <= 180.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deskew_angle_detection() {
|
||||
let image = GrayImage::new(100, 100);
|
||||
let angle = deskew_angle(&image);
|
||||
|
||||
// Skew angle should typically be small (< 45 degrees)
|
||||
assert!(angle.abs() < 45.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deskew_angle_horizontal_lines() {
|
||||
let image = GrayImage::from_fn(100, 100, |_, y| {
|
||||
if y % 10 == 0 {
|
||||
255
|
||||
} else {
|
||||
0
|
||||
}
|
||||
});
|
||||
|
||||
let angle = deskew_angle(&image);
|
||||
// Should detect minimal skew for horizontal lines
|
||||
assert!(angle.abs() < 5.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clahe_enhancement() {
|
||||
let image = GrayImage::from_fn(100, 100, |x, y| ((x + y) % 128) as u8);
|
||||
let enhanced = apply_clahe(&image, 2.0);
|
||||
|
||||
assert_eq!(enhanced.width, image.width);
|
||||
assert_eq!(enhanced.height, image.height);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clahe_increases_contrast() {
|
||||
let low_contrast = GrayImage::from_fn(50, 50, |x, _| (100 + x % 20) as u8);
|
||||
let enhanced = apply_clahe(&low_contrast, 2.0);
|
||||
|
||||
// Calculate simple contrast measure
|
||||
let original_range = calculate_range(&low_contrast);
|
||||
let enhanced_range = calculate_range(&enhanced);
|
||||
|
||||
// Enhanced image should have equal or greater range
|
||||
assert!(enhanced_range >= original_range);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clahe_preserves_dimensions() {
|
||||
let image = GrayImage::new(256, 256);
|
||||
let enhanced = apply_clahe(&image, 2.0);
|
||||
|
||||
assert_eq!(enhanced.width, 256);
|
||||
assert_eq!(enhanced.height, 256);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clahe_different_clip_limits() {
|
||||
let image = GrayImage::from_fn(50, 50, |x, y| ((x + y) % 256) as u8);
|
||||
|
||||
let clip_limits = vec![1.0, 2.0, 3.0, 4.0];
|
||||
for limit in clip_limits {
|
||||
let enhanced = apply_clahe(&image, limit);
|
||||
assert_eq!(enhanced.width, image.width);
|
||||
assert_eq!(enhanced.height, image.height);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pipeline_chaining_blur_then_threshold() {
|
||||
let image = GrayImage::from_fn(100, 100, |x, y| ((x + y) % 256) as u8);
|
||||
|
||||
// Chain operations
|
||||
let blurred = gaussian_blur(&image, 1.0);
|
||||
let threshold = otsu_threshold(&blurred);
|
||||
let binary = apply_threshold(&blurred, threshold);
|
||||
|
||||
// Verify final result is binary
|
||||
assert!(binary.data.iter().all(|&x| x == 0 || x == 255));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pipeline_chaining_enhance_then_threshold() {
|
||||
let image = GrayImage::from_fn(100, 100, |x, y| ((x + y) % 128) as u8);
|
||||
|
||||
// Chain CLAHE then threshold
|
||||
let enhanced = apply_clahe(&image, 2.0);
|
||||
let threshold = otsu_threshold(&enhanced);
|
||||
let binary = apply_threshold(&enhanced, threshold);
|
||||
|
||||
assert!(binary.data.iter().all(|&x| x == 0 || x == 255));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pipeline_full_preprocessing() {
|
||||
let image = GrayImage::from_fn(100, 100, |x, y| ((x + y) % 256) as u8);
|
||||
|
||||
// Full pipeline: blur -> enhance -> threshold
|
||||
let blurred = gaussian_blur(&image, 1.0);
|
||||
let enhanced = apply_clahe(&blurred, 2.0);
|
||||
let threshold = otsu_threshold(&enhanced);
|
||||
let binary = apply_threshold(&enhanced, threshold);
|
||||
|
||||
assert_eq!(binary.width, image.width);
|
||||
assert_eq!(binary.height, image.height);
|
||||
assert!(binary.data.iter().all(|&x| x == 0 || x == 255));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pipeline_preserves_dimensions_throughout() {
|
||||
let image = GrayImage::new(200, 150);
|
||||
|
||||
let blurred = gaussian_blur(&image, 1.5);
|
||||
assert_eq!((blurred.width, blurred.height), (200, 150));
|
||||
|
||||
let enhanced = apply_clahe(&blurred, 2.0);
|
||||
assert_eq!((enhanced.width, enhanced.height), (200, 150));
|
||||
|
||||
let binary = apply_threshold(&enhanced, 128);
|
||||
assert_eq!((binary.width, binary.height), (200, 150));
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
fn calculate_range(image: &GrayImage) -> u8 {
|
||||
let min = *image.data.iter().min().unwrap_or(&0);
|
||||
let max = *image.data.iter().max().unwrap_or(&255);
|
||||
max - min
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_edge_case_empty_like_image() {
|
||||
let tiny = GrayImage::new(1, 1);
|
||||
assert_eq!(tiny.width, 1);
|
||||
assert_eq!(tiny.height, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_edge_case_large_image_dimensions() {
|
||||
let large = GrayImage::new(4096, 4096);
|
||||
assert_eq!(large.width, 4096);
|
||||
assert_eq!(large.height, 4096);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user