git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
386 lines
10 KiB
Rust
386 lines
10 KiB
Rust
// OCR engine tests for ruvector-scipix
|
|
//
|
|
// Tests OCR engine initialization, model loading, inference options,
|
|
// and batch processing capabilities.
|
|
// Target: 85%+ coverage of OCR engine module
|
|
|
|
#[cfg(test)]
|
|
mod ocr_tests {
|
|
use std::path::PathBuf;
|
|
|
|
// Mock OCR engine structures
|
|
#[derive(Debug, Clone)]
|
|
struct OcrEngine {
|
|
model_path: PathBuf,
|
|
device: String,
|
|
batch_size: usize,
|
|
loaded: bool,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
struct OcrOptions {
|
|
confidence_threshold: f32,
|
|
detect_rotation: bool,
|
|
preprocessing: bool,
|
|
language: String,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
struct OcrResult {
|
|
text: String,
|
|
confidence: f32,
|
|
bounding_boxes: Vec<BoundingBox>,
|
|
processing_time_ms: u64,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
struct BoundingBox {
|
|
x: u32,
|
|
y: u32,
|
|
width: u32,
|
|
height: u32,
|
|
confidence: f32,
|
|
}
|
|
|
|
impl Default for OcrOptions {
|
|
fn default() -> Self {
|
|
Self {
|
|
confidence_threshold: 0.7,
|
|
detect_rotation: true,
|
|
preprocessing: true,
|
|
language: "en".to_string(),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl OcrEngine {
|
|
fn new(model_path: PathBuf, device: &str) -> Result<Self, String> {
|
|
if !model_path.to_string_lossy().ends_with(".onnx") {
|
|
return Err("Model must be .onnx format".to_string());
|
|
}
|
|
|
|
Ok(Self {
|
|
model_path,
|
|
device: device.to_string(),
|
|
batch_size: 4,
|
|
loaded: false,
|
|
})
|
|
}
|
|
|
|
fn load(&mut self) -> Result<(), String> {
|
|
if self.loaded {
|
|
return Err("Model already loaded".to_string());
|
|
}
|
|
self.loaded = true;
|
|
Ok(())
|
|
}
|
|
|
|
fn is_loaded(&self) -> bool {
|
|
self.loaded
|
|
}
|
|
|
|
fn process(&self, _image_data: &[u8], options: &OcrOptions) -> Result<OcrResult, String> {
|
|
if !self.loaded {
|
|
return Err("Model not loaded".to_string());
|
|
}
|
|
|
|
Ok(OcrResult {
|
|
text: "x^2 + 1".to_string(),
|
|
confidence: 0.95,
|
|
bounding_boxes: vec![BoundingBox {
|
|
x: 10,
|
|
y: 20,
|
|
width: 100,
|
|
height: 50,
|
|
confidence: 0.95,
|
|
}],
|
|
processing_time_ms: 123,
|
|
})
|
|
}
|
|
|
|
fn process_batch(
|
|
&self,
|
|
images: &[Vec<u8>],
|
|
options: &OcrOptions,
|
|
) -> Result<Vec<OcrResult>, String> {
|
|
if !self.loaded {
|
|
return Err("Model not loaded".to_string());
|
|
}
|
|
|
|
images
|
|
.iter()
|
|
.map(|img| self.process(img, options))
|
|
.collect()
|
|
}
|
|
|
|
fn set_batch_size(&mut self, size: usize) {
|
|
self.batch_size = size;
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_engine_creation() {
|
|
let engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu");
|
|
assert!(engine.is_ok());
|
|
}
|
|
|
|
#[test]
|
|
fn test_engine_creation_invalid_model() {
|
|
let engine = OcrEngine::new(PathBuf::from("model.txt"), "cpu");
|
|
assert!(engine.is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn test_engine_creation_cpu_device() {
|
|
let engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
|
|
assert_eq!(engine.device, "cpu");
|
|
}
|
|
|
|
#[test]
|
|
fn test_engine_creation_cuda_device() {
|
|
let engine = OcrEngine::new(PathBuf::from("model.onnx"), "cuda").unwrap();
|
|
assert_eq!(engine.device, "cuda");
|
|
}
|
|
|
|
#[test]
|
|
fn test_model_loading() {
|
|
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
|
|
|
|
assert!(!engine.is_loaded());
|
|
|
|
let result = engine.load();
|
|
assert!(result.is_ok());
|
|
assert!(engine.is_loaded());
|
|
}
|
|
|
|
#[test]
|
|
fn test_model_loading_twice() {
|
|
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
|
|
|
|
engine.load().unwrap();
|
|
let second_load = engine.load();
|
|
|
|
assert!(second_load.is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn test_ocr_options_default() {
|
|
let options = OcrOptions::default();
|
|
|
|
assert_eq!(options.confidence_threshold, 0.7);
|
|
assert!(options.detect_rotation);
|
|
assert!(options.preprocessing);
|
|
assert_eq!(options.language, "en");
|
|
}
|
|
|
|
#[test]
|
|
fn test_ocr_options_custom() {
|
|
let options = OcrOptions {
|
|
confidence_threshold: 0.9,
|
|
detect_rotation: false,
|
|
preprocessing: true,
|
|
language: "math".to_string(),
|
|
};
|
|
|
|
assert_eq!(options.confidence_threshold, 0.9);
|
|
assert!(!options.detect_rotation);
|
|
}
|
|
|
|
#[test]
|
|
fn test_process_without_loading() {
|
|
let engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
|
|
let options = OcrOptions::default();
|
|
|
|
let result = engine.process(&[0u8; 100], &options);
|
|
assert!(result.is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn test_process_after_loading() {
|
|
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
|
|
engine.load().unwrap();
|
|
|
|
let options = OcrOptions::default();
|
|
let result = engine.process(&[0u8; 100], &options);
|
|
|
|
assert!(result.is_ok());
|
|
}
|
|
|
|
#[test]
|
|
fn test_process_result_structure() {
|
|
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
|
|
engine.load().unwrap();
|
|
|
|
let options = OcrOptions::default();
|
|
let result = engine.process(&[0u8; 100], &options).unwrap();
|
|
|
|
assert!(!result.text.is_empty());
|
|
assert!(result.confidence > 0.0 && result.confidence <= 1.0);
|
|
assert!(!result.bounding_boxes.is_empty());
|
|
assert!(result.processing_time_ms > 0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_batch_processing() {
|
|
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
|
|
engine.load().unwrap();
|
|
|
|
let images = vec![vec![0u8; 100], vec![1u8; 100], vec![2u8; 100]];
|
|
let options = OcrOptions::default();
|
|
|
|
let results = engine.process_batch(&images, &options).unwrap();
|
|
|
|
assert_eq!(results.len(), 3);
|
|
}
|
|
|
|
#[test]
|
|
fn test_batch_processing_empty() {
|
|
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
|
|
engine.load().unwrap();
|
|
|
|
let images: Vec<Vec<u8>> = vec![];
|
|
let options = OcrOptions::default();
|
|
|
|
let results = engine.process_batch(&images, &options).unwrap();
|
|
|
|
assert_eq!(results.len(), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_batch_processing_single_image() {
|
|
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
|
|
engine.load().unwrap();
|
|
|
|
let images = vec![vec![0u8; 100]];
|
|
let options = OcrOptions::default();
|
|
|
|
let results = engine.process_batch(&images, &options).unwrap();
|
|
|
|
assert_eq!(results.len(), 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_batch_size_configuration() {
|
|
let mut engine = OcrEngine::new(PathBuf::from("model.onnx"), "cpu").unwrap();
|
|
|
|
assert_eq!(engine.batch_size, 4);
|
|
|
|
engine.set_batch_size(8);
|
|
assert_eq!(engine.batch_size, 8);
|
|
|
|
engine.set_batch_size(16);
|
|
assert_eq!(engine.batch_size, 16);
|
|
}
|
|
|
|
#[test]
|
|
fn test_bounding_box_structure() {
|
|
let bbox = BoundingBox {
|
|
x: 10,
|
|
y: 20,
|
|
width: 100,
|
|
height: 50,
|
|
confidence: 0.95,
|
|
};
|
|
|
|
assert_eq!(bbox.x, 10);
|
|
assert_eq!(bbox.y, 20);
|
|
assert_eq!(bbox.width, 100);
|
|
assert_eq!(bbox.height, 50);
|
|
assert_eq!(bbox.confidence, 0.95);
|
|
}
|
|
|
|
#[test]
|
|
fn test_multiple_bounding_boxes() {
|
|
let boxes = vec![
|
|
BoundingBox {
|
|
x: 10,
|
|
y: 20,
|
|
width: 50,
|
|
height: 30,
|
|
confidence: 0.95,
|
|
},
|
|
BoundingBox {
|
|
x: 70,
|
|
y: 20,
|
|
width: 60,
|
|
height: 30,
|
|
confidence: 0.93,
|
|
},
|
|
];
|
|
|
|
assert_eq!(boxes.len(), 2);
|
|
assert!(boxes.iter().all(|b| b.confidence > 0.9));
|
|
}
|
|
|
|
#[test]
|
|
fn test_options_language_variants() {
|
|
let languages = vec!["en", "math", "mixed", "es", "fr", "de"];
|
|
|
|
for lang in languages {
|
|
let options = OcrOptions {
|
|
language: lang.to_string(),
|
|
..Default::default()
|
|
};
|
|
|
|
assert_eq!(options.language, lang);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_options_confidence_thresholds() {
|
|
let thresholds = vec![0.5, 0.7, 0.8, 0.9, 0.95];
|
|
|
|
for threshold in thresholds {
|
|
let options = OcrOptions {
|
|
confidence_threshold: threshold,
|
|
..Default::default()
|
|
};
|
|
|
|
assert_eq!(options.confidence_threshold, threshold);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_options_preprocessing_toggle() {
|
|
let mut options = OcrOptions::default();
|
|
assert!(options.preprocessing);
|
|
|
|
options.preprocessing = false;
|
|
assert!(!options.preprocessing);
|
|
}
|
|
|
|
#[test]
|
|
fn test_options_rotation_detection_toggle() {
|
|
let mut options = OcrOptions::default();
|
|
assert!(options.detect_rotation);
|
|
|
|
options.detect_rotation = false;
|
|
assert!(!options.detect_rotation);
|
|
}
|
|
|
|
#[test]
|
|
fn test_engine_with_different_devices() {
|
|
let devices = vec!["cpu", "cuda", "cuda:0", "cuda:1"];
|
|
|
|
for device in devices {
|
|
let engine = OcrEngine::new(PathBuf::from("model.onnx"), device);
|
|
assert!(engine.is_ok());
|
|
assert_eq!(engine.unwrap().device, device);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_ocr_result_cloning() {
|
|
let result = OcrResult {
|
|
text: "test".to_string(),
|
|
confidence: 0.95,
|
|
bounding_boxes: vec![],
|
|
processing_time_ms: 100,
|
|
};
|
|
|
|
let cloned = result.clone();
|
|
assert_eq!(result.text, cloned.text);
|
|
assert_eq!(result.confidence, cloned.confidence);
|
|
}
|
|
}
|