Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,353 @@
//! Skew detection and correction using Hough transform
use super::{PreprocessError, Result};
use image::{GrayImage, Luma};
use imageproc::edges::canny;
use imageproc::geometric_transformations::{rotate_about_center, Interpolation};
use std::collections::BTreeMap;
use std::f32;
/// Detect skew angle using Hough transform
///
/// Applies edge detection and Hough transform to find dominant lines,
/// then calculates average skew angle.
///
/// # Arguments
/// * `image` - Input grayscale image
///
/// # Returns
/// Skew angle in degrees (positive = clockwise)
///
/// # Example
/// ```no_run
/// use ruvector_scipix::preprocess::deskew::detect_skew_angle;
/// # use image::GrayImage;
/// # let image = GrayImage::new(100, 100);
/// let angle = detect_skew_angle(&image).unwrap();
/// println!("Detected skew: {:.2}°", angle);
/// ```
pub fn detect_skew_angle(image: &GrayImage) -> Result<f32> {
let (width, height) = image.dimensions();
if width < 20 || height < 20 {
return Err(PreprocessError::InvalidParameters(
"Image too small for skew detection".to_string(),
));
}
// Apply Canny edge detection
let edges = canny(image, 50.0, 100.0);
// Perform Hough transform to detect lines
let angles = detect_lines_hough(&edges, width, height)?;
if angles.is_empty() {
return Ok(0.0);
}
// Calculate weighted average angle
let total_weight: f32 = angles.values().sum();
let weighted_sum: f32 = angles
.iter()
.map(|(angle_key, weight)| (*angle_key as f32 / 10.0) * weight)
.sum();
let average_angle = if total_weight > 0.0 {
weighted_sum / total_weight
} else {
0.0
};
Ok(average_angle)
}
/// Detect lines using Hough transform
///
/// Returns map of angles to their confidence weights
fn detect_lines_hough(edges: &GrayImage, width: u32, height: u32) -> Result<BTreeMap<i32, f32>> {
let max_rho = ((width * width + height * height) as f32).sqrt() as usize;
let num_angles = 360;
// Accumulator array for Hough space
let mut accumulator = vec![vec![0u32; max_rho]; num_angles];
// Populate accumulator
for y in 0..height {
for x in 0..width {
if edges.get_pixel(x, y)[0] > 128 {
// Edge pixel found
for theta_idx in 0..num_angles {
let theta = (theta_idx as f32) * std::f32::consts::PI / 180.0;
let rho = (x as f32) * theta.cos() + (y as f32) * theta.sin();
let rho_idx = (rho + max_rho as f32 / 2.0) as usize;
if rho_idx < max_rho {
accumulator[theta_idx][rho_idx] += 1;
}
}
}
}
}
// Find peaks in accumulator
let mut angle_votes: BTreeMap<i32, f32> = BTreeMap::new();
let threshold = (width.min(height) / 10) as u32; // Adaptive threshold
for theta_idx in 0..num_angles {
for rho_idx in 0..max_rho {
let votes = accumulator[theta_idx][rho_idx];
if votes > threshold {
let angle = (theta_idx as f32) - 180.0; // Convert to -180 to 180
let normalized_angle = normalize_angle(angle);
// Only consider angles near horizontal (within ±45°)
if normalized_angle.abs() < 45.0 {
// Use integer keys for BTreeMap (angle * 10 to preserve precision)
let key = (normalized_angle * 10.0) as i32;
*angle_votes.entry(key).or_insert(0.0) += votes as f32;
}
}
}
}
Ok(angle_votes)
}
/// Normalize angle to -45 to +45 degree range
fn normalize_angle(angle: f32) -> f32 {
let mut normalized = angle % 180.0;
if normalized > 90.0 {
normalized -= 180.0;
} else if normalized < -90.0 {
normalized += 180.0;
}
// Clamp to ±45°
normalized.clamp(-45.0, 45.0)
}
/// Deskew image using detected skew angle
///
/// # Arguments
/// * `image` - Input grayscale image
/// * `angle` - Skew angle in degrees (from detect_skew_angle)
///
/// # Returns
/// Deskewed image with white background fill
///
/// # Example
/// ```no_run
/// use ruvector_scipix::preprocess::deskew::{detect_skew_angle, deskew_image};
/// # use image::GrayImage;
/// # let image = GrayImage::new(100, 100);
/// let angle = detect_skew_angle(&image).unwrap();
/// let deskewed = deskew_image(&image, angle).unwrap();
/// ```
pub fn deskew_image(image: &GrayImage, angle: f32) -> Result<GrayImage> {
if angle.abs() < 0.1 {
// No deskewing needed
return Ok(image.clone());
}
let radians = -angle.to_radians(); // Negate for correct direction
let deskewed = rotate_about_center(
image,
radians,
Interpolation::Bilinear,
Luma([255]), // White background
);
Ok(deskewed)
}
/// Auto-deskew image with confidence threshold
///
/// # Arguments
/// * `image` - Input grayscale image
/// * `max_angle` - Maximum angle to correct (degrees)
///
/// # Returns
/// Tuple of (deskewed_image, angle_applied)
pub fn auto_deskew(image: &GrayImage, max_angle: f32) -> Result<(GrayImage, f32)> {
let angle = detect_skew_angle(image)?;
if angle.abs() <= max_angle {
let deskewed = deskew_image(image, angle)?;
Ok((deskewed, angle))
} else {
// Angle too large, don't correct
Ok((image.clone(), 0.0))
}
}
/// Detect skew using projection profile method (alternative approach)
///
/// This is a faster but less accurate method compared to Hough transform
pub fn detect_skew_projection(image: &GrayImage) -> Result<f32> {
let angles = [
-45.0, -30.0, -15.0, -10.0, -5.0, 0.0, 5.0, 10.0, 15.0, 30.0, 45.0,
];
let mut max_variance = 0.0;
let mut best_angle = 0.0;
for &angle in &angles {
let variance = calculate_projection_variance(image, angle);
if variance > max_variance {
max_variance = variance;
best_angle = angle;
}
}
Ok(best_angle)
}
/// Calculate projection variance for a given angle
fn calculate_projection_variance(image: &GrayImage, angle: f32) -> f32 {
let (width, height) = image.dimensions();
let rad = angle.to_radians();
let cos_a = rad.cos();
let sin_a = rad.sin();
let mut projection = vec![0u32; height as usize];
for y in 0..height {
for x in 0..width {
let pixel = image.get_pixel(x, y)[0];
if pixel < 128 {
let proj_y = ((y as f32) * cos_a - (x as f32) * sin_a) as i32;
if proj_y >= 0 && proj_y < height as i32 {
projection[proj_y as usize] += 1;
}
}
}
}
// Calculate variance
if projection.is_empty() {
return 0.0;
}
let mean = projection.iter().sum::<u32>() as f32 / projection.len() as f32;
projection
.iter()
.map(|&x| {
let diff = x as f32 - mean;
diff * diff
})
.sum::<f32>()
/ projection.len() as f32
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_image() -> GrayImage {
let mut img = GrayImage::new(200, 100);
// Fill with white
for pixel in img.pixels_mut() {
*pixel = Luma([255]);
}
// Draw some horizontal lines (simulating text)
for y in [20, 40, 60, 80] {
for x in 10..190 {
img.put_pixel(x, y, Luma([0]));
}
}
img
}
#[test]
fn test_detect_skew_straight() {
let img = create_test_image();
let angle = detect_skew_angle(&img);
assert!(angle.is_ok());
let a = angle.unwrap();
// Should detect near-zero skew for straight lines
assert!(a.abs() < 10.0);
}
#[test]
fn test_deskew_image() {
let img = create_test_image();
// Deskew by 5 degrees
let deskewed = deskew_image(&img, 5.0);
assert!(deskewed.is_ok());
let result = deskewed.unwrap();
assert_eq!(result.dimensions(), img.dimensions());
}
#[test]
fn test_deskew_no_change() {
let img = create_test_image();
// Deskew by ~0 degrees
let deskewed = deskew_image(&img, 0.05);
assert!(deskewed.is_ok());
let result = deskewed.unwrap();
assert_eq!(result.dimensions(), img.dimensions());
}
#[test]
fn test_auto_deskew() {
let img = create_test_image();
let result = auto_deskew(&img, 15.0);
assert!(result.is_ok());
let (deskewed, angle) = result.unwrap();
assert_eq!(deskewed.dimensions(), img.dimensions());
assert!(angle.abs() <= 15.0);
}
#[test]
fn test_normalize_angle() {
assert!((normalize_angle(0.0) - 0.0).abs() < 0.01);
// Test normalization behavior
let angle_100 = normalize_angle(100.0);
assert!(angle_100.abs() <= 45.0); // Should be clamped to ±45°
let angle_neg100 = normalize_angle(-100.0);
assert!(angle_neg100.abs() <= 45.0); // Should be clamped to ±45°
assert!((normalize_angle(50.0) - 45.0).abs() < 0.01); // Clamped to 45
assert!((normalize_angle(-50.0) - -45.0).abs() < 0.01); // Clamped to -45
}
#[test]
fn test_detect_skew_projection() {
let img = create_test_image();
let angle = detect_skew_projection(&img);
assert!(angle.is_ok());
let a = angle.unwrap();
assert!(a.abs() < 20.0);
}
#[test]
fn test_skew_small_image_error() {
let small_img = GrayImage::new(10, 10);
let result = detect_skew_angle(&small_img);
assert!(result.is_err());
}
#[test]
fn test_projection_variance() {
let img = create_test_image();
let var_0 = calculate_projection_variance(&img, 0.0);
let var_30 = calculate_projection_variance(&img, 30.0);
// Variance at 0° should be higher for horizontal lines
assert!(var_0 > 0.0);
println!("Variance at 0°: {}, at 30°: {}", var_0, var_30);
}
}

View File

@@ -0,0 +1,420 @@
//! Image enhancement functions for improving OCR accuracy
use super::{PreprocessError, Result};
use image::{GrayImage, Luma};
use std::cmp;
/// Contrast Limited Adaptive Histogram Equalization (CLAHE)
///
/// Improves local contrast while avoiding over-amplification of noise.
/// Divides image into tiles and applies histogram equalization with clipping.
///
/// # Arguments
/// * `image` - Input grayscale image
/// * `clip_limit` - Contrast clipping limit (typically 2.0-4.0)
/// * `tile_size` - Size of contextual regions (typically 8x8 or 16x16)
///
/// # Returns
/// Enhanced image with improved local contrast
///
/// # Example
/// ```no_run
/// use ruvector_scipix::preprocess::enhancement::clahe;
/// # use image::GrayImage;
/// # let image = GrayImage::new(100, 100);
/// let enhanced = clahe(&image, 2.0, 8).unwrap();
/// ```
pub fn clahe(image: &GrayImage, clip_limit: f32, tile_size: u32) -> Result<GrayImage> {
if tile_size == 0 || clip_limit <= 0.0 {
return Err(PreprocessError::InvalidParameters(
"Invalid CLAHE parameters".to_string(),
));
}
let (width, height) = image.dimensions();
let mut result = GrayImage::new(width, height);
let tiles_x = (width + tile_size - 1) / tile_size;
let tiles_y = (height + tile_size - 1) / tile_size;
// Compute histograms and CDFs for each tile
let mut tile_cdfs = vec![vec![Vec::new(); tiles_x as usize]; tiles_y as usize];
for ty in 0..tiles_y {
for tx in 0..tiles_x {
let x_start = tx * tile_size;
let y_start = ty * tile_size;
let x_end = cmp::min(x_start + tile_size, width);
let y_end = cmp::min(y_start + tile_size, height);
let cdf = compute_tile_cdf(image, x_start, y_start, x_end, y_end, clip_limit);
tile_cdfs[ty as usize][tx as usize] = cdf;
}
}
// Interpolate and apply transformation
for y in 0..height {
for x in 0..width {
let pixel = image.get_pixel(x, y)[0];
// Find tile coordinates
let tx = (x as f32 / tile_size as f32).floor();
let ty = (y as f32 / tile_size as f32).floor();
// Calculate interpolation weights
let x_ratio = (x as f32 / tile_size as f32) - tx;
let y_ratio = (y as f32 / tile_size as f32) - ty;
let tx = tx as usize;
let ty = ty as usize;
// Bilinear interpolation between neighboring tiles
let value = if tx < tiles_x as usize - 1 && ty < tiles_y as usize - 1 {
let v00 = tile_cdfs[ty][tx][pixel as usize];
let v10 = tile_cdfs[ty][tx + 1][pixel as usize];
let v01 = tile_cdfs[ty + 1][tx][pixel as usize];
let v11 = tile_cdfs[ty + 1][tx + 1][pixel as usize];
let v0 = v00 * (1.0 - x_ratio) + v10 * x_ratio;
let v1 = v01 * (1.0 - x_ratio) + v11 * x_ratio;
v0 * (1.0 - y_ratio) + v1 * y_ratio
} else if tx < tiles_x as usize - 1 {
let v0 = tile_cdfs[ty][tx][pixel as usize];
let v1 = tile_cdfs[ty][tx + 1][pixel as usize];
v0 * (1.0 - x_ratio) + v1 * x_ratio
} else if ty < tiles_y as usize - 1 {
let v0 = tile_cdfs[ty][tx][pixel as usize];
let v1 = tile_cdfs[ty + 1][tx][pixel as usize];
v0 * (1.0 - y_ratio) + v1 * y_ratio
} else {
tile_cdfs[ty][tx][pixel as usize]
};
result.put_pixel(x, y, Luma([(value * 255.0) as u8]));
}
}
Ok(result)
}
/// Compute clipped histogram and CDF for a tile
fn compute_tile_cdf(
image: &GrayImage,
x_start: u32,
y_start: u32,
x_end: u32,
y_end: u32,
clip_limit: f32,
) -> Vec<f32> {
// Calculate histogram
let mut histogram = [0u32; 256];
let mut pixel_count = 0;
for y in y_start..y_end {
for x in x_start..x_end {
let pixel = image.get_pixel(x, y)[0];
histogram[pixel as usize] += 1;
pixel_count += 1;
}
}
if pixel_count == 0 {
return vec![0.0; 256];
}
// Apply contrast limiting
let clip_limit_actual = (clip_limit * pixel_count as f32 / 256.0) as u32;
let mut clipped_total = 0u32;
for h in histogram.iter_mut() {
if *h > clip_limit_actual {
clipped_total += *h - clip_limit_actual;
*h = clip_limit_actual;
}
}
// Redistribute clipped pixels
let redistribute = clipped_total / 256;
let remainder = clipped_total % 256;
for (i, h) in histogram.iter_mut().enumerate() {
*h += redistribute;
if i < remainder as usize {
*h += 1;
}
}
// Compute cumulative distribution function (CDF)
let mut cdf = vec![0.0; 256];
let mut cumsum = 0u32;
for (i, &h) in histogram.iter().enumerate() {
cumsum += h;
cdf[i] = cumsum as f32 / pixel_count as f32;
}
cdf
}
/// Normalize brightness across the image
///
/// Adjusts image to have mean brightness of 128
///
/// # Arguments
/// * `image` - Input grayscale image
///
/// # Returns
/// Brightness-normalized image
pub fn normalize_brightness(image: &GrayImage) -> GrayImage {
let (width, height) = image.dimensions();
let pixel_count = (width * height) as f32;
// Calculate mean brightness
let sum: u32 = image.pixels().map(|p| p[0] as u32).sum();
let mean = sum as f32 / pixel_count;
let target_mean = 128.0;
let adjustment = target_mean - mean;
// Apply adjustment
let mut result = GrayImage::new(width, height);
for (x, y, pixel) in image.enumerate_pixels() {
let adjusted = (pixel[0] as f32 + adjustment).clamp(0.0, 255.0) as u8;
result.put_pixel(x, y, Luma([adjusted]));
}
result
}
/// Remove shadows from document image
///
/// Uses morphological operations to estimate and subtract background
///
/// # Arguments
/// * `image` - Input grayscale image
///
/// # Returns
/// Image with reduced shadows
pub fn remove_shadows(image: &GrayImage) -> Result<GrayImage> {
let (width, height) = image.dimensions();
// Estimate background using dilation (morphological closing)
let kernel_size = (width.min(height) / 20).max(15) as usize;
let background = estimate_background(image, kernel_size);
// Subtract background
let mut result = GrayImage::new(width, height);
for (x, y, pixel) in image.enumerate_pixels() {
let bg = background.get_pixel(x, y)[0] as i32;
let fg = pixel[0] as i32;
// Normalize: (foreground / background) * 255
let normalized = if bg > 0 {
((fg as f32 / bg as f32) * 255.0).min(255.0) as u8
} else {
fg as u8
};
result.put_pixel(x, y, Luma([normalized]));
}
Ok(result)
}
/// Estimate background using max filter (dilation)
fn estimate_background(image: &GrayImage, kernel_size: usize) -> GrayImage {
let (width, height) = image.dimensions();
let mut background = GrayImage::new(width, height);
let half_kernel = (kernel_size / 2) as i32;
for y in 0..height {
for x in 0..width {
let mut max_val = 0u8;
// Find maximum in kernel window
for ky in -(half_kernel)..=half_kernel {
for kx in -(half_kernel)..=half_kernel {
let px = (x as i32 + kx).clamp(0, width as i32 - 1) as u32;
let py = (y as i32 + ky).clamp(0, height as i32 - 1) as u32;
let val = image.get_pixel(px, py)[0];
if val > max_val {
max_val = val;
}
}
}
background.put_pixel(x, y, Luma([max_val]));
}
}
background
}
/// Enhance contrast using simple linear stretch
///
/// Maps min-max range to 0-255
pub fn contrast_stretch(image: &GrayImage) -> GrayImage {
// Find min and max values
let mut min_val = 255u8;
let mut max_val = 0u8;
for pixel in image.pixels() {
let val = pixel[0];
if val < min_val {
min_val = val;
}
if val > max_val {
max_val = val;
}
}
if min_val == max_val {
return image.clone();
}
// Stretch contrast
let (width, height) = image.dimensions();
let mut result = GrayImage::new(width, height);
let range = (max_val - min_val) as f32;
for (x, y, pixel) in image.enumerate_pixels() {
let val = pixel[0];
let stretched = ((val - min_val) as f32 / range * 255.0) as u8;
result.put_pixel(x, y, Luma([stretched]));
}
result
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_image() -> GrayImage {
let mut img = GrayImage::new(100, 100);
for y in 0..100 {
for x in 0..100 {
let val = ((x + y) / 2) as u8;
img.put_pixel(x, y, Luma([val]));
}
}
img
}
#[test]
fn test_clahe() {
let img = create_test_image();
let enhanced = clahe(&img, 2.0, 8);
assert!(enhanced.is_ok());
let result = enhanced.unwrap();
assert_eq!(result.dimensions(), img.dimensions());
}
#[test]
fn test_clahe_invalid_params() {
let img = create_test_image();
// Invalid tile size
let result = clahe(&img, 2.0, 0);
assert!(result.is_err());
// Invalid clip limit
let result = clahe(&img, -1.0, 8);
assert!(result.is_err());
}
#[test]
fn test_normalize_brightness() {
let img = create_test_image();
let normalized = normalize_brightness(&img);
assert_eq!(normalized.dimensions(), img.dimensions());
// Check that mean is closer to 128
let sum: u32 = normalized.pixels().map(|p| p[0] as u32).sum();
let mean = sum as f32 / (100.0 * 100.0);
assert!((mean - 128.0).abs() < 5.0);
}
#[test]
fn test_remove_shadows() {
let img = create_test_image();
let result = remove_shadows(&img);
assert!(result.is_ok());
let shadow_removed = result.unwrap();
assert_eq!(shadow_removed.dimensions(), img.dimensions());
}
#[test]
fn test_contrast_stretch() {
// Create low contrast image
let mut img = GrayImage::new(100, 100);
for y in 0..100 {
for x in 0..100 {
let val = 100 + ((x + y) / 10) as u8; // Range: 100-119
img.put_pixel(x, y, Luma([val]));
}
}
let stretched = contrast_stretch(&img);
// Check that range is now 0-255
let mut min_val = 255u8;
let mut max_val = 0u8;
for pixel in stretched.pixels() {
let val = pixel[0];
if val < min_val {
min_val = val;
}
if val > max_val {
max_val = val;
}
}
assert_eq!(min_val, 0);
assert_eq!(max_val, 255);
}
#[test]
fn test_contrast_stretch_uniform() {
// Uniform image should remain unchanged
let mut img = GrayImage::new(50, 50);
for pixel in img.pixels_mut() {
*pixel = Luma([128]);
}
let stretched = contrast_stretch(&img);
for pixel in stretched.pixels() {
assert_eq!(pixel[0], 128);
}
}
#[test]
fn test_estimate_background() {
let img = create_test_image();
let background = estimate_background(&img, 5);
assert_eq!(background.dimensions(), img.dimensions());
// Background should have higher values (max filter)
for (orig, bg) in img.pixels().zip(background.pixels()) {
assert!(bg[0] >= orig[0]);
}
}
#[test]
fn test_clahe_various_tile_sizes() {
let img = create_test_image();
for tile_size in [4, 8, 16, 32] {
let result = clahe(&img, 2.0, tile_size);
assert!(result.is_ok());
}
}
}

View File

@@ -0,0 +1,277 @@
//! Image preprocessing module for OCR pipeline
//!
//! This module provides comprehensive image preprocessing capabilities including:
//! - Image transformations (grayscale, blur, sharpen, threshold)
//! - Rotation detection and correction
//! - Skew correction (deskewing)
//! - Image enhancement (CLAHE, normalization)
//! - Text region segmentation
//! - Complete preprocessing pipeline with parallel processing
pub mod deskew;
pub mod enhancement;
pub mod pipeline;
pub mod rotation;
pub mod segmentation;
pub mod transforms;
use image::{DynamicImage, GrayImage};
use serde::{Deserialize, Serialize};
use thiserror::Error;
/// Preprocessing error types
#[derive(Error, Debug)]
pub enum PreprocessError {
#[error("Image loading error: {0}")]
ImageLoad(String),
#[error("Invalid parameters: {0}")]
InvalidParameters(String),
#[error("Processing error: {0}")]
Processing(String),
#[error("Segmentation error: {0}")]
Segmentation(String),
}
/// Result type for preprocessing operations
pub type Result<T> = std::result::Result<T, PreprocessError>;
/// Preprocessing options for configuring the pipeline
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PreprocessOptions {
/// Enable rotation detection and correction
pub auto_rotate: bool,
/// Enable skew detection and correction
pub auto_deskew: bool,
/// Enable contrast enhancement
pub enhance_contrast: bool,
/// Enable denoising
pub denoise: bool,
/// Binarization threshold (None for auto Otsu)
pub threshold: Option<u8>,
/// Enable adaptive thresholding
pub adaptive_threshold: bool,
/// Adaptive threshold window size
pub adaptive_window_size: u32,
/// Target image width (None to keep original)
pub target_width: Option<u32>,
/// Target image height (None to keep original)
pub target_height: Option<u32>,
/// Enable text region detection
pub detect_regions: bool,
/// Gaussian blur sigma for denoising
pub blur_sigma: f32,
/// CLAHE clip limit for contrast enhancement
pub clahe_clip_limit: f32,
/// CLAHE tile size
pub clahe_tile_size: u32,
}
impl Default for PreprocessOptions {
fn default() -> Self {
Self {
auto_rotate: true,
auto_deskew: true,
enhance_contrast: true,
denoise: true,
threshold: None,
adaptive_threshold: true,
adaptive_window_size: 15,
target_width: None,
target_height: None,
detect_regions: true,
blur_sigma: 1.0,
clahe_clip_limit: 2.0,
clahe_tile_size: 8,
}
}
}
/// Type of text region
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum RegionType {
/// Regular text
Text,
/// Mathematical equation
Math,
/// Table
Table,
/// Figure/Image
Figure,
/// Unknown/Other
Unknown,
}
/// Detected text region with bounding box
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TextRegion {
/// Region type
pub region_type: RegionType,
/// Bounding box (x, y, width, height)
pub bbox: (u32, u32, u32, u32),
/// Confidence score (0.0 to 1.0)
pub confidence: f32,
/// Average text height in pixels
pub text_height: f32,
/// Detected baseline angle in degrees
pub baseline_angle: f32,
}
/// Main preprocessing function with configurable options
///
/// # Arguments
/// * `image` - Input image to preprocess
/// * `options` - Preprocessing configuration options
///
/// # Returns
/// Preprocessed grayscale image ready for OCR
///
/// # Example
/// ```no_run
/// use image::open;
/// use ruvector_scipix::preprocess::{preprocess, PreprocessOptions};
///
/// let img = open("document.jpg").unwrap();
/// let options = PreprocessOptions::default();
/// let processed = preprocess(&img, &options).unwrap();
/// ```
pub fn preprocess(image: &DynamicImage, options: &PreprocessOptions) -> Result<GrayImage> {
pipeline::PreprocessPipeline::builder()
.auto_rotate(options.auto_rotate)
.auto_deskew(options.auto_deskew)
.enhance_contrast(options.enhance_contrast)
.denoise(options.denoise)
.blur_sigma(options.blur_sigma)
.clahe_clip_limit(options.clahe_clip_limit)
.clahe_tile_size(options.clahe_tile_size)
.threshold(options.threshold)
.adaptive_threshold(options.adaptive_threshold)
.adaptive_window_size(options.adaptive_window_size)
.target_size(options.target_width, options.target_height)
.build()
.process(image)
}
/// Detect text regions in an image
///
/// # Arguments
/// * `image` - Input grayscale image
/// * `min_region_size` - Minimum region size in pixels
///
/// # Returns
/// Vector of detected text regions with metadata
///
/// # Example
/// ```no_run
/// use image::open;
/// use ruvector_scipix::preprocess::detect_text_regions;
///
/// let img = open("document.jpg").unwrap().to_luma8();
/// let regions = detect_text_regions(&img, 100).unwrap();
/// println!("Found {} text regions", regions.len());
/// ```
pub fn detect_text_regions(image: &GrayImage, min_region_size: u32) -> Result<Vec<TextRegion>> {
segmentation::find_text_regions(image, min_region_size)
}
#[cfg(test)]
mod tests {
use super::*;
use image::{Rgb, RgbImage};
fn create_test_image(width: u32, height: u32) -> DynamicImage {
let mut img = RgbImage::new(width, height);
// Create a simple test pattern
for y in 0..height {
for x in 0..width {
let val = ((x + y) % 256) as u8;
img.put_pixel(x, y, Rgb([val, val, val]));
}
}
DynamicImage::ImageRgb8(img)
}
#[test]
fn test_preprocess_default_options() {
let img = create_test_image(100, 100);
let options = PreprocessOptions::default();
let result = preprocess(&img, &options);
assert!(result.is_ok());
let processed = result.unwrap();
assert_eq!(processed.width(), 100);
assert_eq!(processed.height(), 100);
}
#[test]
fn test_preprocess_with_resize() {
let img = create_test_image(200, 200);
let mut options = PreprocessOptions::default();
options.target_width = Some(100);
options.target_height = Some(100);
let result = preprocess(&img, &options);
assert!(result.is_ok());
let processed = result.unwrap();
assert_eq!(processed.width(), 100);
assert_eq!(processed.height(), 100);
}
#[test]
fn test_preprocess_options_builder() {
let options = PreprocessOptions {
auto_rotate: false,
auto_deskew: false,
enhance_contrast: true,
denoise: true,
threshold: Some(128),
adaptive_threshold: false,
..Default::default()
};
assert!(!options.auto_rotate);
assert!(!options.auto_deskew);
assert!(options.enhance_contrast);
assert_eq!(options.threshold, Some(128));
}
#[test]
fn test_region_type_serialization() {
let region = TextRegion {
region_type: RegionType::Math,
bbox: (10, 20, 100, 50),
confidence: 0.95,
text_height: 12.0,
baseline_angle: 0.5,
};
let json = serde_json::to_string(&region).unwrap();
let deserialized: TextRegion = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.region_type, RegionType::Math);
assert_eq!(deserialized.bbox, (10, 20, 100, 50));
assert!((deserialized.confidence - 0.95).abs() < 0.001);
}
}

View File

@@ -0,0 +1,456 @@
//! Complete preprocessing pipeline with builder pattern and parallel processing
use super::Result;
use crate::preprocess::{deskew, enhancement, rotation, transforms};
use image::{DynamicImage, GrayImage};
use rayon::prelude::*;
use std::sync::Arc;
/// Progress callback type
pub type ProgressCallback = Arc<dyn Fn(&str, f32) + Send + Sync>;
/// Complete preprocessing pipeline with configurable steps
pub struct PreprocessPipeline {
auto_rotate: bool,
auto_deskew: bool,
enhance_contrast: bool,
denoise: bool,
blur_sigma: f32,
clahe_clip_limit: f32,
clahe_tile_size: u32,
threshold: Option<u8>,
adaptive_threshold: bool,
adaptive_window_size: u32,
target_width: Option<u32>,
target_height: Option<u32>,
progress_callback: Option<ProgressCallback>,
}
/// Builder for preprocessing pipeline
pub struct PreprocessPipelineBuilder {
auto_rotate: bool,
auto_deskew: bool,
enhance_contrast: bool,
denoise: bool,
blur_sigma: f32,
clahe_clip_limit: f32,
clahe_tile_size: u32,
threshold: Option<u8>,
adaptive_threshold: bool,
adaptive_window_size: u32,
target_width: Option<u32>,
target_height: Option<u32>,
progress_callback: Option<ProgressCallback>,
}
impl Default for PreprocessPipelineBuilder {
fn default() -> Self {
Self {
auto_rotate: true,
auto_deskew: true,
enhance_contrast: true,
denoise: true,
blur_sigma: 1.0,
clahe_clip_limit: 2.0,
clahe_tile_size: 8,
threshold: None,
adaptive_threshold: true,
adaptive_window_size: 15,
target_width: None,
target_height: None,
progress_callback: None,
}
}
}
impl PreprocessPipelineBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn auto_rotate(mut self, enable: bool) -> Self {
self.auto_rotate = enable;
self
}
pub fn auto_deskew(mut self, enable: bool) -> Self {
self.auto_deskew = enable;
self
}
pub fn enhance_contrast(mut self, enable: bool) -> Self {
self.enhance_contrast = enable;
self
}
pub fn denoise(mut self, enable: bool) -> Self {
self.denoise = enable;
self
}
pub fn blur_sigma(mut self, sigma: f32) -> Self {
self.blur_sigma = sigma;
self
}
pub fn clahe_clip_limit(mut self, limit: f32) -> Self {
self.clahe_clip_limit = limit;
self
}
pub fn clahe_tile_size(mut self, size: u32) -> Self {
self.clahe_tile_size = size;
self
}
pub fn threshold(mut self, threshold: Option<u8>) -> Self {
self.threshold = threshold;
self
}
pub fn adaptive_threshold(mut self, enable: bool) -> Self {
self.adaptive_threshold = enable;
self
}
pub fn adaptive_window_size(mut self, size: u32) -> Self {
self.adaptive_window_size = size;
self
}
pub fn target_size(mut self, width: Option<u32>, height: Option<u32>) -> Self {
self.target_width = width;
self.target_height = height;
self
}
pub fn progress_callback<F>(mut self, callback: F) -> Self
where
F: Fn(&str, f32) + Send + Sync + 'static,
{
self.progress_callback = Some(Arc::new(callback));
self
}
pub fn build(self) -> PreprocessPipeline {
PreprocessPipeline {
auto_rotate: self.auto_rotate,
auto_deskew: self.auto_deskew,
enhance_contrast: self.enhance_contrast,
denoise: self.denoise,
blur_sigma: self.blur_sigma,
clahe_clip_limit: self.clahe_clip_limit,
clahe_tile_size: self.clahe_tile_size,
threshold: self.threshold,
adaptive_threshold: self.adaptive_threshold,
adaptive_window_size: self.adaptive_window_size,
target_width: self.target_width,
target_height: self.target_height,
progress_callback: self.progress_callback,
}
}
}
impl PreprocessPipeline {
/// Create a new pipeline builder
pub fn builder() -> PreprocessPipelineBuilder {
PreprocessPipelineBuilder::new()
}
/// Report progress if callback is set
fn report_progress(&self, step: &str, progress: f32) {
if let Some(callback) = &self.progress_callback {
callback(step, progress);
}
}
/// Process a single image through the complete pipeline
///
/// # Pipeline steps:
/// 1. Convert to grayscale
/// 2. Detect and correct rotation (if enabled)
/// 3. Detect and correct skew (if enabled)
/// 4. Enhance contrast with CLAHE (if enabled)
/// 5. Denoise with Gaussian blur (if enabled)
/// 6. Apply thresholding (binary or adaptive)
/// 7. Resize to target dimensions (if specified)
pub fn process(&self, image: &DynamicImage) -> Result<GrayImage> {
self.report_progress("Starting preprocessing", 0.0);
// Step 1: Convert to grayscale
self.report_progress("Converting to grayscale", 0.1);
let mut gray = transforms::to_grayscale(image);
// Step 2: Auto-rotate
if self.auto_rotate {
self.report_progress("Detecting rotation", 0.2);
let angle = rotation::detect_rotation(&gray)?;
if angle.abs() > 0.5 {
self.report_progress("Correcting rotation", 0.25);
gray = rotation::rotate_image(&gray, -angle)?;
}
}
// Step 3: Auto-deskew
if self.auto_deskew {
self.report_progress("Detecting skew", 0.3);
let angle = deskew::detect_skew_angle(&gray)?;
if angle.abs() > 0.5 {
self.report_progress("Correcting skew", 0.35);
gray = deskew::deskew_image(&gray, angle)?;
}
}
// Step 4: Enhance contrast
if self.enhance_contrast {
self.report_progress("Enhancing contrast", 0.5);
gray = enhancement::clahe(&gray, self.clahe_clip_limit, self.clahe_tile_size)?;
}
// Step 5: Denoise
if self.denoise {
self.report_progress("Denoising", 0.6);
gray = transforms::gaussian_blur(&gray, self.blur_sigma)?;
}
// Step 6: Thresholding
self.report_progress("Applying threshold", 0.7);
gray = if self.adaptive_threshold {
transforms::adaptive_threshold(&gray, self.adaptive_window_size)?
} else if let Some(threshold_val) = self.threshold {
transforms::threshold(&gray, threshold_val)
} else {
// Auto Otsu threshold
let threshold_val = transforms::otsu_threshold(&gray)?;
transforms::threshold(&gray, threshold_val)
};
// Step 7: Resize
if let (Some(width), Some(height)) = (self.target_width, self.target_height) {
self.report_progress("Resizing", 0.9);
gray = image::imageops::resize(
&gray,
width,
height,
image::imageops::FilterType::Lanczos3,
);
}
self.report_progress("Preprocessing complete", 1.0);
Ok(gray)
}
/// Process multiple images in parallel
///
/// # Arguments
/// * `images` - Vector of images to process
///
/// # Returns
/// Vector of preprocessed images in the same order
pub fn process_batch(&self, images: Vec<DynamicImage>) -> Result<Vec<GrayImage>> {
images
.into_par_iter()
.map(|img| self.process(&img))
.collect()
}
/// Process image and return intermediate results from each step
///
/// Useful for debugging and visualization
pub fn process_with_intermediates(
&self,
image: &DynamicImage,
) -> Result<Vec<(String, GrayImage)>> {
let mut results = Vec::new();
// Step 1: Grayscale
let mut gray = transforms::to_grayscale(image);
results.push(("01_grayscale".to_string(), gray.clone()));
// Step 2: Rotation
if self.auto_rotate {
let angle = rotation::detect_rotation(&gray)?;
if angle.abs() > 0.5 {
gray = rotation::rotate_image(&gray, -angle)?;
results.push(("02_rotated".to_string(), gray.clone()));
}
}
// Step 3: Deskew
if self.auto_deskew {
let angle = deskew::detect_skew_angle(&gray)?;
if angle.abs() > 0.5 {
gray = deskew::deskew_image(&gray, angle)?;
results.push(("03_deskewed".to_string(), gray.clone()));
}
}
// Step 4: Enhancement
if self.enhance_contrast {
gray = enhancement::clahe(&gray, self.clahe_clip_limit, self.clahe_tile_size)?;
results.push(("04_enhanced".to_string(), gray.clone()));
}
// Step 5: Denoise
if self.denoise {
gray = transforms::gaussian_blur(&gray, self.blur_sigma)?;
results.push(("05_denoised".to_string(), gray.clone()));
}
// Step 6: Threshold
gray = if self.adaptive_threshold {
transforms::adaptive_threshold(&gray, self.adaptive_window_size)?
} else if let Some(threshold_val) = self.threshold {
transforms::threshold(&gray, threshold_val)
} else {
let threshold_val = transforms::otsu_threshold(&gray)?;
transforms::threshold(&gray, threshold_val)
};
results.push(("06_thresholded".to_string(), gray.clone()));
// Step 7: Resize
if let (Some(width), Some(height)) = (self.target_width, self.target_height) {
gray = image::imageops::resize(
&gray,
width,
height,
image::imageops::FilterType::Lanczos3,
);
results.push(("07_resized".to_string(), gray.clone()));
}
Ok(results)
}
}
#[cfg(test)]
mod tests {
use super::*;
use image::{Rgb, RgbImage};
fn create_test_image() -> DynamicImage {
let mut img = RgbImage::new(100, 100);
for y in 0..100 {
for x in 0..100 {
let val = ((x + y) / 2) as u8;
img.put_pixel(x, y, Rgb([val, val, val]));
}
}
DynamicImage::ImageRgb8(img)
}
#[test]
fn test_pipeline_builder() {
let pipeline = PreprocessPipeline::builder()
.auto_rotate(false)
.denoise(true)
.blur_sigma(1.5)
.build();
assert!(!pipeline.auto_rotate);
assert!(pipeline.denoise);
assert!((pipeline.blur_sigma - 1.5).abs() < 0.001);
}
#[test]
fn test_pipeline_process() {
let img = create_test_image();
let pipeline = PreprocessPipeline::builder()
.auto_rotate(false)
.auto_deskew(false)
.build();
let result = pipeline.process(&img);
assert!(result.is_ok());
let processed = result.unwrap();
assert_eq!(processed.width(), 100);
assert_eq!(processed.height(), 100);
}
#[test]
fn test_pipeline_with_resize() {
let img = create_test_image();
let pipeline = PreprocessPipeline::builder()
.target_size(Some(50), Some(50))
.auto_rotate(false)
.auto_deskew(false)
.build();
let result = pipeline.process(&img);
assert!(result.is_ok());
let processed = result.unwrap();
assert_eq!(processed.width(), 50);
assert_eq!(processed.height(), 50);
}
#[test]
fn test_pipeline_batch_processing() {
let images = vec![
create_test_image(),
create_test_image(),
create_test_image(),
];
let pipeline = PreprocessPipeline::builder()
.auto_rotate(false)
.auto_deskew(false)
.build();
let results = pipeline.process_batch(images);
assert!(results.is_ok());
let processed = results.unwrap();
assert_eq!(processed.len(), 3);
}
#[test]
fn test_pipeline_intermediates() {
let img = create_test_image();
let pipeline = PreprocessPipeline::builder()
.auto_rotate(false)
.auto_deskew(false)
.enhance_contrast(true)
.denoise(true)
.build();
let result = pipeline.process_with_intermediates(&img);
assert!(result.is_ok());
let intermediates = result.unwrap();
assert!(!intermediates.is_empty());
assert!(intermediates
.iter()
.any(|(name, _)| name.contains("grayscale")));
assert!(intermediates
.iter()
.any(|(name, _)| name.contains("thresholded")));
}
#[test]
fn test_progress_callback() {
use std::sync::{Arc, Mutex};
let progress_steps = Arc::new(Mutex::new(Vec::new()));
let progress_clone = Arc::clone(&progress_steps);
let pipeline = PreprocessPipeline::builder()
.auto_rotate(false)
.auto_deskew(false)
.progress_callback(move |step, _progress| {
progress_clone.lock().unwrap().push(step.to_string());
})
.build();
let img = create_test_image();
let _ = pipeline.process(&img);
let steps = progress_steps.lock().unwrap();
assert!(!steps.is_empty());
assert!(steps.iter().any(|s| s.contains("Starting")));
assert!(steps.iter().any(|s| s.contains("complete")));
}
}

View File

@@ -0,0 +1,319 @@
//! Rotation detection and correction using projection profiles
use super::{PreprocessError, Result};
use image::{GrayImage, Luma};
use imageproc::geometric_transformations::{rotate_about_center, Interpolation};
use std::f32;
/// Detect rotation angle using projection profile analysis
///
/// Uses horizontal and vertical projection profiles to detect document rotation.
/// Returns angle in degrees (typically in range -45 to +45).
///
/// # Arguments
/// * `image` - Input grayscale image
///
/// # Returns
/// Rotation angle in degrees (positive = clockwise)
///
/// # Example
/// ```no_run
/// use ruvector_scipix::preprocess::rotation::detect_rotation;
/// # use image::GrayImage;
/// # let image = GrayImage::new(100, 100);
/// let angle = detect_rotation(&image).unwrap();
/// println!("Detected rotation: {:.2}°", angle);
/// ```
pub fn detect_rotation(image: &GrayImage) -> Result<f32> {
let (width, height) = image.dimensions();
if width < 10 || height < 10 {
return Err(PreprocessError::InvalidParameters(
"Image too small for rotation detection".to_string(),
));
}
// Calculate projection profiles for different angles
let angles = [-45.0, -30.0, -15.0, 0.0, 15.0, 30.0, 45.0];
let mut max_score = 0.0;
let mut best_angle = 0.0;
for &angle in &angles {
let score = calculate_projection_score(image, angle);
if score > max_score {
max_score = score;
best_angle = angle;
}
}
// Refine angle with finer search around best candidate
let fine_angles: Vec<f32> = (-5..=5).map(|i| best_angle + (i as f32) * 2.0).collect();
max_score = 0.0;
for angle in fine_angles {
let score = calculate_projection_score(image, angle);
if score > max_score {
max_score = score;
best_angle = angle;
}
}
Ok(best_angle)
}
/// Calculate projection profile score for a given rotation angle
///
/// Higher scores indicate better alignment with text baselines
fn calculate_projection_score(image: &GrayImage, angle: f32) -> f32 {
let (width, height) = image.dimensions();
// For 0 degrees, use direct projection
if angle.abs() < 0.1 {
return calculate_horizontal_projection_variance(image);
}
// For non-zero angles, calculate projection along rotated axis
let rad = angle.to_radians();
let cos_a = rad.cos();
let sin_a = rad.sin();
let mut projection = vec![0u32; height as usize];
for y in 0..height {
for x in 0..width {
let pixel = image.get_pixel(x, y)[0];
if pixel < 128 {
// Project pixel onto rotated horizontal axis
let proj_y = ((y as f32) * cos_a - (x as f32) * sin_a) as i32;
if proj_y >= 0 && proj_y < height as i32 {
projection[proj_y as usize] += 1;
}
}
}
}
// Calculate variance of projection (higher = better alignment)
calculate_variance(&projection)
}
/// Calculate horizontal projection variance
fn calculate_horizontal_projection_variance(image: &GrayImage) -> f32 {
let (width, height) = image.dimensions();
let mut projection = vec![0u32; height as usize];
for y in 0..height {
for x in 0..width {
let pixel = image.get_pixel(x, y)[0];
if pixel < 128 {
projection[y as usize] += 1;
}
}
}
calculate_variance(&projection)
}
/// Calculate variance of projection profile
fn calculate_variance(projection: &[u32]) -> f32 {
if projection.is_empty() {
return 0.0;
}
let mean = projection.iter().sum::<u32>() as f32 / projection.len() as f32;
let variance = projection
.iter()
.map(|&x| {
let diff = x as f32 - mean;
diff * diff
})
.sum::<f32>()
/ projection.len() as f32;
variance
}
/// Rotate image by specified angle
///
/// # Arguments
/// * `image` - Input grayscale image
/// * `angle` - Rotation angle in degrees (positive = clockwise)
///
/// # Returns
/// Rotated image with bilinear interpolation
///
/// # Example
/// ```no_run
/// use ruvector_scipix::preprocess::rotation::rotate_image;
/// # use image::GrayImage;
/// # let image = GrayImage::new(100, 100);
/// let rotated = rotate_image(&image, 15.0).unwrap();
/// ```
pub fn rotate_image(image: &GrayImage, angle: f32) -> Result<GrayImage> {
if angle.abs() < 0.01 {
// No rotation needed
return Ok(image.clone());
}
let radians = -angle.to_radians(); // Negate for correct direction
let rotated = rotate_about_center(
image,
radians,
Interpolation::Bilinear,
Luma([255]), // White background
);
Ok(rotated)
}
/// Detect rotation with confidence score
///
/// Returns tuple of (angle, confidence) where confidence is 0.0-1.0
pub fn detect_rotation_with_confidence(image: &GrayImage) -> Result<(f32, f32)> {
let angle = detect_rotation(image)?;
// Calculate confidence based on projection profile variance difference
let current_score = calculate_projection_score(image, angle);
let baseline_score = calculate_projection_score(image, 0.0);
// Confidence is relative improvement over baseline
let confidence = if baseline_score > 0.0 {
(current_score / baseline_score).min(1.0)
} else {
0.5 // Default moderate confidence
};
Ok((angle, confidence))
}
/// Auto-rotate image only if confidence is above threshold
///
/// # Arguments
/// * `image` - Input grayscale image
/// * `confidence_threshold` - Minimum confidence (0.0-1.0) to apply rotation
///
/// # Returns
/// Tuple of (rotated_image, angle_applied, confidence)
pub fn auto_rotate(image: &GrayImage, confidence_threshold: f32) -> Result<(GrayImage, f32, f32)> {
let (angle, confidence) = detect_rotation_with_confidence(image)?;
if confidence >= confidence_threshold && angle.abs() > 0.5 {
let rotated = rotate_image(image, -angle)?;
Ok((rotated, angle, confidence))
} else {
Ok((image.clone(), 0.0, confidence))
}
}
#[cfg(test)]
mod tests {
use super::*;
fn create_text_image() -> GrayImage {
let mut img = GrayImage::new(200, 100);
// Fill with white
for pixel in img.pixels_mut() {
*pixel = Luma([255]);
}
// Draw some horizontal lines (simulating text)
for y in [20, 25, 50, 55] {
for x in 10..190 {
img.put_pixel(x, y, Luma([0]));
}
}
img
}
#[test]
fn test_detect_rotation_straight() {
let img = create_text_image();
let angle = detect_rotation(&img);
assert!(angle.is_ok());
let a = angle.unwrap();
// Should detect near-zero rotation
assert!(a.abs() < 10.0);
}
#[test]
fn test_rotate_image() {
let img = create_text_image();
// Rotate by 15 degrees
let rotated = rotate_image(&img, 15.0);
assert!(rotated.is_ok());
let result = rotated.unwrap();
assert_eq!(result.dimensions(), img.dimensions());
}
#[test]
fn test_rotate_no_change() {
let img = create_text_image();
// Rotate by ~0 degrees
let rotated = rotate_image(&img, 0.001);
assert!(rotated.is_ok());
let result = rotated.unwrap();
assert_eq!(result.dimensions(), img.dimensions());
}
#[test]
fn test_rotation_confidence() {
let img = create_text_image();
let result = detect_rotation_with_confidence(&img);
assert!(result.is_ok());
let (angle, confidence) = result.unwrap();
assert!(confidence >= 0.0 && confidence <= 1.0);
println!(
"Detected angle: {:.2}°, confidence: {:.2}",
angle, confidence
);
}
#[test]
fn test_auto_rotate_with_threshold() {
let img = create_text_image();
// High threshold - should not rotate if confidence is low
let result = auto_rotate(&img, 0.95);
assert!(result.is_ok());
let (rotated, angle, confidence) = result.unwrap();
assert_eq!(rotated.dimensions(), img.dimensions());
println!(
"Auto-rotate: angle={:.2}°, confidence={:.2}",
angle, confidence
);
}
#[test]
fn test_projection_variance() {
let projection = vec![10, 50, 100, 50, 10];
let variance = calculate_variance(&projection);
assert!(variance > 0.0);
}
#[test]
fn test_rotation_small_image_error() {
let small_img = GrayImage::new(5, 5);
let result = detect_rotation(&small_img);
assert!(result.is_err());
}
#[test]
fn test_rotation_roundtrip() {
let img = create_text_image();
// Rotate and unrotate
let rotated = rotate_image(&img, 30.0).unwrap();
let unrotated = rotate_image(&rotated, -30.0).unwrap();
assert_eq!(unrotated.dimensions(), img.dimensions());
}
}

View File

@@ -0,0 +1,483 @@
//! Text region detection and segmentation
use super::{RegionType, Result, TextRegion};
use image::GrayImage;
use std::collections::{HashMap, HashSet};
/// Find text regions in a binary or grayscale image
///
/// Uses connected component analysis and geometric heuristics to identify
/// text regions and classify them by type (text, math, table, etc.)
///
/// # Arguments
/// * `image` - Input grayscale or binary image
/// * `min_region_size` - Minimum region area in pixels
///
/// # Returns
/// Vector of detected text regions with bounding boxes
///
/// # Example
/// ```no_run
/// use ruvector_scipix::preprocess::segmentation::find_text_regions;
/// # use image::GrayImage;
/// # let image = GrayImage::new(100, 100);
/// let regions = find_text_regions(&image, 100).unwrap();
/// println!("Found {} regions", regions.len());
/// ```
pub fn find_text_regions(image: &GrayImage, min_region_size: u32) -> Result<Vec<TextRegion>> {
// Find connected components
let components = connected_components(image);
// Extract bounding boxes for each component
let bboxes = extract_bounding_boxes(&components);
// Filter by size and merge overlapping regions
let filtered = filter_by_size(bboxes, min_region_size);
let merged = merge_overlapping_regions(filtered, 10);
// Find text lines and group components
let text_lines = find_text_lines(image, &merged);
// Classify regions and create TextRegion objects
let regions = classify_regions(image, text_lines);
Ok(regions)
}
/// Connected component labeling using flood-fill algorithm
///
/// Returns labeled image where each connected component has a unique ID
fn connected_components(image: &GrayImage) -> Vec<Vec<u32>> {
let (width, height) = image.dimensions();
let mut labels = vec![vec![0u32; width as usize]; height as usize];
let mut current_label = 1u32;
for y in 0..height {
for x in 0..width {
if labels[y as usize][x as usize] == 0 && image.get_pixel(x, y)[0] < 128 {
// Found unlabeled foreground pixel, start flood fill
flood_fill(image, &mut labels, x, y, current_label);
current_label += 1;
}
}
}
labels
}
/// Flood fill algorithm for connected component labeling
fn flood_fill(image: &GrayImage, labels: &mut [Vec<u32>], start_x: u32, start_y: u32, label: u32) {
let (width, height) = image.dimensions();
let mut stack = vec![(start_x, start_y)];
while let Some((x, y)) = stack.pop() {
if x >= width || y >= height {
continue;
}
if labels[y as usize][x as usize] != 0 || image.get_pixel(x, y)[0] >= 128 {
continue;
}
labels[y as usize][x as usize] = label;
// Add 4-connected neighbors
if x > 0 {
stack.push((x - 1, y));
}
if x < width - 1 {
stack.push((x + 1, y));
}
if y > 0 {
stack.push((x, y - 1));
}
if y < height - 1 {
stack.push((x, y + 1));
}
}
}
/// Extract bounding boxes for each labeled component
fn extract_bounding_boxes(labels: &[Vec<u32>]) -> HashMap<u32, (u32, u32, u32, u32)> {
let mut bboxes: HashMap<u32, (u32, u32, u32, u32)> = HashMap::new();
for (y, row) in labels.iter().enumerate() {
for (x, &label) in row.iter().enumerate() {
if label == 0 {
continue;
}
let bbox = bboxes
.entry(label)
.or_insert((x as u32, y as u32, x as u32, y as u32));
// Update bounding box
bbox.0 = bbox.0.min(x as u32); // min_x
bbox.1 = bbox.1.min(y as u32); // min_y
bbox.2 = bbox.2.max(x as u32); // max_x
bbox.3 = bbox.3.max(y as u32); // max_y
}
}
// Convert to (x, y, width, height) format
bboxes
.into_iter()
.map(|(label, (min_x, min_y, max_x, max_y))| {
let width = max_x - min_x + 1;
let height = max_y - min_y + 1;
(label, (min_x, min_y, width, height))
})
.collect()
}
/// Filter regions by minimum size
fn filter_by_size(
bboxes: HashMap<u32, (u32, u32, u32, u32)>,
min_size: u32,
) -> Vec<(u32, u32, u32, u32)> {
bboxes
.into_values()
.filter(|(_, _, w, h)| w * h >= min_size)
.collect()
}
/// Merge overlapping or nearby regions
///
/// # Arguments
/// * `regions` - Vector of bounding boxes (x, y, width, height)
/// * `merge_distance` - Maximum distance to merge regions
pub fn merge_overlapping_regions(
regions: Vec<(u32, u32, u32, u32)>,
merge_distance: u32,
) -> Vec<(u32, u32, u32, u32)> {
if regions.is_empty() {
return regions;
}
let mut merged = Vec::new();
let mut used = HashSet::new();
for i in 0..regions.len() {
if used.contains(&i) {
continue;
}
let mut current = regions[i];
let mut changed = true;
while changed {
changed = false;
for j in (i + 1)..regions.len() {
if used.contains(&j) {
continue;
}
if boxes_overlap_or_close(&current, &regions[j], merge_distance) {
current = merge_boxes(&current, &regions[j]);
used.insert(j);
changed = true;
}
}
}
merged.push(current);
used.insert(i);
}
merged
}
/// Check if two bounding boxes overlap or are close
fn boxes_overlap_or_close(
box1: &(u32, u32, u32, u32),
box2: &(u32, u32, u32, u32),
distance: u32,
) -> bool {
let (x1, y1, w1, h1) = *box1;
let (x2, y2, w2, h2) = *box2;
let x1_end = x1 + w1;
let y1_end = y1 + h1;
let x2_end = x2 + w2;
let y2_end = y2 + h2;
// Check for overlap or proximity
let x_overlap = (x1 <= x2_end + distance) && (x2 <= x1_end + distance);
let y_overlap = (y1 <= y2_end + distance) && (y2 <= y1_end + distance);
x_overlap && y_overlap
}
/// Merge two bounding boxes
fn merge_boxes(box1: &(u32, u32, u32, u32), box2: &(u32, u32, u32, u32)) -> (u32, u32, u32, u32) {
let (x1, y1, w1, h1) = *box1;
let (x2, y2, w2, h2) = *box2;
let min_x = x1.min(x2);
let min_y = y1.min(y2);
let max_x = (x1 + w1).max(x2 + w2);
let max_y = (y1 + h1).max(y2 + h2);
(min_x, min_y, max_x - min_x, max_y - min_y)
}
/// Find text lines using projection profiles
///
/// Groups regions into lines based on vertical alignment
pub fn find_text_lines(
_image: &GrayImage,
regions: &[(u32, u32, u32, u32)],
) -> Vec<Vec<(u32, u32, u32, u32)>> {
if regions.is_empty() {
return Vec::new();
}
// Sort regions by y-coordinate
let mut sorted_regions = regions.to_vec();
sorted_regions.sort_by_key(|r| r.1);
let mut lines = Vec::new();
let mut current_line = vec![sorted_regions[0]];
for region in sorted_regions.iter().skip(1) {
let (_, y, _, h) = region;
let (_, prev_y, _, prev_h) = current_line.last().unwrap();
// Check if region is on the same line (vertical overlap)
let line_height = (*prev_h).max(*h);
let distance = if y > prev_y { y - prev_y } else { prev_y - y };
if distance < line_height / 2 {
current_line.push(*region);
} else {
lines.push(current_line.clone());
current_line = vec![*region];
}
}
if !current_line.is_empty() {
lines.push(current_line);
}
lines
}
/// Classify regions by type (text, math, table, etc.)
fn classify_regions(
image: &GrayImage,
text_lines: Vec<Vec<(u32, u32, u32, u32)>>,
) -> Vec<TextRegion> {
let mut regions = Vec::new();
for line in text_lines {
for bbox in line {
let (x, y, width, height) = bbox;
// Calculate features for classification
let aspect_ratio = width as f32 / height as f32;
let density = calculate_density(image, bbox);
// Simple heuristic classification
let region_type = if aspect_ratio > 10.0 {
// Very wide region might be a table or figure caption
RegionType::Table
} else if aspect_ratio < 0.5 && height > 50 {
// Tall region might be a figure
RegionType::Figure
} else if density > 0.3 && height < 30 {
// Dense, small region likely math
RegionType::Math
} else {
// Default to text
RegionType::Text
};
regions.push(TextRegion {
region_type,
bbox: (x, y, width, height),
confidence: 0.8, // Default confidence
text_height: height as f32,
baseline_angle: 0.0,
});
}
}
regions
}
/// Calculate pixel density in a region
fn calculate_density(image: &GrayImage, bbox: (u32, u32, u32, u32)) -> f32 {
let (x, y, width, height) = bbox;
let total_pixels = (width * height) as f32;
if total_pixels == 0.0 {
return 0.0;
}
let mut foreground_pixels = 0;
for py in y..(y + height) {
for px in x..(x + width) {
if image.get_pixel(px, py)[0] < 128 {
foreground_pixels += 1;
}
}
}
foreground_pixels as f32 / total_pixels
}
#[cfg(test)]
mod tests {
use super::*;
use image::Luma;
fn create_test_image_with_rectangles() -> GrayImage {
let mut img = GrayImage::new(200, 200);
// Fill with white
for pixel in img.pixels_mut() {
*pixel = Luma([255]);
}
// Draw some black rectangles (simulating text regions)
for y in 20..40 {
for x in 20..100 {
img.put_pixel(x, y, Luma([0]));
}
}
for y in 60..80 {
for x in 20..120 {
img.put_pixel(x, y, Luma([0]));
}
}
for y in 100..120 {
for x in 20..80 {
img.put_pixel(x, y, Luma([0]));
}
}
img
}
#[test]
fn test_find_text_regions() {
let img = create_test_image_with_rectangles();
let regions = find_text_regions(&img, 100);
assert!(regions.is_ok());
let r = regions.unwrap();
// Should find at least 3 regions
assert!(r.len() >= 3);
for region in r {
println!("Region: {:?} at {:?}", region.region_type, region.bbox);
}
}
#[test]
fn test_connected_components() {
let img = create_test_image_with_rectangles();
let components = connected_components(&img);
// Check that we have non-zero labels
let max_label = components
.iter()
.flat_map(|row| row.iter())
.max()
.unwrap_or(&0);
assert!(*max_label > 0);
}
#[test]
fn test_merge_overlapping_regions() {
let regions = vec![(10, 10, 50, 20), (40, 10, 50, 20), (100, 100, 30, 30)];
let merged = merge_overlapping_regions(regions, 10);
// First two should merge, third stays separate
assert_eq!(merged.len(), 2);
}
#[test]
fn test_merge_boxes() {
let box1 = (10, 10, 50, 20);
let box2 = (40, 15, 30, 25);
let merged = merge_boxes(&box1, &box2);
assert_eq!(merged.0, 10); // min x
assert_eq!(merged.1, 10); // min y
assert!(merged.2 >= 50); // width
assert!(merged.3 >= 25); // height
}
#[test]
fn test_boxes_overlap() {
let box1 = (10, 10, 50, 20);
let box2 = (40, 10, 50, 20);
assert!(boxes_overlap_or_close(&box1, &box2, 0));
assert!(boxes_overlap_or_close(&box1, &box2, 10));
}
#[test]
fn test_boxes_dont_overlap() {
let box1 = (10, 10, 20, 20);
let box2 = (100, 100, 20, 20);
assert!(!boxes_overlap_or_close(&box1, &box2, 0));
}
#[test]
fn test_find_text_lines() {
let regions = vec![
(10, 10, 50, 20),
(70, 12, 50, 20),
(10, 50, 50, 20),
(70, 52, 50, 20),
];
let img = GrayImage::new(200, 100);
let lines = find_text_lines(&img, &regions);
// Should find 2 lines
assert_eq!(lines.len(), 2);
assert_eq!(lines[0].len(), 2);
assert_eq!(lines[1].len(), 2);
}
#[test]
fn test_calculate_density() {
let mut img = GrayImage::new(100, 100);
// Fill region with 50% black pixels
for y in 10..30 {
for x in 10..30 {
let val = if (x + y) % 2 == 0 { 0 } else { 255 };
img.put_pixel(x, y, Luma([val]));
}
}
let density = calculate_density(&img, (10, 10, 20, 20));
assert!((density - 0.5).abs() < 0.1);
}
#[test]
fn test_filter_by_size() {
let mut bboxes = HashMap::new();
bboxes.insert(1, (10, 10, 50, 50)); // 2500 pixels
bboxes.insert(2, (100, 100, 10, 10)); // 100 pixels
bboxes.insert(3, (200, 200, 30, 30)); // 900 pixels
let filtered = filter_by_size(bboxes, 500);
// Should keep regions 1 and 3
assert_eq!(filtered.len(), 2);
}
}

View File

@@ -0,0 +1,400 @@
//! Image transformation functions for preprocessing
use super::{PreprocessError, Result};
use image::{DynamicImage, GrayImage, Luma};
use imageproc::filter::gaussian_blur_f32;
use std::f32;
/// Convert image to grayscale
///
/// # Arguments
/// * `image` - Input color or grayscale image
///
/// # Returns
/// Grayscale image
pub fn to_grayscale(image: &DynamicImage) -> GrayImage {
image.to_luma8()
}
/// Apply Gaussian blur for noise reduction
///
/// # Arguments
/// * `image` - Input grayscale image
/// * `sigma` - Standard deviation of Gaussian kernel
///
/// # Returns
/// Blurred image
///
/// # Example
/// ```no_run
/// use ruvector_scipix::preprocess::transforms::gaussian_blur;
/// # use image::GrayImage;
/// # let image = GrayImage::new(100, 100);
/// let blurred = gaussian_blur(&image, 1.5).unwrap();
/// ```
pub fn gaussian_blur(image: &GrayImage, sigma: f32) -> Result<GrayImage> {
if sigma <= 0.0 {
return Err(PreprocessError::InvalidParameters(
"Sigma must be positive".to_string(),
));
}
Ok(gaussian_blur_f32(image, sigma))
}
/// Sharpen image using unsharp mask
///
/// # Arguments
/// * `image` - Input grayscale image
/// * `sigma` - Gaussian blur sigma
/// * `amount` - Sharpening strength (typically 0.5-2.0)
///
/// # Returns
/// Sharpened image
pub fn sharpen(image: &GrayImage, sigma: f32, amount: f32) -> Result<GrayImage> {
if sigma <= 0.0 || amount < 0.0 {
return Err(PreprocessError::InvalidParameters(
"Invalid sharpening parameters".to_string(),
));
}
let blurred = gaussian_blur_f32(image, sigma);
let (width, height) = image.dimensions();
let mut result = GrayImage::new(width, height);
for y in 0..height {
for x in 0..width {
let original = image.get_pixel(x, y)[0] as f32;
let blur = blurred.get_pixel(x, y)[0] as f32;
// Unsharp mask: original + amount * (original - blurred)
let sharpened = original + amount * (original - blur);
let clamped = sharpened.clamp(0.0, 255.0) as u8;
result.put_pixel(x, y, Luma([clamped]));
}
}
Ok(result)
}
/// Calculate optimal threshold using Otsu's method
///
/// Implements full Otsu's algorithm for automatic threshold selection
/// based on maximizing inter-class variance.
///
/// # Arguments
/// * `image` - Input grayscale image
///
/// # Returns
/// Optimal threshold value (0-255)
///
/// # Example
/// ```no_run
/// use ruvector_scipix::preprocess::transforms::otsu_threshold;
/// # use image::GrayImage;
/// # let image = GrayImage::new(100, 100);
/// let threshold = otsu_threshold(&image).unwrap();
/// println!("Optimal threshold: {}", threshold);
/// ```
pub fn otsu_threshold(image: &GrayImage) -> Result<u8> {
// Calculate histogram
let mut histogram = [0u32; 256];
for pixel in image.pixels() {
histogram[pixel[0] as usize] += 1;
}
let total_pixels = (image.width() * image.height()) as f64;
// Calculate cumulative sums
let mut sum_total = 0.0;
for (i, &count) in histogram.iter().enumerate() {
sum_total += (i as f64) * (count as f64);
}
let mut sum_background = 0.0;
let mut weight_background = 0.0;
let mut max_variance = 0.0;
let mut threshold = 0u8;
// Find threshold that maximizes inter-class variance
for (t, &count) in histogram.iter().enumerate() {
weight_background += count as f64;
if weight_background == 0.0 {
continue;
}
let weight_foreground = total_pixels - weight_background;
if weight_foreground == 0.0 {
break;
}
sum_background += (t as f64) * (count as f64);
let mean_background = sum_background / weight_background;
let mean_foreground = (sum_total - sum_background) / weight_foreground;
// Inter-class variance
let variance =
weight_background * weight_foreground * (mean_background - mean_foreground).powi(2);
if variance > max_variance {
max_variance = variance;
threshold = t as u8;
}
}
Ok(threshold)
}
/// Apply binary thresholding
///
/// # Arguments
/// * `image` - Input grayscale image
/// * `threshold` - Threshold value (0-255)
///
/// # Returns
/// Binary image (0 or 255)
pub fn threshold(image: &GrayImage, threshold_val: u8) -> GrayImage {
let (width, height) = image.dimensions();
let mut result = GrayImage::new(width, height);
for y in 0..height {
for x in 0..width {
let pixel = image.get_pixel(x, y)[0];
let value = if pixel >= threshold_val { 255 } else { 0 };
result.put_pixel(x, y, Luma([value]));
}
}
result
}
/// Apply adaptive thresholding using local window statistics
///
/// Uses a sliding window to calculate local mean and applies threshold
/// relative to local statistics. Better for images with varying illumination.
///
/// # Arguments
/// * `image` - Input grayscale image
/// * `window_size` - Size of local window (must be odd)
///
/// # Returns
/// Binary image with adaptive thresholding applied
///
/// # Example
/// ```no_run
/// use ruvector_scipix::preprocess::transforms::adaptive_threshold;
/// # use image::GrayImage;
/// # let image = GrayImage::new(100, 100);
/// let binary = adaptive_threshold(&image, 15).unwrap();
/// ```
pub fn adaptive_threshold(image: &GrayImage, window_size: u32) -> Result<GrayImage> {
if window_size % 2 == 0 {
return Err(PreprocessError::InvalidParameters(
"Window size must be odd".to_string(),
));
}
let (width, height) = image.dimensions();
let mut result = GrayImage::new(width, height);
let half_window = (window_size / 2) as i32;
// Use integral image for fast window sum calculation
let integral = compute_integral_image(image);
for y in 0..height as i32 {
for x in 0..width as i32 {
// Define window bounds
let x1 = (x - half_window).max(0);
let y1 = (y - half_window).max(0);
let x2 = (x + half_window + 1).min(width as i32);
let y2 = (y + half_window + 1).min(height as i32);
// Calculate mean using integral image
let area = ((x2 - x1) * (y2 - y1)) as f64;
let sum = get_integral_sum(&integral, x1, y1, x2, y2);
let mean = (sum as f64 / area) as u8;
// Apply threshold with small bias
let pixel = image.get_pixel(x as u32, y as u32)[0];
let bias = 5; // Small bias to reduce noise
let value = if pixel >= mean.saturating_sub(bias) {
255
} else {
0
};
result.put_pixel(x as u32, y as u32, Luma([value]));
}
}
Ok(result)
}
/// Compute integral image for fast rectangle sum queries
fn compute_integral_image(image: &GrayImage) -> Vec<Vec<u64>> {
let (width, height) = image.dimensions();
let mut integral = vec![vec![0u64; width as usize + 1]; height as usize + 1];
for y in 1..=height as usize {
for x in 1..=width as usize {
let pixel = image.get_pixel(x as u32 - 1, y as u32 - 1)[0] as u64;
integral[y][x] =
pixel + integral[y - 1][x] + integral[y][x - 1] - integral[y - 1][x - 1];
}
}
integral
}
/// Get sum of rectangle in integral image
fn get_integral_sum(integral: &[Vec<u64>], x1: i32, y1: i32, x2: i32, y2: i32) -> u64 {
let x1 = x1 as usize;
let y1 = y1 as usize;
let x2 = x2 as usize;
let y2 = y2 as usize;
integral[y2][x2] + integral[y1][x1] - integral[y1][x2] - integral[y2][x1]
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_relative_eq;
fn create_gradient_image(width: u32, height: u32) -> GrayImage {
let mut img = GrayImage::new(width, height);
for y in 0..height {
for x in 0..width {
let val = ((x + y) * 255 / (width + height)) as u8;
img.put_pixel(x, y, Luma([val]));
}
}
img
}
#[test]
fn test_to_grayscale() {
let img = DynamicImage::new_rgb8(100, 100);
let gray = to_grayscale(&img);
assert_eq!(gray.dimensions(), (100, 100));
}
#[test]
fn test_gaussian_blur() {
let img = create_gradient_image(50, 50);
let blurred = gaussian_blur(&img, 1.0);
assert!(blurred.is_ok());
let result = blurred.unwrap();
assert_eq!(result.dimensions(), img.dimensions());
}
#[test]
fn test_gaussian_blur_invalid_sigma() {
let img = create_gradient_image(50, 50);
let result = gaussian_blur(&img, -1.0);
assert!(result.is_err());
}
#[test]
fn test_sharpen() {
let img = create_gradient_image(50, 50);
let sharpened = sharpen(&img, 1.0, 1.5);
assert!(sharpened.is_ok());
let result = sharpened.unwrap();
assert_eq!(result.dimensions(), img.dimensions());
}
#[test]
fn test_otsu_threshold() {
// Create bimodal image (good for Otsu)
let mut img = GrayImage::new(100, 100);
for y in 0..100 {
for x in 0..100 {
let val = if x < 50 { 50 } else { 200 };
img.put_pixel(x, y, Luma([val]));
}
}
let threshold = otsu_threshold(&img);
assert!(threshold.is_ok());
let t = threshold.unwrap();
// Should be somewhere between the two values (not necessarily strictly between)
// Otsu finds optimal threshold which could be at boundary
assert!(
t >= 50 && t <= 200,
"threshold {} should be between 50 and 200",
t
);
}
#[test]
fn test_threshold() {
let img = create_gradient_image(100, 100);
let binary = threshold(&img, 128);
assert_eq!(binary.dimensions(), img.dimensions());
// Check that output is binary
for pixel in binary.pixels() {
let val = pixel[0];
assert!(val == 0 || val == 255);
}
}
#[test]
fn test_adaptive_threshold() {
let img = create_gradient_image(100, 100);
let binary = adaptive_threshold(&img, 15);
assert!(binary.is_ok());
let result = binary.unwrap();
assert_eq!(result.dimensions(), img.dimensions());
// Check binary output
for pixel in result.pixels() {
let val = pixel[0];
assert!(val == 0 || val == 255);
}
}
#[test]
fn test_adaptive_threshold_invalid_window() {
let img = create_gradient_image(50, 50);
let result = adaptive_threshold(&img, 16); // Even number
assert!(result.is_err());
}
#[test]
fn test_integral_image() {
let mut img = GrayImage::new(3, 3);
for y in 0..3 {
for x in 0..3 {
img.put_pixel(x, y, Luma([1]));
}
}
let integral = compute_integral_image(&img);
// Check 3x3 sum
let sum = get_integral_sum(&integral, 0, 0, 3, 3);
assert_eq!(sum, 9); // 3x3 image with all 1s
}
#[test]
fn test_threshold_extremes() {
let img = create_gradient_image(100, 100);
// Threshold at 0 should make everything white
let binary = threshold(&img, 0);
assert!(binary.pixels().all(|p| p[0] == 255));
// Threshold at 255 should make everything black
let binary = threshold(&img, 255);
assert!(binary.pixels().all(|p| p[0] == 0));
}
}