Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
353
vendor/ruvector/examples/scipix/src/preprocess/deskew.rs
vendored
Normal file
353
vendor/ruvector/examples/scipix/src/preprocess/deskew.rs
vendored
Normal file
@@ -0,0 +1,353 @@
|
||||
//! Skew detection and correction using Hough transform
|
||||
|
||||
use super::{PreprocessError, Result};
|
||||
use image::{GrayImage, Luma};
|
||||
use imageproc::edges::canny;
|
||||
use imageproc::geometric_transformations::{rotate_about_center, Interpolation};
|
||||
use std::collections::BTreeMap;
|
||||
use std::f32;
|
||||
|
||||
/// Detect skew angle using Hough transform
|
||||
///
|
||||
/// Applies edge detection and Hough transform to find dominant lines,
|
||||
/// then calculates average skew angle.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `image` - Input grayscale image
|
||||
///
|
||||
/// # Returns
|
||||
/// Skew angle in degrees (positive = clockwise)
|
||||
///
|
||||
/// # Example
|
||||
/// ```no_run
|
||||
/// use ruvector_scipix::preprocess::deskew::detect_skew_angle;
|
||||
/// # use image::GrayImage;
|
||||
/// # let image = GrayImage::new(100, 100);
|
||||
/// let angle = detect_skew_angle(&image).unwrap();
|
||||
/// println!("Detected skew: {:.2}°", angle);
|
||||
/// ```
|
||||
pub fn detect_skew_angle(image: &GrayImage) -> Result<f32> {
|
||||
let (width, height) = image.dimensions();
|
||||
|
||||
if width < 20 || height < 20 {
|
||||
return Err(PreprocessError::InvalidParameters(
|
||||
"Image too small for skew detection".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
// Apply Canny edge detection
|
||||
let edges = canny(image, 50.0, 100.0);
|
||||
|
||||
// Perform Hough transform to detect lines
|
||||
let angles = detect_lines_hough(&edges, width, height)?;
|
||||
|
||||
if angles.is_empty() {
|
||||
return Ok(0.0);
|
||||
}
|
||||
|
||||
// Calculate weighted average angle
|
||||
let total_weight: f32 = angles.values().sum();
|
||||
let weighted_sum: f32 = angles
|
||||
.iter()
|
||||
.map(|(angle_key, weight)| (*angle_key as f32 / 10.0) * weight)
|
||||
.sum();
|
||||
|
||||
let average_angle = if total_weight > 0.0 {
|
||||
weighted_sum / total_weight
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
Ok(average_angle)
|
||||
}
|
||||
|
||||
/// Detect lines using Hough transform
|
||||
///
|
||||
/// Returns map of angles to their confidence weights
|
||||
fn detect_lines_hough(edges: &GrayImage, width: u32, height: u32) -> Result<BTreeMap<i32, f32>> {
|
||||
let max_rho = ((width * width + height * height) as f32).sqrt() as usize;
|
||||
let num_angles = 360;
|
||||
|
||||
// Accumulator array for Hough space
|
||||
let mut accumulator = vec![vec![0u32; max_rho]; num_angles];
|
||||
|
||||
// Populate accumulator
|
||||
for y in 0..height {
|
||||
for x in 0..width {
|
||||
if edges.get_pixel(x, y)[0] > 128 {
|
||||
// Edge pixel found
|
||||
for theta_idx in 0..num_angles {
|
||||
let theta = (theta_idx as f32) * std::f32::consts::PI / 180.0;
|
||||
let rho = (x as f32) * theta.cos() + (y as f32) * theta.sin();
|
||||
let rho_idx = (rho + max_rho as f32 / 2.0) as usize;
|
||||
|
||||
if rho_idx < max_rho {
|
||||
accumulator[theta_idx][rho_idx] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find peaks in accumulator
|
||||
let mut angle_votes: BTreeMap<i32, f32> = BTreeMap::new();
|
||||
let threshold = (width.min(height) / 10) as u32; // Adaptive threshold
|
||||
|
||||
for theta_idx in 0..num_angles {
|
||||
for rho_idx in 0..max_rho {
|
||||
let votes = accumulator[theta_idx][rho_idx];
|
||||
if votes > threshold {
|
||||
let angle = (theta_idx as f32) - 180.0; // Convert to -180 to 180
|
||||
let normalized_angle = normalize_angle(angle);
|
||||
|
||||
// Only consider angles near horizontal (within ±45°)
|
||||
if normalized_angle.abs() < 45.0 {
|
||||
// Use integer keys for BTreeMap (angle * 10 to preserve precision)
|
||||
let key = (normalized_angle * 10.0) as i32;
|
||||
*angle_votes.entry(key).or_insert(0.0) += votes as f32;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(angle_votes)
|
||||
}
|
||||
|
||||
/// Normalize angle to -45 to +45 degree range
|
||||
fn normalize_angle(angle: f32) -> f32 {
|
||||
let mut normalized = angle % 180.0;
|
||||
if normalized > 90.0 {
|
||||
normalized -= 180.0;
|
||||
} else if normalized < -90.0 {
|
||||
normalized += 180.0;
|
||||
}
|
||||
|
||||
// Clamp to ±45°
|
||||
normalized.clamp(-45.0, 45.0)
|
||||
}
|
||||
|
||||
/// Deskew image using detected skew angle
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `image` - Input grayscale image
|
||||
/// * `angle` - Skew angle in degrees (from detect_skew_angle)
|
||||
///
|
||||
/// # Returns
|
||||
/// Deskewed image with white background fill
|
||||
///
|
||||
/// # Example
|
||||
/// ```no_run
|
||||
/// use ruvector_scipix::preprocess::deskew::{detect_skew_angle, deskew_image};
|
||||
/// # use image::GrayImage;
|
||||
/// # let image = GrayImage::new(100, 100);
|
||||
/// let angle = detect_skew_angle(&image).unwrap();
|
||||
/// let deskewed = deskew_image(&image, angle).unwrap();
|
||||
/// ```
|
||||
pub fn deskew_image(image: &GrayImage, angle: f32) -> Result<GrayImage> {
|
||||
if angle.abs() < 0.1 {
|
||||
// No deskewing needed
|
||||
return Ok(image.clone());
|
||||
}
|
||||
|
||||
let radians = -angle.to_radians(); // Negate for correct direction
|
||||
let deskewed = rotate_about_center(
|
||||
image,
|
||||
radians,
|
||||
Interpolation::Bilinear,
|
||||
Luma([255]), // White background
|
||||
);
|
||||
|
||||
Ok(deskewed)
|
||||
}
|
||||
|
||||
/// Auto-deskew image with confidence threshold
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `image` - Input grayscale image
|
||||
/// * `max_angle` - Maximum angle to correct (degrees)
|
||||
///
|
||||
/// # Returns
|
||||
/// Tuple of (deskewed_image, angle_applied)
|
||||
pub fn auto_deskew(image: &GrayImage, max_angle: f32) -> Result<(GrayImage, f32)> {
|
||||
let angle = detect_skew_angle(image)?;
|
||||
|
||||
if angle.abs() <= max_angle {
|
||||
let deskewed = deskew_image(image, angle)?;
|
||||
Ok((deskewed, angle))
|
||||
} else {
|
||||
// Angle too large, don't correct
|
||||
Ok((image.clone(), 0.0))
|
||||
}
|
||||
}
|
||||
|
||||
/// Detect skew using projection profile method (alternative approach)
|
||||
///
|
||||
/// This is a faster but less accurate method compared to Hough transform
|
||||
pub fn detect_skew_projection(image: &GrayImage) -> Result<f32> {
|
||||
let angles = [
|
||||
-45.0, -30.0, -15.0, -10.0, -5.0, 0.0, 5.0, 10.0, 15.0, 30.0, 45.0,
|
||||
];
|
||||
let mut max_variance = 0.0;
|
||||
let mut best_angle = 0.0;
|
||||
|
||||
for &angle in &angles {
|
||||
let variance = calculate_projection_variance(image, angle);
|
||||
if variance > max_variance {
|
||||
max_variance = variance;
|
||||
best_angle = angle;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(best_angle)
|
||||
}
|
||||
|
||||
/// Calculate projection variance for a given angle
|
||||
fn calculate_projection_variance(image: &GrayImage, angle: f32) -> f32 {
|
||||
let (width, height) = image.dimensions();
|
||||
let rad = angle.to_radians();
|
||||
let cos_a = rad.cos();
|
||||
let sin_a = rad.sin();
|
||||
|
||||
let mut projection = vec![0u32; height as usize];
|
||||
|
||||
for y in 0..height {
|
||||
for x in 0..width {
|
||||
let pixel = image.get_pixel(x, y)[0];
|
||||
if pixel < 128 {
|
||||
let proj_y = ((y as f32) * cos_a - (x as f32) * sin_a) as i32;
|
||||
if proj_y >= 0 && proj_y < height as i32 {
|
||||
projection[proj_y as usize] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate variance
|
||||
if projection.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let mean = projection.iter().sum::<u32>() as f32 / projection.len() as f32;
|
||||
projection
|
||||
.iter()
|
||||
.map(|&x| {
|
||||
let diff = x as f32 - mean;
|
||||
diff * diff
|
||||
})
|
||||
.sum::<f32>()
|
||||
/ projection.len() as f32
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn create_test_image() -> GrayImage {
|
||||
let mut img = GrayImage::new(200, 100);
|
||||
|
||||
// Fill with white
|
||||
for pixel in img.pixels_mut() {
|
||||
*pixel = Luma([255]);
|
||||
}
|
||||
|
||||
// Draw some horizontal lines (simulating text)
|
||||
for y in [20, 40, 60, 80] {
|
||||
for x in 10..190 {
|
||||
img.put_pixel(x, y, Luma([0]));
|
||||
}
|
||||
}
|
||||
|
||||
img
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_skew_straight() {
|
||||
let img = create_test_image();
|
||||
let angle = detect_skew_angle(&img);
|
||||
|
||||
assert!(angle.is_ok());
|
||||
let a = angle.unwrap();
|
||||
// Should detect near-zero skew for straight lines
|
||||
assert!(a.abs() < 10.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deskew_image() {
|
||||
let img = create_test_image();
|
||||
|
||||
// Deskew by 5 degrees
|
||||
let deskewed = deskew_image(&img, 5.0);
|
||||
assert!(deskewed.is_ok());
|
||||
|
||||
let result = deskewed.unwrap();
|
||||
assert_eq!(result.dimensions(), img.dimensions());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deskew_no_change() {
|
||||
let img = create_test_image();
|
||||
|
||||
// Deskew by ~0 degrees
|
||||
let deskewed = deskew_image(&img, 0.05);
|
||||
assert!(deskewed.is_ok());
|
||||
|
||||
let result = deskewed.unwrap();
|
||||
assert_eq!(result.dimensions(), img.dimensions());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_deskew() {
|
||||
let img = create_test_image();
|
||||
let result = auto_deskew(&img, 15.0);
|
||||
|
||||
assert!(result.is_ok());
|
||||
let (deskewed, angle) = result.unwrap();
|
||||
|
||||
assert_eq!(deskewed.dimensions(), img.dimensions());
|
||||
assert!(angle.abs() <= 15.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_angle() {
|
||||
assert!((normalize_angle(0.0) - 0.0).abs() < 0.01);
|
||||
|
||||
// Test normalization behavior
|
||||
let angle_100 = normalize_angle(100.0);
|
||||
assert!(angle_100.abs() <= 45.0); // Should be clamped to ±45°
|
||||
|
||||
let angle_neg100 = normalize_angle(-100.0);
|
||||
assert!(angle_neg100.abs() <= 45.0); // Should be clamped to ±45°
|
||||
|
||||
assert!((normalize_angle(50.0) - 45.0).abs() < 0.01); // Clamped to 45
|
||||
assert!((normalize_angle(-50.0) - -45.0).abs() < 0.01); // Clamped to -45
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_skew_projection() {
|
||||
let img = create_test_image();
|
||||
let angle = detect_skew_projection(&img);
|
||||
|
||||
assert!(angle.is_ok());
|
||||
let a = angle.unwrap();
|
||||
assert!(a.abs() < 20.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_skew_small_image_error() {
|
||||
let small_img = GrayImage::new(10, 10);
|
||||
let result = detect_skew_angle(&small_img);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_projection_variance() {
|
||||
let img = create_test_image();
|
||||
|
||||
let var_0 = calculate_projection_variance(&img, 0.0);
|
||||
let var_30 = calculate_projection_variance(&img, 30.0);
|
||||
|
||||
// Variance at 0° should be higher for horizontal lines
|
||||
assert!(var_0 > 0.0);
|
||||
println!("Variance at 0°: {}, at 30°: {}", var_0, var_30);
|
||||
}
|
||||
}
|
||||
420
vendor/ruvector/examples/scipix/src/preprocess/enhancement.rs
vendored
Normal file
420
vendor/ruvector/examples/scipix/src/preprocess/enhancement.rs
vendored
Normal file
@@ -0,0 +1,420 @@
|
||||
//! Image enhancement functions for improving OCR accuracy
|
||||
|
||||
use super::{PreprocessError, Result};
|
||||
use image::{GrayImage, Luma};
|
||||
use std::cmp;
|
||||
|
||||
/// Contrast Limited Adaptive Histogram Equalization (CLAHE)
|
||||
///
|
||||
/// Improves local contrast while avoiding over-amplification of noise.
|
||||
/// Divides image into tiles and applies histogram equalization with clipping.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `image` - Input grayscale image
|
||||
/// * `clip_limit` - Contrast clipping limit (typically 2.0-4.0)
|
||||
/// * `tile_size` - Size of contextual regions (typically 8x8 or 16x16)
|
||||
///
|
||||
/// # Returns
|
||||
/// Enhanced image with improved local contrast
|
||||
///
|
||||
/// # Example
|
||||
/// ```no_run
|
||||
/// use ruvector_scipix::preprocess::enhancement::clahe;
|
||||
/// # use image::GrayImage;
|
||||
/// # let image = GrayImage::new(100, 100);
|
||||
/// let enhanced = clahe(&image, 2.0, 8).unwrap();
|
||||
/// ```
|
||||
pub fn clahe(image: &GrayImage, clip_limit: f32, tile_size: u32) -> Result<GrayImage> {
|
||||
if tile_size == 0 || clip_limit <= 0.0 {
|
||||
return Err(PreprocessError::InvalidParameters(
|
||||
"Invalid CLAHE parameters".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let (width, height) = image.dimensions();
|
||||
let mut result = GrayImage::new(width, height);
|
||||
|
||||
let tiles_x = (width + tile_size - 1) / tile_size;
|
||||
let tiles_y = (height + tile_size - 1) / tile_size;
|
||||
|
||||
// Compute histograms and CDFs for each tile
|
||||
let mut tile_cdfs = vec![vec![Vec::new(); tiles_x as usize]; tiles_y as usize];
|
||||
|
||||
for ty in 0..tiles_y {
|
||||
for tx in 0..tiles_x {
|
||||
let x_start = tx * tile_size;
|
||||
let y_start = ty * tile_size;
|
||||
let x_end = cmp::min(x_start + tile_size, width);
|
||||
let y_end = cmp::min(y_start + tile_size, height);
|
||||
|
||||
let cdf = compute_tile_cdf(image, x_start, y_start, x_end, y_end, clip_limit);
|
||||
tile_cdfs[ty as usize][tx as usize] = cdf;
|
||||
}
|
||||
}
|
||||
|
||||
// Interpolate and apply transformation
|
||||
for y in 0..height {
|
||||
for x in 0..width {
|
||||
let pixel = image.get_pixel(x, y)[0];
|
||||
|
||||
// Find tile coordinates
|
||||
let tx = (x as f32 / tile_size as f32).floor();
|
||||
let ty = (y as f32 / tile_size as f32).floor();
|
||||
|
||||
// Calculate interpolation weights
|
||||
let x_ratio = (x as f32 / tile_size as f32) - tx;
|
||||
let y_ratio = (y as f32 / tile_size as f32) - ty;
|
||||
|
||||
let tx = tx as usize;
|
||||
let ty = ty as usize;
|
||||
|
||||
// Bilinear interpolation between neighboring tiles
|
||||
let value = if tx < tiles_x as usize - 1 && ty < tiles_y as usize - 1 {
|
||||
let v00 = tile_cdfs[ty][tx][pixel as usize];
|
||||
let v10 = tile_cdfs[ty][tx + 1][pixel as usize];
|
||||
let v01 = tile_cdfs[ty + 1][tx][pixel as usize];
|
||||
let v11 = tile_cdfs[ty + 1][tx + 1][pixel as usize];
|
||||
|
||||
let v0 = v00 * (1.0 - x_ratio) + v10 * x_ratio;
|
||||
let v1 = v01 * (1.0 - x_ratio) + v11 * x_ratio;
|
||||
|
||||
v0 * (1.0 - y_ratio) + v1 * y_ratio
|
||||
} else if tx < tiles_x as usize - 1 {
|
||||
let v0 = tile_cdfs[ty][tx][pixel as usize];
|
||||
let v1 = tile_cdfs[ty][tx + 1][pixel as usize];
|
||||
v0 * (1.0 - x_ratio) + v1 * x_ratio
|
||||
} else if ty < tiles_y as usize - 1 {
|
||||
let v0 = tile_cdfs[ty][tx][pixel as usize];
|
||||
let v1 = tile_cdfs[ty + 1][tx][pixel as usize];
|
||||
v0 * (1.0 - y_ratio) + v1 * y_ratio
|
||||
} else {
|
||||
tile_cdfs[ty][tx][pixel as usize]
|
||||
};
|
||||
|
||||
result.put_pixel(x, y, Luma([(value * 255.0) as u8]));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Compute clipped histogram and CDF for a tile
|
||||
fn compute_tile_cdf(
|
||||
image: &GrayImage,
|
||||
x_start: u32,
|
||||
y_start: u32,
|
||||
x_end: u32,
|
||||
y_end: u32,
|
||||
clip_limit: f32,
|
||||
) -> Vec<f32> {
|
||||
// Calculate histogram
|
||||
let mut histogram = [0u32; 256];
|
||||
let mut pixel_count = 0;
|
||||
|
||||
for y in y_start..y_end {
|
||||
for x in x_start..x_end {
|
||||
let pixel = image.get_pixel(x, y)[0];
|
||||
histogram[pixel as usize] += 1;
|
||||
pixel_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if pixel_count == 0 {
|
||||
return vec![0.0; 256];
|
||||
}
|
||||
|
||||
// Apply contrast limiting
|
||||
let clip_limit_actual = (clip_limit * pixel_count as f32 / 256.0) as u32;
|
||||
let mut clipped_total = 0u32;
|
||||
|
||||
for h in histogram.iter_mut() {
|
||||
if *h > clip_limit_actual {
|
||||
clipped_total += *h - clip_limit_actual;
|
||||
*h = clip_limit_actual;
|
||||
}
|
||||
}
|
||||
|
||||
// Redistribute clipped pixels
|
||||
let redistribute = clipped_total / 256;
|
||||
let remainder = clipped_total % 256;
|
||||
|
||||
for (i, h) in histogram.iter_mut().enumerate() {
|
||||
*h += redistribute;
|
||||
if i < remainder as usize {
|
||||
*h += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute cumulative distribution function (CDF)
|
||||
let mut cdf = vec![0.0; 256];
|
||||
let mut cumsum = 0u32;
|
||||
|
||||
for (i, &h) in histogram.iter().enumerate() {
|
||||
cumsum += h;
|
||||
cdf[i] = cumsum as f32 / pixel_count as f32;
|
||||
}
|
||||
|
||||
cdf
|
||||
}
|
||||
|
||||
/// Normalize brightness across the image
|
||||
///
|
||||
/// Adjusts image to have mean brightness of 128
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `image` - Input grayscale image
|
||||
///
|
||||
/// # Returns
|
||||
/// Brightness-normalized image
|
||||
pub fn normalize_brightness(image: &GrayImage) -> GrayImage {
|
||||
let (width, height) = image.dimensions();
|
||||
let pixel_count = (width * height) as f32;
|
||||
|
||||
// Calculate mean brightness
|
||||
let sum: u32 = image.pixels().map(|p| p[0] as u32).sum();
|
||||
let mean = sum as f32 / pixel_count;
|
||||
|
||||
let target_mean = 128.0;
|
||||
let adjustment = target_mean - mean;
|
||||
|
||||
// Apply adjustment
|
||||
let mut result = GrayImage::new(width, height);
|
||||
for (x, y, pixel) in image.enumerate_pixels() {
|
||||
let adjusted = (pixel[0] as f32 + adjustment).clamp(0.0, 255.0) as u8;
|
||||
result.put_pixel(x, y, Luma([adjusted]));
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Remove shadows from document image
|
||||
///
|
||||
/// Uses morphological operations to estimate and subtract background
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `image` - Input grayscale image
|
||||
///
|
||||
/// # Returns
|
||||
/// Image with reduced shadows
|
||||
pub fn remove_shadows(image: &GrayImage) -> Result<GrayImage> {
|
||||
let (width, height) = image.dimensions();
|
||||
|
||||
// Estimate background using dilation (morphological closing)
|
||||
let kernel_size = (width.min(height) / 20).max(15) as usize;
|
||||
let background = estimate_background(image, kernel_size);
|
||||
|
||||
// Subtract background
|
||||
let mut result = GrayImage::new(width, height);
|
||||
for (x, y, pixel) in image.enumerate_pixels() {
|
||||
let bg = background.get_pixel(x, y)[0] as i32;
|
||||
let fg = pixel[0] as i32;
|
||||
|
||||
// Normalize: (foreground / background) * 255
|
||||
let normalized = if bg > 0 {
|
||||
((fg as f32 / bg as f32) * 255.0).min(255.0) as u8
|
||||
} else {
|
||||
fg as u8
|
||||
};
|
||||
|
||||
result.put_pixel(x, y, Luma([normalized]));
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Estimate background using max filter (dilation)
|
||||
fn estimate_background(image: &GrayImage, kernel_size: usize) -> GrayImage {
|
||||
let (width, height) = image.dimensions();
|
||||
let mut background = GrayImage::new(width, height);
|
||||
let half_kernel = (kernel_size / 2) as i32;
|
||||
|
||||
for y in 0..height {
|
||||
for x in 0..width {
|
||||
let mut max_val = 0u8;
|
||||
|
||||
// Find maximum in kernel window
|
||||
for ky in -(half_kernel)..=half_kernel {
|
||||
for kx in -(half_kernel)..=half_kernel {
|
||||
let px = (x as i32 + kx).clamp(0, width as i32 - 1) as u32;
|
||||
let py = (y as i32 + ky).clamp(0, height as i32 - 1) as u32;
|
||||
|
||||
let val = image.get_pixel(px, py)[0];
|
||||
if val > max_val {
|
||||
max_val = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
background.put_pixel(x, y, Luma([max_val]));
|
||||
}
|
||||
}
|
||||
|
||||
background
|
||||
}
|
||||
|
||||
/// Enhance contrast using simple linear stretch
|
||||
///
|
||||
/// Maps min-max range to 0-255
|
||||
pub fn contrast_stretch(image: &GrayImage) -> GrayImage {
|
||||
// Find min and max values
|
||||
let mut min_val = 255u8;
|
||||
let mut max_val = 0u8;
|
||||
|
||||
for pixel in image.pixels() {
|
||||
let val = pixel[0];
|
||||
if val < min_val {
|
||||
min_val = val;
|
||||
}
|
||||
if val > max_val {
|
||||
max_val = val;
|
||||
}
|
||||
}
|
||||
|
||||
if min_val == max_val {
|
||||
return image.clone();
|
||||
}
|
||||
|
||||
// Stretch contrast
|
||||
let (width, height) = image.dimensions();
|
||||
let mut result = GrayImage::new(width, height);
|
||||
let range = (max_val - min_val) as f32;
|
||||
|
||||
for (x, y, pixel) in image.enumerate_pixels() {
|
||||
let val = pixel[0];
|
||||
let stretched = ((val - min_val) as f32 / range * 255.0) as u8;
|
||||
result.put_pixel(x, y, Luma([stretched]));
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn create_test_image() -> GrayImage {
|
||||
let mut img = GrayImage::new(100, 100);
|
||||
for y in 0..100 {
|
||||
for x in 0..100 {
|
||||
let val = ((x + y) / 2) as u8;
|
||||
img.put_pixel(x, y, Luma([val]));
|
||||
}
|
||||
}
|
||||
img
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clahe() {
|
||||
let img = create_test_image();
|
||||
let enhanced = clahe(&img, 2.0, 8);
|
||||
|
||||
assert!(enhanced.is_ok());
|
||||
let result = enhanced.unwrap();
|
||||
assert_eq!(result.dimensions(), img.dimensions());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clahe_invalid_params() {
|
||||
let img = create_test_image();
|
||||
|
||||
// Invalid tile size
|
||||
let result = clahe(&img, 2.0, 0);
|
||||
assert!(result.is_err());
|
||||
|
||||
// Invalid clip limit
|
||||
let result = clahe(&img, -1.0, 8);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_brightness() {
|
||||
let img = create_test_image();
|
||||
let normalized = normalize_brightness(&img);
|
||||
|
||||
assert_eq!(normalized.dimensions(), img.dimensions());
|
||||
|
||||
// Check that mean is closer to 128
|
||||
let sum: u32 = normalized.pixels().map(|p| p[0] as u32).sum();
|
||||
let mean = sum as f32 / (100.0 * 100.0);
|
||||
|
||||
assert!((mean - 128.0).abs() < 5.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_remove_shadows() {
|
||||
let img = create_test_image();
|
||||
let result = remove_shadows(&img);
|
||||
|
||||
assert!(result.is_ok());
|
||||
let shadow_removed = result.unwrap();
|
||||
assert_eq!(shadow_removed.dimensions(), img.dimensions());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_contrast_stretch() {
|
||||
// Create low contrast image
|
||||
let mut img = GrayImage::new(100, 100);
|
||||
for y in 0..100 {
|
||||
for x in 0..100 {
|
||||
let val = 100 + ((x + y) / 10) as u8; // Range: 100-119
|
||||
img.put_pixel(x, y, Luma([val]));
|
||||
}
|
||||
}
|
||||
|
||||
let stretched = contrast_stretch(&img);
|
||||
|
||||
// Check that range is now 0-255
|
||||
let mut min_val = 255u8;
|
||||
let mut max_val = 0u8;
|
||||
for pixel in stretched.pixels() {
|
||||
let val = pixel[0];
|
||||
if val < min_val {
|
||||
min_val = val;
|
||||
}
|
||||
if val > max_val {
|
||||
max_val = val;
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(min_val, 0);
|
||||
assert_eq!(max_val, 255);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_contrast_stretch_uniform() {
|
||||
// Uniform image should remain unchanged
|
||||
let mut img = GrayImage::new(50, 50);
|
||||
for pixel in img.pixels_mut() {
|
||||
*pixel = Luma([128]);
|
||||
}
|
||||
|
||||
let stretched = contrast_stretch(&img);
|
||||
|
||||
for pixel in stretched.pixels() {
|
||||
assert_eq!(pixel[0], 128);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_estimate_background() {
|
||||
let img = create_test_image();
|
||||
let background = estimate_background(&img, 5);
|
||||
|
||||
assert_eq!(background.dimensions(), img.dimensions());
|
||||
|
||||
// Background should have higher values (max filter)
|
||||
for (orig, bg) in img.pixels().zip(background.pixels()) {
|
||||
assert!(bg[0] >= orig[0]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clahe_various_tile_sizes() {
|
||||
let img = create_test_image();
|
||||
|
||||
for tile_size in [4, 8, 16, 32] {
|
||||
let result = clahe(&img, 2.0, tile_size);
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
}
|
||||
}
|
||||
277
vendor/ruvector/examples/scipix/src/preprocess/mod.rs
vendored
Normal file
277
vendor/ruvector/examples/scipix/src/preprocess/mod.rs
vendored
Normal file
@@ -0,0 +1,277 @@
|
||||
//! Image preprocessing module for OCR pipeline
|
||||
//!
|
||||
//! This module provides comprehensive image preprocessing capabilities including:
|
||||
//! - Image transformations (grayscale, blur, sharpen, threshold)
|
||||
//! - Rotation detection and correction
|
||||
//! - Skew correction (deskewing)
|
||||
//! - Image enhancement (CLAHE, normalization)
|
||||
//! - Text region segmentation
|
||||
//! - Complete preprocessing pipeline with parallel processing
|
||||
|
||||
pub mod deskew;
|
||||
pub mod enhancement;
|
||||
pub mod pipeline;
|
||||
pub mod rotation;
|
||||
pub mod segmentation;
|
||||
pub mod transforms;
|
||||
|
||||
use image::{DynamicImage, GrayImage};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
|
||||
/// Preprocessing error types
|
||||
#[derive(Error, Debug)]
|
||||
pub enum PreprocessError {
|
||||
#[error("Image loading error: {0}")]
|
||||
ImageLoad(String),
|
||||
|
||||
#[error("Invalid parameters: {0}")]
|
||||
InvalidParameters(String),
|
||||
|
||||
#[error("Processing error: {0}")]
|
||||
Processing(String),
|
||||
|
||||
#[error("Segmentation error: {0}")]
|
||||
Segmentation(String),
|
||||
}
|
||||
|
||||
/// Result type for preprocessing operations
|
||||
pub type Result<T> = std::result::Result<T, PreprocessError>;
|
||||
|
||||
/// Preprocessing options for configuring the pipeline
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct PreprocessOptions {
|
||||
/// Enable rotation detection and correction
|
||||
pub auto_rotate: bool,
|
||||
|
||||
/// Enable skew detection and correction
|
||||
pub auto_deskew: bool,
|
||||
|
||||
/// Enable contrast enhancement
|
||||
pub enhance_contrast: bool,
|
||||
|
||||
/// Enable denoising
|
||||
pub denoise: bool,
|
||||
|
||||
/// Binarization threshold (None for auto Otsu)
|
||||
pub threshold: Option<u8>,
|
||||
|
||||
/// Enable adaptive thresholding
|
||||
pub adaptive_threshold: bool,
|
||||
|
||||
/// Adaptive threshold window size
|
||||
pub adaptive_window_size: u32,
|
||||
|
||||
/// Target image width (None to keep original)
|
||||
pub target_width: Option<u32>,
|
||||
|
||||
/// Target image height (None to keep original)
|
||||
pub target_height: Option<u32>,
|
||||
|
||||
/// Enable text region detection
|
||||
pub detect_regions: bool,
|
||||
|
||||
/// Gaussian blur sigma for denoising
|
||||
pub blur_sigma: f32,
|
||||
|
||||
/// CLAHE clip limit for contrast enhancement
|
||||
pub clahe_clip_limit: f32,
|
||||
|
||||
/// CLAHE tile size
|
||||
pub clahe_tile_size: u32,
|
||||
}
|
||||
|
||||
impl Default for PreprocessOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
auto_rotate: true,
|
||||
auto_deskew: true,
|
||||
enhance_contrast: true,
|
||||
denoise: true,
|
||||
threshold: None,
|
||||
adaptive_threshold: true,
|
||||
adaptive_window_size: 15,
|
||||
target_width: None,
|
||||
target_height: None,
|
||||
detect_regions: true,
|
||||
blur_sigma: 1.0,
|
||||
clahe_clip_limit: 2.0,
|
||||
clahe_tile_size: 8,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Type of text region
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum RegionType {
|
||||
/// Regular text
|
||||
Text,
|
||||
/// Mathematical equation
|
||||
Math,
|
||||
/// Table
|
||||
Table,
|
||||
/// Figure/Image
|
||||
Figure,
|
||||
/// Unknown/Other
|
||||
Unknown,
|
||||
}
|
||||
|
||||
/// Detected text region with bounding box
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TextRegion {
|
||||
/// Region type
|
||||
pub region_type: RegionType,
|
||||
|
||||
/// Bounding box (x, y, width, height)
|
||||
pub bbox: (u32, u32, u32, u32),
|
||||
|
||||
/// Confidence score (0.0 to 1.0)
|
||||
pub confidence: f32,
|
||||
|
||||
/// Average text height in pixels
|
||||
pub text_height: f32,
|
||||
|
||||
/// Detected baseline angle in degrees
|
||||
pub baseline_angle: f32,
|
||||
}
|
||||
|
||||
/// Main preprocessing function with configurable options
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `image` - Input image to preprocess
|
||||
/// * `options` - Preprocessing configuration options
|
||||
///
|
||||
/// # Returns
|
||||
/// Preprocessed grayscale image ready for OCR
|
||||
///
|
||||
/// # Example
|
||||
/// ```no_run
|
||||
/// use image::open;
|
||||
/// use ruvector_scipix::preprocess::{preprocess, PreprocessOptions};
|
||||
///
|
||||
/// let img = open("document.jpg").unwrap();
|
||||
/// let options = PreprocessOptions::default();
|
||||
/// let processed = preprocess(&img, &options).unwrap();
|
||||
/// ```
|
||||
pub fn preprocess(image: &DynamicImage, options: &PreprocessOptions) -> Result<GrayImage> {
|
||||
pipeline::PreprocessPipeline::builder()
|
||||
.auto_rotate(options.auto_rotate)
|
||||
.auto_deskew(options.auto_deskew)
|
||||
.enhance_contrast(options.enhance_contrast)
|
||||
.denoise(options.denoise)
|
||||
.blur_sigma(options.blur_sigma)
|
||||
.clahe_clip_limit(options.clahe_clip_limit)
|
||||
.clahe_tile_size(options.clahe_tile_size)
|
||||
.threshold(options.threshold)
|
||||
.adaptive_threshold(options.adaptive_threshold)
|
||||
.adaptive_window_size(options.adaptive_window_size)
|
||||
.target_size(options.target_width, options.target_height)
|
||||
.build()
|
||||
.process(image)
|
||||
}
|
||||
|
||||
/// Detect text regions in an image
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `image` - Input grayscale image
|
||||
/// * `min_region_size` - Minimum region size in pixels
|
||||
///
|
||||
/// # Returns
|
||||
/// Vector of detected text regions with metadata
|
||||
///
|
||||
/// # Example
|
||||
/// ```no_run
|
||||
/// use image::open;
|
||||
/// use ruvector_scipix::preprocess::detect_text_regions;
|
||||
///
|
||||
/// let img = open("document.jpg").unwrap().to_luma8();
|
||||
/// let regions = detect_text_regions(&img, 100).unwrap();
|
||||
/// println!("Found {} text regions", regions.len());
|
||||
/// ```
|
||||
pub fn detect_text_regions(image: &GrayImage, min_region_size: u32) -> Result<Vec<TextRegion>> {
|
||||
segmentation::find_text_regions(image, min_region_size)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use image::{Rgb, RgbImage};
|
||||
|
||||
fn create_test_image(width: u32, height: u32) -> DynamicImage {
|
||||
let mut img = RgbImage::new(width, height);
|
||||
|
||||
// Create a simple test pattern
|
||||
for y in 0..height {
|
||||
for x in 0..width {
|
||||
let val = ((x + y) % 256) as u8;
|
||||
img.put_pixel(x, y, Rgb([val, val, val]));
|
||||
}
|
||||
}
|
||||
|
||||
DynamicImage::ImageRgb8(img)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_preprocess_default_options() {
|
||||
let img = create_test_image(100, 100);
|
||||
let options = PreprocessOptions::default();
|
||||
|
||||
let result = preprocess(&img, &options);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let processed = result.unwrap();
|
||||
assert_eq!(processed.width(), 100);
|
||||
assert_eq!(processed.height(), 100);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_preprocess_with_resize() {
|
||||
let img = create_test_image(200, 200);
|
||||
let mut options = PreprocessOptions::default();
|
||||
options.target_width = Some(100);
|
||||
options.target_height = Some(100);
|
||||
|
||||
let result = preprocess(&img, &options);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let processed = result.unwrap();
|
||||
assert_eq!(processed.width(), 100);
|
||||
assert_eq!(processed.height(), 100);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_preprocess_options_builder() {
|
||||
let options = PreprocessOptions {
|
||||
auto_rotate: false,
|
||||
auto_deskew: false,
|
||||
enhance_contrast: true,
|
||||
denoise: true,
|
||||
threshold: Some(128),
|
||||
adaptive_threshold: false,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
assert!(!options.auto_rotate);
|
||||
assert!(!options.auto_deskew);
|
||||
assert!(options.enhance_contrast);
|
||||
assert_eq!(options.threshold, Some(128));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_region_type_serialization() {
|
||||
let region = TextRegion {
|
||||
region_type: RegionType::Math,
|
||||
bbox: (10, 20, 100, 50),
|
||||
confidence: 0.95,
|
||||
text_height: 12.0,
|
||||
baseline_angle: 0.5,
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(®ion).unwrap();
|
||||
let deserialized: TextRegion = serde_json::from_str(&json).unwrap();
|
||||
|
||||
assert_eq!(deserialized.region_type, RegionType::Math);
|
||||
assert_eq!(deserialized.bbox, (10, 20, 100, 50));
|
||||
assert!((deserialized.confidence - 0.95).abs() < 0.001);
|
||||
}
|
||||
}
|
||||
456
vendor/ruvector/examples/scipix/src/preprocess/pipeline.rs
vendored
Normal file
456
vendor/ruvector/examples/scipix/src/preprocess/pipeline.rs
vendored
Normal file
@@ -0,0 +1,456 @@
|
||||
//! Complete preprocessing pipeline with builder pattern and parallel processing
|
||||
|
||||
use super::Result;
|
||||
use crate::preprocess::{deskew, enhancement, rotation, transforms};
|
||||
use image::{DynamicImage, GrayImage};
|
||||
use rayon::prelude::*;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Progress callback type
|
||||
pub type ProgressCallback = Arc<dyn Fn(&str, f32) + Send + Sync>;
|
||||
|
||||
/// Complete preprocessing pipeline with configurable steps
|
||||
pub struct PreprocessPipeline {
|
||||
auto_rotate: bool,
|
||||
auto_deskew: bool,
|
||||
enhance_contrast: bool,
|
||||
denoise: bool,
|
||||
blur_sigma: f32,
|
||||
clahe_clip_limit: f32,
|
||||
clahe_tile_size: u32,
|
||||
threshold: Option<u8>,
|
||||
adaptive_threshold: bool,
|
||||
adaptive_window_size: u32,
|
||||
target_width: Option<u32>,
|
||||
target_height: Option<u32>,
|
||||
progress_callback: Option<ProgressCallback>,
|
||||
}
|
||||
|
||||
/// Builder for preprocessing pipeline
|
||||
pub struct PreprocessPipelineBuilder {
|
||||
auto_rotate: bool,
|
||||
auto_deskew: bool,
|
||||
enhance_contrast: bool,
|
||||
denoise: bool,
|
||||
blur_sigma: f32,
|
||||
clahe_clip_limit: f32,
|
||||
clahe_tile_size: u32,
|
||||
threshold: Option<u8>,
|
||||
adaptive_threshold: bool,
|
||||
adaptive_window_size: u32,
|
||||
target_width: Option<u32>,
|
||||
target_height: Option<u32>,
|
||||
progress_callback: Option<ProgressCallback>,
|
||||
}
|
||||
|
||||
impl Default for PreprocessPipelineBuilder {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
auto_rotate: true,
|
||||
auto_deskew: true,
|
||||
enhance_contrast: true,
|
||||
denoise: true,
|
||||
blur_sigma: 1.0,
|
||||
clahe_clip_limit: 2.0,
|
||||
clahe_tile_size: 8,
|
||||
threshold: None,
|
||||
adaptive_threshold: true,
|
||||
adaptive_window_size: 15,
|
||||
target_width: None,
|
||||
target_height: None,
|
||||
progress_callback: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PreprocessPipelineBuilder {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn auto_rotate(mut self, enable: bool) -> Self {
|
||||
self.auto_rotate = enable;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn auto_deskew(mut self, enable: bool) -> Self {
|
||||
self.auto_deskew = enable;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn enhance_contrast(mut self, enable: bool) -> Self {
|
||||
self.enhance_contrast = enable;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn denoise(mut self, enable: bool) -> Self {
|
||||
self.denoise = enable;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn blur_sigma(mut self, sigma: f32) -> Self {
|
||||
self.blur_sigma = sigma;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn clahe_clip_limit(mut self, limit: f32) -> Self {
|
||||
self.clahe_clip_limit = limit;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn clahe_tile_size(mut self, size: u32) -> Self {
|
||||
self.clahe_tile_size = size;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn threshold(mut self, threshold: Option<u8>) -> Self {
|
||||
self.threshold = threshold;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn adaptive_threshold(mut self, enable: bool) -> Self {
|
||||
self.adaptive_threshold = enable;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn adaptive_window_size(mut self, size: u32) -> Self {
|
||||
self.adaptive_window_size = size;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn target_size(mut self, width: Option<u32>, height: Option<u32>) -> Self {
|
||||
self.target_width = width;
|
||||
self.target_height = height;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn progress_callback<F>(mut self, callback: F) -> Self
|
||||
where
|
||||
F: Fn(&str, f32) + Send + Sync + 'static,
|
||||
{
|
||||
self.progress_callback = Some(Arc::new(callback));
|
||||
self
|
||||
}
|
||||
|
||||
pub fn build(self) -> PreprocessPipeline {
|
||||
PreprocessPipeline {
|
||||
auto_rotate: self.auto_rotate,
|
||||
auto_deskew: self.auto_deskew,
|
||||
enhance_contrast: self.enhance_contrast,
|
||||
denoise: self.denoise,
|
||||
blur_sigma: self.blur_sigma,
|
||||
clahe_clip_limit: self.clahe_clip_limit,
|
||||
clahe_tile_size: self.clahe_tile_size,
|
||||
threshold: self.threshold,
|
||||
adaptive_threshold: self.adaptive_threshold,
|
||||
adaptive_window_size: self.adaptive_window_size,
|
||||
target_width: self.target_width,
|
||||
target_height: self.target_height,
|
||||
progress_callback: self.progress_callback,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PreprocessPipeline {
|
||||
/// Create a new pipeline builder
|
||||
pub fn builder() -> PreprocessPipelineBuilder {
|
||||
PreprocessPipelineBuilder::new()
|
||||
}
|
||||
|
||||
/// Report progress if callback is set
|
||||
fn report_progress(&self, step: &str, progress: f32) {
|
||||
if let Some(callback) = &self.progress_callback {
|
||||
callback(step, progress);
|
||||
}
|
||||
}
|
||||
|
||||
/// Process a single image through the complete pipeline
|
||||
///
|
||||
/// # Pipeline steps:
|
||||
/// 1. Convert to grayscale
|
||||
/// 2. Detect and correct rotation (if enabled)
|
||||
/// 3. Detect and correct skew (if enabled)
|
||||
/// 4. Enhance contrast with CLAHE (if enabled)
|
||||
/// 5. Denoise with Gaussian blur (if enabled)
|
||||
/// 6. Apply thresholding (binary or adaptive)
|
||||
/// 7. Resize to target dimensions (if specified)
|
||||
pub fn process(&self, image: &DynamicImage) -> Result<GrayImage> {
|
||||
self.report_progress("Starting preprocessing", 0.0);
|
||||
|
||||
// Step 1: Convert to grayscale
|
||||
self.report_progress("Converting to grayscale", 0.1);
|
||||
let mut gray = transforms::to_grayscale(image);
|
||||
|
||||
// Step 2: Auto-rotate
|
||||
if self.auto_rotate {
|
||||
self.report_progress("Detecting rotation", 0.2);
|
||||
let angle = rotation::detect_rotation(&gray)?;
|
||||
|
||||
if angle.abs() > 0.5 {
|
||||
self.report_progress("Correcting rotation", 0.25);
|
||||
gray = rotation::rotate_image(&gray, -angle)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: Auto-deskew
|
||||
if self.auto_deskew {
|
||||
self.report_progress("Detecting skew", 0.3);
|
||||
let angle = deskew::detect_skew_angle(&gray)?;
|
||||
|
||||
if angle.abs() > 0.5 {
|
||||
self.report_progress("Correcting skew", 0.35);
|
||||
gray = deskew::deskew_image(&gray, angle)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4: Enhance contrast
|
||||
if self.enhance_contrast {
|
||||
self.report_progress("Enhancing contrast", 0.5);
|
||||
gray = enhancement::clahe(&gray, self.clahe_clip_limit, self.clahe_tile_size)?;
|
||||
}
|
||||
|
||||
// Step 5: Denoise
|
||||
if self.denoise {
|
||||
self.report_progress("Denoising", 0.6);
|
||||
gray = transforms::gaussian_blur(&gray, self.blur_sigma)?;
|
||||
}
|
||||
|
||||
// Step 6: Thresholding
|
||||
self.report_progress("Applying threshold", 0.7);
|
||||
gray = if self.adaptive_threshold {
|
||||
transforms::adaptive_threshold(&gray, self.adaptive_window_size)?
|
||||
} else if let Some(threshold_val) = self.threshold {
|
||||
transforms::threshold(&gray, threshold_val)
|
||||
} else {
|
||||
// Auto Otsu threshold
|
||||
let threshold_val = transforms::otsu_threshold(&gray)?;
|
||||
transforms::threshold(&gray, threshold_val)
|
||||
};
|
||||
|
||||
// Step 7: Resize
|
||||
if let (Some(width), Some(height)) = (self.target_width, self.target_height) {
|
||||
self.report_progress("Resizing", 0.9);
|
||||
gray = image::imageops::resize(
|
||||
&gray,
|
||||
width,
|
||||
height,
|
||||
image::imageops::FilterType::Lanczos3,
|
||||
);
|
||||
}
|
||||
|
||||
self.report_progress("Preprocessing complete", 1.0);
|
||||
Ok(gray)
|
||||
}
|
||||
|
||||
/// Process multiple images in parallel
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `images` - Vector of images to process
|
||||
///
|
||||
/// # Returns
|
||||
/// Vector of preprocessed images in the same order
|
||||
pub fn process_batch(&self, images: Vec<DynamicImage>) -> Result<Vec<GrayImage>> {
|
||||
images
|
||||
.into_par_iter()
|
||||
.map(|img| self.process(&img))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Process image and return intermediate results from each step
|
||||
///
|
||||
/// Useful for debugging and visualization
|
||||
pub fn process_with_intermediates(
|
||||
&self,
|
||||
image: &DynamicImage,
|
||||
) -> Result<Vec<(String, GrayImage)>> {
|
||||
let mut results = Vec::new();
|
||||
|
||||
// Step 1: Grayscale
|
||||
let mut gray = transforms::to_grayscale(image);
|
||||
results.push(("01_grayscale".to_string(), gray.clone()));
|
||||
|
||||
// Step 2: Rotation
|
||||
if self.auto_rotate {
|
||||
let angle = rotation::detect_rotation(&gray)?;
|
||||
if angle.abs() > 0.5 {
|
||||
gray = rotation::rotate_image(&gray, -angle)?;
|
||||
results.push(("02_rotated".to_string(), gray.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: Deskew
|
||||
if self.auto_deskew {
|
||||
let angle = deskew::detect_skew_angle(&gray)?;
|
||||
if angle.abs() > 0.5 {
|
||||
gray = deskew::deskew_image(&gray, angle)?;
|
||||
results.push(("03_deskewed".to_string(), gray.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4: Enhancement
|
||||
if self.enhance_contrast {
|
||||
gray = enhancement::clahe(&gray, self.clahe_clip_limit, self.clahe_tile_size)?;
|
||||
results.push(("04_enhanced".to_string(), gray.clone()));
|
||||
}
|
||||
|
||||
// Step 5: Denoise
|
||||
if self.denoise {
|
||||
gray = transforms::gaussian_blur(&gray, self.blur_sigma)?;
|
||||
results.push(("05_denoised".to_string(), gray.clone()));
|
||||
}
|
||||
|
||||
// Step 6: Threshold
|
||||
gray = if self.adaptive_threshold {
|
||||
transforms::adaptive_threshold(&gray, self.adaptive_window_size)?
|
||||
} else if let Some(threshold_val) = self.threshold {
|
||||
transforms::threshold(&gray, threshold_val)
|
||||
} else {
|
||||
let threshold_val = transforms::otsu_threshold(&gray)?;
|
||||
transforms::threshold(&gray, threshold_val)
|
||||
};
|
||||
results.push(("06_thresholded".to_string(), gray.clone()));
|
||||
|
||||
// Step 7: Resize
|
||||
if let (Some(width), Some(height)) = (self.target_width, self.target_height) {
|
||||
gray = image::imageops::resize(
|
||||
&gray,
|
||||
width,
|
||||
height,
|
||||
image::imageops::FilterType::Lanczos3,
|
||||
);
|
||||
results.push(("07_resized".to_string(), gray.clone()));
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use image::{Rgb, RgbImage};
|
||||
|
||||
fn create_test_image() -> DynamicImage {
|
||||
let mut img = RgbImage::new(100, 100);
|
||||
for y in 0..100 {
|
||||
for x in 0..100 {
|
||||
let val = ((x + y) / 2) as u8;
|
||||
img.put_pixel(x, y, Rgb([val, val, val]));
|
||||
}
|
||||
}
|
||||
DynamicImage::ImageRgb8(img)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pipeline_builder() {
|
||||
let pipeline = PreprocessPipeline::builder()
|
||||
.auto_rotate(false)
|
||||
.denoise(true)
|
||||
.blur_sigma(1.5)
|
||||
.build();
|
||||
|
||||
assert!(!pipeline.auto_rotate);
|
||||
assert!(pipeline.denoise);
|
||||
assert!((pipeline.blur_sigma - 1.5).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pipeline_process() {
|
||||
let img = create_test_image();
|
||||
let pipeline = PreprocessPipeline::builder()
|
||||
.auto_rotate(false)
|
||||
.auto_deskew(false)
|
||||
.build();
|
||||
|
||||
let result = pipeline.process(&img);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let processed = result.unwrap();
|
||||
assert_eq!(processed.width(), 100);
|
||||
assert_eq!(processed.height(), 100);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pipeline_with_resize() {
|
||||
let img = create_test_image();
|
||||
let pipeline = PreprocessPipeline::builder()
|
||||
.target_size(Some(50), Some(50))
|
||||
.auto_rotate(false)
|
||||
.auto_deskew(false)
|
||||
.build();
|
||||
|
||||
let result = pipeline.process(&img);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let processed = result.unwrap();
|
||||
assert_eq!(processed.width(), 50);
|
||||
assert_eq!(processed.height(), 50);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pipeline_batch_processing() {
|
||||
let images = vec![
|
||||
create_test_image(),
|
||||
create_test_image(),
|
||||
create_test_image(),
|
||||
];
|
||||
|
||||
let pipeline = PreprocessPipeline::builder()
|
||||
.auto_rotate(false)
|
||||
.auto_deskew(false)
|
||||
.build();
|
||||
|
||||
let results = pipeline.process_batch(images);
|
||||
assert!(results.is_ok());
|
||||
|
||||
let processed = results.unwrap();
|
||||
assert_eq!(processed.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pipeline_intermediates() {
|
||||
let img = create_test_image();
|
||||
let pipeline = PreprocessPipeline::builder()
|
||||
.auto_rotate(false)
|
||||
.auto_deskew(false)
|
||||
.enhance_contrast(true)
|
||||
.denoise(true)
|
||||
.build();
|
||||
|
||||
let result = pipeline.process_with_intermediates(&img);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let intermediates = result.unwrap();
|
||||
assert!(!intermediates.is_empty());
|
||||
assert!(intermediates
|
||||
.iter()
|
||||
.any(|(name, _)| name.contains("grayscale")));
|
||||
assert!(intermediates
|
||||
.iter()
|
||||
.any(|(name, _)| name.contains("thresholded")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_progress_callback() {
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
let progress_steps = Arc::new(Mutex::new(Vec::new()));
|
||||
let progress_clone = Arc::clone(&progress_steps);
|
||||
|
||||
let pipeline = PreprocessPipeline::builder()
|
||||
.auto_rotate(false)
|
||||
.auto_deskew(false)
|
||||
.progress_callback(move |step, _progress| {
|
||||
progress_clone.lock().unwrap().push(step.to_string());
|
||||
})
|
||||
.build();
|
||||
|
||||
let img = create_test_image();
|
||||
let _ = pipeline.process(&img);
|
||||
|
||||
let steps = progress_steps.lock().unwrap();
|
||||
assert!(!steps.is_empty());
|
||||
assert!(steps.iter().any(|s| s.contains("Starting")));
|
||||
assert!(steps.iter().any(|s| s.contains("complete")));
|
||||
}
|
||||
}
|
||||
319
vendor/ruvector/examples/scipix/src/preprocess/rotation.rs
vendored
Normal file
319
vendor/ruvector/examples/scipix/src/preprocess/rotation.rs
vendored
Normal file
@@ -0,0 +1,319 @@
|
||||
//! Rotation detection and correction using projection profiles
|
||||
|
||||
use super::{PreprocessError, Result};
|
||||
use image::{GrayImage, Luma};
|
||||
use imageproc::geometric_transformations::{rotate_about_center, Interpolation};
|
||||
use std::f32;
|
||||
|
||||
/// Detect rotation angle using projection profile analysis
|
||||
///
|
||||
/// Uses horizontal and vertical projection profiles to detect document rotation.
|
||||
/// Returns angle in degrees (typically in range -45 to +45).
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `image` - Input grayscale image
|
||||
///
|
||||
/// # Returns
|
||||
/// Rotation angle in degrees (positive = clockwise)
|
||||
///
|
||||
/// # Example
|
||||
/// ```no_run
|
||||
/// use ruvector_scipix::preprocess::rotation::detect_rotation;
|
||||
/// # use image::GrayImage;
|
||||
/// # let image = GrayImage::new(100, 100);
|
||||
/// let angle = detect_rotation(&image).unwrap();
|
||||
/// println!("Detected rotation: {:.2}°", angle);
|
||||
/// ```
|
||||
pub fn detect_rotation(image: &GrayImage) -> Result<f32> {
|
||||
let (width, height) = image.dimensions();
|
||||
|
||||
if width < 10 || height < 10 {
|
||||
return Err(PreprocessError::InvalidParameters(
|
||||
"Image too small for rotation detection".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
// Calculate projection profiles for different angles
|
||||
let angles = [-45.0, -30.0, -15.0, 0.0, 15.0, 30.0, 45.0];
|
||||
let mut max_score = 0.0;
|
||||
let mut best_angle = 0.0;
|
||||
|
||||
for &angle in &angles {
|
||||
let score = calculate_projection_score(image, angle);
|
||||
if score > max_score {
|
||||
max_score = score;
|
||||
best_angle = angle;
|
||||
}
|
||||
}
|
||||
|
||||
// Refine angle with finer search around best candidate
|
||||
let fine_angles: Vec<f32> = (-5..=5).map(|i| best_angle + (i as f32) * 2.0).collect();
|
||||
|
||||
max_score = 0.0;
|
||||
for angle in fine_angles {
|
||||
let score = calculate_projection_score(image, angle);
|
||||
if score > max_score {
|
||||
max_score = score;
|
||||
best_angle = angle;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(best_angle)
|
||||
}
|
||||
|
||||
/// Calculate projection profile score for a given rotation angle
|
||||
///
|
||||
/// Higher scores indicate better alignment with text baselines
|
||||
fn calculate_projection_score(image: &GrayImage, angle: f32) -> f32 {
|
||||
let (width, height) = image.dimensions();
|
||||
|
||||
// For 0 degrees, use direct projection
|
||||
if angle.abs() < 0.1 {
|
||||
return calculate_horizontal_projection_variance(image);
|
||||
}
|
||||
|
||||
// For non-zero angles, calculate projection along rotated axis
|
||||
let rad = angle.to_radians();
|
||||
let cos_a = rad.cos();
|
||||
let sin_a = rad.sin();
|
||||
|
||||
let mut projection = vec![0u32; height as usize];
|
||||
|
||||
for y in 0..height {
|
||||
for x in 0..width {
|
||||
let pixel = image.get_pixel(x, y)[0];
|
||||
if pixel < 128 {
|
||||
// Project pixel onto rotated horizontal axis
|
||||
let proj_y = ((y as f32) * cos_a - (x as f32) * sin_a) as i32;
|
||||
if proj_y >= 0 && proj_y < height as i32 {
|
||||
projection[proj_y as usize] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate variance of projection (higher = better alignment)
|
||||
calculate_variance(&projection)
|
||||
}
|
||||
|
||||
/// Calculate horizontal projection variance
|
||||
fn calculate_horizontal_projection_variance(image: &GrayImage) -> f32 {
|
||||
let (width, height) = image.dimensions();
|
||||
let mut projection = vec![0u32; height as usize];
|
||||
|
||||
for y in 0..height {
|
||||
for x in 0..width {
|
||||
let pixel = image.get_pixel(x, y)[0];
|
||||
if pixel < 128 {
|
||||
projection[y as usize] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
calculate_variance(&projection)
|
||||
}
|
||||
|
||||
/// Calculate variance of projection profile
|
||||
fn calculate_variance(projection: &[u32]) -> f32 {
|
||||
if projection.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let mean = projection.iter().sum::<u32>() as f32 / projection.len() as f32;
|
||||
let variance = projection
|
||||
.iter()
|
||||
.map(|&x| {
|
||||
let diff = x as f32 - mean;
|
||||
diff * diff
|
||||
})
|
||||
.sum::<f32>()
|
||||
/ projection.len() as f32;
|
||||
|
||||
variance
|
||||
}
|
||||
|
||||
/// Rotate image by specified angle
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `image` - Input grayscale image
|
||||
/// * `angle` - Rotation angle in degrees (positive = clockwise)
|
||||
///
|
||||
/// # Returns
|
||||
/// Rotated image with bilinear interpolation
|
||||
///
|
||||
/// # Example
|
||||
/// ```no_run
|
||||
/// use ruvector_scipix::preprocess::rotation::rotate_image;
|
||||
/// # use image::GrayImage;
|
||||
/// # let image = GrayImage::new(100, 100);
|
||||
/// let rotated = rotate_image(&image, 15.0).unwrap();
|
||||
/// ```
|
||||
pub fn rotate_image(image: &GrayImage, angle: f32) -> Result<GrayImage> {
|
||||
if angle.abs() < 0.01 {
|
||||
// No rotation needed
|
||||
return Ok(image.clone());
|
||||
}
|
||||
|
||||
let radians = -angle.to_radians(); // Negate for correct direction
|
||||
let rotated = rotate_about_center(
|
||||
image,
|
||||
radians,
|
||||
Interpolation::Bilinear,
|
||||
Luma([255]), // White background
|
||||
);
|
||||
|
||||
Ok(rotated)
|
||||
}
|
||||
|
||||
/// Detect rotation with confidence score
|
||||
///
|
||||
/// Returns tuple of (angle, confidence) where confidence is 0.0-1.0
|
||||
pub fn detect_rotation_with_confidence(image: &GrayImage) -> Result<(f32, f32)> {
|
||||
let angle = detect_rotation(image)?;
|
||||
|
||||
// Calculate confidence based on projection profile variance difference
|
||||
let current_score = calculate_projection_score(image, angle);
|
||||
let baseline_score = calculate_projection_score(image, 0.0);
|
||||
|
||||
// Confidence is relative improvement over baseline
|
||||
let confidence = if baseline_score > 0.0 {
|
||||
(current_score / baseline_score).min(1.0)
|
||||
} else {
|
||||
0.5 // Default moderate confidence
|
||||
};
|
||||
|
||||
Ok((angle, confidence))
|
||||
}
|
||||
|
||||
/// Auto-rotate image only if confidence is above threshold
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `image` - Input grayscale image
|
||||
/// * `confidence_threshold` - Minimum confidence (0.0-1.0) to apply rotation
|
||||
///
|
||||
/// # Returns
|
||||
/// Tuple of (rotated_image, angle_applied, confidence)
|
||||
pub fn auto_rotate(image: &GrayImage, confidence_threshold: f32) -> Result<(GrayImage, f32, f32)> {
|
||||
let (angle, confidence) = detect_rotation_with_confidence(image)?;
|
||||
|
||||
if confidence >= confidence_threshold && angle.abs() > 0.5 {
|
||||
let rotated = rotate_image(image, -angle)?;
|
||||
Ok((rotated, angle, confidence))
|
||||
} else {
|
||||
Ok((image.clone(), 0.0, confidence))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn create_text_image() -> GrayImage {
|
||||
let mut img = GrayImage::new(200, 100);
|
||||
|
||||
// Fill with white
|
||||
for pixel in img.pixels_mut() {
|
||||
*pixel = Luma([255]);
|
||||
}
|
||||
|
||||
// Draw some horizontal lines (simulating text)
|
||||
for y in [20, 25, 50, 55] {
|
||||
for x in 10..190 {
|
||||
img.put_pixel(x, y, Luma([0]));
|
||||
}
|
||||
}
|
||||
|
||||
img
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_rotation_straight() {
|
||||
let img = create_text_image();
|
||||
let angle = detect_rotation(&img);
|
||||
|
||||
assert!(angle.is_ok());
|
||||
let a = angle.unwrap();
|
||||
// Should detect near-zero rotation
|
||||
assert!(a.abs() < 10.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rotate_image() {
|
||||
let img = create_text_image();
|
||||
|
||||
// Rotate by 15 degrees
|
||||
let rotated = rotate_image(&img, 15.0);
|
||||
assert!(rotated.is_ok());
|
||||
|
||||
let result = rotated.unwrap();
|
||||
assert_eq!(result.dimensions(), img.dimensions());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rotate_no_change() {
|
||||
let img = create_text_image();
|
||||
|
||||
// Rotate by ~0 degrees
|
||||
let rotated = rotate_image(&img, 0.001);
|
||||
assert!(rotated.is_ok());
|
||||
|
||||
let result = rotated.unwrap();
|
||||
assert_eq!(result.dimensions(), img.dimensions());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rotation_confidence() {
|
||||
let img = create_text_image();
|
||||
let result = detect_rotation_with_confidence(&img);
|
||||
|
||||
assert!(result.is_ok());
|
||||
let (angle, confidence) = result.unwrap();
|
||||
|
||||
assert!(confidence >= 0.0 && confidence <= 1.0);
|
||||
println!(
|
||||
"Detected angle: {:.2}°, confidence: {:.2}",
|
||||
angle, confidence
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_rotate_with_threshold() {
|
||||
let img = create_text_image();
|
||||
|
||||
// High threshold - should not rotate if confidence is low
|
||||
let result = auto_rotate(&img, 0.95);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let (rotated, angle, confidence) = result.unwrap();
|
||||
assert_eq!(rotated.dimensions(), img.dimensions());
|
||||
println!(
|
||||
"Auto-rotate: angle={:.2}°, confidence={:.2}",
|
||||
angle, confidence
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_projection_variance() {
|
||||
let projection = vec![10, 50, 100, 50, 10];
|
||||
let variance = calculate_variance(&projection);
|
||||
assert!(variance > 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rotation_small_image_error() {
|
||||
let small_img = GrayImage::new(5, 5);
|
||||
let result = detect_rotation(&small_img);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rotation_roundtrip() {
|
||||
let img = create_text_image();
|
||||
|
||||
// Rotate and unrotate
|
||||
let rotated = rotate_image(&img, 30.0).unwrap();
|
||||
let unrotated = rotate_image(&rotated, -30.0).unwrap();
|
||||
|
||||
assert_eq!(unrotated.dimensions(), img.dimensions());
|
||||
}
|
||||
}
|
||||
483
vendor/ruvector/examples/scipix/src/preprocess/segmentation.rs
vendored
Normal file
483
vendor/ruvector/examples/scipix/src/preprocess/segmentation.rs
vendored
Normal file
@@ -0,0 +1,483 @@
|
||||
//! Text region detection and segmentation
|
||||
|
||||
use super::{RegionType, Result, TextRegion};
|
||||
use image::GrayImage;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
/// Find text regions in a binary or grayscale image
|
||||
///
|
||||
/// Uses connected component analysis and geometric heuristics to identify
|
||||
/// text regions and classify them by type (text, math, table, etc.)
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `image` - Input grayscale or binary image
|
||||
/// * `min_region_size` - Minimum region area in pixels
|
||||
///
|
||||
/// # Returns
|
||||
/// Vector of detected text regions with bounding boxes
|
||||
///
|
||||
/// # Example
|
||||
/// ```no_run
|
||||
/// use ruvector_scipix::preprocess::segmentation::find_text_regions;
|
||||
/// # use image::GrayImage;
|
||||
/// # let image = GrayImage::new(100, 100);
|
||||
/// let regions = find_text_regions(&image, 100).unwrap();
|
||||
/// println!("Found {} regions", regions.len());
|
||||
/// ```
|
||||
pub fn find_text_regions(image: &GrayImage, min_region_size: u32) -> Result<Vec<TextRegion>> {
|
||||
// Find connected components
|
||||
let components = connected_components(image);
|
||||
|
||||
// Extract bounding boxes for each component
|
||||
let bboxes = extract_bounding_boxes(&components);
|
||||
|
||||
// Filter by size and merge overlapping regions
|
||||
let filtered = filter_by_size(bboxes, min_region_size);
|
||||
let merged = merge_overlapping_regions(filtered, 10);
|
||||
|
||||
// Find text lines and group components
|
||||
let text_lines = find_text_lines(image, &merged);
|
||||
|
||||
// Classify regions and create TextRegion objects
|
||||
let regions = classify_regions(image, text_lines);
|
||||
|
||||
Ok(regions)
|
||||
}
|
||||
|
||||
/// Connected component labeling using flood-fill algorithm
|
||||
///
|
||||
/// Returns labeled image where each connected component has a unique ID
|
||||
fn connected_components(image: &GrayImage) -> Vec<Vec<u32>> {
|
||||
let (width, height) = image.dimensions();
|
||||
let mut labels = vec![vec![0u32; width as usize]; height as usize];
|
||||
let mut current_label = 1u32;
|
||||
|
||||
for y in 0..height {
|
||||
for x in 0..width {
|
||||
if labels[y as usize][x as usize] == 0 && image.get_pixel(x, y)[0] < 128 {
|
||||
// Found unlabeled foreground pixel, start flood fill
|
||||
flood_fill(image, &mut labels, x, y, current_label);
|
||||
current_label += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
labels
|
||||
}
|
||||
|
||||
/// Flood fill algorithm for connected component labeling
|
||||
fn flood_fill(image: &GrayImage, labels: &mut [Vec<u32>], start_x: u32, start_y: u32, label: u32) {
|
||||
let (width, height) = image.dimensions();
|
||||
let mut stack = vec![(start_x, start_y)];
|
||||
|
||||
while let Some((x, y)) = stack.pop() {
|
||||
if x >= width || y >= height {
|
||||
continue;
|
||||
}
|
||||
|
||||
if labels[y as usize][x as usize] != 0 || image.get_pixel(x, y)[0] >= 128 {
|
||||
continue;
|
||||
}
|
||||
|
||||
labels[y as usize][x as usize] = label;
|
||||
|
||||
// Add 4-connected neighbors
|
||||
if x > 0 {
|
||||
stack.push((x - 1, y));
|
||||
}
|
||||
if x < width - 1 {
|
||||
stack.push((x + 1, y));
|
||||
}
|
||||
if y > 0 {
|
||||
stack.push((x, y - 1));
|
||||
}
|
||||
if y < height - 1 {
|
||||
stack.push((x, y + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract bounding boxes for each labeled component
|
||||
fn extract_bounding_boxes(labels: &[Vec<u32>]) -> HashMap<u32, (u32, u32, u32, u32)> {
|
||||
let mut bboxes: HashMap<u32, (u32, u32, u32, u32)> = HashMap::new();
|
||||
|
||||
for (y, row) in labels.iter().enumerate() {
|
||||
for (x, &label) in row.iter().enumerate() {
|
||||
if label == 0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let bbox = bboxes
|
||||
.entry(label)
|
||||
.or_insert((x as u32, y as u32, x as u32, y as u32));
|
||||
|
||||
// Update bounding box
|
||||
bbox.0 = bbox.0.min(x as u32); // min_x
|
||||
bbox.1 = bbox.1.min(y as u32); // min_y
|
||||
bbox.2 = bbox.2.max(x as u32); // max_x
|
||||
bbox.3 = bbox.3.max(y as u32); // max_y
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to (x, y, width, height) format
|
||||
bboxes
|
||||
.into_iter()
|
||||
.map(|(label, (min_x, min_y, max_x, max_y))| {
|
||||
let width = max_x - min_x + 1;
|
||||
let height = max_y - min_y + 1;
|
||||
(label, (min_x, min_y, width, height))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Filter regions by minimum size
|
||||
fn filter_by_size(
|
||||
bboxes: HashMap<u32, (u32, u32, u32, u32)>,
|
||||
min_size: u32,
|
||||
) -> Vec<(u32, u32, u32, u32)> {
|
||||
bboxes
|
||||
.into_values()
|
||||
.filter(|(_, _, w, h)| w * h >= min_size)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Merge overlapping or nearby regions
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `regions` - Vector of bounding boxes (x, y, width, height)
|
||||
/// * `merge_distance` - Maximum distance to merge regions
|
||||
pub fn merge_overlapping_regions(
|
||||
regions: Vec<(u32, u32, u32, u32)>,
|
||||
merge_distance: u32,
|
||||
) -> Vec<(u32, u32, u32, u32)> {
|
||||
if regions.is_empty() {
|
||||
return regions;
|
||||
}
|
||||
|
||||
let mut merged = Vec::new();
|
||||
let mut used = HashSet::new();
|
||||
|
||||
for i in 0..regions.len() {
|
||||
if used.contains(&i) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut current = regions[i];
|
||||
let mut changed = true;
|
||||
|
||||
while changed {
|
||||
changed = false;
|
||||
|
||||
for j in (i + 1)..regions.len() {
|
||||
if used.contains(&j) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if boxes_overlap_or_close(¤t, ®ions[j], merge_distance) {
|
||||
current = merge_boxes(¤t, ®ions[j]);
|
||||
used.insert(j);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
merged.push(current);
|
||||
used.insert(i);
|
||||
}
|
||||
|
||||
merged
|
||||
}
|
||||
|
||||
/// Check if two bounding boxes overlap or are close
|
||||
fn boxes_overlap_or_close(
|
||||
box1: &(u32, u32, u32, u32),
|
||||
box2: &(u32, u32, u32, u32),
|
||||
distance: u32,
|
||||
) -> bool {
|
||||
let (x1, y1, w1, h1) = *box1;
|
||||
let (x2, y2, w2, h2) = *box2;
|
||||
|
||||
let x1_end = x1 + w1;
|
||||
let y1_end = y1 + h1;
|
||||
let x2_end = x2 + w2;
|
||||
let y2_end = y2 + h2;
|
||||
|
||||
// Check for overlap or proximity
|
||||
let x_overlap = (x1 <= x2_end + distance) && (x2 <= x1_end + distance);
|
||||
let y_overlap = (y1 <= y2_end + distance) && (y2 <= y1_end + distance);
|
||||
|
||||
x_overlap && y_overlap
|
||||
}
|
||||
|
||||
/// Merge two bounding boxes
|
||||
fn merge_boxes(box1: &(u32, u32, u32, u32), box2: &(u32, u32, u32, u32)) -> (u32, u32, u32, u32) {
|
||||
let (x1, y1, w1, h1) = *box1;
|
||||
let (x2, y2, w2, h2) = *box2;
|
||||
|
||||
let min_x = x1.min(x2);
|
||||
let min_y = y1.min(y2);
|
||||
let max_x = (x1 + w1).max(x2 + w2);
|
||||
let max_y = (y1 + h1).max(y2 + h2);
|
||||
|
||||
(min_x, min_y, max_x - min_x, max_y - min_y)
|
||||
}
|
||||
|
||||
/// Find text lines using projection profiles
|
||||
///
|
||||
/// Groups regions into lines based on vertical alignment
|
||||
pub fn find_text_lines(
|
||||
_image: &GrayImage,
|
||||
regions: &[(u32, u32, u32, u32)],
|
||||
) -> Vec<Vec<(u32, u32, u32, u32)>> {
|
||||
if regions.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
// Sort regions by y-coordinate
|
||||
let mut sorted_regions = regions.to_vec();
|
||||
sorted_regions.sort_by_key(|r| r.1);
|
||||
|
||||
let mut lines = Vec::new();
|
||||
let mut current_line = vec![sorted_regions[0]];
|
||||
|
||||
for region in sorted_regions.iter().skip(1) {
|
||||
let (_, y, _, h) = region;
|
||||
let (_, prev_y, _, prev_h) = current_line.last().unwrap();
|
||||
|
||||
// Check if region is on the same line (vertical overlap)
|
||||
let line_height = (*prev_h).max(*h);
|
||||
let distance = if y > prev_y { y - prev_y } else { prev_y - y };
|
||||
|
||||
if distance < line_height / 2 {
|
||||
current_line.push(*region);
|
||||
} else {
|
||||
lines.push(current_line.clone());
|
||||
current_line = vec![*region];
|
||||
}
|
||||
}
|
||||
|
||||
if !current_line.is_empty() {
|
||||
lines.push(current_line);
|
||||
}
|
||||
|
||||
lines
|
||||
}
|
||||
|
||||
/// Classify regions by type (text, math, table, etc.)
|
||||
fn classify_regions(
|
||||
image: &GrayImage,
|
||||
text_lines: Vec<Vec<(u32, u32, u32, u32)>>,
|
||||
) -> Vec<TextRegion> {
|
||||
let mut regions = Vec::new();
|
||||
|
||||
for line in text_lines {
|
||||
for bbox in line {
|
||||
let (x, y, width, height) = bbox;
|
||||
|
||||
// Calculate features for classification
|
||||
let aspect_ratio = width as f32 / height as f32;
|
||||
let density = calculate_density(image, bbox);
|
||||
|
||||
// Simple heuristic classification
|
||||
let region_type = if aspect_ratio > 10.0 {
|
||||
// Very wide region might be a table or figure caption
|
||||
RegionType::Table
|
||||
} else if aspect_ratio < 0.5 && height > 50 {
|
||||
// Tall region might be a figure
|
||||
RegionType::Figure
|
||||
} else if density > 0.3 && height < 30 {
|
||||
// Dense, small region likely math
|
||||
RegionType::Math
|
||||
} else {
|
||||
// Default to text
|
||||
RegionType::Text
|
||||
};
|
||||
|
||||
regions.push(TextRegion {
|
||||
region_type,
|
||||
bbox: (x, y, width, height),
|
||||
confidence: 0.8, // Default confidence
|
||||
text_height: height as f32,
|
||||
baseline_angle: 0.0,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
regions
|
||||
}
|
||||
|
||||
/// Calculate pixel density in a region
|
||||
fn calculate_density(image: &GrayImage, bbox: (u32, u32, u32, u32)) -> f32 {
|
||||
let (x, y, width, height) = bbox;
|
||||
let total_pixels = (width * height) as f32;
|
||||
|
||||
if total_pixels == 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let mut foreground_pixels = 0;
|
||||
|
||||
for py in y..(y + height) {
|
||||
for px in x..(x + width) {
|
||||
if image.get_pixel(px, py)[0] < 128 {
|
||||
foreground_pixels += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreground_pixels as f32 / total_pixels
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use image::Luma;
|
||||
|
||||
fn create_test_image_with_rectangles() -> GrayImage {
|
||||
let mut img = GrayImage::new(200, 200);
|
||||
|
||||
// Fill with white
|
||||
for pixel in img.pixels_mut() {
|
||||
*pixel = Luma([255]);
|
||||
}
|
||||
|
||||
// Draw some black rectangles (simulating text regions)
|
||||
for y in 20..40 {
|
||||
for x in 20..100 {
|
||||
img.put_pixel(x, y, Luma([0]));
|
||||
}
|
||||
}
|
||||
|
||||
for y in 60..80 {
|
||||
for x in 20..120 {
|
||||
img.put_pixel(x, y, Luma([0]));
|
||||
}
|
||||
}
|
||||
|
||||
for y in 100..120 {
|
||||
for x in 20..80 {
|
||||
img.put_pixel(x, y, Luma([0]));
|
||||
}
|
||||
}
|
||||
|
||||
img
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_text_regions() {
|
||||
let img = create_test_image_with_rectangles();
|
||||
let regions = find_text_regions(&img, 100);
|
||||
|
||||
assert!(regions.is_ok());
|
||||
let r = regions.unwrap();
|
||||
|
||||
// Should find at least 3 regions
|
||||
assert!(r.len() >= 3);
|
||||
|
||||
for region in r {
|
||||
println!("Region: {:?} at {:?}", region.region_type, region.bbox);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_connected_components() {
|
||||
let img = create_test_image_with_rectangles();
|
||||
let components = connected_components(&img);
|
||||
|
||||
// Check that we have non-zero labels
|
||||
let max_label = components
|
||||
.iter()
|
||||
.flat_map(|row| row.iter())
|
||||
.max()
|
||||
.unwrap_or(&0);
|
||||
|
||||
assert!(*max_label > 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_overlapping_regions() {
|
||||
let regions = vec![(10, 10, 50, 20), (40, 10, 50, 20), (100, 100, 30, 30)];
|
||||
|
||||
let merged = merge_overlapping_regions(regions, 10);
|
||||
|
||||
// First two should merge, third stays separate
|
||||
assert_eq!(merged.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_boxes() {
|
||||
let box1 = (10, 10, 50, 20);
|
||||
let box2 = (40, 15, 30, 25);
|
||||
|
||||
let merged = merge_boxes(&box1, &box2);
|
||||
|
||||
assert_eq!(merged.0, 10); // min x
|
||||
assert_eq!(merged.1, 10); // min y
|
||||
assert!(merged.2 >= 50); // width
|
||||
assert!(merged.3 >= 25); // height
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boxes_overlap() {
|
||||
let box1 = (10, 10, 50, 20);
|
||||
let box2 = (40, 10, 50, 20);
|
||||
|
||||
assert!(boxes_overlap_or_close(&box1, &box2, 0));
|
||||
assert!(boxes_overlap_or_close(&box1, &box2, 10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boxes_dont_overlap() {
|
||||
let box1 = (10, 10, 20, 20);
|
||||
let box2 = (100, 100, 20, 20);
|
||||
|
||||
assert!(!boxes_overlap_or_close(&box1, &box2, 0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_text_lines() {
|
||||
let regions = vec![
|
||||
(10, 10, 50, 20),
|
||||
(70, 12, 50, 20),
|
||||
(10, 50, 50, 20),
|
||||
(70, 52, 50, 20),
|
||||
];
|
||||
|
||||
let img = GrayImage::new(200, 100);
|
||||
let lines = find_text_lines(&img, ®ions);
|
||||
|
||||
// Should find 2 lines
|
||||
assert_eq!(lines.len(), 2);
|
||||
assert_eq!(lines[0].len(), 2);
|
||||
assert_eq!(lines[1].len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_calculate_density() {
|
||||
let mut img = GrayImage::new(100, 100);
|
||||
|
||||
// Fill region with 50% black pixels
|
||||
for y in 10..30 {
|
||||
for x in 10..30 {
|
||||
let val = if (x + y) % 2 == 0 { 0 } else { 255 };
|
||||
img.put_pixel(x, y, Luma([val]));
|
||||
}
|
||||
}
|
||||
|
||||
let density = calculate_density(&img, (10, 10, 20, 20));
|
||||
assert!((density - 0.5).abs() < 0.1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filter_by_size() {
|
||||
let mut bboxes = HashMap::new();
|
||||
bboxes.insert(1, (10, 10, 50, 50)); // 2500 pixels
|
||||
bboxes.insert(2, (100, 100, 10, 10)); // 100 pixels
|
||||
bboxes.insert(3, (200, 200, 30, 30)); // 900 pixels
|
||||
|
||||
let filtered = filter_by_size(bboxes, 500);
|
||||
|
||||
// Should keep regions 1 and 3
|
||||
assert_eq!(filtered.len(), 2);
|
||||
}
|
||||
}
|
||||
400
vendor/ruvector/examples/scipix/src/preprocess/transforms.rs
vendored
Normal file
400
vendor/ruvector/examples/scipix/src/preprocess/transforms.rs
vendored
Normal file
@@ -0,0 +1,400 @@
|
||||
//! Image transformation functions for preprocessing
|
||||
|
||||
use super::{PreprocessError, Result};
|
||||
use image::{DynamicImage, GrayImage, Luma};
|
||||
use imageproc::filter::gaussian_blur_f32;
|
||||
use std::f32;
|
||||
|
||||
/// Convert image to grayscale
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `image` - Input color or grayscale image
|
||||
///
|
||||
/// # Returns
|
||||
/// Grayscale image
|
||||
pub fn to_grayscale(image: &DynamicImage) -> GrayImage {
|
||||
image.to_luma8()
|
||||
}
|
||||
|
||||
/// Apply Gaussian blur for noise reduction
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `image` - Input grayscale image
|
||||
/// * `sigma` - Standard deviation of Gaussian kernel
|
||||
///
|
||||
/// # Returns
|
||||
/// Blurred image
|
||||
///
|
||||
/// # Example
|
||||
/// ```no_run
|
||||
/// use ruvector_scipix::preprocess::transforms::gaussian_blur;
|
||||
/// # use image::GrayImage;
|
||||
/// # let image = GrayImage::new(100, 100);
|
||||
/// let blurred = gaussian_blur(&image, 1.5).unwrap();
|
||||
/// ```
|
||||
pub fn gaussian_blur(image: &GrayImage, sigma: f32) -> Result<GrayImage> {
|
||||
if sigma <= 0.0 {
|
||||
return Err(PreprocessError::InvalidParameters(
|
||||
"Sigma must be positive".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(gaussian_blur_f32(image, sigma))
|
||||
}
|
||||
|
||||
/// Sharpen image using unsharp mask
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `image` - Input grayscale image
|
||||
/// * `sigma` - Gaussian blur sigma
|
||||
/// * `amount` - Sharpening strength (typically 0.5-2.0)
|
||||
///
|
||||
/// # Returns
|
||||
/// Sharpened image
|
||||
pub fn sharpen(image: &GrayImage, sigma: f32, amount: f32) -> Result<GrayImage> {
|
||||
if sigma <= 0.0 || amount < 0.0 {
|
||||
return Err(PreprocessError::InvalidParameters(
|
||||
"Invalid sharpening parameters".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let blurred = gaussian_blur_f32(image, sigma);
|
||||
let (width, height) = image.dimensions();
|
||||
let mut result = GrayImage::new(width, height);
|
||||
|
||||
for y in 0..height {
|
||||
for x in 0..width {
|
||||
let original = image.get_pixel(x, y)[0] as f32;
|
||||
let blur = blurred.get_pixel(x, y)[0] as f32;
|
||||
|
||||
// Unsharp mask: original + amount * (original - blurred)
|
||||
let sharpened = original + amount * (original - blur);
|
||||
let clamped = sharpened.clamp(0.0, 255.0) as u8;
|
||||
|
||||
result.put_pixel(x, y, Luma([clamped]));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Calculate optimal threshold using Otsu's method
|
||||
///
|
||||
/// Implements full Otsu's algorithm for automatic threshold selection
|
||||
/// based on maximizing inter-class variance.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `image` - Input grayscale image
|
||||
///
|
||||
/// # Returns
|
||||
/// Optimal threshold value (0-255)
|
||||
///
|
||||
/// # Example
|
||||
/// ```no_run
|
||||
/// use ruvector_scipix::preprocess::transforms::otsu_threshold;
|
||||
/// # use image::GrayImage;
|
||||
/// # let image = GrayImage::new(100, 100);
|
||||
/// let threshold = otsu_threshold(&image).unwrap();
|
||||
/// println!("Optimal threshold: {}", threshold);
|
||||
/// ```
|
||||
pub fn otsu_threshold(image: &GrayImage) -> Result<u8> {
|
||||
// Calculate histogram
|
||||
let mut histogram = [0u32; 256];
|
||||
for pixel in image.pixels() {
|
||||
histogram[pixel[0] as usize] += 1;
|
||||
}
|
||||
|
||||
let total_pixels = (image.width() * image.height()) as f64;
|
||||
|
||||
// Calculate cumulative sums
|
||||
let mut sum_total = 0.0;
|
||||
for (i, &count) in histogram.iter().enumerate() {
|
||||
sum_total += (i as f64) * (count as f64);
|
||||
}
|
||||
|
||||
let mut sum_background = 0.0;
|
||||
let mut weight_background = 0.0;
|
||||
let mut max_variance = 0.0;
|
||||
let mut threshold = 0u8;
|
||||
|
||||
// Find threshold that maximizes inter-class variance
|
||||
for (t, &count) in histogram.iter().enumerate() {
|
||||
weight_background += count as f64;
|
||||
if weight_background == 0.0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let weight_foreground = total_pixels - weight_background;
|
||||
if weight_foreground == 0.0 {
|
||||
break;
|
||||
}
|
||||
|
||||
sum_background += (t as f64) * (count as f64);
|
||||
|
||||
let mean_background = sum_background / weight_background;
|
||||
let mean_foreground = (sum_total - sum_background) / weight_foreground;
|
||||
|
||||
// Inter-class variance
|
||||
let variance =
|
||||
weight_background * weight_foreground * (mean_background - mean_foreground).powi(2);
|
||||
|
||||
if variance > max_variance {
|
||||
max_variance = variance;
|
||||
threshold = t as u8;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(threshold)
|
||||
}
|
||||
|
||||
/// Apply binary thresholding
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `image` - Input grayscale image
|
||||
/// * `threshold` - Threshold value (0-255)
|
||||
///
|
||||
/// # Returns
|
||||
/// Binary image (0 or 255)
|
||||
pub fn threshold(image: &GrayImage, threshold_val: u8) -> GrayImage {
|
||||
let (width, height) = image.dimensions();
|
||||
let mut result = GrayImage::new(width, height);
|
||||
|
||||
for y in 0..height {
|
||||
for x in 0..width {
|
||||
let pixel = image.get_pixel(x, y)[0];
|
||||
let value = if pixel >= threshold_val { 255 } else { 0 };
|
||||
result.put_pixel(x, y, Luma([value]));
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Apply adaptive thresholding using local window statistics
|
||||
///
|
||||
/// Uses a sliding window to calculate local mean and applies threshold
|
||||
/// relative to local statistics. Better for images with varying illumination.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `image` - Input grayscale image
|
||||
/// * `window_size` - Size of local window (must be odd)
|
||||
///
|
||||
/// # Returns
|
||||
/// Binary image with adaptive thresholding applied
|
||||
///
|
||||
/// # Example
|
||||
/// ```no_run
|
||||
/// use ruvector_scipix::preprocess::transforms::adaptive_threshold;
|
||||
/// # use image::GrayImage;
|
||||
/// # let image = GrayImage::new(100, 100);
|
||||
/// let binary = adaptive_threshold(&image, 15).unwrap();
|
||||
/// ```
|
||||
pub fn adaptive_threshold(image: &GrayImage, window_size: u32) -> Result<GrayImage> {
|
||||
if window_size % 2 == 0 {
|
||||
return Err(PreprocessError::InvalidParameters(
|
||||
"Window size must be odd".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let (width, height) = image.dimensions();
|
||||
let mut result = GrayImage::new(width, height);
|
||||
let half_window = (window_size / 2) as i32;
|
||||
|
||||
// Use integral image for fast window sum calculation
|
||||
let integral = compute_integral_image(image);
|
||||
|
||||
for y in 0..height as i32 {
|
||||
for x in 0..width as i32 {
|
||||
// Define window bounds
|
||||
let x1 = (x - half_window).max(0);
|
||||
let y1 = (y - half_window).max(0);
|
||||
let x2 = (x + half_window + 1).min(width as i32);
|
||||
let y2 = (y + half_window + 1).min(height as i32);
|
||||
|
||||
// Calculate mean using integral image
|
||||
let area = ((x2 - x1) * (y2 - y1)) as f64;
|
||||
let sum = get_integral_sum(&integral, x1, y1, x2, y2);
|
||||
let mean = (sum as f64 / area) as u8;
|
||||
|
||||
// Apply threshold with small bias
|
||||
let pixel = image.get_pixel(x as u32, y as u32)[0];
|
||||
let bias = 5; // Small bias to reduce noise
|
||||
let value = if pixel >= mean.saturating_sub(bias) {
|
||||
255
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
result.put_pixel(x as u32, y as u32, Luma([value]));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Compute integral image for fast rectangle sum queries
|
||||
fn compute_integral_image(image: &GrayImage) -> Vec<Vec<u64>> {
|
||||
let (width, height) = image.dimensions();
|
||||
let mut integral = vec![vec![0u64; width as usize + 1]; height as usize + 1];
|
||||
|
||||
for y in 1..=height as usize {
|
||||
for x in 1..=width as usize {
|
||||
let pixel = image.get_pixel(x as u32 - 1, y as u32 - 1)[0] as u64;
|
||||
integral[y][x] =
|
||||
pixel + integral[y - 1][x] + integral[y][x - 1] - integral[y - 1][x - 1];
|
||||
}
|
||||
}
|
||||
|
||||
integral
|
||||
}
|
||||
|
||||
/// Get sum of rectangle in integral image
|
||||
fn get_integral_sum(integral: &[Vec<u64>], x1: i32, y1: i32, x2: i32, y2: i32) -> u64 {
|
||||
let x1 = x1 as usize;
|
||||
let y1 = y1 as usize;
|
||||
let x2 = x2 as usize;
|
||||
let y2 = y2 as usize;
|
||||
|
||||
integral[y2][x2] + integral[y1][x1] - integral[y1][x2] - integral[y2][x1]
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use approx::assert_relative_eq;
|
||||
|
||||
fn create_gradient_image(width: u32, height: u32) -> GrayImage {
|
||||
let mut img = GrayImage::new(width, height);
|
||||
for y in 0..height {
|
||||
for x in 0..width {
|
||||
let val = ((x + y) * 255 / (width + height)) as u8;
|
||||
img.put_pixel(x, y, Luma([val]));
|
||||
}
|
||||
}
|
||||
img
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_grayscale() {
|
||||
let img = DynamicImage::new_rgb8(100, 100);
|
||||
let gray = to_grayscale(&img);
|
||||
assert_eq!(gray.dimensions(), (100, 100));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_gaussian_blur() {
|
||||
let img = create_gradient_image(50, 50);
|
||||
let blurred = gaussian_blur(&img, 1.0);
|
||||
assert!(blurred.is_ok());
|
||||
|
||||
let result = blurred.unwrap();
|
||||
assert_eq!(result.dimensions(), img.dimensions());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_gaussian_blur_invalid_sigma() {
|
||||
let img = create_gradient_image(50, 50);
|
||||
let result = gaussian_blur(&img, -1.0);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sharpen() {
|
||||
let img = create_gradient_image(50, 50);
|
||||
let sharpened = sharpen(&img, 1.0, 1.5);
|
||||
assert!(sharpened.is_ok());
|
||||
|
||||
let result = sharpened.unwrap();
|
||||
assert_eq!(result.dimensions(), img.dimensions());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_otsu_threshold() {
|
||||
// Create bimodal image (good for Otsu)
|
||||
let mut img = GrayImage::new(100, 100);
|
||||
for y in 0..100 {
|
||||
for x in 0..100 {
|
||||
let val = if x < 50 { 50 } else { 200 };
|
||||
img.put_pixel(x, y, Luma([val]));
|
||||
}
|
||||
}
|
||||
|
||||
let threshold = otsu_threshold(&img);
|
||||
assert!(threshold.is_ok());
|
||||
|
||||
let t = threshold.unwrap();
|
||||
// Should be somewhere between the two values (not necessarily strictly between)
|
||||
// Otsu finds optimal threshold which could be at boundary
|
||||
assert!(
|
||||
t >= 50 && t <= 200,
|
||||
"threshold {} should be between 50 and 200",
|
||||
t
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_threshold() {
|
||||
let img = create_gradient_image(100, 100);
|
||||
let binary = threshold(&img, 128);
|
||||
|
||||
assert_eq!(binary.dimensions(), img.dimensions());
|
||||
|
||||
// Check that output is binary
|
||||
for pixel in binary.pixels() {
|
||||
let val = pixel[0];
|
||||
assert!(val == 0 || val == 255);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_adaptive_threshold() {
|
||||
let img = create_gradient_image(100, 100);
|
||||
let binary = adaptive_threshold(&img, 15);
|
||||
assert!(binary.is_ok());
|
||||
|
||||
let result = binary.unwrap();
|
||||
assert_eq!(result.dimensions(), img.dimensions());
|
||||
|
||||
// Check binary output
|
||||
for pixel in result.pixels() {
|
||||
let val = pixel[0];
|
||||
assert!(val == 0 || val == 255);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_adaptive_threshold_invalid_window() {
|
||||
let img = create_gradient_image(50, 50);
|
||||
let result = adaptive_threshold(&img, 16); // Even number
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_integral_image() {
|
||||
let mut img = GrayImage::new(3, 3);
|
||||
for y in 0..3 {
|
||||
for x in 0..3 {
|
||||
img.put_pixel(x, y, Luma([1]));
|
||||
}
|
||||
}
|
||||
|
||||
let integral = compute_integral_image(&img);
|
||||
|
||||
// Check 3x3 sum
|
||||
let sum = get_integral_sum(&integral, 0, 0, 3, 3);
|
||||
assert_eq!(sum, 9); // 3x3 image with all 1s
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_threshold_extremes() {
|
||||
let img = create_gradient_image(100, 100);
|
||||
|
||||
// Threshold at 0 should make everything white
|
||||
let binary = threshold(&img, 0);
|
||||
assert!(binary.pixels().all(|p| p[0] == 255));
|
||||
|
||||
// Threshold at 255 should make everything black
|
||||
let binary = threshold(&img, 255);
|
||||
assert!(binary.pixels().all(|p| p[0] == 0));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user