170 lines
4.3 KiB
Rust
170 lines
4.3 KiB
Rust
//! Performance optimization utilities for scipix OCR
|
|
//!
|
|
//! This module provides runtime feature detection and optimized code paths
|
|
//! for different CPU architectures and capabilities.
|
|
|
|
pub mod batch;
|
|
pub mod memory;
|
|
pub mod parallel;
|
|
pub mod quantize;
|
|
pub mod simd;
|
|
|
|
use std::sync::OnceLock;
|
|
|
|
/// CPU features detected at runtime
|
|
#[derive(Debug, Clone, Copy)]
|
|
pub struct CpuFeatures {
|
|
pub avx2: bool,
|
|
pub avx512f: bool,
|
|
pub neon: bool,
|
|
pub sse4_2: bool,
|
|
}
|
|
|
|
static CPU_FEATURES: OnceLock<CpuFeatures> = OnceLock::new();
|
|
|
|
/// Detect CPU features at runtime
|
|
pub fn detect_features() -> CpuFeatures {
|
|
*CPU_FEATURES.get_or_init(|| {
|
|
#[cfg(target_arch = "x86_64")]
|
|
{
|
|
CpuFeatures {
|
|
avx2: is_x86_feature_detected!("avx2"),
|
|
avx512f: is_x86_feature_detected!("avx512f"),
|
|
neon: false,
|
|
sse4_2: is_x86_feature_detected!("sse4.2"),
|
|
}
|
|
}
|
|
#[cfg(target_arch = "aarch64")]
|
|
{
|
|
CpuFeatures {
|
|
avx2: false,
|
|
avx512f: false,
|
|
neon: std::arch::is_aarch64_feature_detected!("neon"),
|
|
sse4_2: false,
|
|
}
|
|
}
|
|
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
|
|
{
|
|
CpuFeatures {
|
|
avx2: false,
|
|
avx512f: false,
|
|
neon: false,
|
|
sse4_2: false,
|
|
}
|
|
}
|
|
})
|
|
}
|
|
|
|
/// Get the detected CPU features
|
|
pub fn get_features() -> CpuFeatures {
|
|
detect_features()
|
|
}
|
|
|
|
/// Runtime dispatch to optimized implementation
|
|
pub trait OptimizedOp<T> {
|
|
/// Execute the operation with the best available implementation
|
|
fn execute(&self, input: T) -> T;
|
|
|
|
/// Execute with SIMD if available, fallback to scalar
|
|
fn execute_auto(&self, input: T) -> T {
|
|
let features = get_features();
|
|
if features.avx2 || features.avx512f || features.neon {
|
|
self.execute_simd(input)
|
|
} else {
|
|
self.execute_scalar(input)
|
|
}
|
|
}
|
|
|
|
/// SIMD implementation
|
|
fn execute_simd(&self, input: T) -> T;
|
|
|
|
/// Scalar fallback implementation
|
|
fn execute_scalar(&self, input: T) -> T;
|
|
}
|
|
|
|
/// Optimization level configuration
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum OptLevel {
|
|
/// No optimizations, scalar code only
|
|
None,
|
|
/// Use SIMD when available
|
|
Simd,
|
|
/// Use SIMD + parallel processing
|
|
Parallel,
|
|
/// All optimizations including memory optimizations
|
|
Full,
|
|
}
|
|
|
|
impl Default for OptLevel {
|
|
fn default() -> Self {
|
|
OptLevel::Full
|
|
}
|
|
}
|
|
|
|
/// Global optimization configuration
|
|
static OPT_LEVEL: OnceLock<OptLevel> = OnceLock::new();
|
|
|
|
/// Set the optimization level
|
|
pub fn set_opt_level(level: OptLevel) {
|
|
OPT_LEVEL.set(level).ok();
|
|
}
|
|
|
|
/// Get the current optimization level
|
|
pub fn get_opt_level() -> OptLevel {
|
|
*OPT_LEVEL.get_or_init(OptLevel::default)
|
|
}
|
|
|
|
/// Check if SIMD optimizations are enabled
|
|
pub fn simd_enabled() -> bool {
|
|
matches!(
|
|
get_opt_level(),
|
|
OptLevel::Simd | OptLevel::Parallel | OptLevel::Full
|
|
)
|
|
}
|
|
|
|
/// Check if parallel optimizations are enabled
|
|
pub fn parallel_enabled() -> bool {
|
|
matches!(get_opt_level(), OptLevel::Parallel | OptLevel::Full)
|
|
}
|
|
|
|
/// Check if memory optimizations are enabled
|
|
pub fn memory_opt_enabled() -> bool {
|
|
matches!(get_opt_level(), OptLevel::Full)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_feature_detection() {
|
|
let features = detect_features();
|
|
println!("Detected features: {:?}", features);
|
|
|
|
// Should always succeed on any platform
|
|
assert!(
|
|
features.avx2
|
|
|| features.avx512f
|
|
|| features.neon
|
|
|| features.sse4_2
|
|
|| (!features.avx2 && !features.avx512f && !features.neon && !features.sse4_2)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_opt_level() {
|
|
assert_eq!(get_opt_level(), OptLevel::Full);
|
|
|
|
set_opt_level(OptLevel::Simd);
|
|
// Can't change after first init, should still be Full
|
|
assert_eq!(get_opt_level(), OptLevel::Full);
|
|
}
|
|
|
|
#[test]
|
|
fn test_optimization_checks() {
|
|
assert!(simd_enabled());
|
|
assert!(parallel_enabled());
|
|
assert!(memory_opt_enabled());
|
|
}
|
|
}
|