Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
169
vendor/ruvector/examples/scipix/src/optimize/mod.rs
vendored
Normal file
169
vendor/ruvector/examples/scipix/src/optimize/mod.rs
vendored
Normal file
@@ -0,0 +1,169 @@
|
||||
//! Performance optimization utilities for scipix OCR
|
||||
//!
|
||||
//! This module provides runtime feature detection and optimized code paths
|
||||
//! for different CPU architectures and capabilities.
|
||||
|
||||
pub mod batch;
|
||||
pub mod memory;
|
||||
pub mod parallel;
|
||||
pub mod quantize;
|
||||
pub mod simd;
|
||||
|
||||
use std::sync::OnceLock;
|
||||
|
||||
/// CPU features detected at runtime
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct CpuFeatures {
|
||||
pub avx2: bool,
|
||||
pub avx512f: bool,
|
||||
pub neon: bool,
|
||||
pub sse4_2: bool,
|
||||
}
|
||||
|
||||
static CPU_FEATURES: OnceLock<CpuFeatures> = OnceLock::new();
|
||||
|
||||
/// Detect CPU features at runtime
|
||||
pub fn detect_features() -> CpuFeatures {
|
||||
*CPU_FEATURES.get_or_init(|| {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
CpuFeatures {
|
||||
avx2: is_x86_feature_detected!("avx2"),
|
||||
avx512f: is_x86_feature_detected!("avx512f"),
|
||||
neon: false,
|
||||
sse4_2: is_x86_feature_detected!("sse4.2"),
|
||||
}
|
||||
}
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
{
|
||||
CpuFeatures {
|
||||
avx2: false,
|
||||
avx512f: false,
|
||||
neon: std::arch::is_aarch64_feature_detected!("neon"),
|
||||
sse4_2: false,
|
||||
}
|
||||
}
|
||||
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
|
||||
{
|
||||
CpuFeatures {
|
||||
avx2: false,
|
||||
avx512f: false,
|
||||
neon: false,
|
||||
sse4_2: false,
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Get the detected CPU features
|
||||
pub fn get_features() -> CpuFeatures {
|
||||
detect_features()
|
||||
}
|
||||
|
||||
/// Runtime dispatch to optimized implementation
|
||||
pub trait OptimizedOp<T> {
|
||||
/// Execute the operation with the best available implementation
|
||||
fn execute(&self, input: T) -> T;
|
||||
|
||||
/// Execute with SIMD if available, fallback to scalar
|
||||
fn execute_auto(&self, input: T) -> T {
|
||||
let features = get_features();
|
||||
if features.avx2 || features.avx512f || features.neon {
|
||||
self.execute_simd(input)
|
||||
} else {
|
||||
self.execute_scalar(input)
|
||||
}
|
||||
}
|
||||
|
||||
/// SIMD implementation
|
||||
fn execute_simd(&self, input: T) -> T;
|
||||
|
||||
/// Scalar fallback implementation
|
||||
fn execute_scalar(&self, input: T) -> T;
|
||||
}
|
||||
|
||||
/// Optimization level configuration
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum OptLevel {
|
||||
/// No optimizations, scalar code only
|
||||
None,
|
||||
/// Use SIMD when available
|
||||
Simd,
|
||||
/// Use SIMD + parallel processing
|
||||
Parallel,
|
||||
/// All optimizations including memory optimizations
|
||||
Full,
|
||||
}
|
||||
|
||||
impl Default for OptLevel {
|
||||
fn default() -> Self {
|
||||
OptLevel::Full
|
||||
}
|
||||
}
|
||||
|
||||
/// Global optimization configuration
|
||||
static OPT_LEVEL: OnceLock<OptLevel> = OnceLock::new();
|
||||
|
||||
/// Set the optimization level
|
||||
pub fn set_opt_level(level: OptLevel) {
|
||||
OPT_LEVEL.set(level).ok();
|
||||
}
|
||||
|
||||
/// Get the current optimization level
|
||||
pub fn get_opt_level() -> OptLevel {
|
||||
*OPT_LEVEL.get_or_init(OptLevel::default)
|
||||
}
|
||||
|
||||
/// Check if SIMD optimizations are enabled
|
||||
pub fn simd_enabled() -> bool {
|
||||
matches!(
|
||||
get_opt_level(),
|
||||
OptLevel::Simd | OptLevel::Parallel | OptLevel::Full
|
||||
)
|
||||
}
|
||||
|
||||
/// Check if parallel optimizations are enabled
|
||||
pub fn parallel_enabled() -> bool {
|
||||
matches!(get_opt_level(), OptLevel::Parallel | OptLevel::Full)
|
||||
}
|
||||
|
||||
/// Check if memory optimizations are enabled
|
||||
pub fn memory_opt_enabled() -> bool {
|
||||
matches!(get_opt_level(), OptLevel::Full)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_feature_detection() {
|
||||
let features = detect_features();
|
||||
println!("Detected features: {:?}", features);
|
||||
|
||||
// Should always succeed on any platform
|
||||
assert!(
|
||||
features.avx2
|
||||
|| features.avx512f
|
||||
|| features.neon
|
||||
|| features.sse4_2
|
||||
|| (!features.avx2 && !features.avx512f && !features.neon && !features.sse4_2)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_opt_level() {
|
||||
assert_eq!(get_opt_level(), OptLevel::Full);
|
||||
|
||||
set_opt_level(OptLevel::Simd);
|
||||
// Can't change after first init, should still be Full
|
||||
assert_eq!(get_opt_level(), OptLevel::Full);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_optimization_checks() {
|
||||
assert!(simd_enabled());
|
||||
assert!(parallel_enabled());
|
||||
assert!(memory_opt_enabled());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user