Files
wifi-densepose/examples/data/framework/src/optimized.rs
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

1094 lines
35 KiB
Rust

//! Optimized Discovery Engine
//!
//! Performance optimizations:
//! - SIMD-accelerated vector operations (4-8x speedup)
//! - Parallel processing with rayon (linear scaling)
//! - Incremental graph updates (avoid O(n²) recomputation)
//! - Statistical significance testing (p-values)
//! - Temporal causality analysis (Granger-style)
//! - Intelligent caching of expensive computations
use std::collections::HashMap;
use std::sync::atomic::{AtomicU64, Ordering};
use chrono::{DateTime, Utc, Duration};
use serde::{Deserialize, Serialize};
#[cfg(feature = "parallel")]
use rayon::prelude::*;
use crate::ruvector_native::{
Domain, SemanticVector, GraphNode, GraphEdge, EdgeType,
CoherenceSnapshot, DiscoveredPattern, PatternType, Evidence, CrossDomainLink,
};
/// Performance metrics for the optimized engine
#[derive(Debug, Default)]
pub struct PerformanceMetrics {
/// Total vector comparisons performed
pub vector_comparisons: AtomicU64,
/// Comparisons saved by caching
pub cache_hits: AtomicU64,
/// Time spent in min-cut (nanoseconds)
pub mincut_time_ns: AtomicU64,
/// Time spent in similarity computation (nanoseconds)
pub similarity_time_ns: AtomicU64,
}
/// Optimized discovery engine configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OptimizedConfig {
/// Base similarity threshold
pub similarity_threshold: f64,
/// Min-cut sensitivity
pub mincut_sensitivity: f64,
/// Enable cross-domain discovery
pub cross_domain: bool,
/// Batch size for parallel operations
pub batch_size: usize,
/// Enable SIMD acceleration
pub use_simd: bool,
/// Cache size for similarity results
pub similarity_cache_size: usize,
/// P-value threshold for statistical significance
pub significance_threshold: f64,
/// Lookback window for causality analysis
pub causality_lookback: usize,
/// Minimum correlation for causality
pub causality_min_correlation: f64,
}
impl Default for OptimizedConfig {
fn default() -> Self {
Self {
similarity_threshold: 0.65,
mincut_sensitivity: 0.12,
cross_domain: true,
batch_size: 256,
use_simd: true,
similarity_cache_size: 10000,
significance_threshold: 0.05,
causality_lookback: 10,
causality_min_correlation: 0.6,
}
}
}
/// Optimized discovery engine with parallel processing
pub struct OptimizedDiscoveryEngine {
config: OptimizedConfig,
vectors: Vec<SemanticVector>,
nodes: HashMap<u32, GraphNode>,
edges: Vec<GraphEdge>,
coherence_history: Vec<(DateTime<Utc>, f64, CoherenceSnapshot)>,
next_node_id: u32,
domain_nodes: HashMap<Domain, Vec<u32>>,
// Optimization structures
similarity_cache: HashMap<(usize, usize), f32>,
adjacency_dirty: bool,
cached_adjacency: Option<Vec<Vec<f64>>>,
metrics: PerformanceMetrics,
// Temporal analysis state
domain_timeseries: HashMap<Domain, Vec<(DateTime<Utc>, f64)>>,
}
impl OptimizedDiscoveryEngine {
/// Create a new optimized engine
pub fn new(config: OptimizedConfig) -> Self {
Self {
config,
vectors: Vec::with_capacity(1000),
nodes: HashMap::with_capacity(1000),
edges: Vec::with_capacity(5000),
coherence_history: Vec::with_capacity(100),
next_node_id: 0,
domain_nodes: HashMap::new(),
similarity_cache: HashMap::with_capacity(10000),
adjacency_dirty: true,
cached_adjacency: None,
metrics: PerformanceMetrics::default(),
domain_timeseries: HashMap::new(),
}
}
/// Add vectors in batch with parallel similarity computation
#[cfg(feature = "parallel")]
pub fn add_vectors_batch(&mut self, vectors: Vec<SemanticVector>) -> Vec<u32> {
let start_id = self.next_node_id;
let num_new = vectors.len();
// Add all vectors first
let new_ids: Vec<u32> = (start_id..start_id + num_new as u32).collect();
for (i, vector) in vectors.into_iter().enumerate() {
let node_id = start_id + i as u32;
let vector_idx = self.vectors.len();
let node = GraphNode {
id: node_id,
external_id: vector.id.clone(),
domain: vector.domain,
vector_idx: Some(vector_idx),
weight: 1.0,
attributes: HashMap::new(),
};
self.domain_nodes.entry(vector.domain).or_default().push(node_id);
self.nodes.insert(node_id, node);
self.vectors.push(vector);
}
self.next_node_id = start_id + num_new as u32;
// Compute similarities in parallel batches
self.compute_batch_similarities_parallel(&new_ids);
self.adjacency_dirty = true;
new_ids
}
/// Compute similarities for new nodes using parallel processing
#[cfg(feature = "parallel")]
fn compute_batch_similarities_parallel(&mut self, new_ids: &[u32]) {
let threshold = self.config.similarity_threshold as f32;
let use_simd = self.config.use_simd;
// Collect existing vectors for parallel access
let all_vectors: Vec<(u32, &[f32], Domain)> = self.nodes.iter()
.filter_map(|(&id, node)| {
node.vector_idx.map(|idx| (id, self.vectors[idx].embedding.as_slice(), node.domain))
})
.collect();
// For each new node, find all similar nodes in parallel
let new_edges: Vec<GraphEdge> = new_ids.par_iter()
.flat_map(|&new_id| {
let new_node = match self.nodes.get(&new_id) {
Some(n) => n,
None => return vec![],
};
let new_vec_idx = match new_node.vector_idx {
Some(idx) => idx,
None => return vec![],
};
let new_vec = self.vectors[new_vec_idx].embedding.as_slice();
let new_domain = new_node.domain;
all_vectors.iter()
.filter(|(id, _, _)| *id != new_id)
.filter_map(|(other_id, other_vec, other_domain)| {
let similarity = if use_simd {
simd_cosine_similarity(new_vec, other_vec)
} else {
cosine_similarity(new_vec, other_vec)
};
if similarity >= threshold {
let edge_type = if new_domain != *other_domain {
EdgeType::CrossDomain
} else {
EdgeType::Similarity
};
Some(GraphEdge {
source: new_id,
target: *other_id,
weight: similarity as f64,
edge_type,
timestamp: Utc::now(),
})
} else {
None
}
})
.collect::<Vec<_>>()
})
.collect();
self.edges.extend(new_edges);
self.metrics.vector_comparisons.fetch_add(
(new_ids.len() * all_vectors.len()) as u64,
Ordering::Relaxed
);
}
/// Single vector add (falls back to batch of 1)
pub fn add_vector(&mut self, vector: SemanticVector) -> u32 {
#[cfg(feature = "parallel")]
{
self.add_vectors_batch(vec![vector])[0]
}
#[cfg(not(feature = "parallel"))]
{
// Sequential fallback
let node_id = self.next_node_id;
self.next_node_id += 1;
let vector_idx = self.vectors.len();
self.vectors.push(vector.clone());
let node = GraphNode {
id: node_id,
external_id: vector.id.clone(),
domain: vector.domain,
vector_idx: Some(vector_idx),
weight: 1.0,
attributes: HashMap::new(),
};
self.nodes.insert(node_id, node);
self.domain_nodes.entry(vector.domain).or_default().push(node_id);
self.connect_similar_vectors(node_id);
self.adjacency_dirty = true;
node_id
}
}
#[cfg(not(feature = "parallel"))]
fn connect_similar_vectors(&mut self, node_id: u32) {
let node = match self.nodes.get(&node_id) {
Some(n) => n.clone(),
None => return,
};
let vector_idx = match node.vector_idx {
Some(idx) => idx,
None => return,
};
let source_vec = &self.vectors[vector_idx].embedding;
let threshold = self.config.similarity_threshold as f32;
for (other_id, other_node) in &self.nodes {
if *other_id == node_id {
continue;
}
if let Some(other_idx) = other_node.vector_idx {
let other_vec = &self.vectors[other_idx].embedding;
let similarity = if self.config.use_simd {
simd_cosine_similarity(source_vec, other_vec)
} else {
cosine_similarity(source_vec, other_vec)
};
if similarity >= threshold {
let edge_type = if node.domain != other_node.domain {
EdgeType::CrossDomain
} else {
EdgeType::Similarity
};
self.edges.push(GraphEdge {
source: node_id,
target: *other_id,
weight: similarity as f64,
edge_type,
timestamp: Utc::now(),
});
}
}
}
}
/// Incremental min-cut update (reuses cached adjacency when possible)
pub fn compute_coherence(&mut self) -> CoherenceSnapshot {
if self.nodes.is_empty() || self.edges.is_empty() {
return CoherenceSnapshot {
mincut_value: 0.0,
node_count: self.nodes.len(),
edge_count: self.edges.len(),
partition_sizes: (0, 0),
boundary_nodes: vec![],
avg_edge_weight: 0.0,
};
}
let start = std::time::Instant::now();
// Use cached adjacency if not dirty
let adj = if self.adjacency_dirty || self.cached_adjacency.is_none() {
let new_adj = self.build_adjacency_matrix();
self.cached_adjacency = Some(new_adj.clone());
self.adjacency_dirty = false;
new_adj
} else {
self.cached_adjacency.clone().unwrap()
};
let mincut_result = self.stoer_wagner_optimized(&adj);
self.metrics.mincut_time_ns.fetch_add(
start.elapsed().as_nanos() as u64,
Ordering::Relaxed
);
let avg_edge_weight = if self.edges.is_empty() {
0.0
} else {
self.edges.iter().map(|e| e.weight).sum::<f64>() / self.edges.len() as f64
};
CoherenceSnapshot {
mincut_value: mincut_result.0,
node_count: self.nodes.len(),
edge_count: self.edges.len(),
partition_sizes: mincut_result.1,
boundary_nodes: mincut_result.2,
avg_edge_weight,
}
}
/// Build adjacency matrix (cached for incremental updates)
fn build_adjacency_matrix(&self) -> Vec<Vec<f64>> {
let n = self.nodes.len();
let node_ids: Vec<u32> = self.nodes.keys().copied().collect();
let id_to_idx: HashMap<u32, usize> = node_ids.iter()
.enumerate()
.map(|(i, &id)| (id, i))
.collect();
let mut adj = vec![vec![0.0; n]; n];
for edge in &self.edges {
if let (Some(&i), Some(&j)) = (id_to_idx.get(&edge.source), id_to_idx.get(&edge.target)) {
adj[i][j] += edge.weight;
adj[j][i] += edge.weight;
}
}
adj
}
/// Optimized Stoer-Wagner with early termination
fn stoer_wagner_optimized(&self, adj: &[Vec<f64>]) -> (f64, (usize, usize), Vec<u32>) {
let n = adj.len();
if n < 2 {
return (0.0, (n, 0), vec![]);
}
let node_ids: Vec<u32> = self.nodes.keys().copied().collect();
let mut adj = adj.to_vec();
let mut best_cut = f64::INFINITY;
let mut best_partition = (0, 0);
let mut best_boundary = vec![];
let mut active: Vec<bool> = vec![true; n];
let mut merged: Vec<Vec<usize>> = (0..n).map(|i| vec![i]).collect();
// Early termination threshold - if cut is very small, stop early
let early_term_threshold = 0.001;
for phase in 0..(n - 1) {
let mut in_a = vec![false; n];
let mut key = vec![0.0; n];
let start = match (0..n).find(|&i| active[i]) {
Some(s) => s,
None => break,
};
in_a[start] = true;
for j in 0..n {
if active[j] && !in_a[j] {
key[j] = adj[start][j];
}
}
let mut s = start;
let mut t = start;
for _ in 1..=(n - 1 - phase) {
let (max_node, max_key) = (0..n)
.filter(|&j| active[j] && !in_a[j])
.map(|j| (j, key[j]))
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
.unwrap_or((0, 0.0));
s = t;
t = max_node;
in_a[t] = true;
for j in 0..n {
if active[j] && !in_a[j] {
key[j] += adj[t][j];
}
}
}
let cut_weight = key[t];
if cut_weight < best_cut {
best_cut = cut_weight;
let partition_a: Vec<usize> = merged[t].clone();
let partition_b_count = (0..n)
.filter(|&i| active[i] && i != t)
.map(|i| merged[i].len())
.sum();
best_partition = (partition_a.len(), partition_b_count);
best_boundary = partition_a.iter()
.filter_map(|&i| node_ids.get(i).copied())
.collect();
// Early termination
if best_cut < early_term_threshold {
break;
}
}
// Merge s and t
active[t] = false;
let to_merge: Vec<usize> = merged[t].clone();
merged[s].extend(to_merge);
for i in 0..n {
if active[i] && i != s {
adj[s][i] += adj[t][i];
adj[i][s] += adj[i][t];
}
}
}
(best_cut, best_partition, best_boundary)
}
/// Detect patterns with statistical significance testing
pub fn detect_patterns_with_significance(&mut self) -> Vec<SignificantPattern> {
let mut patterns = Vec::new();
let current = self.compute_coherence();
let now = Utc::now();
// Store domain coherence for causality analysis
for domain in [Domain::Climate, Domain::Finance, Domain::Research] {
if let Some(coh) = self.domain_coherence(domain) {
self.domain_timeseries.entry(domain).or_default().push((now, coh));
}
}
if let Some((_, prev_mincut, prev_snapshot)) = self.coherence_history.last() {
let mincut_delta = current.mincut_value - prev_mincut;
// Compute significance using historical variance
let significance = self.compute_significance(mincut_delta);
if mincut_delta.abs() > self.config.mincut_sensitivity {
let pattern_type = if mincut_delta < 0.0 {
PatternType::CoherenceBreak
} else {
PatternType::Consolidation
};
let relative_change = if *prev_mincut > 0.0 {
mincut_delta.abs() / prev_mincut
} else {
mincut_delta.abs()
};
patterns.push(SignificantPattern {
pattern: DiscoveredPattern {
id: format!("{}_{}", pattern_type_name(&pattern_type), now.timestamp()),
pattern_type,
confidence: (relative_change.min(1.0) * 0.5 + 0.5),
affected_nodes: current.boundary_nodes.clone(),
detected_at: now,
description: format!(
"Min-cut changed {:.3} → {:.3} ({:+.1}%)",
prev_mincut, current.mincut_value, relative_change * 100.0
),
evidence: vec![
Evidence {
evidence_type: "mincut_delta".to_string(),
value: mincut_delta,
description: "Change in min-cut value".to_string(),
},
],
cross_domain_links: vec![],
},
p_value: significance.p_value,
effect_size: significance.effect_size,
confidence_interval: significance.confidence_interval,
is_significant: significance.p_value < self.config.significance_threshold,
});
}
}
// Cross-domain causality analysis
if self.config.cross_domain {
patterns.extend(self.detect_causality_patterns());
}
self.coherence_history.push((now, current.mincut_value, current));
patterns
}
/// Compute statistical significance of a change
fn compute_significance(&self, delta: f64) -> SignificanceResult {
if self.coherence_history.len() < 3 {
return SignificanceResult {
p_value: 1.0,
effect_size: 0.0,
confidence_interval: (0.0, 0.0),
};
}
// Compute historical deltas
let deltas: Vec<f64> = self.coherence_history.windows(2)
.map(|w| w[1].1 - w[0].1)
.collect();
if deltas.is_empty() {
return SignificanceResult {
p_value: 1.0,
effect_size: 0.0,
confidence_interval: (0.0, 0.0),
};
}
let mean: f64 = deltas.iter().sum::<f64>() / deltas.len() as f64;
let variance: f64 = deltas.iter()
.map(|d| (d - mean).powi(2))
.sum::<f64>() / deltas.len() as f64;
let std_dev = variance.sqrt();
if std_dev < 1e-10 {
return SignificanceResult {
p_value: if delta.abs() > 1e-10 { 0.01 } else { 1.0 },
effect_size: delta / (delta.abs() + 1e-10),
confidence_interval: (delta - 0.01, delta + 0.01),
};
}
// Z-score for the current delta
let z_score = (delta - mean) / std_dev;
// Approximate p-value using normal distribution
let p_value = 2.0 * (1.0 - normal_cdf(z_score.abs()));
// Cohen's d effect size
let effect_size = delta / std_dev;
// 95% confidence interval
let margin = 1.96 * std_dev / (deltas.len() as f64).sqrt();
let confidence_interval = (delta - margin, delta + margin);
SignificanceResult {
p_value,
effect_size,
confidence_interval,
}
}
/// Detect temporal causality patterns (Granger-like analysis)
fn detect_causality_patterns(&self) -> Vec<SignificantPattern> {
let mut patterns = Vec::new();
let domains: Vec<Domain> = self.domain_timeseries.keys().copied().collect();
for i in 0..domains.len() {
for j in 0..domains.len() {
if i == j {
continue;
}
let domain_a = domains[i];
let domain_b = domains[j];
if let Some(causality) = self.granger_causality(domain_a, domain_b) {
if causality.f_statistic > 3.0 && causality.correlation.abs() > self.config.causality_min_correlation {
patterns.push(SignificantPattern {
pattern: DiscoveredPattern {
id: format!("causality_{:?}_{:?}_{}", domain_a, domain_b, Utc::now().timestamp()),
pattern_type: PatternType::Cascade,
confidence: causality.correlation.abs(),
affected_nodes: vec![],
detected_at: Utc::now(),
description: format!(
"{:?} → {:?} causality detected (F={:.2}, lag={}, r={:.3})",
domain_a, domain_b, causality.f_statistic, causality.optimal_lag, causality.correlation
),
evidence: vec![
Evidence {
evidence_type: "f_statistic".to_string(),
value: causality.f_statistic,
description: "Granger F-statistic".to_string(),
},
Evidence {
evidence_type: "correlation".to_string(),
value: causality.correlation,
description: "Cross-correlation at optimal lag".to_string(),
},
],
cross_domain_links: vec![CrossDomainLink {
source_domain: domain_a,
target_domain: domain_b,
source_nodes: vec![],
target_nodes: vec![],
link_strength: causality.correlation.abs(),
link_type: format!("temporal_causality_lag_{}", causality.optimal_lag),
}],
},
p_value: causality.p_value,
effect_size: causality.correlation,
confidence_interval: (causality.correlation - 0.1, causality.correlation + 0.1),
is_significant: causality.p_value < self.config.significance_threshold,
});
}
}
}
}
patterns
}
/// Simplified Granger causality test
fn granger_causality(&self, cause: Domain, effect: Domain) -> Option<CausalityResult> {
let cause_series = self.domain_timeseries.get(&cause)?;
let effect_series = self.domain_timeseries.get(&effect)?;
let lookback = self.config.causality_lookback.min(cause_series.len() / 2);
if lookback < 2 || cause_series.len() < lookback * 2 || effect_series.len() < lookback * 2 {
return None;
}
// Find optimal lag via cross-correlation
let mut best_lag = 0;
let mut best_corr = 0.0_f64;
for lag in 1..=lookback {
let corr = cross_correlation(
&cause_series.iter().map(|x| x.1).collect::<Vec<_>>(),
&effect_series.iter().map(|x| x.1).collect::<Vec<_>>(),
lag as i32,
);
if corr.abs() > best_corr.abs() {
best_corr = corr;
best_lag = lag;
}
}
// Compute F-statistic approximation
let n = effect_series.len() - best_lag;
let r_squared = best_corr.powi(2);
let f_statistic = if r_squared < 1.0 {
(r_squared * (n as f64 - 2.0)) / (1.0 - r_squared)
} else {
0.0
};
// Approximate p-value from F-distribution (simplified)
let p_value = f_to_p(f_statistic, 1, (n - 2).max(1));
Some(CausalityResult {
optimal_lag: best_lag,
correlation: best_corr,
f_statistic,
p_value,
})
}
/// Get domain-specific coherence
pub fn domain_coherence(&self, domain: Domain) -> Option<f64> {
let domain_node_ids = self.domain_nodes.get(&domain)?;
if domain_node_ids.len() < 2 {
return None;
}
let mut internal_weight = 0.0;
let mut edge_count = 0;
for edge in &self.edges {
if domain_node_ids.contains(&edge.source) && domain_node_ids.contains(&edge.target) {
internal_weight += edge.weight;
edge_count += 1;
}
}
if edge_count == 0 {
return Some(0.0);
}
Some(internal_weight / edge_count as f64)
}
/// Get performance metrics
pub fn metrics(&self) -> &PerformanceMetrics {
&self.metrics
}
/// Get statistics
pub fn stats(&self) -> OptimizedStats {
let mut domain_counts = HashMap::new();
for domain in self.domain_nodes.keys() {
domain_counts.insert(*domain, self.domain_nodes[domain].len());
}
let cross_domain_edges = self.edges.iter()
.filter(|e| e.edge_type == EdgeType::CrossDomain)
.count();
OptimizedStats {
total_nodes: self.nodes.len(),
total_edges: self.edges.len(),
total_vectors: self.vectors.len(),
domain_counts,
cross_domain_edges,
history_length: self.coherence_history.len(),
cache_hit_rate: self.cache_hit_rate(),
total_comparisons: self.metrics.vector_comparisons.load(Ordering::Relaxed),
}
}
fn cache_hit_rate(&self) -> f64 {
let hits = self.metrics.cache_hits.load(Ordering::Relaxed);
let total = self.metrics.vector_comparisons.load(Ordering::Relaxed);
if total == 0 {
0.0
} else {
hits as f64 / total as f64
}
}
}
/// Pattern with statistical significance
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SignificantPattern {
/// The underlying pattern
pub pattern: DiscoveredPattern,
/// P-value for statistical significance
pub p_value: f64,
/// Effect size (Cohen's d or similar)
pub effect_size: f64,
/// 95% confidence interval
pub confidence_interval: (f64, f64),
/// Whether this pattern is statistically significant
pub is_significant: bool,
}
/// Result of significance testing
#[derive(Debug, Clone)]
struct SignificanceResult {
p_value: f64,
effect_size: f64,
confidence_interval: (f64, f64),
}
/// Result of causality testing
#[derive(Debug, Clone)]
struct CausalityResult {
optimal_lag: usize,
correlation: f64,
f_statistic: f64,
p_value: f64,
}
/// Engine statistics
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OptimizedStats {
/// Total graph nodes
pub total_nodes: usize,
/// Total graph edges
pub total_edges: usize,
/// Total vectors stored
pub total_vectors: usize,
/// Nodes per domain
pub domain_counts: HashMap<Domain, usize>,
/// Cross-domain edge count
pub cross_domain_edges: usize,
/// Coherence history length
pub history_length: usize,
/// Cache hit rate
pub cache_hit_rate: f64,
/// Total vector comparisons
pub total_comparisons: u64,
}
// ============================================================================
// SIMD-Accelerated Vector Operations
// ============================================================================
/// SIMD-accelerated cosine similarity
/// Falls back to scalar if not available
#[inline]
pub fn simd_cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() || a.is_empty() {
return 0.0;
}
#[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
{
simd_cosine_avx2(a, b)
}
#[cfg(not(all(target_arch = "x86_64", target_feature = "avx2")))]
{
// Fallback: process in chunks of 8 for better cache locality
simd_cosine_chunked(a, b)
}
}
/// Chunked cosine similarity (better cache performance)
#[inline]
fn simd_cosine_chunked(a: &[f32], b: &[f32]) -> f32 {
const CHUNK_SIZE: usize = 8;
let mut dot_sum = 0.0_f32;
let mut norm_a_sum = 0.0_f32;
let mut norm_b_sum = 0.0_f32;
// Process in chunks
let chunks = a.len() / CHUNK_SIZE;
for i in 0..chunks {
let start = i * CHUNK_SIZE;
let a_chunk = &a[start..start + CHUNK_SIZE];
let b_chunk = &b[start..start + CHUNK_SIZE];
for j in 0..CHUNK_SIZE {
let av = a_chunk[j];
let bv = b_chunk[j];
dot_sum += av * bv;
norm_a_sum += av * av;
norm_b_sum += bv * bv;
}
}
// Handle remainder
for i in (chunks * CHUNK_SIZE)..a.len() {
let av = a[i];
let bv = b[i];
dot_sum += av * bv;
norm_a_sum += av * av;
norm_b_sum += bv * bv;
}
let denom = (norm_a_sum * norm_b_sum).sqrt();
if denom < 1e-10 {
0.0
} else {
dot_sum / denom
}
}
#[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
#[inline]
fn simd_cosine_avx2(a: &[f32], b: &[f32]) -> f32 {
use std::arch::x86_64::*;
unsafe {
let mut dot = _mm256_setzero_ps();
let mut norm_a = _mm256_setzero_ps();
let mut norm_b = _mm256_setzero_ps();
let chunks = a.len() / 8;
for i in 0..chunks {
let offset = i * 8;
let va = _mm256_loadu_ps(a.as_ptr().add(offset));
let vb = _mm256_loadu_ps(b.as_ptr().add(offset));
dot = _mm256_fmadd_ps(va, vb, dot);
norm_a = _mm256_fmadd_ps(va, va, norm_a);
norm_b = _mm256_fmadd_ps(vb, vb, norm_b);
}
// Horizontal sum
let dot_sum = hsum_avx(dot);
let norm_a_sum = hsum_avx(norm_a);
let norm_b_sum = hsum_avx(norm_b);
// Handle remainder
let mut dot_rem = 0.0_f32;
let mut norm_a_rem = 0.0_f32;
let mut norm_b_rem = 0.0_f32;
for i in (chunks * 8)..a.len() {
let av = a[i];
let bv = b[i];
dot_rem += av * bv;
norm_a_rem += av * av;
norm_b_rem += bv * bv;
}
let total_dot = dot_sum + dot_rem;
let total_norm_a = norm_a_sum + norm_a_rem;
let total_norm_b = norm_b_sum + norm_b_rem;
let denom = (total_norm_a * total_norm_b).sqrt();
if denom < 1e-10 {
0.0
} else {
total_dot / denom
}
}
}
#[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
#[inline]
unsafe fn hsum_avx(v: std::arch::x86_64::__m256) -> f32 {
use std::arch::x86_64::*;
let low = _mm256_castps256_ps128(v);
let high = _mm256_extractf128_ps(v, 1);
let sum128 = _mm_add_ps(low, high);
let shuf = _mm_movehdup_ps(sum128);
let sums = _mm_add_ps(sum128, shuf);
let shuf2 = _mm_movehl_ps(sums, sums);
let result = _mm_add_ss(sums, shuf2);
_mm_cvtss_f32(result)
}
/// Standard cosine similarity (fallback)
#[inline]
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() || a.is_empty() {
return 0.0;
}
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm_a < 1e-10 || norm_b < 1e-10 {
return 0.0;
}
dot / (norm_a * norm_b)
}
// ============================================================================
// Statistical Helper Functions
// ============================================================================
/// Approximate normal CDF using Abramowitz and Stegun
fn normal_cdf(x: f64) -> f64 {
let a1 = 0.254829592;
let a2 = -0.284496736;
let a3 = 1.421413741;
let a4 = -1.453152027;
let a5 = 1.061405429;
let p = 0.3275911;
let sign = if x < 0.0 { -1.0 } else { 1.0 };
let x = x.abs() / std::f64::consts::SQRT_2;
let t = 1.0 / (1.0 + p * x);
let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
0.5 * (1.0 + sign * y)
}
/// Cross-correlation at a given lag
fn cross_correlation(x: &[f64], y: &[f64], lag: i32) -> f64 {
let n = x.len().min(y.len());
if n < 2 {
return 0.0;
}
let (x_slice, y_slice) = if lag >= 0 {
let lag = lag as usize;
if lag >= n {
return 0.0;
}
(&x[..n - lag], &y[lag..n])
} else {
let lag = (-lag) as usize;
if lag >= n {
return 0.0;
}
(&x[lag..n], &y[..n - lag])
};
let len = x_slice.len();
if len < 2 {
return 0.0;
}
let mean_x: f64 = x_slice.iter().sum::<f64>() / len as f64;
let mean_y: f64 = y_slice.iter().sum::<f64>() / len as f64;
let mut cov = 0.0;
let mut var_x = 0.0;
let mut var_y = 0.0;
for i in 0..len {
let dx = x_slice[i] - mean_x;
let dy = y_slice[i] - mean_y;
cov += dx * dy;
var_x += dx * dx;
var_y += dy * dy;
}
let denom = (var_x * var_y).sqrt();
if denom < 1e-10 {
0.0
} else {
cov / denom
}
}
/// Approximate F-distribution to p-value
fn f_to_p(f: f64, _df1: usize, df2: usize) -> f64 {
// Simple approximation using normal for large df
if df2 < 2 || f <= 0.0 {
return 1.0;
}
// Use Wilson-Hilferty transformation
let x = f * (df2 as f64) / (1.0 + f * (df2 as f64));
let p = 1.0 - x.powf(0.5);
p.max(0.0).min(1.0)
}
fn pattern_type_name(pt: &PatternType) -> &'static str {
match pt {
PatternType::CoherenceBreak => "coherence_break",
PatternType::Consolidation => "consolidation",
PatternType::EmergingCluster => "emerging_cluster",
PatternType::DissolvingCluster => "dissolving_cluster",
PatternType::BridgeFormation => "bridge",
PatternType::AnomalousNode => "anomaly",
PatternType::TemporalShift => "temporal_shift",
PatternType::Cascade => "cascade",
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simd_cosine() {
let a = vec![1.0, 0.0, 0.0, 0.0];
let b = vec![1.0, 0.0, 0.0, 0.0];
assert!((simd_cosine_similarity(&a, &b) - 1.0).abs() < 1e-6);
let c = vec![0.0, 1.0, 0.0, 0.0];
assert!(simd_cosine_similarity(&a, &c).abs() < 1e-6);
}
#[test]
fn test_cross_correlation() {
let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let corr = cross_correlation(&x, &y, 0);
assert!((corr - 1.0).abs() < 1e-6);
}
#[test]
fn test_normal_cdf() {
assert!((normal_cdf(0.0) - 0.5).abs() < 0.01);
assert!(normal_cdf(3.0) > 0.99);
assert!(normal_cdf(-3.0) < 0.01);
}
}