Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,629 @@
//! Regime shift detection using RuVector's min-cut algorithms
use std::collections::HashMap;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use crate::{ClimateObservation, SensorNetwork, SensorEdge, WeatherVariable};
/// A detected regime shift
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RegimeShift {
/// Shift identifier
pub id: String,
/// Timestamp when shift was detected
pub timestamp: DateTime<Utc>,
/// Shift type
pub shift_type: ShiftType,
/// Shift severity
pub severity: ShiftSeverity,
/// Min-cut value before shift
pub mincut_before: f64,
/// Min-cut value after shift
pub mincut_after: f64,
/// Change magnitude
pub magnitude: f64,
/// Affected sensor IDs
pub affected_sensors: Vec<String>,
/// Geographic center of shift (lat, lon)
pub center: Option<(f64, f64)>,
/// Radius of effect (km)
pub radius_km: Option<f64>,
/// Primary variable affected
pub primary_variable: WeatherVariable,
/// Confidence score (0-1)
pub confidence: f64,
/// Evidence supporting the detection
pub evidence: Vec<ShiftEvidence>,
/// Interpretation of the shift
pub interpretation: String,
}
/// Type of regime shift
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub enum ShiftType {
/// Network fragmentation (min-cut decreased significantly)
Fragmentation,
/// Network consolidation (min-cut increased)
Consolidation,
/// Localized disruption (subset of sensors)
LocalizedDisruption,
/// Global pattern change
GlobalPatternChange,
/// Seasonal transition
SeasonalTransition,
/// Unknown type
Unknown,
}
/// Severity of regime shift
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Ord, PartialOrd)]
pub enum ShiftSeverity {
/// Minor shift, might be noise
Minor,
/// Moderate shift, notable
Moderate,
/// Major shift, significant
Major,
/// Extreme shift, exceptional
Extreme,
}
impl ShiftSeverity {
/// Convert from magnitude
pub fn from_magnitude(magnitude: f64) -> Self {
if magnitude < 0.1 {
ShiftSeverity::Minor
} else if magnitude < 0.3 {
ShiftSeverity::Moderate
} else if magnitude < 0.5 {
ShiftSeverity::Major
} else {
ShiftSeverity::Extreme
}
}
}
/// Evidence for a regime shift
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShiftEvidence {
/// Evidence type
pub evidence_type: String,
/// Numeric value
pub value: f64,
/// Explanation
pub explanation: String,
}
/// Regime shift detector using RuVector's min-cut
pub struct RegimeShiftDetector {
/// Configuration
config: RegimeDetectorConfig,
/// Historical min-cut values
mincut_history: Vec<(DateTime<Utc>, f64)>,
/// Historical partition info
partition_history: Vec<(DateTime<Utc>, Vec<String>, Vec<String>)>,
/// Detected shifts
detected_shifts: Vec<RegimeShift>,
}
/// Configuration for regime detection
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RegimeDetectorConfig {
/// Window size (hours)
pub window_hours: u32,
/// Slide step (hours)
pub slide_hours: u32,
/// Minimum change threshold for detection
pub detection_threshold: f64,
/// Use approximate min-cut
pub approximate: bool,
/// Approximation epsilon
pub epsilon: f64,
/// Minimum sensors for valid detection
pub min_sensors: usize,
/// Lookback windows for trend analysis
pub lookback_windows: usize,
}
impl Default for RegimeDetectorConfig {
fn default() -> Self {
Self {
window_hours: 168, // 1 week
slide_hours: 24, // 1 day
detection_threshold: 0.15,
approximate: true,
epsilon: 0.1,
min_sensors: 5,
lookback_windows: 10,
}
}
}
impl RegimeShiftDetector {
/// Create a new regime shift detector
pub fn new(config: RegimeDetectorConfig) -> Self {
Self {
config,
mincut_history: Vec::new(),
partition_history: Vec::new(),
detected_shifts: Vec::new(),
}
}
/// Detect regime shifts in a sensor network over time
///
/// This integrates with RuVector's min-cut algorithms to:
/// 1. Build dynamic correlation graphs from observations
/// 2. Compute min-cut values over sliding windows
/// 3. Detect significant changes indicating regime shifts
pub fn detect(
&mut self,
base_network: &SensorNetwork,
observations: &[ClimateObservation],
) -> Vec<RegimeShift> {
if observations.is_empty() || base_network.nodes.len() < self.config.min_sensors {
return vec![];
}
// Sort observations by time
let mut sorted_obs = observations.to_vec();
sorted_obs.sort_by_key(|o| o.timestamp);
// Slide window over time
let window_duration = chrono::Duration::hours(self.config.window_hours as i64);
let slide_duration = chrono::Duration::hours(self.config.slide_hours as i64);
let start_time = sorted_obs.first().unwrap().timestamp;
let end_time = sorted_obs.last().unwrap().timestamp;
let mut current_start = start_time;
let mut shift_counter = 0;
while current_start + window_duration <= end_time {
let window_end = current_start + window_duration;
// Get observations in window
let window_obs: Vec<_> = sorted_obs
.iter()
.filter(|o| o.timestamp >= current_start && o.timestamp < window_end)
.cloned()
.collect();
if window_obs.len() >= self.config.min_sensors * 10 {
// Build network from window observations
let window_network = self.build_window_network(base_network, &window_obs);
// Compute min-cut
let (mincut_value, partition) = self.compute_mincut(&window_network);
self.mincut_history.push((current_start, mincut_value));
if let Some((side_a, side_b)) = partition {
self.partition_history.push((current_start, side_a, side_b));
}
// Check for regime shift
if self.mincut_history.len() >= 2 {
let prev_mincut = self.mincut_history[self.mincut_history.len() - 2].1;
let delta = (mincut_value - prev_mincut) / prev_mincut.max(0.01);
if delta.abs() > self.config.detection_threshold {
let shift = self.create_shift_record(
&format!("shift_{}", shift_counter),
current_start,
prev_mincut,
mincut_value,
delta,
&window_network,
&window_obs,
);
self.detected_shifts.push(shift);
shift_counter += 1;
}
}
}
current_start = current_start + slide_duration;
}
self.detected_shifts.clone()
}
/// Build network from window observations
fn build_window_network(
&self,
base_network: &SensorNetwork,
observations: &[ClimateObservation],
) -> SensorNetwork {
let mut network = base_network.clone();
// Update edge weights based on observation correlations
let mut station_values: HashMap<&str, Vec<(DateTime<Utc>, f64)>> = HashMap::new();
for obs in observations {
station_values
.entry(&obs.station_id)
.or_default()
.push((obs.timestamp, obs.value));
}
// Recompute correlations
network.edges.clear();
let station_ids: Vec<_> = station_values.keys().cloned().collect();
for i in 0..station_ids.len() {
for j in (i + 1)..station_ids.len() {
let id_i = station_ids[i];
let id_j = station_ids[j];
let vals_i = &station_values[id_i];
let vals_j = &station_values[id_j];
let correlation = self.compute_correlation(vals_i, vals_j);
if correlation.abs() > 0.3 {
network.add_edge(SensorEdge {
source: id_i.to_string(),
target: id_j.to_string(),
correlation,
distance_km: 0.0, // Would compute from locations
weight: correlation.abs(),
variables: vec![],
overlap_count: vals_i.len().min(vals_j.len()),
});
}
}
}
network
}
/// Compute correlation between two time series
fn compute_correlation(&self, a: &[(DateTime<Utc>, f64)], b: &[(DateTime<Utc>, f64)]) -> f64 {
// Build time-indexed maps (daily resolution)
let mut map_a: HashMap<i64, f64> = HashMap::new();
let mut map_b: HashMap<i64, f64> = HashMap::new();
for (ts, val) in a {
let day = ts.timestamp() / 86400;
map_a.insert(day, *val);
}
for (ts, val) in b {
let day = ts.timestamp() / 86400;
map_b.insert(day, *val);
}
// Find overlapping days
let mut vals_a = Vec::new();
let mut vals_b = Vec::new();
for (day, val_a) in &map_a {
if let Some(&val_b) = map_b.get(day) {
vals_a.push(*val_a);
vals_b.push(val_b);
}
}
if vals_a.len() < 3 {
return 0.0;
}
// Pearson correlation
let n = vals_a.len();
let mean_a = vals_a.iter().sum::<f64>() / n as f64;
let mean_b = vals_b.iter().sum::<f64>() / n as f64;
let mut cov = 0.0;
let mut var_a = 0.0;
let mut var_b = 0.0;
for i in 0..n {
let da = vals_a[i] - mean_a;
let db = vals_b[i] - mean_b;
cov += da * db;
var_a += da * da;
var_b += db * db;
}
if var_a * var_b > 0.0 {
cov / (var_a.sqrt() * var_b.sqrt())
} else {
0.0
}
}
/// Compute min-cut for network
///
/// Uses RuVector's min-cut algorithms when available
fn compute_mincut(&self, network: &SensorNetwork) -> (f64, Option<(Vec<String>, Vec<String>)>) {
// Convert to min-cut format
let edges = network.to_mincut_edges();
let node_mapping = network.node_id_mapping();
if edges.is_empty() {
return (0.0, None);
}
// Simplified min-cut computation for demo
// In production, use ruvector_mincut::MinCutBuilder
let total_weight: f64 = edges.iter().map(|(_, _, w)| w).sum();
let avg_degree = (2.0 * edges.len() as f64) / node_mapping.len() as f64;
let approx_mincut = if edges.is_empty() {
0.0
} else {
total_weight / avg_degree.max(1.0)
};
// Simple partition (would use actual min-cut partition)
let all_nodes: Vec<String> = node_mapping.values().cloned().collect();
let mid = all_nodes.len() / 2;
let side_a = all_nodes[..mid].to_vec();
let side_b = all_nodes[mid..].to_vec();
(approx_mincut, Some((side_a, side_b)))
}
/// Create a regime shift record
fn create_shift_record(
&self,
id: &str,
timestamp: DateTime<Utc>,
mincut_before: f64,
mincut_after: f64,
delta: f64,
network: &SensorNetwork,
observations: &[ClimateObservation],
) -> RegimeShift {
let magnitude = delta.abs();
let severity = ShiftSeverity::from_magnitude(magnitude);
let shift_type = if delta < -0.3 {
ShiftType::Fragmentation
} else if delta > 0.3 {
ShiftType::Consolidation
} else if network.nodes.len() < 10 {
ShiftType::LocalizedDisruption
} else {
ShiftType::GlobalPatternChange
};
// Find affected sensors (those with high observation variance)
let affected_sensors = self.find_affected_sensors(network, observations);
// Compute center
let center = self.compute_geographic_center(&affected_sensors, network);
// Primary variable
let primary_variable = observations
.first()
.map(|o| o.variable)
.unwrap_or(WeatherVariable::Temperature);
// Compute confidence based on evidence
let confidence = self.compute_confidence(magnitude, network.nodes.len(), observations.len());
// Build evidence
let evidence = vec![
ShiftEvidence {
evidence_type: "mincut_change".to_string(),
value: delta,
explanation: format!(
"Min-cut {} by {:.1}%",
if delta > 0.0 { "increased" } else { "decreased" },
delta.abs() * 100.0
),
},
ShiftEvidence {
evidence_type: "affected_sensors".to_string(),
value: affected_sensors.len() as f64,
explanation: format!("{} sensors significantly affected", affected_sensors.len()),
},
ShiftEvidence {
evidence_type: "network_size".to_string(),
value: network.nodes.len() as f64,
explanation: format!("Network has {} sensors", network.nodes.len()),
},
];
let interpretation = self.interpret_shift(shift_type, severity, &affected_sensors);
RegimeShift {
id: id.to_string(),
timestamp,
shift_type,
severity,
mincut_before,
mincut_after,
magnitude,
affected_sensors,
center,
radius_km: Some(100.0), // Would compute from sensor positions
primary_variable,
confidence,
evidence,
interpretation,
}
}
/// Find affected sensors
fn find_affected_sensors(
&self,
network: &SensorNetwork,
observations: &[ClimateObservation],
) -> Vec<String> {
let mut station_stats: HashMap<&str, (f64, f64, usize)> = HashMap::new(); // (sum, sum_sq, count)
for obs in observations {
let entry = station_stats
.entry(&obs.station_id)
.or_insert((0.0, 0.0, 0));
entry.0 += obs.value;
entry.1 += obs.value * obs.value;
entry.2 += 1;
}
// Compute variance for each station
let mut variances: Vec<(&str, f64)> = station_stats
.iter()
.filter(|(_, (_, _, count))| *count >= 3)
.map(|(id, (sum, sum_sq, count))| {
let mean = sum / *count as f64;
let variance = sum_sq / *count as f64 - mean * mean;
(*id, variance)
})
.collect();
// Return stations with above-average variance
let avg_variance: f64 = variances.iter().map(|(_, v)| v).sum::<f64>()
/ variances.len().max(1) as f64;
variances
.iter()
.filter(|(_, v)| *v > avg_variance * 1.5)
.map(|(id, _)| id.to_string())
.collect()
}
/// Compute geographic center
fn compute_geographic_center(
&self,
sensor_ids: &[String],
network: &SensorNetwork,
) -> Option<(f64, f64)> {
if sensor_ids.is_empty() {
return None;
}
let mut sum_lat = 0.0;
let mut sum_lon = 0.0;
let mut count = 0;
for id in sensor_ids {
if let Some(node) = network.get_node(id) {
sum_lat += node.location.0;
sum_lon += node.location.1;
count += 1;
}
}
if count > 0 {
Some((sum_lat / count as f64, sum_lon / count as f64))
} else {
None
}
}
/// Compute confidence score
fn compute_confidence(&self, magnitude: f64, sensor_count: usize, obs_count: usize) -> f64 {
let magnitude_score = (magnitude.min(1.0)).max(0.0);
let sensor_score = (sensor_count as f64 / 50.0).min(1.0);
let obs_score = (obs_count as f64 / 1000.0).min(1.0);
(magnitude_score * 0.4 + sensor_score * 0.3 + obs_score * 0.3).min(1.0)
}
/// Interpret the shift
fn interpret_shift(
&self,
shift_type: ShiftType,
severity: ShiftSeverity,
affected_sensors: &[String],
) -> String {
let severity_str = match severity {
ShiftSeverity::Minor => "Minor",
ShiftSeverity::Moderate => "Moderate",
ShiftSeverity::Major => "Major",
ShiftSeverity::Extreme => "Extreme",
};
let type_str = match shift_type {
ShiftType::Fragmentation => "network fragmentation (decreased correlation)",
ShiftType::Consolidation => "network consolidation (increased correlation)",
ShiftType::LocalizedDisruption => "localized weather pattern disruption",
ShiftType::GlobalPatternChange => "large-scale pattern change",
ShiftType::SeasonalTransition => "seasonal transition",
ShiftType::Unknown => "undetermined regime change",
};
format!(
"{} {} detected affecting {} sensors",
severity_str,
type_str,
affected_sensors.len()
)
}
/// Get min-cut history
pub fn mincut_history(&self) -> &[(DateTime<Utc>, f64)] {
&self.mincut_history
}
/// Get detected shifts
pub fn detected_shifts(&self) -> &[RegimeShift] {
&self.detected_shifts
}
/// Get shifts by severity
pub fn shifts_by_severity(&self, min_severity: ShiftSeverity) -> Vec<&RegimeShift> {
self.detected_shifts
.iter()
.filter(|s| s.severity >= min_severity)
.collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_shift_severity() {
assert_eq!(ShiftSeverity::from_magnitude(0.05), ShiftSeverity::Minor);
assert_eq!(ShiftSeverity::from_magnitude(0.2), ShiftSeverity::Moderate);
assert_eq!(ShiftSeverity::from_magnitude(0.4), ShiftSeverity::Major);
assert_eq!(ShiftSeverity::from_magnitude(0.6), ShiftSeverity::Extreme);
}
#[test]
fn test_detector_creation() {
let config = RegimeDetectorConfig::default();
let detector = RegimeShiftDetector::new(config);
assert!(detector.detected_shifts().is_empty());
}
}