feat: Implement ADR-014 SOTA signal processing (6 algorithms, 83 tests)
Add six research-grade signal processing algorithms to wifi-densepose-signal: - Conjugate Multiplication: CFO/SFO cancellation via antenna ratio (SpotFi) - Hampel Filter: Robust median/MAD outlier detection (50% contamination resistant) - Fresnel Zone Model: Physics-based breathing detection from chest displacement - CSI Spectrogram: STFT time-frequency generation with 4 window functions - Subcarrier Selection: Variance-ratio ranking for top-K motion-sensitive subcarriers - Body Velocity Profile: Domain-independent Doppler velocity mapping (Widar 3.0) All 313 workspace tests pass, 0 failures. Updated README with new capabilities. https://claude.ai/code/session_01Ki7pvEZtJDvqJkmyn6B714
This commit is contained in:
@@ -0,0 +1,240 @@
|
||||
//! Hampel Filter for robust outlier detection and removal.
|
||||
//!
|
||||
//! Uses running median and MAD (Median Absolute Deviation) instead of
|
||||
//! mean/std, making it resistant to up to 50% contamination — unlike
|
||||
//! Z-score methods where outliers corrupt the mean and mask themselves.
|
||||
//!
|
||||
//! # References
|
||||
//! - Hampel (1974), "The Influence Curve and its Role in Robust Estimation"
|
||||
//! - Used in WiGest (SenSys 2015), WiDance (MobiCom 2017)
|
||||
|
||||
/// Configuration for the Hampel filter.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct HampelConfig {
|
||||
/// Half-window size (total window = 2*half_window + 1)
|
||||
pub half_window: usize,
|
||||
/// Threshold in units of estimated σ (typically 3.0)
|
||||
pub threshold: f64,
|
||||
}
|
||||
|
||||
impl Default for HampelConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
half_window: 3,
|
||||
threshold: 3.0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of Hampel filtering.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct HampelResult {
|
||||
/// Filtered signal (outliers replaced with local median)
|
||||
pub filtered: Vec<f64>,
|
||||
/// Indices where outliers were detected
|
||||
pub outlier_indices: Vec<usize>,
|
||||
/// Local median values at each sample
|
||||
pub medians: Vec<f64>,
|
||||
/// Estimated local σ at each sample
|
||||
pub sigma_estimates: Vec<f64>,
|
||||
}
|
||||
|
||||
/// Scale factor converting MAD to σ for Gaussian distributions.
|
||||
/// MAD = 0.6745 * σ → σ = MAD / 0.6745 = 1.4826 * MAD
|
||||
const MAD_SCALE: f64 = 1.4826;
|
||||
|
||||
/// Apply Hampel filter to a 1D signal.
|
||||
///
|
||||
/// For each sample, computes the median and MAD of the surrounding window.
|
||||
/// If the sample deviates from the median by more than `threshold * σ_est`,
|
||||
/// it is replaced with the median.
|
||||
pub fn hampel_filter(signal: &[f64], config: &HampelConfig) -> Result<HampelResult, HampelError> {
|
||||
if signal.is_empty() {
|
||||
return Err(HampelError::EmptySignal);
|
||||
}
|
||||
if config.half_window == 0 {
|
||||
return Err(HampelError::InvalidWindow);
|
||||
}
|
||||
|
||||
let n = signal.len();
|
||||
let mut filtered = signal.to_vec();
|
||||
let mut outlier_indices = Vec::new();
|
||||
let mut medians = Vec::with_capacity(n);
|
||||
let mut sigma_estimates = Vec::with_capacity(n);
|
||||
|
||||
for i in 0..n {
|
||||
let start = i.saturating_sub(config.half_window);
|
||||
let end = (i + config.half_window + 1).min(n);
|
||||
let window: Vec<f64> = signal[start..end].to_vec();
|
||||
|
||||
let med = median(&window);
|
||||
let mad = median_absolute_deviation(&window, med);
|
||||
let sigma = MAD_SCALE * mad;
|
||||
|
||||
medians.push(med);
|
||||
sigma_estimates.push(sigma);
|
||||
|
||||
let deviation = (signal[i] - med).abs();
|
||||
let is_outlier = if sigma > 1e-15 {
|
||||
// Normal case: compare deviation to threshold * sigma
|
||||
deviation > config.threshold * sigma
|
||||
} else {
|
||||
// Zero-MAD case: all window values identical except possibly this sample.
|
||||
// Any non-zero deviation from the median is an outlier.
|
||||
deviation > 1e-15
|
||||
};
|
||||
|
||||
if is_outlier {
|
||||
filtered[i] = med;
|
||||
outlier_indices.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(HampelResult {
|
||||
filtered,
|
||||
outlier_indices,
|
||||
medians,
|
||||
sigma_estimates,
|
||||
})
|
||||
}
|
||||
|
||||
/// Apply Hampel filter to each row of a 2D array (e.g., per-antenna CSI).
|
||||
pub fn hampel_filter_2d(
|
||||
data: &[Vec<f64>],
|
||||
config: &HampelConfig,
|
||||
) -> Result<Vec<HampelResult>, HampelError> {
|
||||
data.iter().map(|row| hampel_filter(row, config)).collect()
|
||||
}
|
||||
|
||||
/// Compute median of a slice (sorts a copy).
|
||||
fn median(data: &[f64]) -> f64 {
|
||||
if data.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
let mut sorted = data.to_vec();
|
||||
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
|
||||
let mid = sorted.len() / 2;
|
||||
if sorted.len() % 2 == 0 {
|
||||
(sorted[mid - 1] + sorted[mid]) / 2.0
|
||||
} else {
|
||||
sorted[mid]
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute MAD (Median Absolute Deviation) given precomputed median.
|
||||
fn median_absolute_deviation(data: &[f64], med: f64) -> f64 {
|
||||
let deviations: Vec<f64> = data.iter().map(|x| (x - med).abs()).collect();
|
||||
median(&deviations)
|
||||
}
|
||||
|
||||
/// Errors from Hampel filtering.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum HampelError {
|
||||
#[error("Signal is empty")]
|
||||
EmptySignal,
|
||||
#[error("Half-window must be > 0")]
|
||||
InvalidWindow,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_clean_signal_unchanged() {
|
||||
// A smooth sinusoid should have zero outliers
|
||||
let signal: Vec<f64> = (0..100)
|
||||
.map(|i| (i as f64 * 0.1).sin())
|
||||
.collect();
|
||||
|
||||
let result = hampel_filter(&signal, &HampelConfig::default()).unwrap();
|
||||
assert!(result.outlier_indices.is_empty());
|
||||
|
||||
for i in 0..signal.len() {
|
||||
assert!(
|
||||
(result.filtered[i] - signal[i]).abs() < 1e-10,
|
||||
"Clean signal modified at index {}",
|
||||
i
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_spike_detected() {
|
||||
let mut signal: Vec<f64> = vec![1.0; 50];
|
||||
signal[25] = 100.0; // Huge spike
|
||||
|
||||
let result = hampel_filter(&signal, &HampelConfig::default()).unwrap();
|
||||
assert!(result.outlier_indices.contains(&25));
|
||||
assert!((result.filtered[25] - 1.0).abs() < 1e-10); // Replaced with median
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_spikes() {
|
||||
let mut signal: Vec<f64> = (0..200)
|
||||
.map(|i| (i as f64 * 0.05).sin())
|
||||
.collect();
|
||||
|
||||
// Insert spikes
|
||||
signal[30] = 50.0;
|
||||
signal[100] = -50.0;
|
||||
signal[170] = 80.0;
|
||||
|
||||
let config = HampelConfig {
|
||||
half_window: 5,
|
||||
threshold: 3.0,
|
||||
};
|
||||
let result = hampel_filter(&signal, &config).unwrap();
|
||||
|
||||
assert!(result.outlier_indices.contains(&30));
|
||||
assert!(result.outlier_indices.contains(&100));
|
||||
assert!(result.outlier_indices.contains(&170));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_z_score_masking_resistance() {
|
||||
// 50 clean samples + many outliers: Z-score would fail, Hampel should work
|
||||
let mut signal: Vec<f64> = vec![0.0; 100];
|
||||
// Insert 30% contamination (Z-score would be confused)
|
||||
for i in (0..100).step_by(3) {
|
||||
signal[i] = 50.0;
|
||||
}
|
||||
|
||||
let config = HampelConfig {
|
||||
half_window: 5,
|
||||
threshold: 3.0,
|
||||
};
|
||||
let result = hampel_filter(&signal, &config).unwrap();
|
||||
|
||||
// The contaminated samples should be detected as outliers
|
||||
assert!(!result.outlier_indices.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_2d_filtering() {
|
||||
let rows = vec![
|
||||
vec![1.0, 1.0, 100.0, 1.0, 1.0, 1.0, 1.0],
|
||||
vec![2.0, 2.0, 2.0, 2.0, -80.0, 2.0, 2.0],
|
||||
];
|
||||
|
||||
let results = hampel_filter_2d(&rows, &HampelConfig::default()).unwrap();
|
||||
assert_eq!(results.len(), 2);
|
||||
assert!(results[0].outlier_indices.contains(&2));
|
||||
assert!(results[1].outlier_indices.contains(&4));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_median_computation() {
|
||||
assert!((median(&[1.0, 3.0, 2.0]) - 2.0).abs() < 1e-10);
|
||||
assert!((median(&[1.0, 2.0, 3.0, 4.0]) - 2.5).abs() < 1e-10);
|
||||
assert!((median(&[5.0]) - 5.0).abs() < 1e-10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_signal_error() {
|
||||
assert!(matches!(
|
||||
hampel_filter(&[], &HampelConfig::default()),
|
||||
Err(HampelError::EmptySignal)
|
||||
));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user