//! Epigenomics analysis module //! //! Provides methylation profiling and epigenetic age prediction //! using the Horvath clock model. use serde::{Deserialize, Serialize}; /// A CpG site with methylation data #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CpGSite { /// Chromosome number pub chromosome: u8, /// Genomic position pub position: u64, /// Methylation level (beta value, 0.0 to 1.0) pub methylation_level: f32, } /// Methylation profile containing CpG site measurements #[derive(Debug, Clone, Serialize, Deserialize)] pub struct MethylationProfile { /// CpG sites with measured methylation levels pub sites: Vec, } impl MethylationProfile { /// Create a methylation profile from position and beta value arrays pub fn from_beta_values(positions: Vec<(u8, u64)>, betas: Vec) -> Self { let sites = positions .into_iter() .zip(betas.into_iter()) .map(|((chr, pos), beta)| CpGSite { chromosome: chr, position: pos, methylation_level: beta.clamp(0.0, 1.0), }) .collect(); Self { sites } } /// Calculate mean methylation across all sites pub fn mean_methylation(&self) -> f32 { if self.sites.is_empty() { return 0.0; } let sum: f32 = self.sites.iter().map(|s| s.methylation_level).sum(); sum / self.sites.len() as f32 } /// Calculate methylation entropy (Shannon entropy of beta values) /// /// High entropy indicates heterogeneous methylation (potential tumor heterogeneity) pub fn methylation_entropy(&self) -> f64 { if self.sites.is_empty() { return 0.0; } // Bin methylation into 10 bins [0, 0.1), [0.1, 0.2), ..., [0.9, 1.0] let mut bins = [0u32; 10]; for site in &self.sites { let bin = ((site.methylation_level * 10.0) as usize).min(9); bins[bin] += 1; } let n = self.sites.len() as f64; let mut entropy = 0.0; for &count in &bins { if count > 0 { let p = count as f64 / n; entropy -= p * p.ln(); } } entropy } /// Calculate extreme methylation ratio /// /// Fraction of sites with beta < 0.1 (hypomethylated) or > 0.9 (hypermethylated). /// High ratio indicates global methylation disruption (cancer hallmark). pub fn extreme_methylation_ratio(&self) -> f32 { if self.sites.is_empty() { return 0.0; } let extreme_count = self .sites .iter() .filter(|s| s.methylation_level < 0.1 || s.methylation_level > 0.9) .count(); extreme_count as f32 / self.sites.len() as f32 } } /// Horvath epigenetic clock for biological age prediction /// /// Uses a simplified linear model based on CpG site methylation levels /// to predict biological age. pub struct HorvathClock { /// Intercept term intercept: f64, /// Coefficient per CpG site bin coefficients: Vec, /// Number of bins to partition sites into num_bins: usize, } impl HorvathClock { /// Create the default Horvath clock model /// /// Uses a simplified model with binned methylation values. /// Real implementation would use 353 specific CpG sites. pub fn default_clock() -> Self { Self { intercept: 30.0, coefficients: vec![ -15.0, // Low methylation bin (young) 10.0, // High methylation bin (age-associated) 0.5, // Neutral bin ], num_bins: 3, } } /// Predict biological age from a methylation profile pub fn predict_age(&self, profile: &MethylationProfile) -> f64 { if profile.sites.is_empty() { return self.intercept; } // Partition sites into bins and compute mean methylation per bin let bin_size = profile.sites.len() / self.num_bins.max(1); let mut age = self.intercept; for (bin_idx, coefficient) in self.coefficients.iter().enumerate() { let start = bin_idx * bin_size; let end = ((bin_idx + 1) * bin_size).min(profile.sites.len()); if start >= profile.sites.len() { break; } let bin_sites = &profile.sites[start..end]; if !bin_sites.is_empty() { let mean_meth: f64 = bin_sites .iter() .map(|s| s.methylation_level as f64) .sum::() / bin_sites.len() as f64; age += coefficient * mean_meth; } } age.max(0.0) } /// Calculate age acceleration (difference between biological and chronological age) /// /// Positive values indicate accelerated aging (associated with mortality risk). /// Negative values indicate decelerated aging. pub fn age_acceleration(predicted_age: f64, chronological_age: f64) -> f64 { predicted_age - chronological_age } } /// Cancer signal detector using methylation patterns /// /// Combines methylation entropy and extreme methylation ratio /// to produce a cancer risk score (0.0 to 1.0). pub struct CancerSignalDetector { /// Entropy weight in the combined score entropy_weight: f64, /// Extreme ratio weight extreme_weight: f64, /// Threshold for elevated cancer risk risk_threshold: f64, } impl CancerSignalDetector { /// Create with default parameters pub fn new() -> Self { Self { entropy_weight: 0.4, extreme_weight: 0.6, risk_threshold: 0.3, } } /// Detect cancer signal from methylation profile /// /// Returns (risk_score, is_elevated) where risk_score is 0.0-1.0 /// and is_elevated indicates whether the score exceeds the threshold. pub fn detect(&self, profile: &MethylationProfile) -> CancerSignalResult { if profile.sites.is_empty() { return CancerSignalResult { risk_score: 0.0, is_elevated: false, entropy: 0.0, extreme_ratio: 0.0, }; } let entropy = profile.methylation_entropy(); let extreme_ratio = profile.extreme_methylation_ratio() as f64; // Normalize entropy to 0-1 range (max entropy for 10 bins = ln(10) ≈ 2.302) let normalized_entropy = (entropy / 2.302).min(1.0); let risk_score = (self.entropy_weight * normalized_entropy + self.extreme_weight * extreme_ratio) .min(1.0); CancerSignalResult { risk_score, is_elevated: risk_score >= self.risk_threshold, entropy, extreme_ratio, } } } impl Default for CancerSignalDetector { fn default() -> Self { Self::new() } } /// Result from cancer signal detection #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CancerSignalResult { /// Combined risk score (0.0 to 1.0) pub risk_score: f64, /// Whether the risk score exceeds the threshold pub is_elevated: bool, /// Raw methylation entropy pub entropy: f64, /// Fraction of extreme methylation sites pub extreme_ratio: f64, } #[cfg(test)] mod tests { use super::*; #[test] fn test_methylation_profile() { let positions = vec![(1, 1000), (1, 2000)]; let betas = vec![0.3, 0.7]; let profile = MethylationProfile::from_beta_values(positions, betas); assert_eq!(profile.sites.len(), 2); assert!((profile.mean_methylation() - 0.5).abs() < 0.001); } #[test] fn test_horvath_clock() { let clock = HorvathClock::default_clock(); let positions = vec![(1, 1000), (1, 2000), (1, 3000)]; let betas = vec![0.5, 0.5, 0.5]; let profile = MethylationProfile::from_beta_values(positions, betas); let age = clock.predict_age(&profile); assert!(age > 0.0); } #[test] fn test_age_acceleration() { let accel = HorvathClock::age_acceleration(55.0, 50.0); assert!((accel - 5.0).abs() < 0.001); let decel = HorvathClock::age_acceleration(40.0, 50.0); assert!((decel - (-10.0)).abs() < 0.001); } #[test] fn test_methylation_entropy() { // Uniform methylation = low entropy let positions: Vec<(u8, u64)> = (0..100).map(|i| (1u8, i as u64)).collect(); let betas = vec![0.5; 100]; let profile = MethylationProfile::from_beta_values(positions, betas); let entropy = profile.methylation_entropy(); assert!( entropy < 0.1, "Uniform should have low entropy: {}", entropy ); // Spread methylation = high entropy let positions2: Vec<(u8, u64)> = (0..100).map(|i| (1u8, i as u64)).collect(); let betas2: Vec = (0..100).map(|i| i as f32 / 100.0).collect(); let profile2 = MethylationProfile::from_beta_values(positions2, betas2); let entropy2 = profile2.methylation_entropy(); assert!( entropy2 > 1.0, "Spread should have high entropy: {}", entropy2 ); } #[test] fn test_cancer_signal_detector() { let detector = CancerSignalDetector::new(); // Normal profile (moderate methylation) let positions: Vec<(u8, u64)> = (0..100).map(|i| (1u8, i as u64)).collect(); let betas = vec![0.5; 100]; let profile = MethylationProfile::from_beta_values(positions, betas); let result = detector.detect(&profile); assert!(!result.is_elevated, "Normal profile should not be elevated"); assert!(result.risk_score < 0.3); // Cancerous profile (extreme methylation) let positions2: Vec<(u8, u64)> = (0..100).map(|i| (1u8, i as u64)).collect(); let betas2: Vec = (0..100) .map(|i| if i % 2 == 0 { 0.95 } else { 0.05 }) .collect(); let profile2 = MethylationProfile::from_beta_values(positions2, betas2); let result2 = detector.detect(&profile2); assert!(result2.is_elevated, "Cancer profile should be elevated"); assert!(result2.extreme_ratio > 0.8); } }