Files
wifi-densepose/vendor/ruvector/examples/dna/src/epigenomics.rs

323 lines
10 KiB
Rust

//! Epigenomics analysis module
//!
//! Provides methylation profiling and epigenetic age prediction
//! using the Horvath clock model.
use serde::{Deserialize, Serialize};
/// A CpG site with methylation data
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CpGSite {
/// Chromosome number
pub chromosome: u8,
/// Genomic position
pub position: u64,
/// Methylation level (beta value, 0.0 to 1.0)
pub methylation_level: f32,
}
/// Methylation profile containing CpG site measurements
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MethylationProfile {
/// CpG sites with measured methylation levels
pub sites: Vec<CpGSite>,
}
impl MethylationProfile {
/// Create a methylation profile from position and beta value arrays
pub fn from_beta_values(positions: Vec<(u8, u64)>, betas: Vec<f32>) -> Self {
let sites = positions
.into_iter()
.zip(betas.into_iter())
.map(|((chr, pos), beta)| CpGSite {
chromosome: chr,
position: pos,
methylation_level: beta.clamp(0.0, 1.0),
})
.collect();
Self { sites }
}
/// Calculate mean methylation across all sites
pub fn mean_methylation(&self) -> f32 {
if self.sites.is_empty() {
return 0.0;
}
let sum: f32 = self.sites.iter().map(|s| s.methylation_level).sum();
sum / self.sites.len() as f32
}
/// Calculate methylation entropy (Shannon entropy of beta values)
///
/// High entropy indicates heterogeneous methylation (potential tumor heterogeneity)
pub fn methylation_entropy(&self) -> f64 {
if self.sites.is_empty() {
return 0.0;
}
// Bin methylation into 10 bins [0, 0.1), [0.1, 0.2), ..., [0.9, 1.0]
let mut bins = [0u32; 10];
for site in &self.sites {
let bin = ((site.methylation_level * 10.0) as usize).min(9);
bins[bin] += 1;
}
let n = self.sites.len() as f64;
let mut entropy = 0.0;
for &count in &bins {
if count > 0 {
let p = count as f64 / n;
entropy -= p * p.ln();
}
}
entropy
}
/// Calculate extreme methylation ratio
///
/// Fraction of sites with beta < 0.1 (hypomethylated) or > 0.9 (hypermethylated).
/// High ratio indicates global methylation disruption (cancer hallmark).
pub fn extreme_methylation_ratio(&self) -> f32 {
if self.sites.is_empty() {
return 0.0;
}
let extreme_count = self
.sites
.iter()
.filter(|s| s.methylation_level < 0.1 || s.methylation_level > 0.9)
.count();
extreme_count as f32 / self.sites.len() as f32
}
}
/// Horvath epigenetic clock for biological age prediction
///
/// Uses a simplified linear model based on CpG site methylation levels
/// to predict biological age.
pub struct HorvathClock {
/// Intercept term
intercept: f64,
/// Coefficient per CpG site bin
coefficients: Vec<f64>,
/// Number of bins to partition sites into
num_bins: usize,
}
impl HorvathClock {
/// Create the default Horvath clock model
///
/// Uses a simplified model with binned methylation values.
/// Real implementation would use 353 specific CpG sites.
pub fn default_clock() -> Self {
Self {
intercept: 30.0,
coefficients: vec![
-15.0, // Low methylation bin (young)
10.0, // High methylation bin (age-associated)
0.5, // Neutral bin
],
num_bins: 3,
}
}
/// Predict biological age from a methylation profile
pub fn predict_age(&self, profile: &MethylationProfile) -> f64 {
if profile.sites.is_empty() {
return self.intercept;
}
// Partition sites into bins and compute mean methylation per bin
let bin_size = profile.sites.len() / self.num_bins.max(1);
let mut age = self.intercept;
for (bin_idx, coefficient) in self.coefficients.iter().enumerate() {
let start = bin_idx * bin_size;
let end = ((bin_idx + 1) * bin_size).min(profile.sites.len());
if start >= profile.sites.len() {
break;
}
let bin_sites = &profile.sites[start..end];
if !bin_sites.is_empty() {
let mean_meth: f64 = bin_sites
.iter()
.map(|s| s.methylation_level as f64)
.sum::<f64>()
/ bin_sites.len() as f64;
age += coefficient * mean_meth;
}
}
age.max(0.0)
}
/// Calculate age acceleration (difference between biological and chronological age)
///
/// Positive values indicate accelerated aging (associated with mortality risk).
/// Negative values indicate decelerated aging.
pub fn age_acceleration(predicted_age: f64, chronological_age: f64) -> f64 {
predicted_age - chronological_age
}
}
/// Cancer signal detector using methylation patterns
///
/// Combines methylation entropy and extreme methylation ratio
/// to produce a cancer risk score (0.0 to 1.0).
pub struct CancerSignalDetector {
/// Entropy weight in the combined score
entropy_weight: f64,
/// Extreme ratio weight
extreme_weight: f64,
/// Threshold for elevated cancer risk
risk_threshold: f64,
}
impl CancerSignalDetector {
/// Create with default parameters
pub fn new() -> Self {
Self {
entropy_weight: 0.4,
extreme_weight: 0.6,
risk_threshold: 0.3,
}
}
/// Detect cancer signal from methylation profile
///
/// Returns (risk_score, is_elevated) where risk_score is 0.0-1.0
/// and is_elevated indicates whether the score exceeds the threshold.
pub fn detect(&self, profile: &MethylationProfile) -> CancerSignalResult {
if profile.sites.is_empty() {
return CancerSignalResult {
risk_score: 0.0,
is_elevated: false,
entropy: 0.0,
extreme_ratio: 0.0,
};
}
let entropy = profile.methylation_entropy();
let extreme_ratio = profile.extreme_methylation_ratio() as f64;
// Normalize entropy to 0-1 range (max entropy for 10 bins = ln(10) ≈ 2.302)
let normalized_entropy = (entropy / 2.302).min(1.0);
let risk_score = (self.entropy_weight * normalized_entropy
+ self.extreme_weight * extreme_ratio)
.min(1.0);
CancerSignalResult {
risk_score,
is_elevated: risk_score >= self.risk_threshold,
entropy,
extreme_ratio,
}
}
}
impl Default for CancerSignalDetector {
fn default() -> Self {
Self::new()
}
}
/// Result from cancer signal detection
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CancerSignalResult {
/// Combined risk score (0.0 to 1.0)
pub risk_score: f64,
/// Whether the risk score exceeds the threshold
pub is_elevated: bool,
/// Raw methylation entropy
pub entropy: f64,
/// Fraction of extreme methylation sites
pub extreme_ratio: f64,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_methylation_profile() {
let positions = vec![(1, 1000), (1, 2000)];
let betas = vec![0.3, 0.7];
let profile = MethylationProfile::from_beta_values(positions, betas);
assert_eq!(profile.sites.len(), 2);
assert!((profile.mean_methylation() - 0.5).abs() < 0.001);
}
#[test]
fn test_horvath_clock() {
let clock = HorvathClock::default_clock();
let positions = vec![(1, 1000), (1, 2000), (1, 3000)];
let betas = vec![0.5, 0.5, 0.5];
let profile = MethylationProfile::from_beta_values(positions, betas);
let age = clock.predict_age(&profile);
assert!(age > 0.0);
}
#[test]
fn test_age_acceleration() {
let accel = HorvathClock::age_acceleration(55.0, 50.0);
assert!((accel - 5.0).abs() < 0.001);
let decel = HorvathClock::age_acceleration(40.0, 50.0);
assert!((decel - (-10.0)).abs() < 0.001);
}
#[test]
fn test_methylation_entropy() {
// Uniform methylation = low entropy
let positions: Vec<(u8, u64)> = (0..100).map(|i| (1u8, i as u64)).collect();
let betas = vec![0.5; 100];
let profile = MethylationProfile::from_beta_values(positions, betas);
let entropy = profile.methylation_entropy();
assert!(
entropy < 0.1,
"Uniform should have low entropy: {}",
entropy
);
// Spread methylation = high entropy
let positions2: Vec<(u8, u64)> = (0..100).map(|i| (1u8, i as u64)).collect();
let betas2: Vec<f32> = (0..100).map(|i| i as f32 / 100.0).collect();
let profile2 = MethylationProfile::from_beta_values(positions2, betas2);
let entropy2 = profile2.methylation_entropy();
assert!(
entropy2 > 1.0,
"Spread should have high entropy: {}",
entropy2
);
}
#[test]
fn test_cancer_signal_detector() {
let detector = CancerSignalDetector::new();
// Normal profile (moderate methylation)
let positions: Vec<(u8, u64)> = (0..100).map(|i| (1u8, i as u64)).collect();
let betas = vec![0.5; 100];
let profile = MethylationProfile::from_beta_values(positions, betas);
let result = detector.detect(&profile);
assert!(!result.is_elevated, "Normal profile should not be elevated");
assert!(result.risk_score < 0.3);
// Cancerous profile (extreme methylation)
let positions2: Vec<(u8, u64)> = (0..100).map(|i| (1u8, i as u64)).collect();
let betas2: Vec<f32> = (0..100)
.map(|i| if i % 2 == 0 { 0.95 } else { 0.05 })
.collect();
let profile2 = MethylationProfile::from_beta_values(positions2, betas2);
let result2 = detector.detect(&profile2);
assert!(result2.is_elevated, "Cancer profile should be elevated");
assert!(result2.extreme_ratio > 0.8);
}
}