Files
wifi-densepose/crates/rvf/rvf-federation/src/diff_privacy.rs
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

417 lines
13 KiB
Rust

//! Differential privacy primitives for federated learning.
//!
//! Provides calibrated noise injection, gradient clipping, and a Renyi
//! Differential Privacy (RDP) accountant for tracking cumulative privacy loss.
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use rand_distr::{Distribution, Normal};
use crate::error::FederationError;
use crate::types::{DiffPrivacyProof, NoiseMechanism};
/// Differential privacy engine for adding calibrated noise.
pub struct DiffPrivacyEngine {
/// Target epsilon (privacy loss bound).
epsilon: f64,
/// Target delta (probability of exceeding epsilon).
delta: f64,
/// L2 sensitivity bound.
sensitivity: f64,
/// Gradient clipping norm.
clipping_norm: f64,
/// Noise mechanism.
mechanism: NoiseMechanism,
/// Random number generator.
rng: StdRng,
}
impl DiffPrivacyEngine {
/// Create a new DP engine with Gaussian mechanism.
///
/// Default: epsilon=1.0, delta=1e-5 (strong privacy).
pub fn gaussian(
epsilon: f64,
delta: f64,
sensitivity: f64,
clipping_norm: f64,
) -> Result<Self, FederationError> {
if epsilon <= 0.0 {
return Err(FederationError::InvalidEpsilon(epsilon));
}
if delta <= 0.0 || delta >= 1.0 {
return Err(FederationError::InvalidDelta(delta));
}
Ok(Self {
epsilon,
delta,
sensitivity,
clipping_norm,
mechanism: NoiseMechanism::Gaussian,
rng: StdRng::from_rng(rand::thread_rng()).unwrap(),
})
}
/// Create a new DP engine with Laplace mechanism.
pub fn laplace(
epsilon: f64,
sensitivity: f64,
clipping_norm: f64,
) -> Result<Self, FederationError> {
if epsilon <= 0.0 {
return Err(FederationError::InvalidEpsilon(epsilon));
}
Ok(Self {
epsilon,
delta: 0.0,
sensitivity,
clipping_norm,
mechanism: NoiseMechanism::Laplace,
rng: StdRng::from_rng(rand::thread_rng()).unwrap(),
})
}
/// Create with a deterministic seed (for testing).
pub fn with_seed(mut self, seed: u64) -> Self {
self.rng = StdRng::seed_from_u64(seed);
self
}
/// Compute the Gaussian noise standard deviation (sigma).
fn gaussian_sigma(&self) -> f64 {
self.sensitivity * (2.0_f64 * (1.25_f64 / self.delta).ln()).sqrt() / self.epsilon
}
/// Compute the Laplace noise scale (b).
fn laplace_scale(&self) -> f64 {
self.sensitivity / self.epsilon
}
/// Clip a gradient vector to the configured L2 norm bound.
pub fn clip_gradients(&self, gradients: &mut [f64]) {
let norm: f64 = gradients.iter().map(|x| x * x).sum::<f64>().sqrt();
if norm > self.clipping_norm {
let scale = self.clipping_norm / norm;
for g in gradients.iter_mut() {
*g *= scale;
}
}
}
/// Add calibrated noise to a vector of parameters.
///
/// Clips gradients first, then adds noise per the configured mechanism.
pub fn add_noise(&mut self, params: &mut [f64]) -> DiffPrivacyProof {
self.clip_gradients(params);
match self.mechanism {
NoiseMechanism::Gaussian => {
let sigma = self.gaussian_sigma();
let normal = Normal::new(0.0, sigma).unwrap();
for p in params.iter_mut() {
*p += normal.sample(&mut self.rng);
}
DiffPrivacyProof {
epsilon: self.epsilon,
delta: self.delta,
mechanism: NoiseMechanism::Gaussian,
sensitivity: self.sensitivity,
clipping_norm: self.clipping_norm,
noise_scale: sigma,
noised_parameter_count: params.len() as u64,
}
}
NoiseMechanism::Laplace => {
let b = self.laplace_scale();
for p in params.iter_mut() {
// Laplace noise via inverse CDF: b * sign(u-0.5) * ln(1 - 2|u-0.5|)
let u: f64 = self.rng.gen::<f64>() - 0.5;
let noise = -b * u.signum() * (1.0 - 2.0 * u.abs()).ln();
*p += noise;
}
DiffPrivacyProof {
epsilon: self.epsilon,
delta: 0.0,
mechanism: NoiseMechanism::Laplace,
sensitivity: self.sensitivity,
clipping_norm: self.clipping_norm,
noise_scale: b,
noised_parameter_count: params.len() as u64,
}
}
}
}
/// Add noise to a single scalar value.
pub fn add_noise_scalar(&mut self, value: &mut f64) -> f64 {
let mut v = [*value];
self.add_noise(&mut v);
*value = v[0];
v[0]
}
/// Current epsilon setting.
pub fn epsilon(&self) -> f64 {
self.epsilon
}
/// Current delta setting.
pub fn delta(&self) -> f64 {
self.delta
}
}
// -- Privacy Accountant (RDP) ------------------------------------------------
/// Renyi Differential Privacy (RDP) accountant for tracking cumulative privacy loss.
///
/// Tracks privacy budget across multiple export rounds using RDP composition,
/// which provides tighter bounds than naive (epsilon, delta)-DP composition.
pub struct PrivacyAccountant {
/// Maximum allowed cumulative epsilon.
epsilon_limit: f64,
/// Target delta for conversion from RDP to (epsilon, delta)-DP.
target_delta: f64,
/// Accumulated RDP values at various alpha orders.
/// Each entry: (alpha_order, accumulated_rdp_epsilon)
rdp_alphas: Vec<(f64, f64)>,
/// History of exports: (timestamp, epsilon_spent, mechanism).
history: Vec<ExportRecord>,
}
/// Record of a single privacy-consuming export.
#[derive(Clone, Debug)]
pub struct ExportRecord {
/// UNIX timestamp of the export.
pub timestamp_s: u64,
/// Epsilon consumed by this export.
pub epsilon: f64,
/// Delta for this export (0 for pure epsilon-DP).
pub delta: f64,
/// Mechanism used.
pub mechanism: NoiseMechanism,
/// Number of parameters.
pub parameter_count: u64,
}
impl PrivacyAccountant {
/// Create a new accountant with the given budget.
pub fn new(epsilon_limit: f64, target_delta: f64) -> Self {
// Standard RDP alpha orders for accounting
let alphas: Vec<f64> = vec![
1.5, 1.75, 2.0, 2.5, 3.0, 4.0, 5.0, 6.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0,
1024.0,
];
let rdp_alphas = alphas.into_iter().map(|a| (a, 0.0)).collect();
Self {
epsilon_limit,
target_delta,
rdp_alphas,
history: Vec::new(),
}
}
/// Compute RDP epsilon for the Gaussian mechanism at a given alpha order.
fn gaussian_rdp(alpha: f64, sigma: f64) -> f64 {
alpha / (2.0 * sigma * sigma)
}
/// Convert RDP to (epsilon, delta)-DP for a given alpha order.
fn rdp_to_dp(alpha: f64, rdp_epsilon: f64, delta: f64) -> f64 {
rdp_epsilon - (delta.ln()) / (alpha - 1.0)
}
/// Record a Gaussian mechanism query.
pub fn record_gaussian(&mut self, sigma: f64, epsilon: f64, delta: f64, parameter_count: u64) {
// Accumulate RDP at each alpha order
for (alpha, rdp_eps) in &mut self.rdp_alphas {
*rdp_eps += Self::gaussian_rdp(*alpha, sigma);
}
self.history.push(ExportRecord {
timestamp_s: 0,
epsilon,
delta,
mechanism: NoiseMechanism::Gaussian,
parameter_count,
});
}
/// Record a Laplace mechanism query.
pub fn record_laplace(&mut self, epsilon: f64, parameter_count: u64) {
// For Laplace, RDP epsilon at order alpha is: alpha * eps / (alpha - 1)
// when alpha > 1
for (alpha, rdp_eps) in &mut self.rdp_alphas {
if *alpha > 1.0 {
*rdp_eps += *alpha * epsilon / (*alpha - 1.0);
}
}
self.history.push(ExportRecord {
timestamp_s: 0,
epsilon,
delta: 0.0,
mechanism: NoiseMechanism::Laplace,
parameter_count,
});
}
/// Get the current best (tightest) epsilon estimate.
pub fn current_epsilon(&self) -> f64 {
self.rdp_alphas
.iter()
.map(|(alpha, rdp_eps)| Self::rdp_to_dp(*alpha, *rdp_eps, self.target_delta))
.fold(f64::INFINITY, f64::min)
}
/// Remaining privacy budget.
pub fn remaining_budget(&self) -> f64 {
(self.epsilon_limit - self.current_epsilon()).max(0.0)
}
/// Check if we can afford another export with the given epsilon.
pub fn can_afford(&self, additional_epsilon: f64) -> bool {
self.current_epsilon() + additional_epsilon <= self.epsilon_limit
}
/// Check if budget is exhausted.
pub fn is_exhausted(&self) -> bool {
self.current_epsilon() >= self.epsilon_limit
}
/// Fraction of budget consumed (0.0 to 1.0+).
pub fn budget_fraction_used(&self) -> f64 {
self.current_epsilon() / self.epsilon_limit
}
/// Number of exports recorded.
pub fn export_count(&self) -> usize {
self.history.len()
}
/// Export history.
pub fn history(&self) -> &[ExportRecord] {
&self.history
}
/// Epsilon limit.
pub fn epsilon_limit(&self) -> f64 {
self.epsilon_limit
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn gaussian_engine_creates() {
let engine = DiffPrivacyEngine::gaussian(1.0, 1e-5, 1.0, 1.0);
assert!(engine.is_ok());
}
#[test]
fn invalid_epsilon_rejected() {
let engine = DiffPrivacyEngine::gaussian(0.0, 1e-5, 1.0, 1.0);
assert!(engine.is_err());
let engine = DiffPrivacyEngine::gaussian(-1.0, 1e-5, 1.0, 1.0);
assert!(engine.is_err());
}
#[test]
fn invalid_delta_rejected() {
let engine = DiffPrivacyEngine::gaussian(1.0, 0.0, 1.0, 1.0);
assert!(engine.is_err());
let engine = DiffPrivacyEngine::gaussian(1.0, 1.0, 1.0, 1.0);
assert!(engine.is_err());
}
#[test]
fn gradient_clipping() {
let engine = DiffPrivacyEngine::gaussian(1.0, 1e-5, 1.0, 1.0).unwrap();
let mut grads = vec![3.0, 4.0]; // norm = 5.0
engine.clip_gradients(&mut grads);
let norm: f64 = grads.iter().map(|x| x * x).sum::<f64>().sqrt();
assert!((norm - 1.0).abs() < 1e-6); // clipped to norm 1.0
}
#[test]
fn gradient_no_clip_when_small() {
let engine = DiffPrivacyEngine::gaussian(1.0, 1e-5, 1.0, 10.0).unwrap();
let mut grads = vec![3.0, 4.0]; // norm = 5.0, clip = 10.0
engine.clip_gradients(&mut grads);
assert!((grads[0] - 3.0).abs() < 1e-10);
assert!((grads[1] - 4.0).abs() < 1e-10);
}
#[test]
fn add_noise_gaussian_deterministic() {
let mut engine = DiffPrivacyEngine::gaussian(1.0, 1e-5, 1.0, 100.0)
.unwrap()
.with_seed(42);
let mut params = vec![1.0, 2.0, 3.0];
let original = params.clone();
let proof = engine.add_noise(&mut params);
assert_eq!(proof.mechanism, NoiseMechanism::Gaussian);
assert_eq!(proof.noised_parameter_count, 3);
// Params should be different from original (noise added)
assert!(params
.iter()
.zip(original.iter())
.any(|(a, b)| (a - b).abs() > 1e-10));
}
#[test]
fn add_noise_laplace_deterministic() {
let mut engine = DiffPrivacyEngine::laplace(1.0, 1.0, 100.0)
.unwrap()
.with_seed(42);
let mut params = vec![1.0, 2.0, 3.0];
let proof = engine.add_noise(&mut params);
assert_eq!(proof.mechanism, NoiseMechanism::Laplace);
assert_eq!(proof.noised_parameter_count, 3);
}
#[test]
fn privacy_accountant_initial_state() {
let acc = PrivacyAccountant::new(10.0, 1e-5);
assert_eq!(acc.export_count(), 0);
assert!(!acc.is_exhausted());
assert!(acc.can_afford(1.0));
assert!(acc.remaining_budget() > 9.9);
}
#[test]
fn privacy_accountant_tracks_gaussian() {
let mut acc = PrivacyAccountant::new(10.0, 1e-5);
// sigma=1.0 with epsilon=1.0 per query
acc.record_gaussian(1.0, 1.0, 1e-5, 100);
assert_eq!(acc.export_count(), 1);
let eps = acc.current_epsilon();
assert!(eps > 0.0);
assert!(eps < 10.0);
}
#[test]
fn privacy_accountant_composition() {
let mut acc = PrivacyAccountant::new(10.0, 1e-5);
let eps_after_1 = {
acc.record_gaussian(1.0, 1.0, 1e-5, 100);
acc.current_epsilon()
};
acc.record_gaussian(1.0, 1.0, 1e-5, 100);
let eps_after_2 = acc.current_epsilon();
// After 2 queries, epsilon should be larger
assert!(eps_after_2 > eps_after_1);
}
#[test]
fn privacy_accountant_exhaustion() {
let mut acc = PrivacyAccountant::new(1.0, 1e-5);
// Use a very small sigma to burn budget fast
for _ in 0..100 {
acc.record_gaussian(0.1, 10.0, 1e-5, 10);
}
assert!(acc.is_exhausted());
assert!(!acc.can_afford(0.1));
}
}