Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
416
crates/rvf/rvf-federation/src/diff_privacy.rs
Normal file
416
crates/rvf/rvf-federation/src/diff_privacy.rs
Normal file
@@ -0,0 +1,416 @@
|
||||
//! Differential privacy primitives for federated learning.
|
||||
//!
|
||||
//! Provides calibrated noise injection, gradient clipping, and a Renyi
|
||||
//! Differential Privacy (RDP) accountant for tracking cumulative privacy loss.
|
||||
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use rand_distr::{Distribution, Normal};
|
||||
|
||||
use crate::error::FederationError;
|
||||
use crate::types::{DiffPrivacyProof, NoiseMechanism};
|
||||
|
||||
/// Differential privacy engine for adding calibrated noise.
|
||||
pub struct DiffPrivacyEngine {
|
||||
/// Target epsilon (privacy loss bound).
|
||||
epsilon: f64,
|
||||
/// Target delta (probability of exceeding epsilon).
|
||||
delta: f64,
|
||||
/// L2 sensitivity bound.
|
||||
sensitivity: f64,
|
||||
/// Gradient clipping norm.
|
||||
clipping_norm: f64,
|
||||
/// Noise mechanism.
|
||||
mechanism: NoiseMechanism,
|
||||
/// Random number generator.
|
||||
rng: StdRng,
|
||||
}
|
||||
|
||||
impl DiffPrivacyEngine {
|
||||
/// Create a new DP engine with Gaussian mechanism.
|
||||
///
|
||||
/// Default: epsilon=1.0, delta=1e-5 (strong privacy).
|
||||
pub fn gaussian(
|
||||
epsilon: f64,
|
||||
delta: f64,
|
||||
sensitivity: f64,
|
||||
clipping_norm: f64,
|
||||
) -> Result<Self, FederationError> {
|
||||
if epsilon <= 0.0 {
|
||||
return Err(FederationError::InvalidEpsilon(epsilon));
|
||||
}
|
||||
if delta <= 0.0 || delta >= 1.0 {
|
||||
return Err(FederationError::InvalidDelta(delta));
|
||||
}
|
||||
Ok(Self {
|
||||
epsilon,
|
||||
delta,
|
||||
sensitivity,
|
||||
clipping_norm,
|
||||
mechanism: NoiseMechanism::Gaussian,
|
||||
rng: StdRng::from_rng(rand::thread_rng()).unwrap(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a new DP engine with Laplace mechanism.
|
||||
pub fn laplace(
|
||||
epsilon: f64,
|
||||
sensitivity: f64,
|
||||
clipping_norm: f64,
|
||||
) -> Result<Self, FederationError> {
|
||||
if epsilon <= 0.0 {
|
||||
return Err(FederationError::InvalidEpsilon(epsilon));
|
||||
}
|
||||
Ok(Self {
|
||||
epsilon,
|
||||
delta: 0.0,
|
||||
sensitivity,
|
||||
clipping_norm,
|
||||
mechanism: NoiseMechanism::Laplace,
|
||||
rng: StdRng::from_rng(rand::thread_rng()).unwrap(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Create with a deterministic seed (for testing).
|
||||
pub fn with_seed(mut self, seed: u64) -> Self {
|
||||
self.rng = StdRng::seed_from_u64(seed);
|
||||
self
|
||||
}
|
||||
|
||||
/// Compute the Gaussian noise standard deviation (sigma).
|
||||
fn gaussian_sigma(&self) -> f64 {
|
||||
self.sensitivity * (2.0_f64 * (1.25_f64 / self.delta).ln()).sqrt() / self.epsilon
|
||||
}
|
||||
|
||||
/// Compute the Laplace noise scale (b).
|
||||
fn laplace_scale(&self) -> f64 {
|
||||
self.sensitivity / self.epsilon
|
||||
}
|
||||
|
||||
/// Clip a gradient vector to the configured L2 norm bound.
|
||||
pub fn clip_gradients(&self, gradients: &mut [f64]) {
|
||||
let norm: f64 = gradients.iter().map(|x| x * x).sum::<f64>().sqrt();
|
||||
if norm > self.clipping_norm {
|
||||
let scale = self.clipping_norm / norm;
|
||||
for g in gradients.iter_mut() {
|
||||
*g *= scale;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Add calibrated noise to a vector of parameters.
|
||||
///
|
||||
/// Clips gradients first, then adds noise per the configured mechanism.
|
||||
pub fn add_noise(&mut self, params: &mut [f64]) -> DiffPrivacyProof {
|
||||
self.clip_gradients(params);
|
||||
|
||||
match self.mechanism {
|
||||
NoiseMechanism::Gaussian => {
|
||||
let sigma = self.gaussian_sigma();
|
||||
let normal = Normal::new(0.0, sigma).unwrap();
|
||||
for p in params.iter_mut() {
|
||||
*p += normal.sample(&mut self.rng);
|
||||
}
|
||||
DiffPrivacyProof {
|
||||
epsilon: self.epsilon,
|
||||
delta: self.delta,
|
||||
mechanism: NoiseMechanism::Gaussian,
|
||||
sensitivity: self.sensitivity,
|
||||
clipping_norm: self.clipping_norm,
|
||||
noise_scale: sigma,
|
||||
noised_parameter_count: params.len() as u64,
|
||||
}
|
||||
}
|
||||
NoiseMechanism::Laplace => {
|
||||
let b = self.laplace_scale();
|
||||
for p in params.iter_mut() {
|
||||
// Laplace noise via inverse CDF: b * sign(u-0.5) * ln(1 - 2|u-0.5|)
|
||||
let u: f64 = self.rng.gen::<f64>() - 0.5;
|
||||
let noise = -b * u.signum() * (1.0 - 2.0 * u.abs()).ln();
|
||||
*p += noise;
|
||||
}
|
||||
DiffPrivacyProof {
|
||||
epsilon: self.epsilon,
|
||||
delta: 0.0,
|
||||
mechanism: NoiseMechanism::Laplace,
|
||||
sensitivity: self.sensitivity,
|
||||
clipping_norm: self.clipping_norm,
|
||||
noise_scale: b,
|
||||
noised_parameter_count: params.len() as u64,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Add noise to a single scalar value.
|
||||
pub fn add_noise_scalar(&mut self, value: &mut f64) -> f64 {
|
||||
let mut v = [*value];
|
||||
self.add_noise(&mut v);
|
||||
*value = v[0];
|
||||
v[0]
|
||||
}
|
||||
|
||||
/// Current epsilon setting.
|
||||
pub fn epsilon(&self) -> f64 {
|
||||
self.epsilon
|
||||
}
|
||||
|
||||
/// Current delta setting.
|
||||
pub fn delta(&self) -> f64 {
|
||||
self.delta
|
||||
}
|
||||
}
|
||||
|
||||
// -- Privacy Accountant (RDP) ------------------------------------------------
|
||||
|
||||
/// Renyi Differential Privacy (RDP) accountant for tracking cumulative privacy loss.
|
||||
///
|
||||
/// Tracks privacy budget across multiple export rounds using RDP composition,
|
||||
/// which provides tighter bounds than naive (epsilon, delta)-DP composition.
|
||||
pub struct PrivacyAccountant {
|
||||
/// Maximum allowed cumulative epsilon.
|
||||
epsilon_limit: f64,
|
||||
/// Target delta for conversion from RDP to (epsilon, delta)-DP.
|
||||
target_delta: f64,
|
||||
/// Accumulated RDP values at various alpha orders.
|
||||
/// Each entry: (alpha_order, accumulated_rdp_epsilon)
|
||||
rdp_alphas: Vec<(f64, f64)>,
|
||||
/// History of exports: (timestamp, epsilon_spent, mechanism).
|
||||
history: Vec<ExportRecord>,
|
||||
}
|
||||
|
||||
/// Record of a single privacy-consuming export.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ExportRecord {
|
||||
/// UNIX timestamp of the export.
|
||||
pub timestamp_s: u64,
|
||||
/// Epsilon consumed by this export.
|
||||
pub epsilon: f64,
|
||||
/// Delta for this export (0 for pure epsilon-DP).
|
||||
pub delta: f64,
|
||||
/// Mechanism used.
|
||||
pub mechanism: NoiseMechanism,
|
||||
/// Number of parameters.
|
||||
pub parameter_count: u64,
|
||||
}
|
||||
|
||||
impl PrivacyAccountant {
|
||||
/// Create a new accountant with the given budget.
|
||||
pub fn new(epsilon_limit: f64, target_delta: f64) -> Self {
|
||||
// Standard RDP alpha orders for accounting
|
||||
let alphas: Vec<f64> = vec![
|
||||
1.5, 1.75, 2.0, 2.5, 3.0, 4.0, 5.0, 6.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0,
|
||||
1024.0,
|
||||
];
|
||||
let rdp_alphas = alphas.into_iter().map(|a| (a, 0.0)).collect();
|
||||
Self {
|
||||
epsilon_limit,
|
||||
target_delta,
|
||||
rdp_alphas,
|
||||
history: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute RDP epsilon for the Gaussian mechanism at a given alpha order.
|
||||
fn gaussian_rdp(alpha: f64, sigma: f64) -> f64 {
|
||||
alpha / (2.0 * sigma * sigma)
|
||||
}
|
||||
|
||||
/// Convert RDP to (epsilon, delta)-DP for a given alpha order.
|
||||
fn rdp_to_dp(alpha: f64, rdp_epsilon: f64, delta: f64) -> f64 {
|
||||
rdp_epsilon - (delta.ln()) / (alpha - 1.0)
|
||||
}
|
||||
|
||||
/// Record a Gaussian mechanism query.
|
||||
pub fn record_gaussian(&mut self, sigma: f64, epsilon: f64, delta: f64, parameter_count: u64) {
|
||||
// Accumulate RDP at each alpha order
|
||||
for (alpha, rdp_eps) in &mut self.rdp_alphas {
|
||||
*rdp_eps += Self::gaussian_rdp(*alpha, sigma);
|
||||
}
|
||||
self.history.push(ExportRecord {
|
||||
timestamp_s: 0,
|
||||
epsilon,
|
||||
delta,
|
||||
mechanism: NoiseMechanism::Gaussian,
|
||||
parameter_count,
|
||||
});
|
||||
}
|
||||
|
||||
/// Record a Laplace mechanism query.
|
||||
pub fn record_laplace(&mut self, epsilon: f64, parameter_count: u64) {
|
||||
// For Laplace, RDP epsilon at order alpha is: alpha * eps / (alpha - 1)
|
||||
// when alpha > 1
|
||||
for (alpha, rdp_eps) in &mut self.rdp_alphas {
|
||||
if *alpha > 1.0 {
|
||||
*rdp_eps += *alpha * epsilon / (*alpha - 1.0);
|
||||
}
|
||||
}
|
||||
self.history.push(ExportRecord {
|
||||
timestamp_s: 0,
|
||||
epsilon,
|
||||
delta: 0.0,
|
||||
mechanism: NoiseMechanism::Laplace,
|
||||
parameter_count,
|
||||
});
|
||||
}
|
||||
|
||||
/// Get the current best (tightest) epsilon estimate.
|
||||
pub fn current_epsilon(&self) -> f64 {
|
||||
self.rdp_alphas
|
||||
.iter()
|
||||
.map(|(alpha, rdp_eps)| Self::rdp_to_dp(*alpha, *rdp_eps, self.target_delta))
|
||||
.fold(f64::INFINITY, f64::min)
|
||||
}
|
||||
|
||||
/// Remaining privacy budget.
|
||||
pub fn remaining_budget(&self) -> f64 {
|
||||
(self.epsilon_limit - self.current_epsilon()).max(0.0)
|
||||
}
|
||||
|
||||
/// Check if we can afford another export with the given epsilon.
|
||||
pub fn can_afford(&self, additional_epsilon: f64) -> bool {
|
||||
self.current_epsilon() + additional_epsilon <= self.epsilon_limit
|
||||
}
|
||||
|
||||
/// Check if budget is exhausted.
|
||||
pub fn is_exhausted(&self) -> bool {
|
||||
self.current_epsilon() >= self.epsilon_limit
|
||||
}
|
||||
|
||||
/// Fraction of budget consumed (0.0 to 1.0+).
|
||||
pub fn budget_fraction_used(&self) -> f64 {
|
||||
self.current_epsilon() / self.epsilon_limit
|
||||
}
|
||||
|
||||
/// Number of exports recorded.
|
||||
pub fn export_count(&self) -> usize {
|
||||
self.history.len()
|
||||
}
|
||||
|
||||
/// Export history.
|
||||
pub fn history(&self) -> &[ExportRecord] {
|
||||
&self.history
|
||||
}
|
||||
|
||||
/// Epsilon limit.
|
||||
pub fn epsilon_limit(&self) -> f64 {
|
||||
self.epsilon_limit
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn gaussian_engine_creates() {
|
||||
let engine = DiffPrivacyEngine::gaussian(1.0, 1e-5, 1.0, 1.0);
|
||||
assert!(engine.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_epsilon_rejected() {
|
||||
let engine = DiffPrivacyEngine::gaussian(0.0, 1e-5, 1.0, 1.0);
|
||||
assert!(engine.is_err());
|
||||
let engine = DiffPrivacyEngine::gaussian(-1.0, 1e-5, 1.0, 1.0);
|
||||
assert!(engine.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_delta_rejected() {
|
||||
let engine = DiffPrivacyEngine::gaussian(1.0, 0.0, 1.0, 1.0);
|
||||
assert!(engine.is_err());
|
||||
let engine = DiffPrivacyEngine::gaussian(1.0, 1.0, 1.0, 1.0);
|
||||
assert!(engine.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gradient_clipping() {
|
||||
let engine = DiffPrivacyEngine::gaussian(1.0, 1e-5, 1.0, 1.0).unwrap();
|
||||
let mut grads = vec![3.0, 4.0]; // norm = 5.0
|
||||
engine.clip_gradients(&mut grads);
|
||||
let norm: f64 = grads.iter().map(|x| x * x).sum::<f64>().sqrt();
|
||||
assert!((norm - 1.0).abs() < 1e-6); // clipped to norm 1.0
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gradient_no_clip_when_small() {
|
||||
let engine = DiffPrivacyEngine::gaussian(1.0, 1e-5, 1.0, 10.0).unwrap();
|
||||
let mut grads = vec![3.0, 4.0]; // norm = 5.0, clip = 10.0
|
||||
engine.clip_gradients(&mut grads);
|
||||
assert!((grads[0] - 3.0).abs() < 1e-10);
|
||||
assert!((grads[1] - 4.0).abs() < 1e-10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_noise_gaussian_deterministic() {
|
||||
let mut engine = DiffPrivacyEngine::gaussian(1.0, 1e-5, 1.0, 100.0)
|
||||
.unwrap()
|
||||
.with_seed(42);
|
||||
let mut params = vec![1.0, 2.0, 3.0];
|
||||
let original = params.clone();
|
||||
let proof = engine.add_noise(&mut params);
|
||||
assert_eq!(proof.mechanism, NoiseMechanism::Gaussian);
|
||||
assert_eq!(proof.noised_parameter_count, 3);
|
||||
// Params should be different from original (noise added)
|
||||
assert!(params
|
||||
.iter()
|
||||
.zip(original.iter())
|
||||
.any(|(a, b)| (a - b).abs() > 1e-10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_noise_laplace_deterministic() {
|
||||
let mut engine = DiffPrivacyEngine::laplace(1.0, 1.0, 100.0)
|
||||
.unwrap()
|
||||
.with_seed(42);
|
||||
let mut params = vec![1.0, 2.0, 3.0];
|
||||
let proof = engine.add_noise(&mut params);
|
||||
assert_eq!(proof.mechanism, NoiseMechanism::Laplace);
|
||||
assert_eq!(proof.noised_parameter_count, 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn privacy_accountant_initial_state() {
|
||||
let acc = PrivacyAccountant::new(10.0, 1e-5);
|
||||
assert_eq!(acc.export_count(), 0);
|
||||
assert!(!acc.is_exhausted());
|
||||
assert!(acc.can_afford(1.0));
|
||||
assert!(acc.remaining_budget() > 9.9);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn privacy_accountant_tracks_gaussian() {
|
||||
let mut acc = PrivacyAccountant::new(10.0, 1e-5);
|
||||
// sigma=1.0 with epsilon=1.0 per query
|
||||
acc.record_gaussian(1.0, 1.0, 1e-5, 100);
|
||||
assert_eq!(acc.export_count(), 1);
|
||||
let eps = acc.current_epsilon();
|
||||
assert!(eps > 0.0);
|
||||
assert!(eps < 10.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn privacy_accountant_composition() {
|
||||
let mut acc = PrivacyAccountant::new(10.0, 1e-5);
|
||||
let eps_after_1 = {
|
||||
acc.record_gaussian(1.0, 1.0, 1e-5, 100);
|
||||
acc.current_epsilon()
|
||||
};
|
||||
acc.record_gaussian(1.0, 1.0, 1e-5, 100);
|
||||
let eps_after_2 = acc.current_epsilon();
|
||||
// After 2 queries, epsilon should be larger
|
||||
assert!(eps_after_2 > eps_after_1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn privacy_accountant_exhaustion() {
|
||||
let mut acc = PrivacyAccountant::new(1.0, 1e-5);
|
||||
// Use a very small sigma to burn budget fast
|
||||
for _ in 0..100 {
|
||||
acc.record_gaussian(0.1, 10.0, 1e-5, 10);
|
||||
}
|
||||
assert!(acc.is_exhausted());
|
||||
assert!(!acc.can_afford(0.1));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user