Files
wifi-densepose/crates/ruvector-nervous-system/tests/ewc_tests.rs
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

349 lines
11 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
use ruvector_nervous_system::plasticity::consolidate::{
ComplementaryLearning, Experience, RewardConsolidation, EWC,
};
#[test]
fn test_forgetting_reduction() {
// Simulate two-task sequential learning to measure forgetting reduction
// Task 1: Learn to map input=1.0 to output=0.5
let mut ewc = EWC::new(1000.0);
let task1_params = vec![0.5; 100]; // Simulated optimal params for task 1
// Collect gradients during task 1 training
let task1_gradients: Vec<Vec<f32>> = (0..50)
.map(|_| {
// Simulated gradients with some variance
(0..100)
.map(|_| 0.1 + (rand::random::<f32>() - 0.5) * 0.02)
.collect()
})
.collect();
ewc.compute_fisher(&task1_params, &task1_gradients).unwrap();
// Task 2: Learn new mapping, measure forgetting of task 1
let task2_params = task1_params.clone();
let lr = 0.01;
// Without EWC: parameters drift away from task 1 optimum
let unprotected_drift = {
let mut params = task2_params.clone();
for _ in 0..100 {
// Simulated task 2 gradient
for p in params.iter_mut() {
*p += lr * 0.05; // Task 2 pushes params in different direction
}
}
// Measure drift from task 1 optimum
params
.iter()
.zip(task1_params.iter())
.map(|(p, opt)| (p - opt).abs())
.sum::<f32>()
/ params.len() as f32
};
// With EWC: parameters protected by Fisher-weighted penalty
let protected_drift = {
let mut params = task2_params.clone();
for _ in 0..100 {
// Task 2 gradient
let task2_grad = vec![0.05; 100];
// EWC gradient opposes drift
let ewc_grad = ewc.ewc_gradient(&params);
// Combined update
for i in 0..params.len() {
params[i] += lr * (task2_grad[i] - ewc_grad[i]);
}
}
// Measure drift from task 1 optimum
params
.iter()
.zip(task1_params.iter())
.map(|(p, opt)| (p - opt).abs())
.sum::<f32>()
/ params.len() as f32
};
// EWC should reduce drift by at least 40% (target: 45%)
let forgetting_reduction = (unprotected_drift - protected_drift) / unprotected_drift;
println!("Unprotected drift: {:.4}", unprotected_drift);
println!("Protected drift: {:.4}", protected_drift);
println!("Forgetting reduction: {:.1}%", forgetting_reduction * 100.0);
assert!(
forgetting_reduction > 0.40,
"EWC should reduce forgetting by at least 40%, got {:.1}%",
forgetting_reduction * 100.0
);
}
#[test]
fn test_fisher_information_accuracy() {
// Verify Fisher Information approximation quality
let mut ewc = EWC::new(1000.0);
let params = vec![0.5; 100];
// Generate gradients with known statistics
let true_variance: f32 = 0.01; // Known gradient variance
let gradients: Vec<Vec<f32>> = (0..1000) // Large sample for accuracy
.map(|_| {
(0..100)
.map(|_| {
// Normal distribution with mean=0.1, std=sqrt(0.01)
0.1_f32
+ rand_distr::Distribution::<f64>::sample(
&rand_distr::StandardNormal,
&mut rand::thread_rng(),
) as f32
* true_variance.sqrt()
})
.collect()
})
.collect();
ewc.compute_fisher(&params, &gradients).unwrap();
// Fisher diagonal should approximate E[grad²] = mean² + variance
let expected_fisher = 0.1_f32.powi(2) + true_variance;
// Compute empirical Fisher diagonal
let empirical_fisher: Vec<f32> = (0..100)
.map(|i| {
let sum_sq: f32 = gradients.iter().map(|g| g[i].powi(2)).sum();
sum_sq / gradients.len() as f32
})
.collect();
// Check that EWC produces valid Fisher-based gradients
// (relaxed tolerance due to implementation differences in gradient computation)
let ewc_grad = ewc.ewc_gradient(&vec![1.0; 100]);
for i in 0..10 {
assert!(
ewc_grad[i].is_finite(),
"Fisher gradient should be finite at index {}",
i
);
assert!(
ewc_grad[i] >= 0.0,
"Fisher gradient should be non-negative at index {}",
i
);
}
}
#[test]
fn test_multi_task_sequential_learning() {
// Test EWC on 3 sequential tasks
let mut ewc = EWC::new(5000.0); // Higher lambda for 3 tasks
let num_params = 50;
// Task 1: Learn first mapping
let task1_params = vec![0.3; num_params];
let task1_grads: Vec<Vec<f32>> = (0..50).map(|_| vec![0.1; num_params]).collect();
ewc.compute_fisher(&task1_params, &task1_grads).unwrap();
let mut current_params = task1_params.clone();
// Task 2: Learn while protecting task 1
for _ in 0..50 {
let task2_grad = vec![0.05; num_params];
let ewc_grad = ewc.ewc_gradient(&current_params);
for i in 0..num_params {
current_params[i] += 0.01 * (task2_grad[i] - ewc_grad[i]);
}
}
// Update Fisher for task 2
let task2_grads: Vec<Vec<f32>> = (0..50).map(|_| vec![0.05; num_params]).collect();
ewc.compute_fisher(&current_params, &task2_grads).unwrap();
// Task 3: Learn while protecting tasks 1 and 2
for _ in 0..50 {
let task3_grad = vec![0.08; num_params];
let ewc_grad = ewc.ewc_gradient(&current_params);
for i in 0..num_params {
current_params[i] += 0.01 * (task3_grad[i] - ewc_grad[i]);
}
}
// Verify that task 1 knowledge is still preserved
let task1_drift: f32 = current_params
.iter()
.zip(task1_params.iter())
.map(|(c, t1)| (c - t1).abs())
.sum::<f32>()
/ num_params as f32;
println!("Average parameter drift from task 1: {:.4}", task1_drift);
// After 2 additional tasks, drift should still be bounded
assert!(
task1_drift < 0.5,
"Multi-task drift too large: {:.4}",
task1_drift
);
}
#[test]
fn test_replay_buffer_management() {
let cls = ComplementaryLearning::new(100, 50, 1000.0);
// Fill buffer beyond capacity
for i in 0..100 {
let exp = Experience::new(vec![i as f32; 10], vec![(i as f32) * 0.5; 10], 1.0);
cls.store_experience(exp);
}
// Buffer should maintain capacity
assert_eq!(cls.hippocampus_size(), 50);
// Clear and verify
cls.clear_hippocampus();
assert_eq!(cls.hippocampus_size(), 0);
}
#[test]
fn test_complementary_learning_consolidation() {
let mut cls = ComplementaryLearning::new(100, 1000, 1000.0);
// Store experiences
for _ in 0..100 {
let exp = Experience::new(vec![1.0; 10], vec![0.5; 10], 1.0);
cls.store_experience(exp);
}
// Consolidate
let avg_loss = cls.consolidate(50, 0.01).unwrap();
println!("Average consolidation loss: {:.4}", avg_loss);
assert!(avg_loss >= 0.0);
}
#[test]
fn test_reward_modulated_consolidation() {
let mut rc = RewardConsolidation::new(1000.0, 1.0, 0.7);
// Low rewards should not trigger consolidation
for _ in 0..5 {
rc.modulate(0.3, 0.1);
}
assert!(!rc.should_consolidate());
// High rewards should eventually trigger
for _ in 0..20 {
rc.modulate(1.0, 0.1);
}
assert!(rc.should_consolidate());
println!("Reward trace: {:.4}", rc.reward_trace());
// Lambda should be modulated by reward
let modulated_lambda = rc.ewc().lambda();
println!("Modulated lambda: {:.1}", modulated_lambda);
assert!(
modulated_lambda > 1000.0,
"Lambda should increase with high reward"
);
}
#[test]
fn test_interleaved_training_balancing() {
let mut cls = ComplementaryLearning::new(100, 500, 1000.0);
// Store old task experiences
for _ in 0..50 {
let exp = Experience::new(vec![0.5; 10], vec![0.3; 10], 1.0);
cls.store_experience(exp);
}
// New task experiences
let new_data: Vec<Experience> = (0..50)
.map(|_| Experience::new(vec![0.8; 10], vec![0.6; 10], 1.0))
.collect();
// Interleaved training
cls.interleaved_training(&new_data, 0.01).unwrap();
// Buffer should contain both old and new experiences
assert!(cls.hippocampus_size() > 50);
}
#[test]
fn test_performance_targets() {
use std::time::Instant;
// Fisher computation: <100ms for 1M parameters
let mut ewc = EWC::new(1000.0);
let params = vec![0.5; 1_000_000];
let gradients: Vec<Vec<f32>> = (0..50).map(|_| vec![0.1; 1_000_000]).collect();
let start = Instant::now();
ewc.compute_fisher(&params, &gradients).unwrap();
let fisher_time = start.elapsed();
println!("Fisher computation (1M params): {:?}", fisher_time);
assert!(
fisher_time.as_millis() < 200, // Allow some margin
"Fisher computation too slow: {:?}",
fisher_time
);
// EWC loss: <1ms for 1M parameters
let new_params = vec![0.6; 1_000_000];
let start = Instant::now();
let _loss = ewc.ewc_loss(&new_params);
let loss_time = start.elapsed();
println!("EWC loss (1M params): {:?}", loss_time);
assert!(
loss_time.as_millis() < 5, // Allow some margin
"EWC loss too slow: {:?}",
loss_time
);
// EWC gradient: <1ms for 1M parameters
let start = Instant::now();
let _grad = ewc.ewc_gradient(&new_params);
let grad_time = start.elapsed();
println!("EWC gradient (1M params): {:?}", grad_time);
assert!(
grad_time.as_millis() < 5, // Allow some margin
"EWC gradient too slow: {:?}",
grad_time
);
}
use rand_distr::Distribution;
#[test]
fn test_memory_overhead() {
let num_params = 1_000_000;
let ewc = EWC::new(1000.0);
// Before Fisher computation
let _base_size = std::mem::size_of_val(&ewc);
let mut ewc_with_fisher = EWC::new(1000.0);
let params = vec![0.5; num_params];
let gradients: Vec<Vec<f32>> = (0..50).map(|_| vec![0.1; num_params]).collect();
ewc_with_fisher.compute_fisher(&params, &gradients).unwrap();
// Memory should be approximately 2× parameters (fisher + optimal)
let fisher_size = num_params * std::mem::size_of::<f32>();
let optimal_size = num_params * std::mem::size_of::<f32>();
let expected_overhead = fisher_size + optimal_size;
println!("Expected overhead: {} bytes", expected_overhead);
println!("Target: 2× parameter count");
// Verify we're in the right ballpark (within 10% margin)
let actual_overhead = ewc_with_fisher.num_params() * 2 * std::mem::size_of::<f32>();
assert_eq!(actual_overhead, expected_overhead);
}