Files
wifi-densepose/vendor/ruvector/crates/prime-radiant/benches/sona_bench.rs

550 lines
16 KiB
Rust

//! Benchmarks for SONA Micro-LoRA instant adaptation
//!
//! ADR-014 Performance Target: < 0.05ms (50us) for instant adaptation
//!
//! SONA provides self-optimizing threshold tuning with:
//! - Micro-LoRA: Ultra-low rank (1-2) for instant learning
//! - Base-LoRA: Standard LoRA for background learning
//! - EWC++: Elastic Weight Consolidation to prevent forgetting
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
// ============================================================================
// SONA Types (Simulated for benchmarking)
// ============================================================================
/// Micro-LoRA layer (rank 1-2 for instant adaptation)
pub struct MicroLoRA {
/// Low-rank factor A (dim x rank)
pub a: Vec<f32>,
/// Low-rank factor B (rank x dim)
pub b: Vec<f32>,
/// Scaling factor
pub scale: f32,
/// Input dimension
pub dim: usize,
/// Rank (typically 1-2)
pub rank: usize,
}
impl MicroLoRA {
pub fn new(dim: usize, rank: usize) -> Self {
// Initialize with small random values
let a: Vec<f32> = (0..dim * rank)
.map(|i| ((i as f32 * 0.1234).sin() * 0.01))
.collect();
let b: Vec<f32> = (0..rank * dim)
.map(|i| ((i as f32 * 0.5678).cos() * 0.01))
.collect();
Self {
a,
b,
scale: 0.1,
dim,
rank,
}
}
/// Apply micro-LoRA transform: y = x + scale * B @ A @ x
#[inline]
pub fn apply(&self, input: &[f32], output: &mut [f32]) {
debug_assert_eq!(input.len(), self.dim);
debug_assert_eq!(output.len(), self.dim);
// Copy input to output first (identity component)
output.copy_from_slice(input);
// Compute A @ x -> hidden (rank-dimensional)
let mut hidden = vec![0.0f32; self.rank];
for r in 0..self.rank {
for i in 0..self.dim {
hidden[r] += self.a[i * self.rank + r] * input[i];
}
}
// Compute B @ hidden and add to output
for i in 0..self.dim {
let mut delta = 0.0f32;
for r in 0..self.rank {
delta += self.b[r * self.dim + i] * hidden[r];
}
output[i] += self.scale * delta;
}
}
/// Apply with pre-allocated hidden buffer (zero allocation)
#[inline]
pub fn apply_zero_alloc(&self, input: &[f32], hidden: &mut [f32], output: &mut [f32]) {
debug_assert_eq!(hidden.len(), self.rank);
// Copy input
output.copy_from_slice(input);
// A @ x
hidden.fill(0.0);
for r in 0..self.rank {
for i in 0..self.dim {
hidden[r] += self.a[i * self.rank + r] * input[i];
}
}
// B @ hidden
for i in 0..self.dim {
let mut delta = 0.0f32;
for r in 0..self.rank {
delta += self.b[r * self.dim + i] * hidden[r];
}
output[i] += self.scale * delta;
}
}
/// Update weights from gradient (instant learning)
#[inline]
pub fn update(&mut self, grad_a: &[f32], grad_b: &[f32], learning_rate: f32) {
for i in 0..self.a.len() {
self.a[i] -= learning_rate * grad_a[i];
}
for i in 0..self.b.len() {
self.b[i] -= learning_rate * grad_b[i];
}
}
}
/// Base-LoRA layer (higher rank for background learning)
pub struct BaseLoRA {
pub a: Vec<f32>,
pub b: Vec<f32>,
pub scale: f32,
pub dim: usize,
pub rank: usize,
}
impl BaseLoRA {
pub fn new(dim: usize, rank: usize) -> Self {
let a: Vec<f32> = (0..dim * rank)
.map(|i| ((i as f32 * 0.3456).sin() * 0.01))
.collect();
let b: Vec<f32> = (0..rank * dim)
.map(|i| ((i as f32 * 0.7890).cos() * 0.01))
.collect();
Self {
a,
b,
scale: 0.05,
dim,
rank,
}
}
#[inline]
pub fn apply(&self, input: &[f32], output: &mut [f32]) {
output.copy_from_slice(input);
let mut hidden = vec![0.0f32; self.rank];
for r in 0..self.rank {
for i in 0..self.dim {
hidden[r] += self.a[i * self.rank + r] * input[i];
}
}
for i in 0..self.dim {
let mut delta = 0.0f32;
for r in 0..self.rank {
delta += self.b[r * self.dim + i] * hidden[r];
}
output[i] += self.scale * delta;
}
}
}
/// EWC++ weight importance
pub struct EwcPlusPlus {
/// Fisher information diagonal
pub fisher: Vec<f32>,
/// Optimal weights from previous tasks
pub optimal_weights: Vec<f32>,
/// Regularization strength
pub lambda: f32,
}
impl EwcPlusPlus {
pub fn new(param_count: usize, lambda: f32) -> Self {
Self {
fisher: vec![1.0; param_count],
optimal_weights: vec![0.0; param_count],
lambda,
}
}
/// Compute EWC penalty for given weights
#[inline]
pub fn penalty(&self, weights: &[f32]) -> f32 {
let mut penalty = 0.0f32;
for i in 0..weights.len().min(self.fisher.len()) {
let diff = weights[i] - self.optimal_weights[i];
penalty += self.fisher[i] * diff * diff;
}
self.lambda * 0.5 * penalty
}
/// Update Fisher information (consolidation)
pub fn consolidate(&mut self, weights: &[f32], new_fisher: &[f32]) {
for i in 0..self.fisher.len().min(new_fisher.len()) {
// Online Fisher update (running average)
self.fisher[i] = 0.9 * self.fisher[i] + 0.1 * new_fisher[i];
self.optimal_weights[i] = weights[i];
}
}
}
/// Trajectory step for learning
#[derive(Clone)]
pub struct TrajectoryStep {
pub state: Vec<f32>,
pub action_embedding: Vec<f32>,
pub reward: f32,
}
/// Trajectory builder
pub struct TrajectoryBuilder {
pub initial_state: Vec<f32>,
pub steps: Vec<TrajectoryStep>,
}
impl TrajectoryBuilder {
pub fn new(initial_state: Vec<f32>) -> Self {
Self {
initial_state,
steps: Vec::new(),
}
}
pub fn add_step(&mut self, state: Vec<f32>, action: Vec<f32>, reward: f32) {
self.steps.push(TrajectoryStep {
state,
action_embedding: action,
reward,
});
}
}
/// SONA engine (simplified for benchmarking)
pub struct SonaEngine {
pub micro_lora: MicroLoRA,
pub base_lora: BaseLoRA,
pub ewc: EwcPlusPlus,
pub dim: usize,
}
impl SonaEngine {
pub fn new(dim: usize) -> Self {
let micro_rank = 2;
let base_rank = 8;
let param_count = dim * micro_rank * 2 + dim * base_rank * 2;
Self {
micro_lora: MicroLoRA::new(dim, micro_rank),
base_lora: BaseLoRA::new(dim, base_rank),
ewc: EwcPlusPlus::new(param_count, 0.4),
dim,
}
}
/// Begin trajectory
pub fn begin_trajectory(&self, initial_state: Vec<f32>) -> TrajectoryBuilder {
TrajectoryBuilder::new(initial_state)
}
/// End trajectory and trigger learning
pub fn end_trajectory(&mut self, builder: TrajectoryBuilder, final_reward: f32) {
// Simplified learning: update micro-LoRA based on reward
let lr = 0.001 * final_reward.max(0.0);
// Pseudo-gradient (simplified)
let grad_a: Vec<f32> = self.micro_lora.a.iter().map(|w| w * lr).collect();
let grad_b: Vec<f32> = self.micro_lora.b.iter().map(|w| w * lr).collect();
self.micro_lora.update(&grad_a, &grad_b, lr);
}
/// Apply micro-LoRA (instant)
#[inline]
pub fn apply_micro(&self, input: &[f32], output: &mut [f32]) {
self.micro_lora.apply(input, output);
}
/// Apply base-LoRA (background)
pub fn apply_base(&self, input: &[f32], output: &mut [f32]) {
self.base_lora.apply(input, output);
}
/// Apply both LoRAs combined
pub fn apply_combined(&self, input: &[f32], output: &mut [f32]) {
// Apply micro first
let mut intermediate = vec![0.0f32; self.dim];
self.micro_lora.apply(input, &mut intermediate);
// Then base
self.base_lora.apply(&intermediate, output);
}
}
// ============================================================================
// Benchmarks
// ============================================================================
fn generate_state(dim: usize, seed: u64) -> Vec<f32> {
(0..dim)
.map(|i| ((seed as f32 * 0.123 + i as f32 * 0.456).sin()))
.collect()
}
/// Benchmark Micro-LoRA application (target: <50us)
fn bench_micro_lora_apply(c: &mut Criterion) {
let mut group = c.benchmark_group("sona_micro_lora_apply");
group.throughput(Throughput::Elements(1));
for dim in [64, 128, 256, 512] {
let lora = MicroLoRA::new(dim, 2); // Rank 2
let input = generate_state(dim, 42);
let mut output = vec![0.0f32; dim];
group.bench_with_input(BenchmarkId::new("dim", dim), &dim, |b, _| {
b.iter(|| lora.apply(black_box(&input), black_box(&mut output)))
});
}
// Different ranks
let dim = 256;
for rank in [1, 2, 4] {
let lora = MicroLoRA::new(dim, rank);
let input = generate_state(dim, 42);
let mut output = vec![0.0f32; dim];
group.bench_with_input(BenchmarkId::new("rank", rank), &rank, |b, _| {
b.iter(|| lora.apply(black_box(&input), black_box(&mut output)))
});
}
group.finish();
}
/// Benchmark zero-allocation Micro-LoRA
fn bench_micro_lora_zero_alloc(c: &mut Criterion) {
let mut group = c.benchmark_group("sona_micro_lora_zero_alloc");
group.throughput(Throughput::Elements(1));
for dim in [64, 128, 256, 512] {
let lora = MicroLoRA::new(dim, 2);
let input = generate_state(dim, 42);
let mut hidden = vec![0.0f32; 2];
let mut output = vec![0.0f32; dim];
group.bench_with_input(BenchmarkId::new("dim", dim), &dim, |b, _| {
b.iter(|| {
lora.apply_zero_alloc(
black_box(&input),
black_box(&mut hidden),
black_box(&mut output),
)
})
});
}
group.finish();
}
/// Benchmark Base-LoRA application
fn bench_base_lora_apply(c: &mut Criterion) {
let mut group = c.benchmark_group("sona_base_lora_apply");
group.throughput(Throughput::Elements(1));
for dim in [64, 128, 256, 512] {
let lora = BaseLoRA::new(dim, 8); // Rank 8
let input = generate_state(dim, 42);
let mut output = vec![0.0f32; dim];
group.bench_with_input(BenchmarkId::new("dim", dim), &dim, |b, _| {
b.iter(|| lora.apply(black_box(&input), black_box(&mut output)))
});
}
// Different ranks
let dim = 256;
for rank in [4, 8, 16, 32] {
let lora = BaseLoRA::new(dim, rank);
let input = generate_state(dim, 42);
let mut output = vec![0.0f32; dim];
group.bench_with_input(BenchmarkId::new("rank", rank), &rank, |b, _| {
b.iter(|| lora.apply(black_box(&input), black_box(&mut output)))
});
}
group.finish();
}
/// Benchmark EWC++ penalty computation
fn bench_ewc_penalty(c: &mut Criterion) {
let mut group = c.benchmark_group("sona_ewc_penalty");
group.throughput(Throughput::Elements(1));
for param_count in [1000, 10000, 100000] {
let ewc = EwcPlusPlus::new(param_count, 0.4);
let weights: Vec<f32> = (0..param_count).map(|i| (i as f32 * 0.001).sin()).collect();
group.bench_with_input(
BenchmarkId::new("params", param_count),
&param_count,
|b, _| b.iter(|| black_box(ewc.penalty(black_box(&weights)))),
);
}
group.finish();
}
/// Benchmark EWC++ consolidation
fn bench_ewc_consolidate(c: &mut Criterion) {
let mut group = c.benchmark_group("sona_ewc_consolidate");
for param_count in [1000, 10000, 100000] {
let mut ewc = EwcPlusPlus::new(param_count, 0.4);
let weights: Vec<f32> = (0..param_count).map(|i| (i as f32 * 0.001).sin()).collect();
let new_fisher: Vec<f32> = (0..param_count)
.map(|i| (i as f32 * 0.002).cos().abs())
.collect();
group.bench_with_input(
BenchmarkId::new("params", param_count),
&param_count,
|b, _| b.iter(|| ewc.consolidate(black_box(&weights), black_box(&new_fisher))),
);
}
group.finish();
}
/// Benchmark full trajectory learning cycle
fn bench_trajectory_learning(c: &mut Criterion) {
let mut group = c.benchmark_group("sona_trajectory_learning");
let dim = 256;
let mut engine = SonaEngine::new(dim);
// Single step trajectory
group.bench_function("single_step_trajectory", |b| {
b.iter(|| {
let mut builder = engine.begin_trajectory(generate_state(dim, 42));
builder.add_step(generate_state(dim, 43), vec![], 0.8);
engine.end_trajectory(builder, black_box(0.85));
})
});
// Multi-step trajectory
group.bench_function("10_step_trajectory", |b| {
b.iter(|| {
let mut builder = engine.begin_trajectory(generate_state(dim, 42));
for i in 0..10 {
builder.add_step(generate_state(dim, 43 + i), vec![], 0.5 + (i as f32) * 0.05);
}
engine.end_trajectory(builder, black_box(0.9));
})
});
group.finish();
}
/// Benchmark combined LoRA application
fn bench_combined_lora(c: &mut Criterion) {
let mut group = c.benchmark_group("sona_combined_lora");
for dim in [64, 128, 256, 512] {
let engine = SonaEngine::new(dim);
let input = generate_state(dim, 42);
let mut output = vec![0.0f32; dim];
// Micro only
group.bench_with_input(BenchmarkId::new("micro_only", dim), &dim, |b, _| {
b.iter(|| engine.apply_micro(black_box(&input), black_box(&mut output)))
});
// Base only
group.bench_with_input(BenchmarkId::new("base_only", dim), &dim, |b, _| {
b.iter(|| engine.apply_base(black_box(&input), black_box(&mut output)))
});
// Combined
group.bench_with_input(BenchmarkId::new("combined", dim), &dim, |b, _| {
b.iter(|| engine.apply_combined(black_box(&input), black_box(&mut output)))
});
}
group.finish();
}
/// Benchmark batch inference
fn bench_batch_inference(c: &mut Criterion) {
let mut group = c.benchmark_group("sona_batch_inference");
let dim = 256;
let engine = SonaEngine::new(dim);
for batch_size in [1, 10, 100, 1000] {
let inputs: Vec<Vec<f32>> = (0..batch_size)
.map(|i| generate_state(dim, i as u64))
.collect();
let mut outputs: Vec<Vec<f32>> = (0..batch_size).map(|_| vec![0.0f32; dim]).collect();
group.throughput(Throughput::Elements(batch_size as u64));
group.bench_with_input(
BenchmarkId::new("batch", batch_size),
&batch_size,
|b, _| {
b.iter(|| {
for (input, output) in inputs.iter().zip(outputs.iter_mut()) {
engine.apply_micro(input, output);
}
black_box(outputs.len())
})
},
);
}
group.finish();
}
/// Benchmark weight update (instant learning)
fn bench_weight_update(c: &mut Criterion) {
let mut group = c.benchmark_group("sona_weight_update");
for dim in [64, 128, 256, 512] {
let mut lora = MicroLoRA::new(dim, 2);
let grad_a: Vec<f32> = (0..dim * 2).map(|i| (i as f32 * 0.001).sin()).collect();
let grad_b: Vec<f32> = (0..2 * dim).map(|i| (i as f32 * 0.002).cos()).collect();
group.bench_with_input(BenchmarkId::new("dim", dim), &dim, |b, _| {
b.iter(|| {
lora.update(black_box(&grad_a), black_box(&grad_b), black_box(0.001));
})
});
}
group.finish();
}
criterion_group!(
benches,
bench_micro_lora_apply,
bench_micro_lora_zero_alloc,
bench_base_lora_apply,
bench_ewc_penalty,
bench_ewc_consolidate,
bench_trajectory_learning,
bench_combined_lora,
bench_batch_inference,
bench_weight_update,
);
criterion_main!(benches);