Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
317
vendor/ruvector/examples/ruvLLM/src/sona/engine.rs
vendored
Normal file
317
vendor/ruvector/examples/ruvLLM/src/sona/engine.rs
vendored
Normal file
@@ -0,0 +1,317 @@
|
||||
//! SONA Engine - Main interface for self-optimizing neural architecture
|
||||
|
||||
use crate::sona::loops::coordinator::{CoordinatorStats, LoopCoordinator};
|
||||
use crate::sona::lora::MicroLoRA;
|
||||
use crate::sona::trajectory::TrajectoryBuilder;
|
||||
use crate::sona::types::{QueryTrajectory, SonaConfig};
|
||||
use parking_lot::RwLock;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Main SONA engine integrating all components
|
||||
pub struct SonaEngine {
|
||||
/// Loop coordinator
|
||||
coordinator: LoopCoordinator,
|
||||
/// Configuration
|
||||
config: SonaConfig,
|
||||
/// Whether engine is enabled
|
||||
enabled: bool,
|
||||
}
|
||||
|
||||
impl SonaEngine {
|
||||
/// Create new SONA engine with default config
|
||||
pub fn new(hidden_dim: usize) -> Self {
|
||||
Self::with_config(SonaConfig {
|
||||
hidden_dim,
|
||||
embedding_dim: hidden_dim,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
/// Create with custom config
|
||||
pub fn with_config(config: SonaConfig) -> Self {
|
||||
Self {
|
||||
coordinator: LoopCoordinator::with_config(config.clone()),
|
||||
config,
|
||||
enabled: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Start trajectory recording for a query
|
||||
pub fn begin_trajectory(&self, query_embedding: Vec<f32>) -> TrajectoryBuilder {
|
||||
let id = self.coordinator.next_trajectory_id();
|
||||
TrajectoryBuilder::new(id, query_embedding)
|
||||
}
|
||||
|
||||
/// Complete trajectory and submit for learning
|
||||
pub fn end_trajectory(&self, builder: TrajectoryBuilder, quality: f32) {
|
||||
if !self.enabled {
|
||||
return;
|
||||
}
|
||||
|
||||
let trajectory = builder.build(quality);
|
||||
self.coordinator.on_inference(trajectory);
|
||||
}
|
||||
|
||||
/// Submit pre-built trajectory
|
||||
pub fn submit_trajectory(&self, trajectory: QueryTrajectory) {
|
||||
if self.enabled {
|
||||
self.coordinator.on_inference(trajectory);
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply micro-LoRA to hidden states
|
||||
pub fn apply_micro_lora(&self, input: &[f32], output: &mut [f32]) {
|
||||
if !self.enabled {
|
||||
return;
|
||||
}
|
||||
|
||||
if let Some(lora) = self.coordinator.micro_lora().try_read() {
|
||||
lora.forward(input, output);
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply base-LoRA to layer output
|
||||
pub fn apply_base_lora(&self, layer_idx: usize, input: &[f32], output: &mut [f32]) {
|
||||
if !self.enabled {
|
||||
return;
|
||||
}
|
||||
|
||||
if let Some(lora) = self.coordinator.base_lora().try_read() {
|
||||
lora.forward_layer(layer_idx, input, output);
|
||||
}
|
||||
}
|
||||
|
||||
/// Run background learning cycle if due
|
||||
pub fn tick(&self) -> Option<String> {
|
||||
if !self.enabled {
|
||||
return None;
|
||||
}
|
||||
|
||||
if let Some(result) = self.coordinator.maybe_run_background() {
|
||||
Some(format!(
|
||||
"Background cycle: {} trajectories -> {} patterns in {:?}",
|
||||
result.trajectories_processed, result.patterns_extracted, result.elapsed
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Force background learning cycle
|
||||
pub fn force_learn(&self) -> String {
|
||||
let result = self.coordinator.force_background();
|
||||
format!(
|
||||
"Forced learning: {} trajectories -> {} patterns, status: {}",
|
||||
result.trajectories_processed, result.patterns_extracted, result.status
|
||||
)
|
||||
}
|
||||
|
||||
/// Flush instant loop updates
|
||||
pub fn flush(&self) {
|
||||
self.coordinator.flush_instant();
|
||||
}
|
||||
|
||||
/// Find similar patterns to query
|
||||
pub fn find_patterns(
|
||||
&self,
|
||||
query_embedding: &[f32],
|
||||
k: usize,
|
||||
) -> Vec<crate::sona::LearnedPattern> {
|
||||
self.coordinator
|
||||
.reasoning_bank()
|
||||
.read()
|
||||
.find_similar(query_embedding, k)
|
||||
.into_iter()
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get engine statistics
|
||||
pub fn stats(&self) -> CoordinatorStats {
|
||||
self.coordinator.stats()
|
||||
}
|
||||
|
||||
/// Enable/disable engine
|
||||
pub fn set_enabled(&mut self, enabled: bool) {
|
||||
self.enabled = enabled;
|
||||
}
|
||||
|
||||
/// Check if enabled
|
||||
pub fn is_enabled(&self) -> bool {
|
||||
self.enabled
|
||||
}
|
||||
|
||||
/// Get config
|
||||
pub fn config(&self) -> &SonaConfig {
|
||||
&self.config
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder for SonaEngine
|
||||
pub struct SonaEngineBuilder {
|
||||
config: SonaConfig,
|
||||
}
|
||||
|
||||
impl SonaEngineBuilder {
|
||||
/// Create new builder
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
config: SonaConfig::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set hidden dimension
|
||||
pub fn hidden_dim(mut self, dim: usize) -> Self {
|
||||
self.config.hidden_dim = dim;
|
||||
self.config.embedding_dim = dim;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set micro-LoRA rank
|
||||
pub fn micro_lora_rank(mut self, rank: usize) -> Self {
|
||||
self.config.micro_lora_rank = rank.clamp(1, 2);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set base-LoRA rank
|
||||
pub fn base_lora_rank(mut self, rank: usize) -> Self {
|
||||
self.config.base_lora_rank = rank;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set micro-LoRA learning rate
|
||||
pub fn micro_lr(mut self, lr: f32) -> Self {
|
||||
self.config.micro_lora_lr = lr;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set base-LoRA learning rate
|
||||
pub fn base_lr(mut self, lr: f32) -> Self {
|
||||
self.config.base_lora_lr = lr;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set EWC lambda
|
||||
pub fn ewc_lambda(mut self, lambda: f32) -> Self {
|
||||
self.config.ewc_lambda = lambda;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set pattern clusters
|
||||
pub fn pattern_clusters(mut self, k: usize) -> Self {
|
||||
self.config.pattern_clusters = k;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set trajectory buffer capacity
|
||||
pub fn buffer_capacity(mut self, capacity: usize) -> Self {
|
||||
self.config.trajectory_capacity = capacity;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set quality threshold
|
||||
pub fn quality_threshold(mut self, threshold: f32) -> Self {
|
||||
self.config.quality_threshold = threshold;
|
||||
self
|
||||
}
|
||||
|
||||
/// Build the engine
|
||||
pub fn build(self) -> SonaEngine {
|
||||
SonaEngine::with_config(self.config)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for SonaEngineBuilder {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::sona::types::TrajectoryStep;
|
||||
|
||||
#[test]
|
||||
fn test_engine_creation() {
|
||||
let engine = SonaEngine::new(256);
|
||||
assert!(engine.is_enabled());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_builder() {
|
||||
let engine = SonaEngineBuilder::new()
|
||||
.hidden_dim(512)
|
||||
.micro_lora_rank(2)
|
||||
.base_lora_rank(16)
|
||||
.micro_lr(0.002)
|
||||
.ewc_lambda(500.0)
|
||||
.build();
|
||||
|
||||
assert_eq!(engine.config().hidden_dim, 512);
|
||||
assert_eq!(engine.config().micro_lora_rank, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_trajectory_workflow() {
|
||||
let engine = SonaEngine::new(64);
|
||||
|
||||
// Begin trajectory
|
||||
let mut builder = engine.begin_trajectory(vec![0.1; 64]);
|
||||
builder.add_step(vec![0.5; 64], vec![], 0.8);
|
||||
builder.add_step(vec![0.6; 64], vec![], 0.9);
|
||||
|
||||
// End trajectory
|
||||
engine.end_trajectory(builder, 0.85);
|
||||
|
||||
let stats = engine.stats();
|
||||
assert_eq!(stats.trajectories_buffered, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_micro_lora_application() {
|
||||
let engine = SonaEngine::new(64);
|
||||
|
||||
// Train a bit first
|
||||
for i in 0..10 {
|
||||
let mut builder = engine.begin_trajectory(vec![0.1; 64]);
|
||||
builder.add_step(vec![0.5; 64], vec![], 0.8);
|
||||
engine.end_trajectory(builder, 0.8);
|
||||
}
|
||||
engine.flush();
|
||||
|
||||
// Apply LoRA
|
||||
let input = vec![1.0; 64];
|
||||
let mut output = vec![0.0; 64];
|
||||
engine.apply_micro_lora(&input, &mut output);
|
||||
|
||||
// Output may or may not be modified depending on accumulated gradients
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_force_learn() {
|
||||
let engine = SonaEngine::new(256);
|
||||
|
||||
for i in 0..150 {
|
||||
let mut builder = engine.begin_trajectory(vec![0.1; 256]);
|
||||
builder.add_step(vec![0.5; 256], vec![], 0.8);
|
||||
engine.end_trajectory(builder, 0.8);
|
||||
}
|
||||
|
||||
let result = engine.force_learn();
|
||||
assert!(result.contains("150 trajectories"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_disabled_engine() {
|
||||
let mut engine = SonaEngine::new(64);
|
||||
engine.set_enabled(false);
|
||||
|
||||
let builder = engine.begin_trajectory(vec![0.1; 64]);
|
||||
engine.end_trajectory(builder, 0.8);
|
||||
|
||||
// Should not record when disabled
|
||||
let stats = engine.stats();
|
||||
assert_eq!(stats.trajectories_buffered, 0);
|
||||
}
|
||||
}
|
||||
494
vendor/ruvector/examples/ruvLLM/src/sona/ewc.rs
vendored
Normal file
494
vendor/ruvector/examples/ruvLLM/src/sona/ewc.rs
vendored
Normal file
@@ -0,0 +1,494 @@
|
||||
//! EWC++ (Enhanced Elastic Weight Consolidation) for SONA
|
||||
//!
|
||||
//! Prevents catastrophic forgetting with:
|
||||
//! - Online Fisher information estimation
|
||||
//! - Multi-task memory with circular buffer
|
||||
//! - Automatic task boundary detection
|
||||
//! - Adaptive lambda scheduling
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::VecDeque;
|
||||
|
||||
/// EWC++ configuration
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct EwcConfig {
|
||||
/// Number of parameters
|
||||
pub param_count: usize,
|
||||
/// Maximum tasks to remember
|
||||
pub max_tasks: usize,
|
||||
/// Initial lambda
|
||||
pub initial_lambda: f32,
|
||||
/// Minimum lambda
|
||||
pub min_lambda: f32,
|
||||
/// Maximum lambda
|
||||
pub max_lambda: f32,
|
||||
/// Fisher EMA decay factor
|
||||
pub fisher_ema_decay: f32,
|
||||
/// Task boundary detection threshold
|
||||
pub boundary_threshold: f32,
|
||||
/// Gradient history for boundary detection
|
||||
pub gradient_history_size: usize,
|
||||
}
|
||||
|
||||
impl Default for EwcConfig {
|
||||
fn default() -> Self {
|
||||
// OPTIMIZED DEFAULTS based on @ruvector/sona v0.1.1 benchmarks:
|
||||
// - Lambda 2000 optimal for catastrophic forgetting prevention
|
||||
// - Higher max_lambda (15000) for aggressive protection when needed
|
||||
Self {
|
||||
param_count: 1000,
|
||||
max_tasks: 10,
|
||||
initial_lambda: 2000.0, // OPTIMIZED: Better forgetting prevention
|
||||
min_lambda: 100.0,
|
||||
max_lambda: 15000.0, // OPTIMIZED: Higher ceiling for multi-task
|
||||
fisher_ema_decay: 0.999,
|
||||
boundary_threshold: 2.0,
|
||||
gradient_history_size: 100,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Task-specific Fisher information
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct TaskFisher {
|
||||
/// Task ID
|
||||
pub task_id: usize,
|
||||
/// Fisher diagonal
|
||||
pub fisher: Vec<f32>,
|
||||
/// Optimal weights for this task
|
||||
pub optimal_weights: Vec<f32>,
|
||||
/// Task importance (for weighted consolidation)
|
||||
pub importance: f32,
|
||||
}
|
||||
|
||||
/// EWC++ implementation
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct EwcPlusPlus {
|
||||
/// Configuration
|
||||
config: EwcConfig,
|
||||
/// Current Fisher information (online estimate)
|
||||
current_fisher: Vec<f32>,
|
||||
/// Current optimal weights
|
||||
current_weights: Vec<f32>,
|
||||
/// Task memory (circular buffer)
|
||||
task_memory: VecDeque<TaskFisher>,
|
||||
/// Current task ID
|
||||
current_task_id: usize,
|
||||
/// Current lambda
|
||||
lambda: f32,
|
||||
/// Gradient history for boundary detection
|
||||
gradient_history: VecDeque<Vec<f32>>,
|
||||
/// Running gradient mean
|
||||
gradient_mean: Vec<f32>,
|
||||
/// Running gradient variance
|
||||
gradient_var: Vec<f32>,
|
||||
/// Samples seen for current task
|
||||
samples_seen: u64,
|
||||
}
|
||||
|
||||
impl EwcPlusPlus {
|
||||
/// Create new EWC++
|
||||
pub fn new(config: EwcConfig) -> Self {
|
||||
let param_count = config.param_count;
|
||||
let initial_lambda = config.initial_lambda;
|
||||
|
||||
Self {
|
||||
config: config.clone(),
|
||||
current_fisher: vec![0.0; param_count],
|
||||
current_weights: vec![0.0; param_count],
|
||||
task_memory: VecDeque::with_capacity(config.max_tasks),
|
||||
current_task_id: 0,
|
||||
lambda: initial_lambda,
|
||||
gradient_history: VecDeque::with_capacity(config.gradient_history_size),
|
||||
gradient_mean: vec![0.0; param_count],
|
||||
gradient_var: vec![1.0; param_count],
|
||||
samples_seen: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Update Fisher information online using EMA
|
||||
pub fn update_fisher(&mut self, gradients: &[f32]) {
|
||||
if gradients.len() != self.config.param_count {
|
||||
return;
|
||||
}
|
||||
|
||||
let decay = self.config.fisher_ema_decay;
|
||||
|
||||
// Online Fisher update: F_t = decay * F_{t-1} + (1 - decay) * g^2
|
||||
for (i, &g) in gradients.iter().enumerate() {
|
||||
self.current_fisher[i] = decay * self.current_fisher[i] + (1.0 - decay) * g * g;
|
||||
}
|
||||
|
||||
// Update gradient statistics for boundary detection
|
||||
self.update_gradient_stats(gradients);
|
||||
self.samples_seen += 1;
|
||||
}
|
||||
|
||||
/// Update gradient statistics for boundary detection
|
||||
fn update_gradient_stats(&mut self, gradients: &[f32]) {
|
||||
// Store in history
|
||||
if self.gradient_history.len() >= self.config.gradient_history_size {
|
||||
self.gradient_history.pop_front();
|
||||
}
|
||||
self.gradient_history.push_back(gradients.to_vec());
|
||||
|
||||
// Update running mean and variance (Welford's algorithm)
|
||||
let n = self.samples_seen as f32 + 1.0;
|
||||
|
||||
for (i, &g) in gradients.iter().enumerate() {
|
||||
let delta = g - self.gradient_mean[i];
|
||||
self.gradient_mean[i] += delta / n;
|
||||
let delta2 = g - self.gradient_mean[i];
|
||||
self.gradient_var[i] += delta * delta2;
|
||||
}
|
||||
}
|
||||
|
||||
/// Detect task boundary using distribution shift
|
||||
pub fn detect_task_boundary(&self, gradients: &[f32]) -> bool {
|
||||
if self.samples_seen < 50 || gradients.len() != self.config.param_count {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Compute z-score of current gradients vs running stats
|
||||
let mut z_score_sum = 0.0f32;
|
||||
let mut count = 0;
|
||||
|
||||
for (i, &g) in gradients.iter().enumerate() {
|
||||
let var = self.gradient_var[i] / self.samples_seen as f32;
|
||||
if var > 1e-8 {
|
||||
let std = var.sqrt();
|
||||
let z = (g - self.gradient_mean[i]).abs() / std;
|
||||
z_score_sum += z;
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if count == 0 {
|
||||
return false;
|
||||
}
|
||||
|
||||
let avg_z = z_score_sum / count as f32;
|
||||
avg_z > self.config.boundary_threshold
|
||||
}
|
||||
|
||||
/// Start new task - saves current Fisher to memory
|
||||
pub fn start_new_task(&mut self) {
|
||||
// Save current task's Fisher
|
||||
let task_fisher = TaskFisher {
|
||||
task_id: self.current_task_id,
|
||||
fisher: self.current_fisher.clone(),
|
||||
optimal_weights: self.current_weights.clone(),
|
||||
importance: 1.0,
|
||||
};
|
||||
|
||||
// Add to circular buffer
|
||||
if self.task_memory.len() >= self.config.max_tasks {
|
||||
self.task_memory.pop_front();
|
||||
}
|
||||
self.task_memory.push_back(task_fisher);
|
||||
|
||||
// Reset for new task
|
||||
self.current_task_id += 1;
|
||||
self.current_fisher.fill(0.0);
|
||||
self.gradient_history.clear();
|
||||
self.gradient_mean.fill(0.0);
|
||||
self.gradient_var.fill(1.0);
|
||||
self.samples_seen = 0;
|
||||
|
||||
// Adapt lambda based on task count
|
||||
self.adapt_lambda();
|
||||
}
|
||||
|
||||
/// Adapt lambda based on accumulated tasks
|
||||
fn adapt_lambda(&mut self) {
|
||||
let task_count = self.task_memory.len();
|
||||
if task_count == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
// Increase lambda as more tasks accumulate (more to protect)
|
||||
let scale = 1.0 + 0.1 * task_count as f32;
|
||||
self.lambda = (self.config.initial_lambda * scale)
|
||||
.clamp(self.config.min_lambda, self.config.max_lambda);
|
||||
}
|
||||
|
||||
/// Apply EWC++ constraints to gradients
|
||||
pub fn apply_constraints(&self, gradients: &[f32]) -> Vec<f32> {
|
||||
if gradients.len() != self.config.param_count {
|
||||
return gradients.to_vec();
|
||||
}
|
||||
|
||||
let mut constrained = gradients.to_vec();
|
||||
|
||||
// Apply constraint from each remembered task
|
||||
for task in &self.task_memory {
|
||||
for (i, g) in constrained.iter_mut().enumerate() {
|
||||
// Penalty: lambda * F_i * (w_i - w*_i)
|
||||
// Gradient of penalty: lambda * F_i
|
||||
// Project gradient to preserve important weights
|
||||
let importance = task.fisher[i] * task.importance;
|
||||
if importance > 1e-8 {
|
||||
let penalty_grad = self.lambda * importance;
|
||||
// Reduce gradient magnitude for important parameters
|
||||
*g *= 1.0 / (1.0 + penalty_grad);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Also apply current task's Fisher (online)
|
||||
for (i, g) in constrained.iter_mut().enumerate() {
|
||||
if self.current_fisher[i] > 1e-8 {
|
||||
let penalty_grad = self.lambda * self.current_fisher[i] * 0.1; // Lower weight for current
|
||||
*g *= 1.0 / (1.0 + penalty_grad);
|
||||
}
|
||||
}
|
||||
|
||||
constrained
|
||||
}
|
||||
|
||||
/// Compute EWC regularization loss
|
||||
pub fn regularization_loss(&self, current_weights: &[f32]) -> f32 {
|
||||
if current_weights.len() != self.config.param_count {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let mut loss = 0.0f32;
|
||||
|
||||
for task in &self.task_memory {
|
||||
for i in 0..self.config.param_count {
|
||||
let diff = current_weights[i] - task.optimal_weights[i];
|
||||
loss += task.fisher[i] * diff * diff * task.importance;
|
||||
}
|
||||
}
|
||||
|
||||
self.lambda * loss / 2.0
|
||||
}
|
||||
|
||||
/// Update optimal weights reference
|
||||
pub fn set_optimal_weights(&mut self, weights: &[f32]) {
|
||||
if weights.len() == self.config.param_count {
|
||||
self.current_weights.copy_from_slice(weights);
|
||||
}
|
||||
}
|
||||
|
||||
/// Consolidate all tasks (merge Fisher information)
|
||||
pub fn consolidate_all_tasks(&mut self) {
|
||||
if self.task_memory.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Compute weighted average of Fisher matrices
|
||||
let mut consolidated_fisher = vec![0.0f32; self.config.param_count];
|
||||
let mut total_importance = 0.0f32;
|
||||
|
||||
for task in &self.task_memory {
|
||||
for (i, &f) in task.fisher.iter().enumerate() {
|
||||
consolidated_fisher[i] += f * task.importance;
|
||||
}
|
||||
total_importance += task.importance;
|
||||
}
|
||||
|
||||
if total_importance > 0.0 {
|
||||
for f in &mut consolidated_fisher {
|
||||
*f /= total_importance;
|
||||
}
|
||||
}
|
||||
|
||||
// Store as single consolidated task
|
||||
let consolidated = TaskFisher {
|
||||
task_id: 0,
|
||||
fisher: consolidated_fisher,
|
||||
optimal_weights: self.current_weights.clone(),
|
||||
importance: total_importance,
|
||||
};
|
||||
|
||||
self.task_memory.clear();
|
||||
self.task_memory.push_back(consolidated);
|
||||
}
|
||||
|
||||
/// Get current lambda
|
||||
pub fn lambda(&self) -> f32 {
|
||||
self.lambda
|
||||
}
|
||||
|
||||
/// Set lambda manually
|
||||
pub fn set_lambda(&mut self, lambda: f32) {
|
||||
self.lambda = lambda.clamp(self.config.min_lambda, self.config.max_lambda);
|
||||
}
|
||||
|
||||
/// Get task count
|
||||
pub fn task_count(&self) -> usize {
|
||||
self.task_memory.len()
|
||||
}
|
||||
|
||||
/// Get current task ID
|
||||
pub fn current_task_id(&self) -> usize {
|
||||
self.current_task_id
|
||||
}
|
||||
|
||||
/// Get samples seen for current task
|
||||
pub fn samples_seen(&self) -> u64 {
|
||||
self.samples_seen
|
||||
}
|
||||
|
||||
/// Get parameter importance scores
|
||||
pub fn importance_scores(&self) -> Vec<f32> {
|
||||
let mut scores = self.current_fisher.clone();
|
||||
|
||||
for task in &self.task_memory {
|
||||
for (i, &f) in task.fisher.iter().enumerate() {
|
||||
scores[i] += f * task.importance;
|
||||
}
|
||||
}
|
||||
|
||||
scores
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_ewc_creation() {
|
||||
let config = EwcConfig {
|
||||
param_count: 100,
|
||||
..Default::default()
|
||||
};
|
||||
let ewc = EwcPlusPlus::new(config);
|
||||
|
||||
assert_eq!(ewc.task_count(), 0);
|
||||
assert_eq!(ewc.current_task_id(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fisher_update() {
|
||||
let config = EwcConfig {
|
||||
param_count: 10,
|
||||
..Default::default()
|
||||
};
|
||||
let mut ewc = EwcPlusPlus::new(config);
|
||||
|
||||
let gradients = vec![0.5; 10];
|
||||
ewc.update_fisher(&gradients);
|
||||
|
||||
assert!(ewc.samples_seen() > 0);
|
||||
assert!(ewc.current_fisher.iter().any(|&f| f > 0.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_task_boundary() {
|
||||
let config = EwcConfig {
|
||||
param_count: 10,
|
||||
gradient_history_size: 10,
|
||||
boundary_threshold: 2.0,
|
||||
..Default::default()
|
||||
};
|
||||
let mut ewc = EwcPlusPlus::new(config);
|
||||
|
||||
// Train on consistent gradients
|
||||
for _ in 0..60 {
|
||||
let gradients = vec![0.1; 10];
|
||||
ewc.update_fisher(&gradients);
|
||||
}
|
||||
|
||||
// Normal gradient should not trigger boundary
|
||||
let normal = vec![0.1; 10];
|
||||
assert!(!ewc.detect_task_boundary(&normal));
|
||||
|
||||
// Very different gradient might trigger boundary
|
||||
let different = vec![10.0; 10];
|
||||
// May or may not trigger depending on variance
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_constraint_application() {
|
||||
let config = EwcConfig {
|
||||
param_count: 5,
|
||||
..Default::default()
|
||||
};
|
||||
let mut ewc = EwcPlusPlus::new(config);
|
||||
|
||||
// Build up some Fisher information
|
||||
for _ in 0..10 {
|
||||
ewc.update_fisher(&vec![1.0; 5]);
|
||||
}
|
||||
ewc.start_new_task();
|
||||
|
||||
// Apply constraints
|
||||
let gradients = vec![1.0; 5];
|
||||
let constrained = ewc.apply_constraints(&gradients);
|
||||
|
||||
// Constrained gradients should be smaller
|
||||
let orig_mag: f32 = gradients.iter().map(|x| x.abs()).sum();
|
||||
let const_mag: f32 = constrained.iter().map(|x| x.abs()).sum();
|
||||
assert!(const_mag <= orig_mag);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_regularization_loss() {
|
||||
let config = EwcConfig {
|
||||
param_count: 5,
|
||||
initial_lambda: 100.0,
|
||||
..Default::default()
|
||||
};
|
||||
let mut ewc = EwcPlusPlus::new(config);
|
||||
|
||||
// Set up optimal weights and Fisher
|
||||
ewc.set_optimal_weights(&vec![0.0; 5]);
|
||||
for _ in 0..10 {
|
||||
ewc.update_fisher(&vec![1.0; 5]);
|
||||
}
|
||||
ewc.start_new_task();
|
||||
|
||||
// Loss should be zero when at optimal
|
||||
let at_optimal = ewc.regularization_loss(&vec![0.0; 5]);
|
||||
|
||||
// Loss should be positive when deviated
|
||||
let deviated = ewc.regularization_loss(&vec![1.0; 5]);
|
||||
assert!(deviated > at_optimal);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_task_consolidation() {
|
||||
let config = EwcConfig {
|
||||
param_count: 5,
|
||||
max_tasks: 5,
|
||||
..Default::default()
|
||||
};
|
||||
let mut ewc = EwcPlusPlus::new(config);
|
||||
|
||||
// Create multiple tasks
|
||||
for _ in 0..3 {
|
||||
for _ in 0..10 {
|
||||
ewc.update_fisher(&vec![1.0; 5]);
|
||||
}
|
||||
ewc.start_new_task();
|
||||
}
|
||||
|
||||
assert_eq!(ewc.task_count(), 3);
|
||||
|
||||
ewc.consolidate_all_tasks();
|
||||
assert_eq!(ewc.task_count(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lambda_adaptation() {
|
||||
let config = EwcConfig {
|
||||
param_count: 5,
|
||||
initial_lambda: 1000.0,
|
||||
..Default::default()
|
||||
};
|
||||
let mut ewc = EwcPlusPlus::new(config);
|
||||
|
||||
let initial_lambda = ewc.lambda();
|
||||
|
||||
// Add tasks
|
||||
for _ in 0..5 {
|
||||
ewc.start_new_task();
|
||||
}
|
||||
|
||||
// Lambda should have increased
|
||||
assert!(ewc.lambda() >= initial_lambda);
|
||||
}
|
||||
}
|
||||
233
vendor/ruvector/examples/ruvLLM/src/sona/loops/background.rs
vendored
Normal file
233
vendor/ruvector/examples/ruvLLM/src/sona/loops/background.rs
vendored
Normal file
@@ -0,0 +1,233 @@
|
||||
//! Loop B - Background Learning
|
||||
//!
|
||||
//! Hourly pattern extraction and base LoRA updates.
|
||||
|
||||
use crate::sona::ewc::EwcPlusPlus;
|
||||
use crate::sona::lora::BaseLoRA;
|
||||
use crate::sona::reasoning_bank::ReasoningBank;
|
||||
use crate::sona::types::{LearnedPattern, QueryTrajectory, SonaConfig};
|
||||
use parking_lot::RwLock;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
/// Background loop configuration
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct BackgroundLoopConfig {
|
||||
/// Minimum trajectories to process
|
||||
pub min_trajectories: usize,
|
||||
/// Base LoRA learning rate
|
||||
pub base_lora_lr: f32,
|
||||
/// EWC lambda
|
||||
pub ewc_lambda: f32,
|
||||
/// Pattern extraction interval
|
||||
pub extraction_interval: Duration,
|
||||
}
|
||||
|
||||
impl Default for BackgroundLoopConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
min_trajectories: 100,
|
||||
base_lora_lr: 0.0001,
|
||||
ewc_lambda: 1000.0,
|
||||
extraction_interval: Duration::from_secs(3600),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&SonaConfig> for BackgroundLoopConfig {
|
||||
fn from(config: &SonaConfig) -> Self {
|
||||
Self {
|
||||
min_trajectories: 100,
|
||||
base_lora_lr: config.base_lora_lr,
|
||||
ewc_lambda: config.ewc_lambda,
|
||||
extraction_interval: Duration::from_millis(config.background_interval_ms),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Background cycle result
|
||||
#[derive(Debug)]
|
||||
pub struct BackgroundResult {
|
||||
pub trajectories_processed: usize,
|
||||
pub patterns_extracted: usize,
|
||||
pub ewc_updated: bool,
|
||||
pub elapsed: Duration,
|
||||
pub status: String,
|
||||
}
|
||||
|
||||
impl BackgroundResult {
|
||||
fn skipped(reason: &str) -> Self {
|
||||
Self {
|
||||
trajectories_processed: 0,
|
||||
patterns_extracted: 0,
|
||||
ewc_updated: false,
|
||||
elapsed: Duration::ZERO,
|
||||
status: format!("skipped: {}", reason),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Background learning loop (Loop B)
|
||||
pub struct BackgroundLoop {
|
||||
/// Configuration
|
||||
config: BackgroundLoopConfig,
|
||||
/// ReasoningBank for pattern storage
|
||||
reasoning_bank: Arc<RwLock<ReasoningBank>>,
|
||||
/// EWC++ for forgetting prevention
|
||||
ewc: Arc<RwLock<EwcPlusPlus>>,
|
||||
/// Base LoRA
|
||||
base_lora: Arc<RwLock<BaseLoRA>>,
|
||||
/// Last extraction time
|
||||
last_extraction: RwLock<Instant>,
|
||||
}
|
||||
|
||||
impl BackgroundLoop {
|
||||
/// Create new background loop
|
||||
pub fn new(
|
||||
config: BackgroundLoopConfig,
|
||||
reasoning_bank: Arc<RwLock<ReasoningBank>>,
|
||||
ewc: Arc<RwLock<EwcPlusPlus>>,
|
||||
base_lora: Arc<RwLock<BaseLoRA>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
config,
|
||||
reasoning_bank,
|
||||
ewc,
|
||||
base_lora,
|
||||
last_extraction: RwLock::new(Instant::now()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if it's time for background cycle
|
||||
pub fn should_run(&self) -> bool {
|
||||
self.last_extraction.read().elapsed() >= self.config.extraction_interval
|
||||
}
|
||||
|
||||
/// Run background learning cycle
|
||||
pub fn run_cycle(&self, trajectories: Vec<QueryTrajectory>) -> BackgroundResult {
|
||||
if trajectories.len() < self.config.min_trajectories {
|
||||
return BackgroundResult::skipped("insufficient trajectories");
|
||||
}
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
// 1. Add trajectories to reasoning bank
|
||||
{
|
||||
let mut bank = self.reasoning_bank.write();
|
||||
for trajectory in &trajectories {
|
||||
bank.add_trajectory(trajectory);
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Extract patterns
|
||||
let patterns = {
|
||||
let mut bank = self.reasoning_bank.write();
|
||||
bank.extract_patterns()
|
||||
};
|
||||
|
||||
// 3. Compute gradients from patterns
|
||||
let gradients = self.compute_pattern_gradients(&patterns);
|
||||
|
||||
// 4. Apply EWC++ constraints
|
||||
let constrained_gradients = {
|
||||
let ewc = self.ewc.read();
|
||||
ewc.apply_constraints(&gradients)
|
||||
};
|
||||
|
||||
// 5. Check for task boundary
|
||||
let task_boundary = {
|
||||
let ewc = self.ewc.read();
|
||||
ewc.detect_task_boundary(&gradients)
|
||||
};
|
||||
|
||||
if task_boundary {
|
||||
let mut ewc = self.ewc.write();
|
||||
ewc.start_new_task();
|
||||
}
|
||||
|
||||
// 6. Update EWC++ Fisher
|
||||
{
|
||||
let mut ewc = self.ewc.write();
|
||||
ewc.update_fisher(&constrained_gradients);
|
||||
}
|
||||
|
||||
// 7. Update base LoRA
|
||||
self.update_base_lora(&constrained_gradients);
|
||||
|
||||
// Update last extraction time
|
||||
*self.last_extraction.write() = Instant::now();
|
||||
|
||||
BackgroundResult {
|
||||
trajectories_processed: trajectories.len(),
|
||||
patterns_extracted: patterns.len(),
|
||||
ewc_updated: true,
|
||||
elapsed: start.elapsed(),
|
||||
status: "completed".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_pattern_gradients(&self, patterns: &[LearnedPattern]) -> Vec<f32> {
|
||||
if patterns.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let dim = patterns[0].centroid.len();
|
||||
let mut gradient = vec![0.0f32; dim];
|
||||
let mut total_weight = 0.0f32;
|
||||
|
||||
for pattern in patterns {
|
||||
let weight = pattern.avg_quality * pattern.cluster_size as f32;
|
||||
for (i, &v) in pattern.centroid.iter().enumerate() {
|
||||
if i < dim {
|
||||
gradient[i] += v * weight;
|
||||
}
|
||||
}
|
||||
total_weight += weight;
|
||||
}
|
||||
|
||||
if total_weight > 0.0 {
|
||||
for g in &mut gradient {
|
||||
*g /= total_weight;
|
||||
}
|
||||
}
|
||||
|
||||
gradient
|
||||
}
|
||||
|
||||
fn update_base_lora(&self, gradients: &[f32]) {
|
||||
let mut lora = self.base_lora.write();
|
||||
let num_layers = lora.num_layers();
|
||||
|
||||
if num_layers == 0 || gradients.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let per_layer = gradients.len() / num_layers;
|
||||
|
||||
for (layer_idx, layer) in lora.layers.iter_mut().enumerate() {
|
||||
let start = layer_idx * per_layer;
|
||||
let end = (start + per_layer).min(gradients.len());
|
||||
|
||||
for (i, &grad) in gradients[start..end].iter().enumerate() {
|
||||
if i < layer.up_proj.len() {
|
||||
layer.up_proj[i] += grad * self.config.base_lora_lr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get reasoning bank reference
|
||||
pub fn reasoning_bank(&self) -> &Arc<RwLock<ReasoningBank>> {
|
||||
&self.reasoning_bank
|
||||
}
|
||||
|
||||
/// Get EWC reference
|
||||
pub fn ewc(&self) -> &Arc<RwLock<EwcPlusPlus>> {
|
||||
&self.ewc
|
||||
}
|
||||
|
||||
/// Get base LoRA reference
|
||||
pub fn base_lora(&self) -> &Arc<RwLock<BaseLoRA>> {
|
||||
&self.base_lora
|
||||
}
|
||||
}
|
||||
222
vendor/ruvector/examples/ruvLLM/src/sona/loops/coordinator.rs
vendored
Normal file
222
vendor/ruvector/examples/ruvLLM/src/sona/loops/coordinator.rs
vendored
Normal file
@@ -0,0 +1,222 @@
|
||||
//! Loop Coordinator - Orchestrates all learning loops
|
||||
|
||||
use crate::sona::ewc::{EwcConfig, EwcPlusPlus};
|
||||
use crate::sona::loops::background::{BackgroundLoop, BackgroundLoopConfig, BackgroundResult};
|
||||
use crate::sona::loops::instant::{InstantLoop, InstantLoopConfig};
|
||||
use crate::sona::lora::{BaseLoRA, MicroLoRA};
|
||||
use crate::sona::reasoning_bank::{PatternConfig, ReasoningBank};
|
||||
use crate::sona::types::{QueryTrajectory, SonaConfig};
|
||||
use parking_lot::RwLock;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
/// Loop coordinator managing all learning loops
|
||||
pub struct LoopCoordinator {
|
||||
/// Configuration
|
||||
config: SonaConfig,
|
||||
/// Instant loop (Loop A)
|
||||
instant: InstantLoop,
|
||||
/// Background loop (Loop B)
|
||||
background: BackgroundLoop,
|
||||
/// Shared components
|
||||
reasoning_bank: Arc<RwLock<ReasoningBank>>,
|
||||
ewc: Arc<RwLock<EwcPlusPlus>>,
|
||||
base_lora: Arc<RwLock<BaseLoRA>>,
|
||||
/// Enabled flags
|
||||
instant_enabled: bool,
|
||||
background_enabled: bool,
|
||||
}
|
||||
|
||||
impl LoopCoordinator {
|
||||
/// Create new coordinator with default config
|
||||
pub fn new(hidden_dim: usize) -> Self {
|
||||
Self::with_config(SonaConfig {
|
||||
hidden_dim,
|
||||
embedding_dim: hidden_dim,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
/// Create with custom config
|
||||
pub fn with_config(config: SonaConfig) -> Self {
|
||||
let reasoning_bank = Arc::new(RwLock::new(ReasoningBank::new(PatternConfig {
|
||||
embedding_dim: config.embedding_dim,
|
||||
k_clusters: config.pattern_clusters,
|
||||
..Default::default()
|
||||
})));
|
||||
|
||||
let ewc = Arc::new(RwLock::new(EwcPlusPlus::new(EwcConfig {
|
||||
param_count: config.hidden_dim * config.base_lora_rank * 2,
|
||||
initial_lambda: config.ewc_lambda,
|
||||
..Default::default()
|
||||
})));
|
||||
|
||||
let base_lora = Arc::new(RwLock::new(BaseLoRA::new(
|
||||
config.hidden_dim,
|
||||
config.base_lora_rank,
|
||||
12, // Default number of layers
|
||||
)));
|
||||
|
||||
let instant = InstantLoop::from_sona_config(&config);
|
||||
let background = BackgroundLoop::new(
|
||||
BackgroundLoopConfig::from(&config),
|
||||
reasoning_bank.clone(),
|
||||
ewc.clone(),
|
||||
base_lora.clone(),
|
||||
);
|
||||
|
||||
Self {
|
||||
config,
|
||||
instant,
|
||||
background,
|
||||
reasoning_bank,
|
||||
ewc,
|
||||
base_lora,
|
||||
instant_enabled: true,
|
||||
background_enabled: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Process inference trajectory (Loop A)
|
||||
pub fn on_inference(&self, trajectory: QueryTrajectory) {
|
||||
if self.instant_enabled {
|
||||
self.instant.on_trajectory(trajectory);
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate next trajectory ID
|
||||
pub fn next_trajectory_id(&self) -> u64 {
|
||||
self.instant.next_id()
|
||||
}
|
||||
|
||||
/// Run background cycle if needed (Loop B)
|
||||
pub fn maybe_run_background(&self) -> Option<BackgroundResult> {
|
||||
if !self.background_enabled {
|
||||
return None;
|
||||
}
|
||||
|
||||
if self.background.should_run() {
|
||||
let trajectories = self.instant.drain_trajectories();
|
||||
if !trajectories.is_empty() {
|
||||
return Some(self.background.run_cycle(trajectories));
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Force background cycle
|
||||
pub fn force_background(&self) -> BackgroundResult {
|
||||
let trajectories = self.instant.drain_trajectories();
|
||||
self.background.run_cycle(trajectories)
|
||||
}
|
||||
|
||||
/// Flush instant loop updates
|
||||
pub fn flush_instant(&self) {
|
||||
self.instant.flush();
|
||||
}
|
||||
|
||||
/// Get micro-LoRA for inference
|
||||
pub fn micro_lora(&self) -> &Arc<RwLock<MicroLoRA>> {
|
||||
self.instant.micro_lora()
|
||||
}
|
||||
|
||||
/// Get base-LoRA for inference
|
||||
pub fn base_lora(&self) -> &Arc<RwLock<BaseLoRA>> {
|
||||
&self.base_lora
|
||||
}
|
||||
|
||||
/// Get reasoning bank
|
||||
pub fn reasoning_bank(&self) -> &Arc<RwLock<ReasoningBank>> {
|
||||
&self.reasoning_bank
|
||||
}
|
||||
|
||||
/// Get EWC++
|
||||
pub fn ewc(&self) -> &Arc<RwLock<EwcPlusPlus>> {
|
||||
&self.ewc
|
||||
}
|
||||
|
||||
/// Enable/disable instant loop
|
||||
pub fn set_instant_enabled(&mut self, enabled: bool) {
|
||||
self.instant_enabled = enabled;
|
||||
}
|
||||
|
||||
/// Enable/disable background loop
|
||||
pub fn set_background_enabled(&mut self, enabled: bool) {
|
||||
self.background_enabled = enabled;
|
||||
}
|
||||
|
||||
/// Get statistics
|
||||
pub fn stats(&self) -> CoordinatorStats {
|
||||
let (buffer_len, dropped, success_rate) = self.instant.buffer_stats();
|
||||
|
||||
CoordinatorStats {
|
||||
trajectories_buffered: buffer_len,
|
||||
trajectories_dropped: dropped,
|
||||
buffer_success_rate: success_rate,
|
||||
patterns_stored: self.reasoning_bank.read().pattern_count(),
|
||||
ewc_tasks: self.ewc.read().task_count(),
|
||||
instant_enabled: self.instant_enabled,
|
||||
background_enabled: self.background_enabled,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Coordinator statistics
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CoordinatorStats {
|
||||
pub trajectories_buffered: usize,
|
||||
pub trajectories_dropped: u64,
|
||||
pub buffer_success_rate: f64,
|
||||
pub patterns_stored: usize,
|
||||
pub ewc_tasks: usize,
|
||||
pub instant_enabled: bool,
|
||||
pub background_enabled: bool,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::sona::types::TrajectoryStep;
|
||||
|
||||
fn make_trajectory(id: u64) -> QueryTrajectory {
|
||||
let mut t = QueryTrajectory::new(id, vec![0.1; 256]);
|
||||
t.add_step(TrajectoryStep::new(vec![0.5; 256], vec![], 0.8, 0));
|
||||
t.finalize(0.8, 1000);
|
||||
t
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_coordinator_creation() {
|
||||
let coord = LoopCoordinator::new(256);
|
||||
let stats = coord.stats();
|
||||
assert_eq!(stats.trajectories_buffered, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_inference_processing() {
|
||||
let coord = LoopCoordinator::new(256);
|
||||
|
||||
for i in 0..10 {
|
||||
let t = make_trajectory(coord.next_trajectory_id());
|
||||
coord.on_inference(t);
|
||||
}
|
||||
|
||||
let stats = coord.stats();
|
||||
assert_eq!(stats.trajectories_buffered, 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_force_background() {
|
||||
let coord = LoopCoordinator::new(256);
|
||||
|
||||
for i in 0..150 {
|
||||
let t = make_trajectory(coord.next_trajectory_id());
|
||||
coord.on_inference(t);
|
||||
}
|
||||
|
||||
let result = coord.force_background();
|
||||
assert_eq!(result.trajectories_processed, 150);
|
||||
assert!(result.patterns_extracted > 0);
|
||||
}
|
||||
}
|
||||
247
vendor/ruvector/examples/ruvLLM/src/sona/loops/instant.rs
vendored
Normal file
247
vendor/ruvector/examples/ruvLLM/src/sona/loops/instant.rs
vendored
Normal file
@@ -0,0 +1,247 @@
|
||||
//! Loop A - Instant Learning
|
||||
//!
|
||||
//! Per-request adaptation with <1ms overhead.
|
||||
|
||||
use crate::sona::lora::MicroLoRA;
|
||||
use crate::sona::trajectory::{TrajectoryBuffer, TrajectoryIdGen};
|
||||
use crate::sona::types::{LearningSignal, QueryTrajectory, SonaConfig};
|
||||
use parking_lot::RwLock;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Configuration for instant loop
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct InstantLoopConfig {
|
||||
/// Micro-LoRA rank
|
||||
pub micro_lora_rank: usize,
|
||||
/// Micro-LoRA learning rate
|
||||
pub micro_lora_lr: f32,
|
||||
/// Buffer capacity
|
||||
pub buffer_capacity: usize,
|
||||
/// Flush threshold (apply updates every N signals)
|
||||
pub flush_threshold: usize,
|
||||
}
|
||||
|
||||
impl Default for InstantLoopConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
micro_lora_rank: 1,
|
||||
micro_lora_lr: 0.001,
|
||||
buffer_capacity: 10000,
|
||||
flush_threshold: 100,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&SonaConfig> for InstantLoopConfig {
|
||||
fn from(config: &SonaConfig) -> Self {
|
||||
Self {
|
||||
micro_lora_rank: config.micro_lora_rank,
|
||||
micro_lora_lr: config.micro_lora_lr,
|
||||
buffer_capacity: config.trajectory_capacity,
|
||||
flush_threshold: 100,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Instant loop metrics
|
||||
#[derive(Debug, Default)]
|
||||
pub struct InstantLoopMetrics {
|
||||
/// Total trajectories processed
|
||||
pub trajectories_processed: AtomicU64,
|
||||
/// Total signals accumulated
|
||||
pub signals_accumulated: AtomicU64,
|
||||
/// Total flushes performed
|
||||
pub flushes_performed: AtomicU64,
|
||||
/// Total updates applied
|
||||
pub updates_applied: AtomicU64,
|
||||
}
|
||||
|
||||
/// Instant learning loop (Loop A)
|
||||
pub struct InstantLoop {
|
||||
/// Configuration
|
||||
config: InstantLoopConfig,
|
||||
/// Trajectory buffer
|
||||
trajectory_buffer: Arc<TrajectoryBuffer>,
|
||||
/// Micro-LoRA adapter
|
||||
micro_lora: Arc<RwLock<MicroLoRA>>,
|
||||
/// ID generator
|
||||
id_gen: TrajectoryIdGen,
|
||||
/// Pending signal count
|
||||
pending_signals: AtomicU64,
|
||||
/// Metrics
|
||||
pub metrics: InstantLoopMetrics,
|
||||
}
|
||||
|
||||
impl InstantLoop {
|
||||
/// Create new instant loop
|
||||
pub fn new(hidden_dim: usize, config: InstantLoopConfig) -> Self {
|
||||
Self {
|
||||
trajectory_buffer: Arc::new(TrajectoryBuffer::new(config.buffer_capacity)),
|
||||
micro_lora: Arc::new(RwLock::new(MicroLoRA::new(
|
||||
hidden_dim,
|
||||
config.micro_lora_rank,
|
||||
))),
|
||||
id_gen: TrajectoryIdGen::new(),
|
||||
pending_signals: AtomicU64::new(0),
|
||||
config,
|
||||
metrics: InstantLoopMetrics::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create from SONA config
|
||||
pub fn from_sona_config(config: &SonaConfig) -> Self {
|
||||
Self::new(config.hidden_dim, InstantLoopConfig::from(config))
|
||||
}
|
||||
|
||||
/// Generate next trajectory ID
|
||||
pub fn next_id(&self) -> u64 {
|
||||
self.id_gen.next()
|
||||
}
|
||||
|
||||
/// Process completed trajectory
|
||||
pub fn on_trajectory(&self, trajectory: QueryTrajectory) {
|
||||
// Record to buffer
|
||||
self.trajectory_buffer.record(trajectory.clone());
|
||||
self.metrics
|
||||
.trajectories_processed
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
// Generate learning signal
|
||||
let signal = LearningSignal::from_trajectory(&trajectory);
|
||||
|
||||
// Accumulate gradient (non-blocking)
|
||||
if let Some(mut lora) = self.micro_lora.try_write() {
|
||||
lora.accumulate_gradient(&signal);
|
||||
self.metrics
|
||||
.signals_accumulated
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
let pending = self.pending_signals.fetch_add(1, Ordering::Relaxed) + 1;
|
||||
|
||||
// Auto-flush if threshold reached
|
||||
if pending >= self.config.flush_threshold as u64 {
|
||||
self.flush_internal(&mut lora);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Manually flush accumulated updates
|
||||
pub fn flush(&self) {
|
||||
if let Some(mut lora) = self.micro_lora.try_write() {
|
||||
self.flush_internal(&mut lora);
|
||||
}
|
||||
}
|
||||
|
||||
fn flush_internal(&self, lora: &mut MicroLoRA) {
|
||||
let pending = lora.pending_updates();
|
||||
if pending > 0 {
|
||||
lora.apply_accumulated(self.config.micro_lora_lr);
|
||||
self.pending_signals.store(0, Ordering::Relaxed);
|
||||
self.metrics
|
||||
.flushes_performed
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
self.metrics
|
||||
.updates_applied
|
||||
.fetch_add(pending as u64, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
/// Drain trajectories for background processing
|
||||
pub fn drain_trajectories(&self) -> Vec<QueryTrajectory> {
|
||||
self.trajectory_buffer.drain()
|
||||
}
|
||||
|
||||
/// Drain up to N trajectories
|
||||
pub fn drain_trajectories_n(&self, n: usize) -> Vec<QueryTrajectory> {
|
||||
self.trajectory_buffer.drain_n(n)
|
||||
}
|
||||
|
||||
/// Get micro-LoRA reference for inference
|
||||
pub fn micro_lora(&self) -> &Arc<RwLock<MicroLoRA>> {
|
||||
&self.micro_lora
|
||||
}
|
||||
|
||||
/// Get trajectory buffer reference
|
||||
pub fn buffer(&self) -> &Arc<TrajectoryBuffer> {
|
||||
&self.trajectory_buffer
|
||||
}
|
||||
|
||||
/// Get pending trajectory count
|
||||
pub fn pending_count(&self) -> usize {
|
||||
self.trajectory_buffer.len()
|
||||
}
|
||||
|
||||
/// Get buffer stats
|
||||
pub fn buffer_stats(&self) -> (usize, u64, f64) {
|
||||
(
|
||||
self.trajectory_buffer.len(),
|
||||
self.trajectory_buffer.dropped_count(),
|
||||
self.trajectory_buffer.success_rate(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::sona::types::TrajectoryStep;
|
||||
|
||||
fn make_trajectory(id: u64) -> QueryTrajectory {
|
||||
let mut t = QueryTrajectory::new(id, vec![0.1; 64]);
|
||||
t.add_step(TrajectoryStep::new(vec![0.5; 64], vec![], 0.8, 0));
|
||||
t.finalize(0.8, 1000);
|
||||
t
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_instant_loop_creation() {
|
||||
let loop_a = InstantLoop::new(64, InstantLoopConfig::default());
|
||||
assert_eq!(loop_a.pending_count(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_trajectory_processing() {
|
||||
let loop_a = InstantLoop::new(64, InstantLoopConfig::default());
|
||||
|
||||
let t = make_trajectory(loop_a.next_id());
|
||||
loop_a.on_trajectory(t);
|
||||
|
||||
assert_eq!(loop_a.pending_count(), 1);
|
||||
assert_eq!(
|
||||
loop_a
|
||||
.metrics
|
||||
.trajectories_processed
|
||||
.load(Ordering::Relaxed),
|
||||
1
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_flush() {
|
||||
let config = InstantLoopConfig {
|
||||
flush_threshold: 3,
|
||||
..Default::default()
|
||||
};
|
||||
let loop_a = InstantLoop::new(64, config);
|
||||
|
||||
for i in 0..5 {
|
||||
loop_a.on_trajectory(make_trajectory(i));
|
||||
}
|
||||
|
||||
assert!(loop_a.metrics.flushes_performed.load(Ordering::Relaxed) >= 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_drain() {
|
||||
let loop_a = InstantLoop::new(64, InstantLoopConfig::default());
|
||||
|
||||
for i in 0..10 {
|
||||
loop_a.on_trajectory(make_trajectory(i));
|
||||
}
|
||||
|
||||
let drained = loop_a.drain_trajectories();
|
||||
assert_eq!(drained.len(), 10);
|
||||
assert_eq!(loop_a.pending_count(), 0);
|
||||
}
|
||||
}
|
||||
14
vendor/ruvector/examples/ruvLLM/src/sona/loops/mod.rs
vendored
Normal file
14
vendor/ruvector/examples/ruvLLM/src/sona/loops/mod.rs
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
//! SONA Learning Loops
|
||||
//!
|
||||
//! Three-tier temporal learning architecture:
|
||||
//! - Loop A (Instant): Per-request trajectory recording and micro-LoRA updates
|
||||
//! - Loop B (Background): Hourly pattern extraction and base LoRA updates
|
||||
//! - Loop C (Deep): Weekly dream consolidation and full EWC++ update
|
||||
|
||||
pub mod background;
|
||||
pub mod coordinator;
|
||||
pub mod instant;
|
||||
|
||||
pub use background::BackgroundLoop;
|
||||
pub use coordinator::LoopCoordinator;
|
||||
pub use instant::InstantLoop;
|
||||
551
vendor/ruvector/examples/ruvLLM/src/sona/lora.rs
vendored
Normal file
551
vendor/ruvector/examples/ruvLLM/src/sona/lora.rs
vendored
Normal file
@@ -0,0 +1,551 @@
|
||||
//! LoRA (Low-Rank Adaptation) implementations for SONA
|
||||
//!
|
||||
//! Two-tier LoRA system:
|
||||
//! - MicroLoRA: Rank 1-2, per-request adaptation (<100μs)
|
||||
//! - BaseLoRA: Rank 4-16, background adaptation (hourly)
|
||||
|
||||
use crate::sona::types::LearningSignal;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Optimal batch size for processing (benchmark-validated)
|
||||
pub const OPTIMAL_BATCH_SIZE: usize = 32;
|
||||
|
||||
/// Micro-LoRA for per-request adaptation
|
||||
///
|
||||
/// Uses rank 1-2 for ultra-low latency updates.
|
||||
/// Forward pass: output += scale * (input @ down) @ up
|
||||
///
|
||||
/// **Performance notes (from benchmarks):**
|
||||
/// - Rank-2 is ~5% faster than Rank-1 due to better SIMD vectorization
|
||||
/// - Batch size 32 optimal: 0.447ms per-vector, 2,236 ops/sec throughput
|
||||
/// - SIMD-enabled: +10% speedup over scalar
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct MicroLoRA {
|
||||
/// Down projection (hidden_dim -> rank)
|
||||
down_proj: Vec<f32>,
|
||||
/// Up projection (rank -> hidden_dim)
|
||||
up_proj: Vec<f32>,
|
||||
/// Rank (1-2 for micro updates)
|
||||
rank: usize,
|
||||
/// Hidden dimension
|
||||
hidden_dim: usize,
|
||||
/// Accumulated gradients for down
|
||||
#[serde(skip)]
|
||||
grad_down: Vec<f32>,
|
||||
/// Accumulated gradients for up
|
||||
#[serde(skip)]
|
||||
grad_up: Vec<f32>,
|
||||
/// Update count for averaging
|
||||
#[serde(skip)]
|
||||
update_count: usize,
|
||||
/// Scaling factor
|
||||
scale: f32,
|
||||
/// Performance stats
|
||||
#[serde(skip)]
|
||||
stats: MicroLoRAStats,
|
||||
}
|
||||
|
||||
/// Performance statistics for MicroLoRA
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct MicroLoRAStats {
|
||||
/// Total forward passes
|
||||
pub forward_count: u64,
|
||||
/// Total time in forward passes (nanoseconds)
|
||||
pub forward_time_ns: u64,
|
||||
/// Total gradient accumulations
|
||||
pub gradient_count: u64,
|
||||
/// Total apply operations
|
||||
pub apply_count: u64,
|
||||
}
|
||||
|
||||
impl MicroLoRA {
|
||||
/// Create new Micro-LoRA adapter
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `hidden_dim` - Model hidden dimension
|
||||
/// * `rank` - LoRA rank (must be 1-2)
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if rank > 2
|
||||
pub fn new(hidden_dim: usize, rank: usize) -> Self {
|
||||
assert!(
|
||||
rank >= 1 && rank <= 2,
|
||||
"MicroLoRA rank must be 1-2, got {}",
|
||||
rank
|
||||
);
|
||||
|
||||
// Initialize down with small random-like values (deterministic for reproducibility)
|
||||
let down_proj: Vec<f32> = (0..hidden_dim * rank)
|
||||
.map(|i| {
|
||||
let x = (i as f32 * 0.618033988749895) % 1.0;
|
||||
(x - 0.5) * 0.02
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Initialize up to zero (standard LoRA init)
|
||||
let up_proj = vec![0.0f32; rank * hidden_dim];
|
||||
|
||||
Self {
|
||||
down_proj,
|
||||
up_proj,
|
||||
rank,
|
||||
hidden_dim,
|
||||
grad_down: vec![0.0; hidden_dim * rank],
|
||||
grad_up: vec![0.0; rank * hidden_dim],
|
||||
update_count: 0,
|
||||
scale: 1.0 / (rank as f32).sqrt(),
|
||||
stats: MicroLoRAStats::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Batch forward pass - process multiple inputs efficiently
|
||||
///
|
||||
/// Optimal batch size is 32 (0.447ms per-vector, 2,236 throughput)
|
||||
pub fn forward_batch(&self, inputs: &[Vec<f32>], outputs: &mut [Vec<f32>]) {
|
||||
assert_eq!(inputs.len(), outputs.len());
|
||||
for (input, output) in inputs.iter().zip(outputs.iter_mut()) {
|
||||
self.forward(input, output);
|
||||
}
|
||||
}
|
||||
|
||||
/// Batch forward with optimal chunking
|
||||
pub fn forward_batch_optimal(&self, inputs: &[Vec<f32>]) -> Vec<Vec<f32>> {
|
||||
let mut outputs: Vec<Vec<f32>> = inputs
|
||||
.iter()
|
||||
.map(|_| vec![0.0f32; self.hidden_dim])
|
||||
.collect();
|
||||
|
||||
// Process in optimal batch sizes
|
||||
for chunk_start in (0..inputs.len()).step_by(OPTIMAL_BATCH_SIZE) {
|
||||
let chunk_end = (chunk_start + OPTIMAL_BATCH_SIZE).min(inputs.len());
|
||||
for i in chunk_start..chunk_end {
|
||||
self.forward(&inputs[i], &mut outputs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
outputs
|
||||
}
|
||||
|
||||
/// Scalar forward pass (fallback)
|
||||
pub fn forward_scalar(&self, input: &[f32], output: &mut [f32]) {
|
||||
assert_eq!(input.len(), self.hidden_dim);
|
||||
assert_eq!(output.len(), self.hidden_dim);
|
||||
|
||||
// Down projection: hidden_dim -> rank
|
||||
let mut intermediate = vec![0.0f32; self.rank];
|
||||
for r in 0..self.rank {
|
||||
let mut sum = 0.0f32;
|
||||
let offset = r * self.hidden_dim;
|
||||
for i in 0..self.hidden_dim {
|
||||
sum += input[i] * self.down_proj[offset + i];
|
||||
}
|
||||
intermediate[r] = sum;
|
||||
}
|
||||
|
||||
// Up projection: rank -> hidden_dim
|
||||
for i in 0..self.hidden_dim {
|
||||
let mut sum = 0.0f32;
|
||||
for r in 0..self.rank {
|
||||
sum += intermediate[r] * self.up_proj[r * self.hidden_dim + i];
|
||||
}
|
||||
output[i] += sum * self.scale;
|
||||
}
|
||||
}
|
||||
|
||||
/// SIMD-optimized forward pass (AVX2)
|
||||
#[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
|
||||
pub fn forward_simd(&self, input: &[f32], output: &mut [f32]) {
|
||||
use std::arch::x86_64::*;
|
||||
|
||||
assert_eq!(input.len(), self.hidden_dim);
|
||||
assert_eq!(output.len(), self.hidden_dim);
|
||||
|
||||
unsafe {
|
||||
// Down projection: hidden_dim -> rank
|
||||
let mut intermediate = vec![0.0f32; self.rank];
|
||||
|
||||
for r in 0..self.rank {
|
||||
let mut sum = _mm256_setzero_ps();
|
||||
let offset = r * self.hidden_dim;
|
||||
|
||||
let mut i = 0;
|
||||
while i + 8 <= self.hidden_dim {
|
||||
let inp = _mm256_loadu_ps(input[i..].as_ptr());
|
||||
let weight = _mm256_loadu_ps(self.down_proj[offset + i..].as_ptr());
|
||||
sum = _mm256_fmadd_ps(inp, weight, sum);
|
||||
i += 8;
|
||||
}
|
||||
|
||||
// Horizontal sum
|
||||
let mut result = [0.0f32; 8];
|
||||
_mm256_storeu_ps(result.as_mut_ptr(), sum);
|
||||
intermediate[r] = result.iter().sum();
|
||||
|
||||
// Handle remaining elements
|
||||
for j in i..self.hidden_dim {
|
||||
intermediate[r] += input[j] * self.down_proj[offset + j];
|
||||
}
|
||||
}
|
||||
|
||||
// Up projection: rank -> hidden_dim
|
||||
let scale_vec = _mm256_set1_ps(self.scale);
|
||||
|
||||
let mut i = 0;
|
||||
while i + 8 <= self.hidden_dim {
|
||||
let mut sum = _mm256_setzero_ps();
|
||||
|
||||
for r in 0..self.rank {
|
||||
let up_offset = r * self.hidden_dim;
|
||||
let weight = _mm256_loadu_ps(self.up_proj[up_offset + i..].as_ptr());
|
||||
let inter = _mm256_set1_ps(intermediate[r]);
|
||||
sum = _mm256_fmadd_ps(inter, weight, sum);
|
||||
}
|
||||
|
||||
// Scale and add to output
|
||||
sum = _mm256_mul_ps(sum, scale_vec);
|
||||
let existing = _mm256_loadu_ps(output[i..].as_ptr());
|
||||
let result = _mm256_add_ps(existing, sum);
|
||||
_mm256_storeu_ps(output[i..].as_mut_ptr(), result);
|
||||
|
||||
i += 8;
|
||||
}
|
||||
|
||||
// Handle remaining elements
|
||||
for j in i..self.hidden_dim {
|
||||
let mut val = 0.0;
|
||||
for r in 0..self.rank {
|
||||
val += intermediate[r] * self.up_proj[r * self.hidden_dim + j];
|
||||
}
|
||||
output[j] += val * self.scale;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Forward pass with automatic SIMD detection
|
||||
pub fn forward(&self, input: &[f32], output: &mut [f32]) {
|
||||
#[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
|
||||
{
|
||||
self.forward_simd(input, output);
|
||||
return;
|
||||
}
|
||||
|
||||
#[allow(unreachable_code)]
|
||||
self.forward_scalar(input, output);
|
||||
}
|
||||
|
||||
/// Accumulate gradient from learning signal
|
||||
pub fn accumulate_gradient(&mut self, signal: &LearningSignal) {
|
||||
if signal.gradient_estimate.len() != self.hidden_dim {
|
||||
return;
|
||||
}
|
||||
|
||||
let quality = signal.quality_score;
|
||||
|
||||
// Simplified gradient: outer product scaled by quality
|
||||
// This approximates the true gradient for rank-1 LoRA
|
||||
for r in 0..self.rank {
|
||||
for i in 0..self.hidden_dim {
|
||||
let grad_idx = r * self.hidden_dim + i;
|
||||
// Update up projection gradient (main target)
|
||||
self.grad_up[grad_idx] += signal.gradient_estimate[i] * quality;
|
||||
}
|
||||
}
|
||||
|
||||
self.update_count += 1;
|
||||
}
|
||||
|
||||
/// Apply accumulated gradients with learning rate
|
||||
pub fn apply_accumulated(&mut self, learning_rate: f32) {
|
||||
if self.update_count == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
let scale = learning_rate / self.update_count as f32;
|
||||
|
||||
// Update up projection (main adaptation target)
|
||||
for (w, g) in self.up_proj.iter_mut().zip(self.grad_up.iter()) {
|
||||
*w += g * scale;
|
||||
}
|
||||
|
||||
// Reset accumulators
|
||||
self.grad_up.fill(0.0);
|
||||
self.grad_down.fill(0.0);
|
||||
self.update_count = 0;
|
||||
}
|
||||
|
||||
/// Reset adapter to initial state
|
||||
pub fn reset(&mut self) {
|
||||
self.up_proj.fill(0.0);
|
||||
self.grad_up.fill(0.0);
|
||||
self.grad_down.fill(0.0);
|
||||
self.update_count = 0;
|
||||
}
|
||||
|
||||
/// Get rank
|
||||
pub fn rank(&self) -> usize {
|
||||
self.rank
|
||||
}
|
||||
|
||||
/// Get hidden dimension
|
||||
pub fn hidden_dim(&self) -> usize {
|
||||
self.hidden_dim
|
||||
}
|
||||
|
||||
/// Get parameter count
|
||||
pub fn param_count(&self) -> usize {
|
||||
self.down_proj.len() + self.up_proj.len()
|
||||
}
|
||||
|
||||
/// Get scale factor
|
||||
pub fn scale(&self) -> f32 {
|
||||
self.scale
|
||||
}
|
||||
|
||||
/// Set scale factor
|
||||
pub fn set_scale(&mut self, scale: f32) {
|
||||
self.scale = scale;
|
||||
}
|
||||
|
||||
/// Get pending update count
|
||||
pub fn pending_updates(&self) -> usize {
|
||||
self.update_count
|
||||
}
|
||||
}
|
||||
|
||||
/// Base LoRA for background adaptation
|
||||
///
|
||||
/// Higher rank (4-16) for more expressive adaptation.
|
||||
/// Applied hourly during background learning cycles.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct BaseLoRA {
|
||||
/// LoRA layers
|
||||
pub layers: Vec<LoRALayer>,
|
||||
/// Rank
|
||||
pub rank: usize,
|
||||
/// Hidden dimension
|
||||
pub hidden_dim: usize,
|
||||
/// Alpha scaling factor
|
||||
pub alpha: f32,
|
||||
}
|
||||
|
||||
/// Single LoRA layer
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct LoRALayer {
|
||||
/// Down projection weights
|
||||
pub down_proj: Vec<f32>,
|
||||
/// Up projection weights
|
||||
pub up_proj: Vec<f32>,
|
||||
/// Layer index
|
||||
pub layer_idx: usize,
|
||||
}
|
||||
|
||||
impl BaseLoRA {
|
||||
/// Create new Base LoRA
|
||||
pub fn new(hidden_dim: usize, rank: usize, num_layers: usize) -> Self {
|
||||
let layers = (0..num_layers)
|
||||
.map(|idx| LoRALayer {
|
||||
down_proj: vec![0.0; hidden_dim * rank],
|
||||
up_proj: vec![0.0; rank * hidden_dim],
|
||||
layer_idx: idx,
|
||||
})
|
||||
.collect();
|
||||
|
||||
Self {
|
||||
layers,
|
||||
rank,
|
||||
hidden_dim,
|
||||
alpha: rank as f32,
|
||||
}
|
||||
}
|
||||
|
||||
/// Forward pass for single layer
|
||||
pub fn forward_layer(&self, layer_idx: usize, input: &[f32], output: &mut [f32]) {
|
||||
if layer_idx >= self.layers.len() {
|
||||
return;
|
||||
}
|
||||
|
||||
let layer = &self.layers[layer_idx];
|
||||
let scale = self.alpha / self.rank as f32;
|
||||
|
||||
// Down projection
|
||||
let mut intermediate = vec![0.0f32; self.rank];
|
||||
for r in 0..self.rank {
|
||||
let offset = r * self.hidden_dim;
|
||||
intermediate[r] = input
|
||||
.iter()
|
||||
.zip(&layer.down_proj[offset..offset + self.hidden_dim])
|
||||
.map(|(a, b)| a * b)
|
||||
.sum();
|
||||
}
|
||||
|
||||
// Up projection
|
||||
for i in 0..self.hidden_dim {
|
||||
let mut sum = 0.0f32;
|
||||
for r in 0..self.rank {
|
||||
sum += intermediate[r] * layer.up_proj[r * self.hidden_dim + i];
|
||||
}
|
||||
output[i] += sum * scale;
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge LoRA weights into model weights (for inference optimization)
|
||||
pub fn merge_into(&self, model_weights: &mut [f32], layer_idx: usize) {
|
||||
if layer_idx >= self.layers.len() {
|
||||
return;
|
||||
}
|
||||
|
||||
let layer = &self.layers[layer_idx];
|
||||
let scale = self.alpha / self.rank as f32;
|
||||
|
||||
// W' = W + scale * (down @ up)
|
||||
// Assumes model_weights is [hidden_dim x hidden_dim]
|
||||
for i in 0..self.hidden_dim {
|
||||
for j in 0..self.hidden_dim {
|
||||
let mut delta = 0.0f32;
|
||||
for r in 0..self.rank {
|
||||
delta +=
|
||||
layer.down_proj[i * self.rank + r] * layer.up_proj[r * self.hidden_dim + j];
|
||||
}
|
||||
model_weights[i * self.hidden_dim + j] += delta * scale;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get number of layers
|
||||
pub fn num_layers(&self) -> usize {
|
||||
self.layers.len()
|
||||
}
|
||||
|
||||
/// Get total parameter count
|
||||
pub fn param_count(&self) -> usize {
|
||||
self.layers.len() * (self.hidden_dim * self.rank + self.rank * self.hidden_dim)
|
||||
}
|
||||
}
|
||||
|
||||
/// Combined LoRA engine managing both tiers
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct LoRAEngine {
|
||||
/// Micro-LoRA for instant adaptation
|
||||
pub micro: MicroLoRA,
|
||||
/// Base LoRA for background adaptation
|
||||
pub base: BaseLoRA,
|
||||
/// Whether micro-LoRA is enabled
|
||||
pub micro_enabled: bool,
|
||||
/// Whether base LoRA is enabled
|
||||
pub base_enabled: bool,
|
||||
}
|
||||
|
||||
impl LoRAEngine {
|
||||
/// Create new LoRA engine
|
||||
pub fn new(hidden_dim: usize, micro_rank: usize, base_rank: usize, num_layers: usize) -> Self {
|
||||
Self {
|
||||
micro: MicroLoRA::new(hidden_dim, micro_rank.clamp(1, 2)),
|
||||
base: BaseLoRA::new(hidden_dim, base_rank, num_layers),
|
||||
micro_enabled: true,
|
||||
base_enabled: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply both LoRA tiers
|
||||
pub fn forward(&self, layer_idx: usize, input: &[f32], output: &mut [f32]) {
|
||||
if self.micro_enabled {
|
||||
self.micro.forward(input, output);
|
||||
}
|
||||
if self.base_enabled && layer_idx < self.base.num_layers() {
|
||||
self.base.forward_layer(layer_idx, input, output);
|
||||
}
|
||||
}
|
||||
|
||||
/// Accumulate micro-LoRA gradient
|
||||
pub fn accumulate_micro(&mut self, signal: &LearningSignal) {
|
||||
if self.micro_enabled {
|
||||
self.micro.accumulate_gradient(signal);
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply micro-LoRA updates
|
||||
pub fn apply_micro(&mut self, learning_rate: f32) {
|
||||
if self.micro_enabled {
|
||||
self.micro.apply_accumulated(learning_rate);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_micro_lora_creation() {
|
||||
let lora = MicroLoRA::new(256, 1);
|
||||
assert_eq!(lora.rank(), 1);
|
||||
assert_eq!(lora.hidden_dim(), 256);
|
||||
assert_eq!(lora.param_count(), 256 + 256);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_micro_lora_forward() {
|
||||
let lora = MicroLoRA::new(64, 1);
|
||||
let input = vec![1.0f32; 64];
|
||||
let mut output = vec![0.0f32; 64];
|
||||
|
||||
lora.forward(&input, &mut output);
|
||||
|
||||
// Output should be modified (even if small due to init)
|
||||
// With zero-init up_proj, output should still be zero
|
||||
let sum: f32 = output.iter().sum();
|
||||
assert!(
|
||||
sum.abs() < 1e-6,
|
||||
"Expected ~0 with zero up_proj, got {}",
|
||||
sum
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_micro_lora_learning() {
|
||||
let mut lora = MicroLoRA::new(64, 1);
|
||||
|
||||
let signal = LearningSignal::with_gradient(vec![0.1; 64], vec![0.5; 64], 0.8);
|
||||
|
||||
lora.accumulate_gradient(&signal);
|
||||
assert_eq!(lora.pending_updates(), 1);
|
||||
|
||||
lora.apply_accumulated(0.01);
|
||||
assert_eq!(lora.pending_updates(), 0);
|
||||
|
||||
// Now forward should produce non-zero output
|
||||
let input = vec![1.0f32; 64];
|
||||
let mut output = vec![0.0f32; 64];
|
||||
lora.forward(&input, &mut output);
|
||||
|
||||
let sum: f32 = output.iter().map(|x| x.abs()).sum();
|
||||
assert!(sum > 0.0, "Expected non-zero output after learning");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_base_lora() {
|
||||
let lora = BaseLoRA::new(64, 4, 12);
|
||||
assert_eq!(lora.num_layers(), 12);
|
||||
assert_eq!(lora.rank, 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lora_engine() {
|
||||
let mut engine = LoRAEngine::new(64, 1, 4, 12);
|
||||
|
||||
let signal = LearningSignal::with_gradient(vec![0.1; 64], vec![0.5; 64], 0.9);
|
||||
|
||||
engine.accumulate_micro(&signal);
|
||||
engine.apply_micro(0.01);
|
||||
|
||||
let input = vec![1.0f32; 64];
|
||||
let mut output = vec![0.0f32; 64];
|
||||
engine.forward(0, &input, &mut output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "MicroLoRA rank must be 1-2")]
|
||||
fn test_invalid_rank() {
|
||||
MicroLoRA::new(64, 5);
|
||||
}
|
||||
}
|
||||
23
vendor/ruvector/examples/ruvLLM/src/sona/mod.rs
vendored
Normal file
23
vendor/ruvector/examples/ruvLLM/src/sona/mod.rs
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
//! SONA (Self-Optimizing Neural Architecture)
|
||||
//!
|
||||
//! Adaptive learning system with ReasoningBank integration.
|
||||
|
||||
pub mod engine;
|
||||
pub mod ewc;
|
||||
pub mod loops;
|
||||
pub mod lora;
|
||||
pub mod reasoning_bank;
|
||||
pub mod trajectory;
|
||||
pub mod types;
|
||||
|
||||
// Re-export main types
|
||||
pub use engine::SonaEngine;
|
||||
pub use ewc::{EwcConfig, EwcPlusPlus, TaskFisher};
|
||||
pub use loops::{BackgroundLoop, InstantLoop, LoopCoordinator};
|
||||
pub use lora::{BaseLoRA, LoRAEngine, LoRALayer, MicroLoRA};
|
||||
pub use reasoning_bank::{PatternConfig, ReasoningBank};
|
||||
pub use trajectory::{TrajectoryBuffer, TrajectoryBuilder, TrajectoryIdGen};
|
||||
pub use types::{
|
||||
LearnedPattern, LearningSignal, PatternType, QueryTrajectory, SignalMetadata, SonaConfig,
|
||||
TrajectoryStep,
|
||||
};
|
||||
549
vendor/ruvector/examples/ruvLLM/src/sona/reasoning_bank.rs
vendored
Normal file
549
vendor/ruvector/examples/ruvLLM/src/sona/reasoning_bank.rs
vendored
Normal file
@@ -0,0 +1,549 @@
|
||||
//! ReasoningBank - Pattern storage and extraction for SONA
|
||||
//!
|
||||
//! Implements trajectory clustering using K-means++ for pattern discovery.
|
||||
|
||||
use crate::sona::types::{LearnedPattern, PatternType, QueryTrajectory};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// ReasoningBank configuration
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct PatternConfig {
|
||||
/// Number of clusters for K-means++
|
||||
pub k_clusters: usize,
|
||||
/// Embedding dimension
|
||||
pub embedding_dim: usize,
|
||||
/// Maximum K-means iterations
|
||||
pub max_iterations: usize,
|
||||
/// Convergence threshold
|
||||
pub convergence_threshold: f32,
|
||||
/// Minimum cluster size to keep
|
||||
pub min_cluster_size: usize,
|
||||
/// Maximum trajectories to store
|
||||
pub max_trajectories: usize,
|
||||
/// Quality threshold for pattern
|
||||
pub quality_threshold: f32,
|
||||
}
|
||||
|
||||
impl Default for PatternConfig {
|
||||
fn default() -> Self {
|
||||
// OPTIMIZED DEFAULTS based on @ruvector/sona v0.1.1 benchmarks:
|
||||
// - 100 clusters = 1.3ms search vs 50 clusters = 3.0ms (2.3x faster)
|
||||
// - Quality threshold 0.3 balances learning vs noise filtering
|
||||
Self {
|
||||
k_clusters: 100, // OPTIMIZED: 2.3x faster search (1.3ms vs 3.0ms)
|
||||
embedding_dim: 256,
|
||||
max_iterations: 100,
|
||||
convergence_threshold: 0.001,
|
||||
min_cluster_size: 5,
|
||||
max_trajectories: 10000,
|
||||
quality_threshold: 0.3, // OPTIMIZED: Lower threshold for more learning
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// ReasoningBank for pattern storage and extraction
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ReasoningBank {
|
||||
/// Configuration
|
||||
config: PatternConfig,
|
||||
/// Stored trajectories
|
||||
trajectories: Vec<TrajectoryEntry>,
|
||||
/// Extracted patterns
|
||||
patterns: HashMap<u64, LearnedPattern>,
|
||||
/// Next pattern ID
|
||||
next_pattern_id: u64,
|
||||
/// Pattern index (embedding -> pattern_id)
|
||||
pattern_index: Vec<(Vec<f32>, u64)>,
|
||||
}
|
||||
|
||||
/// Internal trajectory entry with embedding
|
||||
#[derive(Clone, Debug)]
|
||||
struct TrajectoryEntry {
|
||||
/// Trajectory embedding (query + avg activations)
|
||||
embedding: Vec<f32>,
|
||||
/// Quality score
|
||||
quality: f32,
|
||||
/// Cluster assignment
|
||||
cluster: Option<usize>,
|
||||
/// Original trajectory ID
|
||||
trajectory_id: u64,
|
||||
}
|
||||
|
||||
impl ReasoningBank {
|
||||
/// Create new ReasoningBank
|
||||
pub fn new(config: PatternConfig) -> Self {
|
||||
Self {
|
||||
config,
|
||||
trajectories: Vec::new(),
|
||||
patterns: HashMap::new(),
|
||||
next_pattern_id: 0,
|
||||
pattern_index: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add trajectory to bank
|
||||
pub fn add_trajectory(&mut self, trajectory: &QueryTrajectory) {
|
||||
// Compute embedding from trajectory
|
||||
let embedding = self.compute_embedding(trajectory);
|
||||
|
||||
let entry = TrajectoryEntry {
|
||||
embedding,
|
||||
quality: trajectory.final_quality,
|
||||
cluster: None,
|
||||
trajectory_id: trajectory.id,
|
||||
};
|
||||
|
||||
// Enforce capacity
|
||||
if self.trajectories.len() >= self.config.max_trajectories {
|
||||
// Remove oldest entries
|
||||
let to_remove = self.trajectories.len() - self.config.max_trajectories + 1;
|
||||
self.trajectories.drain(0..to_remove);
|
||||
}
|
||||
|
||||
self.trajectories.push(entry);
|
||||
}
|
||||
|
||||
/// Compute embedding from trajectory
|
||||
fn compute_embedding(&self, trajectory: &QueryTrajectory) -> Vec<f32> {
|
||||
let dim = self.config.embedding_dim;
|
||||
let mut embedding = vec![0.0f32; dim];
|
||||
|
||||
// Start with query embedding
|
||||
let query_len = trajectory.query_embedding.len().min(dim);
|
||||
embedding[..query_len].copy_from_slice(&trajectory.query_embedding[..query_len]);
|
||||
|
||||
// Average in step activations (weighted by reward)
|
||||
if !trajectory.steps.is_empty() {
|
||||
let mut total_reward = 0.0f32;
|
||||
|
||||
for step in &trajectory.steps {
|
||||
let weight = step.reward.max(0.0);
|
||||
total_reward += weight;
|
||||
|
||||
for (i, &act) in step.activations.iter().enumerate() {
|
||||
if i < dim {
|
||||
embedding[i] += act * weight;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if total_reward > 0.0 {
|
||||
for e in &mut embedding {
|
||||
*e /= total_reward + 1.0; // +1 for query contribution
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// L2 normalize
|
||||
let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
if norm > 1e-8 {
|
||||
for e in &mut embedding {
|
||||
*e /= norm;
|
||||
}
|
||||
}
|
||||
|
||||
embedding
|
||||
}
|
||||
|
||||
/// Extract patterns using K-means++
|
||||
pub fn extract_patterns(&mut self) -> Vec<LearnedPattern> {
|
||||
if self.trajectories.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let k = self.config.k_clusters.min(self.trajectories.len());
|
||||
if k == 0 {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
// K-means++ initialization
|
||||
let centroids = self.kmeans_plus_plus_init(k);
|
||||
|
||||
// Run K-means
|
||||
let (final_centroids, assignments) = self.run_kmeans(centroids);
|
||||
|
||||
// Create patterns from clusters
|
||||
let mut patterns = Vec::new();
|
||||
|
||||
for (cluster_idx, centroid) in final_centroids.into_iter().enumerate() {
|
||||
// Collect cluster members
|
||||
let members: Vec<_> = self
|
||||
.trajectories
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(i, _)| assignments.get(*i) == Some(&cluster_idx))
|
||||
.map(|(_, t)| t)
|
||||
.collect();
|
||||
|
||||
if members.len() < self.config.min_cluster_size {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Compute cluster statistics
|
||||
let cluster_size = members.len();
|
||||
let total_weight: f32 = members.iter().map(|t| t.quality).sum();
|
||||
let avg_quality = total_weight / cluster_size as f32;
|
||||
|
||||
if avg_quality < self.config.quality_threshold {
|
||||
continue;
|
||||
}
|
||||
|
||||
let pattern_id = self.next_pattern_id;
|
||||
self.next_pattern_id += 1;
|
||||
|
||||
let pattern = LearnedPattern {
|
||||
id: pattern_id,
|
||||
centroid,
|
||||
cluster_size,
|
||||
total_weight,
|
||||
avg_quality,
|
||||
created_at: std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs(),
|
||||
last_accessed: std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs(),
|
||||
access_count: 0,
|
||||
pattern_type: PatternType::General,
|
||||
};
|
||||
|
||||
self.patterns.insert(pattern_id, pattern.clone());
|
||||
self.pattern_index
|
||||
.push((pattern.centroid.clone(), pattern_id));
|
||||
patterns.push(pattern);
|
||||
}
|
||||
|
||||
// Update trajectory cluster assignments
|
||||
for (i, cluster) in assignments.into_iter().enumerate() {
|
||||
if i < self.trajectories.len() {
|
||||
self.trajectories[i].cluster = Some(cluster);
|
||||
}
|
||||
}
|
||||
|
||||
patterns
|
||||
}
|
||||
|
||||
/// K-means++ initialization
|
||||
fn kmeans_plus_plus_init(&self, k: usize) -> Vec<Vec<f32>> {
|
||||
let mut centroids = Vec::with_capacity(k);
|
||||
let n = self.trajectories.len();
|
||||
|
||||
if n == 0 || k == 0 {
|
||||
return centroids;
|
||||
}
|
||||
|
||||
// First centroid: random (use deterministic selection for reproducibility)
|
||||
let first_idx = 0;
|
||||
centroids.push(self.trajectories[first_idx].embedding.clone());
|
||||
|
||||
// Remaining centroids: D^2 weighting
|
||||
for _ in 1..k {
|
||||
// Compute distances to nearest centroid
|
||||
let mut distances: Vec<f32> = self
|
||||
.trajectories
|
||||
.iter()
|
||||
.map(|t| {
|
||||
centroids
|
||||
.iter()
|
||||
.map(|c| self.squared_distance(&t.embedding, c))
|
||||
.fold(f32::MAX, f32::min)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Normalize to probabilities
|
||||
let total: f32 = distances.iter().sum();
|
||||
if total > 0.0 {
|
||||
for d in &mut distances {
|
||||
*d /= total;
|
||||
}
|
||||
}
|
||||
|
||||
// Select next centroid (deterministic: highest distance)
|
||||
let (next_idx, _) = distances
|
||||
.iter()
|
||||
.enumerate()
|
||||
.max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
|
||||
.unwrap_or((0, &0.0));
|
||||
|
||||
centroids.push(self.trajectories[next_idx].embedding.clone());
|
||||
}
|
||||
|
||||
centroids
|
||||
}
|
||||
|
||||
/// Run K-means algorithm
|
||||
fn run_kmeans(&self, mut centroids: Vec<Vec<f32>>) -> (Vec<Vec<f32>>, Vec<usize>) {
|
||||
let n = self.trajectories.len();
|
||||
let k = centroids.len();
|
||||
let dim = self.config.embedding_dim;
|
||||
|
||||
let mut assignments = vec![0usize; n];
|
||||
|
||||
for _iter in 0..self.config.max_iterations {
|
||||
// Assign points to nearest centroid
|
||||
let mut changed = false;
|
||||
for (i, t) in self.trajectories.iter().enumerate() {
|
||||
let (nearest, _) = centroids
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(j, c)| (j, self.squared_distance(&t.embedding, c)))
|
||||
.min_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
|
||||
.unwrap_or((0, 0.0));
|
||||
|
||||
if assignments[i] != nearest {
|
||||
assignments[i] = nearest;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
if !changed {
|
||||
break;
|
||||
}
|
||||
|
||||
// Update centroids
|
||||
let mut new_centroids = vec![vec![0.0f32; dim]; k];
|
||||
let mut counts = vec![0usize; k];
|
||||
|
||||
for (i, t) in self.trajectories.iter().enumerate() {
|
||||
let cluster = assignments[i];
|
||||
counts[cluster] += 1;
|
||||
for (j, &e) in t.embedding.iter().enumerate() {
|
||||
new_centroids[cluster][j] += e;
|
||||
}
|
||||
}
|
||||
|
||||
// Average and check convergence
|
||||
let mut max_shift = 0.0f32;
|
||||
for (i, new_c) in new_centroids.iter_mut().enumerate() {
|
||||
if counts[i] > 0 {
|
||||
for e in new_c.iter_mut() {
|
||||
*e /= counts[i] as f32;
|
||||
}
|
||||
let shift = self.squared_distance(new_c, ¢roids[i]).sqrt();
|
||||
max_shift = max_shift.max(shift);
|
||||
}
|
||||
}
|
||||
|
||||
centroids = new_centroids;
|
||||
|
||||
if max_shift < self.config.convergence_threshold {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
(centroids, assignments)
|
||||
}
|
||||
|
||||
/// Squared Euclidean distance
|
||||
fn squared_distance(&self, a: &[f32], b: &[f32]) -> f32 {
|
||||
a.iter()
|
||||
.zip(b.iter())
|
||||
.map(|(&x, &y)| (x - y) * (x - y))
|
||||
.sum()
|
||||
}
|
||||
|
||||
/// Find similar patterns
|
||||
pub fn find_similar(&self, query: &[f32], k: usize) -> Vec<&LearnedPattern> {
|
||||
let mut scored: Vec<_> = self
|
||||
.patterns
|
||||
.values()
|
||||
.map(|p| (p, p.similarity(query)))
|
||||
.collect();
|
||||
|
||||
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
scored.into_iter().take(k).map(|(p, _)| p).collect()
|
||||
}
|
||||
|
||||
/// Get pattern by ID
|
||||
pub fn get_pattern(&self, id: u64) -> Option<&LearnedPattern> {
|
||||
self.patterns.get(&id)
|
||||
}
|
||||
|
||||
/// Get mutable pattern by ID
|
||||
pub fn get_pattern_mut(&mut self, id: u64) -> Option<&mut LearnedPattern> {
|
||||
self.patterns.get_mut(&id)
|
||||
}
|
||||
|
||||
/// Get trajectory count
|
||||
pub fn trajectory_count(&self) -> usize {
|
||||
self.trajectories.len()
|
||||
}
|
||||
|
||||
/// Get pattern count
|
||||
pub fn pattern_count(&self) -> usize {
|
||||
self.patterns.len()
|
||||
}
|
||||
|
||||
/// Clear trajectories (keep patterns)
|
||||
pub fn clear_trajectories(&mut self) {
|
||||
self.trajectories.clear();
|
||||
}
|
||||
|
||||
/// Prune low-quality patterns
|
||||
pub fn prune_patterns(&mut self, min_quality: f32, min_accesses: u32, max_age_secs: u64) {
|
||||
let to_remove: Vec<u64> = self
|
||||
.patterns
|
||||
.iter()
|
||||
.filter(|(_, p)| p.should_prune(min_quality, min_accesses, max_age_secs))
|
||||
.map(|(id, _)| *id)
|
||||
.collect();
|
||||
|
||||
for id in to_remove {
|
||||
self.patterns.remove(&id);
|
||||
}
|
||||
|
||||
// Update index
|
||||
self.pattern_index
|
||||
.retain(|(_, id)| self.patterns.contains_key(id));
|
||||
}
|
||||
|
||||
/// Consolidate similar patterns
|
||||
pub fn consolidate(&mut self, similarity_threshold: f32) {
|
||||
let pattern_ids: Vec<u64> = self.patterns.keys().copied().collect();
|
||||
let mut merged = Vec::new();
|
||||
|
||||
for i in 0..pattern_ids.len() {
|
||||
for j in i + 1..pattern_ids.len() {
|
||||
let id1 = pattern_ids[i];
|
||||
let id2 = pattern_ids[j];
|
||||
|
||||
if merged.contains(&id1) || merged.contains(&id2) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let (Some(p1), Some(p2)) = (self.patterns.get(&id1), self.patterns.get(&id2)) {
|
||||
let sim = p1.similarity(&p2.centroid);
|
||||
if sim > similarity_threshold {
|
||||
// Merge p2 into p1
|
||||
let merged_pattern = p1.merge(p2);
|
||||
self.patterns.insert(id1, merged_pattern);
|
||||
merged.push(id2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove merged patterns
|
||||
for id in merged {
|
||||
self.patterns.remove(&id);
|
||||
}
|
||||
|
||||
// Update index
|
||||
self.pattern_index
|
||||
.retain(|(_, id)| self.patterns.contains_key(id));
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn make_trajectory(id: u64, embedding: Vec<f32>, quality: f32) -> QueryTrajectory {
|
||||
let mut t = QueryTrajectory::new(id, embedding);
|
||||
t.finalize(quality, 1000);
|
||||
t
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bank_creation() {
|
||||
let bank = ReasoningBank::new(PatternConfig::default());
|
||||
assert_eq!(bank.trajectory_count(), 0);
|
||||
assert_eq!(bank.pattern_count(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_add_trajectory() {
|
||||
let config = PatternConfig {
|
||||
embedding_dim: 4,
|
||||
..Default::default()
|
||||
};
|
||||
let mut bank = ReasoningBank::new(config);
|
||||
|
||||
let t = make_trajectory(1, vec![0.1, 0.2, 0.3, 0.4], 0.8);
|
||||
bank.add_trajectory(&t);
|
||||
|
||||
assert_eq!(bank.trajectory_count(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_patterns() {
|
||||
let config = PatternConfig {
|
||||
embedding_dim: 4,
|
||||
k_clusters: 2,
|
||||
min_cluster_size: 2,
|
||||
quality_threshold: 0.0,
|
||||
..Default::default()
|
||||
};
|
||||
let mut bank = ReasoningBank::new(config);
|
||||
|
||||
// Add clustered trajectories
|
||||
for i in 0..5 {
|
||||
let t = make_trajectory(i, vec![1.0, 0.0, 0.0, 0.0], 0.8);
|
||||
bank.add_trajectory(&t);
|
||||
}
|
||||
for i in 5..10 {
|
||||
let t = make_trajectory(i, vec![0.0, 1.0, 0.0, 0.0], 0.7);
|
||||
bank.add_trajectory(&t);
|
||||
}
|
||||
|
||||
let patterns = bank.extract_patterns();
|
||||
assert!(!patterns.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_similar() {
|
||||
let config = PatternConfig {
|
||||
embedding_dim: 4,
|
||||
k_clusters: 2,
|
||||
min_cluster_size: 2,
|
||||
quality_threshold: 0.0,
|
||||
..Default::default()
|
||||
};
|
||||
let mut bank = ReasoningBank::new(config);
|
||||
|
||||
for i in 0..10 {
|
||||
let emb = if i < 5 {
|
||||
vec![1.0, 0.0, 0.0, 0.0]
|
||||
} else {
|
||||
vec![0.0, 1.0, 0.0, 0.0]
|
||||
};
|
||||
bank.add_trajectory(&make_trajectory(i, emb, 0.8));
|
||||
}
|
||||
|
||||
bank.extract_patterns();
|
||||
|
||||
let query = vec![0.9, 0.1, 0.0, 0.0];
|
||||
let similar = bank.find_similar(&query, 1);
|
||||
assert!(!similar.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_consolidate() {
|
||||
let config = PatternConfig {
|
||||
embedding_dim: 4,
|
||||
k_clusters: 3,
|
||||
min_cluster_size: 1,
|
||||
quality_threshold: 0.0,
|
||||
..Default::default()
|
||||
};
|
||||
let mut bank = ReasoningBank::new(config);
|
||||
|
||||
// Create very similar trajectories
|
||||
for i in 0..9 {
|
||||
let emb = vec![1.0 + (i as f32 * 0.001), 0.0, 0.0, 0.0];
|
||||
bank.add_trajectory(&make_trajectory(i, emb, 0.8));
|
||||
}
|
||||
|
||||
bank.extract_patterns();
|
||||
let before = bank.pattern_count();
|
||||
|
||||
bank.consolidate(0.99);
|
||||
let after = bank.pattern_count();
|
||||
|
||||
assert!(after <= before);
|
||||
}
|
||||
}
|
||||
362
vendor/ruvector/examples/ruvLLM/src/sona/trajectory.rs
vendored
Normal file
362
vendor/ruvector/examples/ruvLLM/src/sona/trajectory.rs
vendored
Normal file
@@ -0,0 +1,362 @@
|
||||
//! Lock-free trajectory buffer for SONA
|
||||
//!
|
||||
//! Provides efficient, non-blocking trajectory recording during inference.
|
||||
|
||||
use crate::sona::types::{QueryTrajectory, TrajectoryStep};
|
||||
use crossbeam::queue::ArrayQueue;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::time::Instant;
|
||||
|
||||
/// Lock-free trajectory buffer using crossbeam ArrayQueue
|
||||
pub struct TrajectoryBuffer {
|
||||
/// Internal queue
|
||||
buffer: ArrayQueue<QueryTrajectory>,
|
||||
/// Capacity
|
||||
capacity: usize,
|
||||
/// Count of dropped trajectories
|
||||
dropped: AtomicU64,
|
||||
/// Total trajectories seen
|
||||
total_seen: AtomicU64,
|
||||
}
|
||||
|
||||
impl TrajectoryBuffer {
|
||||
/// Create new buffer with capacity
|
||||
pub fn new(capacity: usize) -> Self {
|
||||
Self {
|
||||
buffer: ArrayQueue::new(capacity),
|
||||
capacity,
|
||||
dropped: AtomicU64::new(0),
|
||||
total_seen: AtomicU64::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
/// Record trajectory (non-blocking)
|
||||
///
|
||||
/// Returns true if recorded, false if buffer full
|
||||
pub fn record(&self, trajectory: QueryTrajectory) -> bool {
|
||||
self.total_seen.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
match self.buffer.push(trajectory) {
|
||||
Ok(()) => true,
|
||||
Err(_) => {
|
||||
self.dropped.fetch_add(1, Ordering::Relaxed);
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to pop single trajectory
|
||||
pub fn pop(&self) -> Option<QueryTrajectory> {
|
||||
self.buffer.pop()
|
||||
}
|
||||
|
||||
/// Drain all trajectories
|
||||
pub fn drain(&self) -> Vec<QueryTrajectory> {
|
||||
let mut result = Vec::with_capacity(self.len());
|
||||
while let Some(t) = self.buffer.pop() {
|
||||
result.push(t);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
/// Drain up to n trajectories
|
||||
pub fn drain_n(&self, n: usize) -> Vec<QueryTrajectory> {
|
||||
let mut result = Vec::with_capacity(n.min(self.len()));
|
||||
for _ in 0..n {
|
||||
match self.buffer.pop() {
|
||||
Some(t) => result.push(t),
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
/// Get current length
|
||||
pub fn len(&self) -> usize {
|
||||
self.buffer.len()
|
||||
}
|
||||
|
||||
/// Check if empty
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.buffer.is_empty()
|
||||
}
|
||||
|
||||
/// Check if full
|
||||
pub fn is_full(&self) -> bool {
|
||||
self.buffer.is_full()
|
||||
}
|
||||
|
||||
/// Get capacity
|
||||
pub fn capacity(&self) -> usize {
|
||||
self.capacity
|
||||
}
|
||||
|
||||
/// Get dropped count
|
||||
pub fn dropped_count(&self) -> u64 {
|
||||
self.dropped.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
/// Get total seen count
|
||||
pub fn total_seen(&self) -> u64 {
|
||||
self.total_seen.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
/// Get success rate
|
||||
pub fn success_rate(&self) -> f64 {
|
||||
let total = self.total_seen.load(Ordering::Relaxed);
|
||||
let dropped = self.dropped.load(Ordering::Relaxed);
|
||||
if total == 0 {
|
||||
1.0
|
||||
} else {
|
||||
(total - dropped) as f64 / total as f64
|
||||
}
|
||||
}
|
||||
|
||||
/// Reset statistics (not the buffer contents)
|
||||
pub fn reset_stats(&self) {
|
||||
self.dropped.store(0, Ordering::Relaxed);
|
||||
self.total_seen.store(0, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder for constructing trajectories during inference
|
||||
pub struct TrajectoryBuilder {
|
||||
/// Trajectory ID
|
||||
id: u64,
|
||||
/// Query embedding
|
||||
query_embedding: Vec<f32>,
|
||||
/// Steps collected
|
||||
steps: Vec<TrajectoryStep>,
|
||||
/// Start time
|
||||
start_time: Instant,
|
||||
/// Model route
|
||||
model_route: Option<String>,
|
||||
/// Context IDs
|
||||
context_ids: Vec<String>,
|
||||
}
|
||||
|
||||
impl TrajectoryBuilder {
|
||||
/// Start new trajectory
|
||||
pub fn new(id: u64, query_embedding: Vec<f32>) -> Self {
|
||||
Self {
|
||||
id,
|
||||
query_embedding,
|
||||
steps: Vec::with_capacity(16),
|
||||
start_time: Instant::now(),
|
||||
model_route: None,
|
||||
context_ids: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add execution step
|
||||
pub fn add_step(&mut self, activations: Vec<f32>, attention_weights: Vec<f32>, reward: f32) {
|
||||
let step_idx = self.steps.len();
|
||||
self.steps.push(TrajectoryStep::new(
|
||||
activations,
|
||||
attention_weights,
|
||||
reward,
|
||||
step_idx,
|
||||
));
|
||||
}
|
||||
|
||||
/// Add step with layer name
|
||||
pub fn add_named_step(
|
||||
&mut self,
|
||||
name: &str,
|
||||
activations: Vec<f32>,
|
||||
attention_weights: Vec<f32>,
|
||||
reward: f32,
|
||||
) {
|
||||
let step_idx = self.steps.len();
|
||||
self.steps.push(
|
||||
TrajectoryStep::new(activations, attention_weights, reward, step_idx).with_layer(name),
|
||||
);
|
||||
}
|
||||
|
||||
/// Set model route
|
||||
pub fn set_model_route(&mut self, route: &str) {
|
||||
self.model_route = Some(route.to_string());
|
||||
}
|
||||
|
||||
/// Add context ID
|
||||
pub fn add_context(&mut self, context_id: &str) {
|
||||
self.context_ids.push(context_id.to_string());
|
||||
}
|
||||
|
||||
/// Get current step count
|
||||
pub fn step_count(&self) -> usize {
|
||||
self.steps.len()
|
||||
}
|
||||
|
||||
/// Get elapsed time
|
||||
pub fn elapsed(&self) -> std::time::Duration {
|
||||
self.start_time.elapsed()
|
||||
}
|
||||
|
||||
/// Finalize and build trajectory
|
||||
pub fn build(self, final_quality: f32) -> QueryTrajectory {
|
||||
let latency_us = self.start_time.elapsed().as_micros() as u64;
|
||||
|
||||
QueryTrajectory {
|
||||
id: self.id,
|
||||
query_embedding: self.query_embedding,
|
||||
steps: self.steps,
|
||||
final_quality,
|
||||
latency_us,
|
||||
model_route: self.model_route,
|
||||
context_ids: self.context_ids,
|
||||
}
|
||||
}
|
||||
|
||||
/// Build with explicit latency
|
||||
pub fn build_with_latency(self, final_quality: f32, latency_us: u64) -> QueryTrajectory {
|
||||
QueryTrajectory {
|
||||
id: self.id,
|
||||
query_embedding: self.query_embedding,
|
||||
steps: self.steps,
|
||||
final_quality,
|
||||
latency_us,
|
||||
model_route: self.model_route,
|
||||
context_ids: self.context_ids,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Trajectory ID generator
|
||||
pub struct TrajectoryIdGen {
|
||||
counter: AtomicU64,
|
||||
}
|
||||
|
||||
impl TrajectoryIdGen {
|
||||
/// Create new generator
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
counter: AtomicU64::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create with starting ID
|
||||
pub fn with_start(start: u64) -> Self {
|
||||
Self {
|
||||
counter: AtomicU64::new(start),
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate next ID
|
||||
pub fn next(&self) -> u64 {
|
||||
self.counter.fetch_add(1, Ordering::Relaxed)
|
||||
}
|
||||
|
||||
/// Get current value without incrementing
|
||||
pub fn current(&self) -> u64 {
|
||||
self.counter.load(Ordering::Relaxed)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for TrajectoryIdGen {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_buffer_basic_ops() {
|
||||
let buffer = TrajectoryBuffer::new(10);
|
||||
|
||||
assert!(buffer.is_empty());
|
||||
assert_eq!(buffer.capacity(), 10);
|
||||
|
||||
let trajectory = QueryTrajectory::new(1, vec![0.1, 0.2]);
|
||||
assert!(buffer.record(trajectory));
|
||||
|
||||
assert_eq!(buffer.len(), 1);
|
||||
assert!(!buffer.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_buffer_overflow() {
|
||||
let buffer = TrajectoryBuffer::new(3);
|
||||
|
||||
for i in 0..5 {
|
||||
let trajectory = QueryTrajectory::new(i, vec![0.1]);
|
||||
buffer.record(trajectory);
|
||||
}
|
||||
|
||||
assert_eq!(buffer.len(), 3);
|
||||
assert_eq!(buffer.dropped_count(), 2);
|
||||
assert_eq!(buffer.total_seen(), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_buffer_drain() {
|
||||
let buffer = TrajectoryBuffer::new(10);
|
||||
|
||||
for i in 0..5 {
|
||||
let trajectory = QueryTrajectory::new(i, vec![0.1]);
|
||||
buffer.record(trajectory);
|
||||
}
|
||||
|
||||
let drained = buffer.drain();
|
||||
assert_eq!(drained.len(), 5);
|
||||
assert!(buffer.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_buffer_drain_n() {
|
||||
let buffer = TrajectoryBuffer::new(10);
|
||||
|
||||
for i in 0..5 {
|
||||
let trajectory = QueryTrajectory::new(i, vec![0.1]);
|
||||
buffer.record(trajectory);
|
||||
}
|
||||
|
||||
let partial = buffer.drain_n(3);
|
||||
assert_eq!(partial.len(), 3);
|
||||
assert_eq!(buffer.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_builder() {
|
||||
let mut builder = TrajectoryBuilder::new(42, vec![0.1, 0.2, 0.3]);
|
||||
|
||||
builder.add_step(vec![0.5], vec![0.4, 0.6], 0.7);
|
||||
builder.add_step(vec![0.6], vec![0.3, 0.7], 0.8);
|
||||
builder.set_model_route("llama-7b");
|
||||
builder.add_context("ctx-123");
|
||||
|
||||
assert_eq!(builder.step_count(), 2);
|
||||
|
||||
let trajectory = builder.build(0.85);
|
||||
|
||||
assert_eq!(trajectory.id, 42);
|
||||
assert_eq!(trajectory.steps.len(), 2);
|
||||
assert_eq!(trajectory.final_quality, 0.85);
|
||||
assert_eq!(trajectory.model_route, Some("llama-7b".to_string()));
|
||||
assert!(trajectory.latency_us > 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_id_generator() {
|
||||
let gen = TrajectoryIdGen::new();
|
||||
|
||||
assert_eq!(gen.next(), 0);
|
||||
assert_eq!(gen.next(), 1);
|
||||
assert_eq!(gen.next(), 2);
|
||||
assert_eq!(gen.current(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_success_rate() {
|
||||
let buffer = TrajectoryBuffer::new(2);
|
||||
|
||||
for i in 0..4 {
|
||||
buffer.record(QueryTrajectory::new(i, vec![]));
|
||||
}
|
||||
|
||||
assert!((buffer.success_rate() - 0.5).abs() < 1e-6);
|
||||
}
|
||||
}
|
||||
531
vendor/ruvector/examples/ruvLLM/src/sona/types.rs
vendored
Normal file
531
vendor/ruvector/examples/ruvLLM/src/sona/types.rs
vendored
Normal file
@@ -0,0 +1,531 @@
|
||||
//! SONA Core Types
|
||||
//!
|
||||
//! Defines the fundamental data structures for the Self-Optimizing Neural Architecture.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::time::Instant;
|
||||
|
||||
/// Learning signal generated from inference trajectory
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct LearningSignal {
|
||||
/// Query embedding vector
|
||||
pub query_embedding: Vec<f32>,
|
||||
/// Estimated gradient direction
|
||||
pub gradient_estimate: Vec<f32>,
|
||||
/// Quality score [0.0, 1.0]
|
||||
pub quality_score: f32,
|
||||
/// Signal generation timestamp (serialized as nanos)
|
||||
#[serde(skip)]
|
||||
pub timestamp: Option<Instant>,
|
||||
/// Additional metadata
|
||||
pub metadata: SignalMetadata,
|
||||
}
|
||||
|
||||
/// Metadata for learning signals
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
|
||||
pub struct SignalMetadata {
|
||||
/// Source trajectory ID
|
||||
pub trajectory_id: u64,
|
||||
/// Number of steps in trajectory
|
||||
pub step_count: usize,
|
||||
/// Model route taken
|
||||
pub model_route: Option<String>,
|
||||
/// Custom tags
|
||||
pub tags: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl LearningSignal {
|
||||
/// Create signal from query trajectory using REINFORCE gradient estimation
|
||||
pub fn from_trajectory(trajectory: &QueryTrajectory) -> Self {
|
||||
let gradient = Self::estimate_gradient(trajectory);
|
||||
|
||||
Self {
|
||||
query_embedding: trajectory.query_embedding.clone(),
|
||||
gradient_estimate: gradient,
|
||||
quality_score: trajectory.final_quality,
|
||||
timestamp: Some(Instant::now()),
|
||||
metadata: SignalMetadata {
|
||||
trajectory_id: trajectory.id,
|
||||
step_count: trajectory.steps.len(),
|
||||
model_route: trajectory.model_route.clone(),
|
||||
tags: HashMap::new(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Create signal with pre-computed gradient
|
||||
pub fn with_gradient(embedding: Vec<f32>, gradient: Vec<f32>, quality: f32) -> Self {
|
||||
Self {
|
||||
query_embedding: embedding,
|
||||
gradient_estimate: gradient,
|
||||
quality_score: quality,
|
||||
timestamp: Some(Instant::now()),
|
||||
metadata: SignalMetadata::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Estimate gradient using REINFORCE with baseline
|
||||
fn estimate_gradient(trajectory: &QueryTrajectory) -> Vec<f32> {
|
||||
if trajectory.steps.is_empty() {
|
||||
return trajectory.query_embedding.clone();
|
||||
}
|
||||
|
||||
let dim = trajectory.query_embedding.len();
|
||||
let mut gradient = vec![0.0f32; dim];
|
||||
|
||||
// Compute baseline (average reward)
|
||||
let baseline =
|
||||
trajectory.steps.iter().map(|s| s.reward).sum::<f32>() / trajectory.steps.len() as f32;
|
||||
|
||||
// REINFORCE: gradient = sum((reward - baseline) * activation)
|
||||
for step in &trajectory.steps {
|
||||
let advantage = step.reward - baseline;
|
||||
let activation_len = step.activations.len().min(dim);
|
||||
for i in 0..activation_len {
|
||||
gradient[i] += advantage * step.activations[i];
|
||||
}
|
||||
}
|
||||
|
||||
// L2 normalize
|
||||
let norm: f32 = gradient.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
if norm > 1e-8 {
|
||||
gradient.iter_mut().for_each(|x| *x /= norm);
|
||||
}
|
||||
|
||||
gradient
|
||||
}
|
||||
|
||||
/// Scale gradient by quality
|
||||
pub fn scaled_gradient(&self) -> Vec<f32> {
|
||||
self.gradient_estimate
|
||||
.iter()
|
||||
.map(|&g| g * self.quality_score)
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// Query trajectory recording
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct QueryTrajectory {
|
||||
/// Unique trajectory identifier
|
||||
pub id: u64,
|
||||
/// Query embedding vector
|
||||
pub query_embedding: Vec<f32>,
|
||||
/// Execution steps
|
||||
pub steps: Vec<TrajectoryStep>,
|
||||
/// Final quality score [0.0, 1.0]
|
||||
pub final_quality: f32,
|
||||
/// Total latency in microseconds
|
||||
pub latency_us: u64,
|
||||
/// Model route taken
|
||||
pub model_route: Option<String>,
|
||||
/// Context used
|
||||
pub context_ids: Vec<String>,
|
||||
}
|
||||
|
||||
impl QueryTrajectory {
|
||||
/// Create new trajectory
|
||||
pub fn new(id: u64, query_embedding: Vec<f32>) -> Self {
|
||||
Self {
|
||||
id,
|
||||
query_embedding,
|
||||
steps: Vec::with_capacity(16),
|
||||
final_quality: 0.0,
|
||||
latency_us: 0,
|
||||
model_route: None,
|
||||
context_ids: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add execution step
|
||||
pub fn add_step(&mut self, step: TrajectoryStep) {
|
||||
self.steps.push(step);
|
||||
}
|
||||
|
||||
/// Finalize trajectory with quality score
|
||||
pub fn finalize(&mut self, quality: f32, latency_us: u64) {
|
||||
self.final_quality = quality;
|
||||
self.latency_us = latency_us;
|
||||
}
|
||||
|
||||
/// Get total reward
|
||||
pub fn total_reward(&self) -> f32 {
|
||||
self.steps.iter().map(|s| s.reward).sum()
|
||||
}
|
||||
|
||||
/// Get average reward
|
||||
pub fn avg_reward(&self) -> f32 {
|
||||
if self.steps.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
self.total_reward() / self.steps.len() as f32
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Single step in a trajectory
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct TrajectoryStep {
|
||||
/// Layer/module activations (subset for efficiency)
|
||||
pub activations: Vec<f32>,
|
||||
/// Attention weights (flattened)
|
||||
pub attention_weights: Vec<f32>,
|
||||
/// Reward signal for this step
|
||||
pub reward: f32,
|
||||
/// Step index
|
||||
pub step_idx: usize,
|
||||
/// Optional layer name
|
||||
pub layer_name: Option<String>,
|
||||
}
|
||||
|
||||
impl TrajectoryStep {
|
||||
/// Create new step
|
||||
pub fn new(
|
||||
activations: Vec<f32>,
|
||||
attention_weights: Vec<f32>,
|
||||
reward: f32,
|
||||
step_idx: usize,
|
||||
) -> Self {
|
||||
Self {
|
||||
activations,
|
||||
attention_weights,
|
||||
reward,
|
||||
step_idx,
|
||||
layer_name: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create step with layer name
|
||||
pub fn with_layer(mut self, name: &str) -> Self {
|
||||
self.layer_name = Some(name.to_string());
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Learned pattern from trajectory clustering
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct LearnedPattern {
|
||||
/// Pattern identifier
|
||||
pub id: u64,
|
||||
/// Cluster centroid embedding
|
||||
pub centroid: Vec<f32>,
|
||||
/// Number of trajectories in cluster
|
||||
pub cluster_size: usize,
|
||||
/// Sum of trajectory weights
|
||||
pub total_weight: f32,
|
||||
/// Average quality of member trajectories
|
||||
pub avg_quality: f32,
|
||||
/// Creation timestamp (Unix seconds)
|
||||
pub created_at: u64,
|
||||
/// Last access timestamp
|
||||
pub last_accessed: u64,
|
||||
/// Total access count
|
||||
pub access_count: u32,
|
||||
/// Pattern type/category
|
||||
pub pattern_type: PatternType,
|
||||
}
|
||||
|
||||
/// Pattern classification
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub enum PatternType {
|
||||
#[default]
|
||||
General,
|
||||
Reasoning,
|
||||
Factual,
|
||||
Creative,
|
||||
CodeGen,
|
||||
Conversational,
|
||||
}
|
||||
|
||||
impl LearnedPattern {
|
||||
/// Create new pattern
|
||||
pub fn new(id: u64, centroid: Vec<f32>) -> Self {
|
||||
let now = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs();
|
||||
|
||||
Self {
|
||||
id,
|
||||
centroid,
|
||||
cluster_size: 1,
|
||||
total_weight: 1.0,
|
||||
avg_quality: 0.0,
|
||||
created_at: now,
|
||||
last_accessed: now,
|
||||
access_count: 0,
|
||||
pattern_type: PatternType::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge two patterns
|
||||
pub fn merge(&self, other: &Self) -> Self {
|
||||
let total_size = self.cluster_size + other.cluster_size;
|
||||
let w1 = self.cluster_size as f32 / total_size as f32;
|
||||
let w2 = other.cluster_size as f32 / total_size as f32;
|
||||
|
||||
let centroid: Vec<f32> = self
|
||||
.centroid
|
||||
.iter()
|
||||
.zip(&other.centroid)
|
||||
.map(|(&a, &b)| a * w1 + b * w2)
|
||||
.collect();
|
||||
|
||||
Self {
|
||||
id: self.id,
|
||||
centroid,
|
||||
cluster_size: total_size,
|
||||
total_weight: self.total_weight + other.total_weight,
|
||||
avg_quality: self.avg_quality * w1 + other.avg_quality * w2,
|
||||
created_at: self.created_at.min(other.created_at),
|
||||
last_accessed: self.last_accessed.max(other.last_accessed),
|
||||
access_count: self.access_count + other.access_count,
|
||||
pattern_type: self.pattern_type.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Decay pattern importance
|
||||
pub fn decay(&mut self, factor: f32) {
|
||||
self.total_weight *= factor;
|
||||
}
|
||||
|
||||
/// Record access
|
||||
pub fn touch(&mut self) {
|
||||
self.access_count += 1;
|
||||
self.last_accessed = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs();
|
||||
}
|
||||
|
||||
/// Check if pattern should be pruned
|
||||
pub fn should_prune(&self, min_quality: f32, min_accesses: u32, max_age_secs: u64) -> bool {
|
||||
let now = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs();
|
||||
let age = now.saturating_sub(self.last_accessed);
|
||||
|
||||
self.avg_quality < min_quality && self.access_count < min_accesses && age > max_age_secs
|
||||
}
|
||||
|
||||
/// Compute cosine similarity with query
|
||||
pub fn similarity(&self, query: &[f32]) -> f32 {
|
||||
if self.centroid.len() != query.len() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let dot: f32 = self.centroid.iter().zip(query).map(|(a, b)| a * b).sum();
|
||||
let norm_a: f32 = self.centroid.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let norm_b: f32 = query.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
|
||||
if norm_a > 1e-8 && norm_b > 1e-8 {
|
||||
dot / (norm_a * norm_b)
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// SONA configuration
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct SonaConfig {
|
||||
/// Hidden dimension
|
||||
pub hidden_dim: usize,
|
||||
/// Embedding dimension
|
||||
pub embedding_dim: usize,
|
||||
/// Micro-LoRA rank
|
||||
pub micro_lora_rank: usize,
|
||||
/// Base LoRA rank
|
||||
pub base_lora_rank: usize,
|
||||
/// Micro-LoRA learning rate
|
||||
pub micro_lora_lr: f32,
|
||||
/// Base LoRA learning rate
|
||||
pub base_lora_lr: f32,
|
||||
/// EWC lambda
|
||||
pub ewc_lambda: f32,
|
||||
/// Pattern extraction clusters
|
||||
pub pattern_clusters: usize,
|
||||
/// Trajectory buffer capacity
|
||||
pub trajectory_capacity: usize,
|
||||
/// Background learning interval (ms)
|
||||
pub background_interval_ms: u64,
|
||||
/// Quality threshold for learning
|
||||
pub quality_threshold: f32,
|
||||
/// Enable SIMD optimizations
|
||||
pub enable_simd: bool,
|
||||
}
|
||||
|
||||
impl Default for SonaConfig {
|
||||
fn default() -> Self {
|
||||
// OPTIMIZED DEFAULTS based on @ruvector/sona v0.1.1 benchmarks:
|
||||
// - Rank-2 is 5% faster than Rank-1 due to better SIMD vectorization
|
||||
// - Learning rate 0.002 yields +55% quality improvement
|
||||
// - 100 clusters = 1.3ms search vs 50 clusters = 3.0ms (2.3x faster)
|
||||
// - EWC lambda 2000 optimal for catastrophic forgetting prevention
|
||||
// - Quality threshold 0.3 balances learning vs noise filtering
|
||||
Self {
|
||||
hidden_dim: 256,
|
||||
embedding_dim: 256,
|
||||
micro_lora_rank: 2, // OPTIMIZED: Rank-2 faster than Rank-1 (2,211 vs 2,100 ops/sec)
|
||||
base_lora_rank: 8, // Balanced for production
|
||||
micro_lora_lr: 0.002, // OPTIMIZED: +55.3% quality improvement
|
||||
base_lora_lr: 0.0001,
|
||||
ewc_lambda: 2000.0, // OPTIMIZED: Better forgetting prevention
|
||||
pattern_clusters: 100, // OPTIMIZED: 2.3x faster search (1.3ms vs 3.0ms)
|
||||
trajectory_capacity: 10000,
|
||||
background_interval_ms: 3600000, // 1 hour
|
||||
quality_threshold: 0.3, // OPTIMIZED: Lower threshold for more learning
|
||||
enable_simd: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SonaConfig {
|
||||
/// Create config optimized for maximum throughput (real-time chat)
|
||||
pub fn max_throughput() -> Self {
|
||||
Self {
|
||||
hidden_dim: 256,
|
||||
embedding_dim: 256,
|
||||
micro_lora_rank: 2, // Rank-2 + SIMD = 2,211 ops/sec
|
||||
base_lora_rank: 4, // Minimal base for speed
|
||||
micro_lora_lr: 0.0005, // Conservative for stability
|
||||
base_lora_lr: 0.0001,
|
||||
ewc_lambda: 2000.0,
|
||||
pattern_clusters: 100,
|
||||
trajectory_capacity: 5000,
|
||||
background_interval_ms: 7200000, // 2 hours
|
||||
quality_threshold: 0.4,
|
||||
enable_simd: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create config optimized for maximum quality (research/batch)
|
||||
pub fn max_quality() -> Self {
|
||||
Self {
|
||||
hidden_dim: 256,
|
||||
embedding_dim: 256,
|
||||
micro_lora_rank: 2,
|
||||
base_lora_rank: 16, // Higher rank for expressiveness
|
||||
micro_lora_lr: 0.002, // Optimal learning rate
|
||||
base_lora_lr: 0.001, // Aggressive base learning
|
||||
ewc_lambda: 2000.0,
|
||||
pattern_clusters: 100,
|
||||
trajectory_capacity: 20000,
|
||||
background_interval_ms: 1800000, // 30 minutes
|
||||
quality_threshold: 0.2, // Learn from more trajectories
|
||||
enable_simd: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create config for edge/mobile deployment (<5MB memory)
|
||||
pub fn edge_deployment() -> Self {
|
||||
Self {
|
||||
hidden_dim: 256,
|
||||
embedding_dim: 256,
|
||||
micro_lora_rank: 1, // Minimal rank for memory
|
||||
base_lora_rank: 4,
|
||||
micro_lora_lr: 0.001,
|
||||
base_lora_lr: 0.0001,
|
||||
ewc_lambda: 1000.0,
|
||||
pattern_clusters: 50,
|
||||
trajectory_capacity: 200, // Small buffer
|
||||
background_interval_ms: 3600000,
|
||||
quality_threshold: 0.5,
|
||||
enable_simd: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create config for batch processing (50+ inferences/sec)
|
||||
pub fn batch_processing() -> Self {
|
||||
Self {
|
||||
hidden_dim: 256,
|
||||
embedding_dim: 256,
|
||||
micro_lora_rank: 2,
|
||||
base_lora_rank: 8,
|
||||
micro_lora_lr: 0.001,
|
||||
base_lora_lr: 0.0001,
|
||||
ewc_lambda: 2000.0,
|
||||
pattern_clusters: 100,
|
||||
trajectory_capacity: 10000,
|
||||
background_interval_ms: 3600000,
|
||||
quality_threshold: 0.3,
|
||||
enable_simd: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_learning_signal_from_trajectory() {
|
||||
let mut trajectory = QueryTrajectory::new(1, vec![0.1, 0.2, 0.3]);
|
||||
trajectory.add_step(TrajectoryStep::new(
|
||||
vec![0.5, 0.3, 0.2],
|
||||
vec![0.4, 0.4, 0.2],
|
||||
0.8,
|
||||
0,
|
||||
));
|
||||
trajectory.finalize(0.8, 1000);
|
||||
|
||||
let signal = LearningSignal::from_trajectory(&trajectory);
|
||||
assert_eq!(signal.quality_score, 0.8);
|
||||
assert_eq!(signal.gradient_estimate.len(), 3);
|
||||
assert_eq!(signal.metadata.trajectory_id, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pattern_merge() {
|
||||
let p1 = LearnedPattern {
|
||||
id: 1,
|
||||
centroid: vec![1.0, 0.0],
|
||||
cluster_size: 10,
|
||||
total_weight: 5.0,
|
||||
avg_quality: 0.8,
|
||||
created_at: 100,
|
||||
last_accessed: 200,
|
||||
access_count: 5,
|
||||
pattern_type: PatternType::General,
|
||||
};
|
||||
|
||||
let p2 = LearnedPattern {
|
||||
id: 2,
|
||||
centroid: vec![0.0, 1.0],
|
||||
cluster_size: 10,
|
||||
total_weight: 5.0,
|
||||
avg_quality: 0.9,
|
||||
created_at: 150,
|
||||
last_accessed: 250,
|
||||
access_count: 3,
|
||||
pattern_type: PatternType::General,
|
||||
};
|
||||
|
||||
let merged = p1.merge(&p2);
|
||||
assert_eq!(merged.cluster_size, 20);
|
||||
assert!((merged.centroid[0] - 0.5).abs() < 1e-6);
|
||||
assert!((merged.centroid[1] - 0.5).abs() < 1e-6);
|
||||
assert!((merged.avg_quality - 0.85).abs() < 1e-6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pattern_similarity() {
|
||||
let pattern = LearnedPattern::new(1, vec![1.0, 0.0, 0.0]);
|
||||
|
||||
assert!((pattern.similarity(&[1.0, 0.0, 0.0]) - 1.0).abs() < 1e-6);
|
||||
assert!(pattern.similarity(&[0.0, 1.0, 0.0]).abs() < 1e-6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_trajectory_rewards() {
|
||||
let mut trajectory = QueryTrajectory::new(1, vec![0.1]);
|
||||
trajectory.add_step(TrajectoryStep::new(vec![], vec![], 0.5, 0));
|
||||
trajectory.add_step(TrajectoryStep::new(vec![], vec![], 0.7, 1));
|
||||
trajectory.add_step(TrajectoryStep::new(vec![], vec![], 0.9, 2));
|
||||
|
||||
assert!((trajectory.total_reward() - 2.1).abs() < 1e-6);
|
||||
assert!((trajectory.avg_reward() - 0.7).abs() < 1e-6);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user