Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
215
vendor/ruvector/crates/ruvector-sparse-inference/src/precision/lanes.rs
vendored
Normal file
215
vendor/ruvector/crates/ruvector-sparse-inference/src/precision/lanes.rs
vendored
Normal file
@@ -0,0 +1,215 @@
|
||||
//! Precision Lane definitions and configuration
|
||||
//!
|
||||
//! Defines the three precision lanes (3/5/7-bit) that map to intelligence roles.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Precision lanes for layered quantization
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub enum PrecisionLane {
|
||||
/// 3-bit lane: Reflex signals, gating, boundaries, health metrics
|
||||
/// Uses signed int4 container restricted to 3-bit domain
|
||||
/// LUT activation for speed
|
||||
Bit3,
|
||||
|
||||
/// 5-bit lane: Streaming embeddings, semantic motion, drift detection
|
||||
/// Uses signed int8 container with values in -16..15
|
||||
/// Per-channel or per-block scale
|
||||
Bit5,
|
||||
|
||||
/// 7-bit lane: Reasoning, synthesis, memory writes, micro-LoRA
|
||||
/// Uses signed int8 container with values in -64..63
|
||||
/// Stable accumulators, close to int8 quality
|
||||
Bit7,
|
||||
|
||||
/// Float lane: Training, calibration, aggregation boundaries only
|
||||
Float32,
|
||||
}
|
||||
|
||||
impl PrecisionLane {
|
||||
/// Get the number of bits for this lane
|
||||
pub fn bits(&self) -> u8 {
|
||||
match self {
|
||||
Self::Bit3 => 3,
|
||||
Self::Bit5 => 5,
|
||||
Self::Bit7 => 7,
|
||||
Self::Float32 => 32,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the value range for this lane
|
||||
pub fn value_range(&self) -> (i32, i32) {
|
||||
match self {
|
||||
Self::Bit3 => (-4, 3), // 3-bit signed: -4 to 3
|
||||
Self::Bit5 => (-16, 15), // 5-bit signed: -16 to 15
|
||||
Self::Bit7 => (-64, 63), // 7-bit signed: -64 to 63
|
||||
Self::Float32 => (i32::MIN, i32::MAX),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get bytes per element (storage container)
|
||||
pub fn bytes_per_element(&self) -> f32 {
|
||||
match self {
|
||||
Self::Bit3 => 0.5, // Packed into int4
|
||||
Self::Bit5 => 1.0, // int8 container
|
||||
Self::Bit7 => 1.0, // int8 container
|
||||
Self::Float32 => 4.0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the default scale factor for this lane
|
||||
pub fn default_scale(&self) -> f32 {
|
||||
match self {
|
||||
Self::Bit3 => 0.25, // Conservative for reflexes
|
||||
Self::Bit5 => 0.0625, // 1/16 for streaming
|
||||
Self::Bit7 => 0.015625, // 1/64 for reasoning
|
||||
Self::Float32 => 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if this lane supports memory writes
|
||||
pub fn allows_memory_writes(&self) -> bool {
|
||||
matches!(self, Self::Bit7 | Self::Float32)
|
||||
}
|
||||
|
||||
/// Check if this lane is event-driven vs continuous
|
||||
pub fn is_event_driven(&self) -> bool {
|
||||
matches!(self, Self::Bit5 | Self::Bit7)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for PrecisionLane {
|
||||
fn default() -> Self {
|
||||
Self::Bit7 // Default to reasoning lane
|
||||
}
|
||||
}
|
||||
|
||||
/// Configuration for precision lane behavior
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct LaneConfig {
|
||||
/// Default lane for new operations
|
||||
pub default_lane: PrecisionLane,
|
||||
|
||||
/// Time budget per tick for 3-bit lane (microseconds)
|
||||
pub bit3_tick_budget_us: u64,
|
||||
|
||||
/// Maximum consecutive 5-bit updates before forced graduation check
|
||||
pub bit5_max_updates: usize,
|
||||
|
||||
/// Minimum stability steps before demotion
|
||||
pub min_stability_steps: usize,
|
||||
|
||||
/// Novelty threshold for escalation (0.0 to 1.0)
|
||||
pub novelty_threshold: f32,
|
||||
|
||||
/// Drift persistence threshold (steps)
|
||||
pub drift_persistence_threshold: usize,
|
||||
|
||||
/// Confidence threshold for graduation (0.0 to 1.0)
|
||||
pub confidence_threshold: f32,
|
||||
|
||||
/// Cost budget for escalation (arbitrary units)
|
||||
pub escalation_budget: f32,
|
||||
|
||||
/// Enable automatic lane selection
|
||||
pub auto_lane_selection: bool,
|
||||
}
|
||||
|
||||
impl Default for LaneConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
default_lane: PrecisionLane::Bit5, // Start at streaming lane
|
||||
bit3_tick_budget_us: 100, // 100μs per tick for reflexes
|
||||
bit5_max_updates: 10, // Check graduation every 10 updates
|
||||
min_stability_steps: 5, // 5 stable steps before demotion
|
||||
novelty_threshold: 0.3, // 30% novelty triggers escalation
|
||||
drift_persistence_threshold: 3, // 3 steps of drift
|
||||
confidence_threshold: 0.7, // 70% confidence required
|
||||
escalation_budget: 1.0, // Normalized budget
|
||||
auto_lane_selection: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Hardware target for lane optimization
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum HardwareTarget {
|
||||
/// ESP32: 3-bit only, tiny models
|
||||
Esp32,
|
||||
/// V0 Appliance: 5-bit streaming + 7-bit reasoning
|
||||
V0Appliance,
|
||||
/// Desktop/Server: Full lane support
|
||||
Desktop,
|
||||
/// FPGA: Deterministic 7-bit with witness logging
|
||||
Fpga,
|
||||
}
|
||||
|
||||
impl HardwareTarget {
|
||||
/// Get supported lanes for this hardware
|
||||
pub fn supported_lanes(&self) -> Vec<PrecisionLane> {
|
||||
match self {
|
||||
Self::Esp32 => vec![PrecisionLane::Bit3],
|
||||
Self::V0Appliance => vec![
|
||||
PrecisionLane::Bit3,
|
||||
PrecisionLane::Bit5,
|
||||
PrecisionLane::Bit7,
|
||||
],
|
||||
Self::Desktop => vec![
|
||||
PrecisionLane::Bit3,
|
||||
PrecisionLane::Bit5,
|
||||
PrecisionLane::Bit7,
|
||||
PrecisionLane::Float32,
|
||||
],
|
||||
Self::Fpga => vec![PrecisionLane::Bit7],
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the default lane for this hardware
|
||||
pub fn default_lane(&self) -> PrecisionLane {
|
||||
match self {
|
||||
Self::Esp32 => PrecisionLane::Bit3,
|
||||
Self::V0Appliance => PrecisionLane::Bit5,
|
||||
Self::Desktop => PrecisionLane::Bit7,
|
||||
Self::Fpga => PrecisionLane::Bit7,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_lane_bits() {
|
||||
assert_eq!(PrecisionLane::Bit3.bits(), 3);
|
||||
assert_eq!(PrecisionLane::Bit5.bits(), 5);
|
||||
assert_eq!(PrecisionLane::Bit7.bits(), 7);
|
||||
assert_eq!(PrecisionLane::Float32.bits(), 32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lane_ranges() {
|
||||
assert_eq!(PrecisionLane::Bit3.value_range(), (-4, 3));
|
||||
assert_eq!(PrecisionLane::Bit5.value_range(), (-16, 15));
|
||||
assert_eq!(PrecisionLane::Bit7.value_range(), (-64, 63));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_memory_write_permission() {
|
||||
assert!(!PrecisionLane::Bit3.allows_memory_writes());
|
||||
assert!(!PrecisionLane::Bit5.allows_memory_writes());
|
||||
assert!(PrecisionLane::Bit7.allows_memory_writes());
|
||||
assert!(PrecisionLane::Float32.allows_memory_writes());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hardware_targets() {
|
||||
assert_eq!(
|
||||
HardwareTarget::Esp32.supported_lanes(),
|
||||
vec![PrecisionLane::Bit3]
|
||||
);
|
||||
assert!(HardwareTarget::Desktop
|
||||
.supported_lanes()
|
||||
.contains(&PrecisionLane::Float32));
|
||||
}
|
||||
}
|
||||
41
vendor/ruvector/crates/ruvector-sparse-inference/src/precision/mod.rs
vendored
Normal file
41
vendor/ruvector/crates/ruvector-sparse-inference/src/precision/mod.rs
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
//! Precision Lanes Module - Layered Quantization for Sparse Inference
|
||||
//!
|
||||
//! This module implements a 3/5/7-bit layered quantization system that turns
|
||||
//! activation locality into a complete control theory for inference.
|
||||
//!
|
||||
//! # Intelligence Roles by Precision Lane
|
||||
//!
|
||||
//! - **3-bit Lane**: Reflex signals, gating, anomaly boundaries, mincut triggers, health metrics
|
||||
//! - **5-bit Lane**: Streaming embeddings, semantic motion, drift detection, lightweight perception
|
||||
//! - **7-bit Lane**: Reasoning, synthesis, memory writes, micro-LoRA adaptation, summaries
|
||||
//! - **Float Lane**: Training, offline calibration, rare aggregation boundaries
|
||||
//!
|
||||
//! # Graduation Rules
|
||||
//!
|
||||
//! Signals move UP lanes when:
|
||||
//! - Novelty exceeds threshold
|
||||
//! - Drift persists for N steps
|
||||
//! - Confidence and stability metrics pass
|
||||
//! - Cost budget allows escalation
|
||||
//!
|
||||
//! Signals move DOWN lanes when:
|
||||
//! - Stability returns
|
||||
//! - Velocity stalls
|
||||
//! - Active set shrinks
|
||||
//! - Uncertainty is high but no action needed
|
||||
//!
|
||||
//! # Key Insight
|
||||
//!
|
||||
//! The active neuron set decides WHAT to compute.
|
||||
//! The lane decides HOW PRECISELY to compute it.
|
||||
//! The graduation rules decide WHEN computation is allowed to become expensive.
|
||||
|
||||
pub mod lanes;
|
||||
pub mod policy;
|
||||
pub mod quantizers;
|
||||
pub mod telemetry;
|
||||
|
||||
pub use lanes::{LaneConfig, PrecisionLane};
|
||||
pub use policy::{GraduationDecision, GraduationMetrics, GraduationPolicy};
|
||||
pub use quantizers::{QuantizedBlock, Quantizer3Bit, Quantizer5Bit, Quantizer7Bit};
|
||||
pub use telemetry::{LaneStats, LaneTelemetry};
|
||||
418
vendor/ruvector/crates/ruvector-sparse-inference/src/precision/policy.rs
vendored
Normal file
418
vendor/ruvector/crates/ruvector-sparse-inference/src/precision/policy.rs
vendored
Normal file
@@ -0,0 +1,418 @@
|
||||
//! Graduation Policy - Rules for lane transitions
|
||||
//!
|
||||
//! Implements the control theory for when signals should move between precision lanes.
|
||||
|
||||
use super::lanes::{LaneConfig, PrecisionLane};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Metrics used for graduation decisions
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct GraduationMetrics {
|
||||
/// Novelty score (0.0 to 1.0) - how different from recent patterns
|
||||
pub novelty: f32,
|
||||
|
||||
/// Drift score (0.0 to 1.0) - how much the signal has drifted
|
||||
pub drift: f32,
|
||||
|
||||
/// Number of steps drift has persisted
|
||||
pub drift_steps: usize,
|
||||
|
||||
/// Confidence score (0.0 to 1.0)
|
||||
pub confidence: f32,
|
||||
|
||||
/// Stability score (0.0 to 1.0) - inverse of variance
|
||||
pub stability: f32,
|
||||
|
||||
/// Number of stable steps
|
||||
pub stable_steps: usize,
|
||||
|
||||
/// Velocity (rate of change)
|
||||
pub velocity: f32,
|
||||
|
||||
/// Active set size (number of active neurons)
|
||||
pub active_set_size: usize,
|
||||
|
||||
/// Uncertainty score (0.0 to 1.0)
|
||||
pub uncertainty: f32,
|
||||
|
||||
/// Current cost usage (0.0 to 1.0)
|
||||
pub cost_usage: f32,
|
||||
|
||||
/// Whether action is needed
|
||||
pub action_needed: bool,
|
||||
}
|
||||
|
||||
impl GraduationMetrics {
|
||||
/// Create new metrics with default values
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Update metrics with a new observation
|
||||
pub fn update(&mut self, observation: &ObservationMetrics, ema_alpha: f32) {
|
||||
// Exponential moving average for smooth updates
|
||||
self.novelty = ema_alpha * observation.novelty + (1.0 - ema_alpha) * self.novelty;
|
||||
self.drift = ema_alpha * observation.drift + (1.0 - ema_alpha) * self.drift;
|
||||
self.confidence = ema_alpha * observation.confidence + (1.0 - ema_alpha) * self.confidence;
|
||||
self.stability = ema_alpha * observation.stability + (1.0 - ema_alpha) * self.stability;
|
||||
self.velocity = ema_alpha * observation.velocity + (1.0 - ema_alpha) * self.velocity;
|
||||
self.uncertainty =
|
||||
ema_alpha * observation.uncertainty + (1.0 - ema_alpha) * self.uncertainty;
|
||||
|
||||
self.active_set_size = observation.active_set_size;
|
||||
self.action_needed = observation.action_needed;
|
||||
|
||||
// Update drift persistence
|
||||
if observation.drift > 0.1 {
|
||||
self.drift_steps += 1;
|
||||
} else {
|
||||
self.drift_steps = 0;
|
||||
}
|
||||
|
||||
// Update stability persistence
|
||||
if observation.stability > 0.8 {
|
||||
self.stable_steps += 1;
|
||||
} else {
|
||||
self.stable_steps = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Raw observation metrics from a single step
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct ObservationMetrics {
|
||||
pub novelty: f32,
|
||||
pub drift: f32,
|
||||
pub confidence: f32,
|
||||
pub stability: f32,
|
||||
pub velocity: f32,
|
||||
pub uncertainty: f32,
|
||||
pub active_set_size: usize,
|
||||
pub action_needed: bool,
|
||||
}
|
||||
|
||||
/// Decision from graduation policy
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum GraduationDecision {
|
||||
/// Stay in current lane
|
||||
Stay,
|
||||
/// Escalate to higher precision lane
|
||||
Escalate(PrecisionLane),
|
||||
/// Demote to lower precision lane
|
||||
Demote(PrecisionLane),
|
||||
}
|
||||
|
||||
/// Graduation policy for lane transitions
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct GraduationPolicy {
|
||||
/// Current precision lane
|
||||
pub current_lane: PrecisionLane,
|
||||
/// Configuration
|
||||
pub config: LaneConfig,
|
||||
/// Accumulated metrics
|
||||
pub metrics: GraduationMetrics,
|
||||
/// EMA smoothing factor
|
||||
pub ema_alpha: f32,
|
||||
}
|
||||
|
||||
impl GraduationPolicy {
|
||||
/// Create a new graduation policy
|
||||
pub fn new(initial_lane: PrecisionLane, config: LaneConfig) -> Self {
|
||||
Self {
|
||||
current_lane: initial_lane,
|
||||
config,
|
||||
metrics: GraduationMetrics::new(),
|
||||
ema_alpha: 0.3,
|
||||
}
|
||||
}
|
||||
|
||||
/// Evaluate and return graduation decision
|
||||
pub fn evaluate(&mut self, observation: &ObservationMetrics) -> GraduationDecision {
|
||||
// Update metrics
|
||||
self.metrics.update(observation, self.ema_alpha);
|
||||
|
||||
// Check for escalation
|
||||
if self.should_escalate() {
|
||||
if let Some(next_lane) = self.next_higher_lane() {
|
||||
return GraduationDecision::Escalate(next_lane);
|
||||
}
|
||||
}
|
||||
|
||||
// Check for demotion
|
||||
if self.should_demote() {
|
||||
if let Some(prev_lane) = self.next_lower_lane() {
|
||||
return GraduationDecision::Demote(prev_lane);
|
||||
}
|
||||
}
|
||||
|
||||
GraduationDecision::Stay
|
||||
}
|
||||
|
||||
/// Apply a graduation decision
|
||||
pub fn apply_decision(&mut self, decision: GraduationDecision) {
|
||||
match decision {
|
||||
GraduationDecision::Stay => {}
|
||||
GraduationDecision::Escalate(lane) | GraduationDecision::Demote(lane) => {
|
||||
self.current_lane = lane;
|
||||
// Reset stability counters on lane change
|
||||
self.metrics.stable_steps = 0;
|
||||
self.metrics.drift_steps = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if escalation conditions are met
|
||||
fn should_escalate(&self) -> bool {
|
||||
// Escalate when:
|
||||
// 1. Novelty exceeds threshold
|
||||
let novelty_trigger = self.metrics.novelty > self.config.novelty_threshold;
|
||||
|
||||
// 2. Drift persists
|
||||
let drift_trigger = self.metrics.drift_steps >= self.config.drift_persistence_threshold;
|
||||
|
||||
// 3. Confidence and stability pass
|
||||
let quality_pass = self.metrics.confidence >= self.config.confidence_threshold
|
||||
&& self.metrics.stability >= 0.5;
|
||||
|
||||
// 4. Cost budget allows
|
||||
let budget_allows = self.metrics.cost_usage < self.config.escalation_budget;
|
||||
|
||||
// Escalate if any trigger fires AND quality/budget conditions are met
|
||||
(novelty_trigger || drift_trigger) && quality_pass && budget_allows
|
||||
}
|
||||
|
||||
/// Check if demotion conditions are met
|
||||
fn should_demote(&self) -> bool {
|
||||
// Demote when:
|
||||
// 1. Stability returns
|
||||
let stability_returned = self.metrics.stable_steps >= self.config.min_stability_steps;
|
||||
|
||||
// 2. Velocity stalls
|
||||
let velocity_stalled = self.metrics.velocity.abs() < 0.01;
|
||||
|
||||
// 3. Active set shrinks (not using the precision)
|
||||
let active_set_shrunk = self.metrics.active_set_size < 10;
|
||||
|
||||
// 4. High uncertainty but no action needed
|
||||
let uncertain_idle = self.metrics.uncertainty > 0.7 && !self.metrics.action_needed;
|
||||
|
||||
// Demote if stability AND (velocity stall OR active shrink OR uncertain idle)
|
||||
stability_returned && (velocity_stalled || active_set_shrunk || uncertain_idle)
|
||||
}
|
||||
|
||||
/// Get the next higher precision lane
|
||||
fn next_higher_lane(&self) -> Option<PrecisionLane> {
|
||||
match self.current_lane {
|
||||
PrecisionLane::Bit3 => Some(PrecisionLane::Bit5),
|
||||
PrecisionLane::Bit5 => Some(PrecisionLane::Bit7),
|
||||
PrecisionLane::Bit7 => Some(PrecisionLane::Float32),
|
||||
PrecisionLane::Float32 => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the next lower precision lane
|
||||
fn next_lower_lane(&self) -> Option<PrecisionLane> {
|
||||
match self.current_lane {
|
||||
PrecisionLane::Bit3 => None,
|
||||
PrecisionLane::Bit5 => Some(PrecisionLane::Bit3),
|
||||
PrecisionLane::Bit7 => Some(PrecisionLane::Bit5),
|
||||
PrecisionLane::Float32 => Some(PrecisionLane::Bit7),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Event processor with precision lane awareness
|
||||
pub struct LanedEventProcessor {
|
||||
/// Graduation policy
|
||||
policy: GraduationPolicy,
|
||||
/// Event counter
|
||||
event_count: usize,
|
||||
}
|
||||
|
||||
impl LanedEventProcessor {
|
||||
/// Create a new event processor
|
||||
pub fn new(config: LaneConfig) -> Self {
|
||||
Self {
|
||||
policy: GraduationPolicy::new(config.default_lane, config),
|
||||
event_count: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Process an event through the appropriate precision lane
|
||||
pub fn process_event(&mut self, event: &Event) -> ProcessResult {
|
||||
self.event_count += 1;
|
||||
|
||||
// 3-bit reflex check (always runs first)
|
||||
let reflex_result = self.reflex_3bit(event);
|
||||
if !reflex_result.boundary_crossed {
|
||||
return ProcessResult::Reflexed(reflex_result);
|
||||
}
|
||||
|
||||
// 5-bit embedding update (event-driven)
|
||||
let embed_result = self.embed_5bit(event);
|
||||
|
||||
// Check for graduation to 7-bit
|
||||
let observation = self.compute_observation(&reflex_result, &embed_result);
|
||||
let decision = self.policy.evaluate(&observation);
|
||||
|
||||
if matches!(decision, GraduationDecision::Escalate(PrecisionLane::Bit7))
|
||||
|| self.policy.current_lane == PrecisionLane::Bit7
|
||||
{
|
||||
// 7-bit reasoning
|
||||
let reason_result = self.reason_7bit(event, &embed_result);
|
||||
self.policy.apply_decision(decision);
|
||||
return ProcessResult::Reasoned(reason_result);
|
||||
}
|
||||
|
||||
self.policy.apply_decision(decision);
|
||||
ProcessResult::Embedded(embed_result)
|
||||
}
|
||||
|
||||
fn reflex_3bit(&self, _event: &Event) -> ReflexResult {
|
||||
// 3-bit reflex processing
|
||||
ReflexResult {
|
||||
boundary_crossed: true, // Simplified
|
||||
health_ok: true,
|
||||
anomaly_detected: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn embed_5bit(&self, _event: &Event) -> EmbedResult {
|
||||
// 5-bit embedding update
|
||||
EmbedResult {
|
||||
embedding_delta: vec![0.0; 64],
|
||||
drift_detected: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn reason_7bit(&self, _event: &Event, _embed: &EmbedResult) -> ReasonResult {
|
||||
// 7-bit reasoning
|
||||
ReasonResult {
|
||||
should_write_memory: false,
|
||||
summary: String::new(),
|
||||
actions: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_observation(
|
||||
&self,
|
||||
_reflex: &ReflexResult,
|
||||
_embed: &EmbedResult,
|
||||
) -> ObservationMetrics {
|
||||
ObservationMetrics::default()
|
||||
}
|
||||
|
||||
/// Get current lane
|
||||
pub fn current_lane(&self) -> PrecisionLane {
|
||||
self.policy.current_lane
|
||||
}
|
||||
}
|
||||
|
||||
/// Simple event type for processing
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Event {
|
||||
pub data: Vec<f32>,
|
||||
pub timestamp: u64,
|
||||
}
|
||||
|
||||
/// Result of 3-bit reflex processing
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ReflexResult {
|
||||
pub boundary_crossed: bool,
|
||||
pub health_ok: bool,
|
||||
pub anomaly_detected: bool,
|
||||
}
|
||||
|
||||
/// Result of 5-bit embedding
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct EmbedResult {
|
||||
pub embedding_delta: Vec<f32>,
|
||||
pub drift_detected: bool,
|
||||
}
|
||||
|
||||
/// Result of 7-bit reasoning
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ReasonResult {
|
||||
pub should_write_memory: bool,
|
||||
pub summary: String,
|
||||
pub actions: Vec<String>,
|
||||
}
|
||||
|
||||
/// Overall processing result
|
||||
#[derive(Debug)]
|
||||
pub enum ProcessResult {
|
||||
Reflexed(ReflexResult),
|
||||
Embedded(EmbedResult),
|
||||
Reasoned(ReasonResult),
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_graduation_policy_creation() {
|
||||
let config = LaneConfig::default();
|
||||
let policy = GraduationPolicy::new(PrecisionLane::Bit5, config);
|
||||
|
||||
assert_eq!(policy.current_lane, PrecisionLane::Bit5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_escalation_on_novelty() {
|
||||
let config = LaneConfig {
|
||||
novelty_threshold: 0.3,
|
||||
confidence_threshold: 0.5,
|
||||
..Default::default()
|
||||
};
|
||||
let mut policy = GraduationPolicy::new(PrecisionLane::Bit5, config);
|
||||
// Set higher EMA alpha for faster response in tests
|
||||
policy.ema_alpha = 1.0;
|
||||
|
||||
// High novelty, good confidence (use high values to overcome any thresholds)
|
||||
let observation = ObservationMetrics {
|
||||
novelty: 0.9,
|
||||
confidence: 0.9,
|
||||
stability: 0.6,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let decision = policy.evaluate(&observation);
|
||||
assert!(matches!(
|
||||
decision,
|
||||
GraduationDecision::Escalate(PrecisionLane::Bit7)
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_demotion_on_stability() {
|
||||
let mut config = LaneConfig::default();
|
||||
config.min_stability_steps = 2;
|
||||
|
||||
let mut policy = GraduationPolicy::new(PrecisionLane::Bit7, config);
|
||||
|
||||
// Build up stable steps
|
||||
for _ in 0..5 {
|
||||
let observation = ObservationMetrics {
|
||||
stability: 0.9,
|
||||
velocity: 0.001,
|
||||
active_set_size: 5,
|
||||
..Default::default()
|
||||
};
|
||||
policy.evaluate(&observation);
|
||||
}
|
||||
|
||||
let observation = ObservationMetrics {
|
||||
stability: 0.9,
|
||||
velocity: 0.001,
|
||||
active_set_size: 5,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let decision = policy.evaluate(&observation);
|
||||
assert!(matches!(
|
||||
decision,
|
||||
GraduationDecision::Demote(PrecisionLane::Bit5)
|
||||
));
|
||||
}
|
||||
}
|
||||
438
vendor/ruvector/crates/ruvector-sparse-inference/src/precision/quantizers.rs
vendored
Normal file
438
vendor/ruvector/crates/ruvector-sparse-inference/src/precision/quantizers.rs
vendored
Normal file
@@ -0,0 +1,438 @@
|
||||
//! Quantizers for 3/5/7-bit precision lanes
|
||||
//!
|
||||
//! Implements pack/unpack operations for each precision lane with
|
||||
//! per-block or per-channel scaling.
|
||||
|
||||
use super::lanes::PrecisionLane;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Quantized block with scale factor
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct QuantizedBlock {
|
||||
/// Quantized data
|
||||
pub data: Vec<i8>,
|
||||
/// Scale factor for dequantization
|
||||
pub scale: f32,
|
||||
/// Zero point offset
|
||||
pub zero_point: i8,
|
||||
/// Block size
|
||||
pub block_size: usize,
|
||||
/// Precision lane
|
||||
pub lane: PrecisionLane,
|
||||
}
|
||||
|
||||
impl QuantizedBlock {
|
||||
/// Create a new quantized block
|
||||
pub fn new(lane: PrecisionLane, block_size: usize) -> Self {
|
||||
Self {
|
||||
data: Vec::with_capacity(block_size),
|
||||
scale: lane.default_scale(),
|
||||
zero_point: 0,
|
||||
block_size,
|
||||
lane,
|
||||
}
|
||||
}
|
||||
|
||||
/// Dequantize to f32 values
|
||||
pub fn dequantize(&self) -> Vec<f32> {
|
||||
self.data
|
||||
.iter()
|
||||
.map(|&q| ((q as i32 - self.zero_point as i32) as f32) * self.scale)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get memory size in bytes
|
||||
pub fn size_bytes(&self) -> usize {
|
||||
self.data.len() + 4 + 1 // data + scale + zero_point
|
||||
}
|
||||
}
|
||||
|
||||
/// 3-bit quantizer for reflex signals
|
||||
///
|
||||
/// Uses signed int4 container with values restricted to -4..3.
|
||||
/// Optimized for LUT-based activation.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Quantizer3Bit {
|
||||
/// Per-block scale factors
|
||||
pub scales: Vec<f32>,
|
||||
/// Block size (typically 32)
|
||||
pub block_size: usize,
|
||||
/// LUT for activation (optional)
|
||||
pub activation_lut: Option<[f32; 8]>,
|
||||
}
|
||||
|
||||
impl Quantizer3Bit {
|
||||
/// Create a new 3-bit quantizer
|
||||
pub fn new(block_size: usize) -> Self {
|
||||
Self {
|
||||
scales: Vec::new(),
|
||||
block_size,
|
||||
activation_lut: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set activation LUT (e.g., for ReLU)
|
||||
pub fn with_activation_lut(mut self, lut: [f32; 8]) -> Self {
|
||||
self.activation_lut = Some(lut);
|
||||
self
|
||||
}
|
||||
|
||||
/// Quantize f32 values to 3-bit
|
||||
pub fn quantize(&mut self, values: &[f32]) -> Vec<u8> {
|
||||
let num_blocks = (values.len() + self.block_size - 1) / self.block_size;
|
||||
self.scales = Vec::with_capacity(num_blocks);
|
||||
|
||||
let mut result = Vec::with_capacity((values.len() + 1) / 2); // Pack 2 values per byte
|
||||
|
||||
for block in values.chunks(self.block_size) {
|
||||
// Find scale for this block
|
||||
let max_abs = block.iter().map(|x| x.abs()).fold(0.0f32, f32::max);
|
||||
let scale = if max_abs > 0.0 { max_abs / 3.0 } else { 1.0 }; // 3-bit max is 3
|
||||
self.scales.push(scale);
|
||||
|
||||
// Quantize values
|
||||
for pair in block.chunks(2) {
|
||||
let q0 = Self::quantize_value(pair[0], scale);
|
||||
let q1 = if pair.len() > 1 {
|
||||
Self::quantize_value(pair[1], scale)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
// Pack two 4-bit values into one byte
|
||||
result.push(((q1 as u8) << 4) | (q0 as u8 & 0x0F));
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Quantize single value to 3-bit
|
||||
fn quantize_value(value: f32, scale: f32) -> i8 {
|
||||
let scaled = (value / scale).round() as i8;
|
||||
scaled.clamp(-4, 3)
|
||||
}
|
||||
|
||||
/// Dequantize 3-bit values to f32
|
||||
pub fn dequantize(&self, data: &[u8], num_values: usize) -> Vec<f32> {
|
||||
let mut result = Vec::with_capacity(num_values);
|
||||
let mut value_idx = 0;
|
||||
let mut block_idx = 0;
|
||||
|
||||
for &byte in data {
|
||||
if value_idx >= num_values {
|
||||
break;
|
||||
}
|
||||
|
||||
let scale = self.scales.get(block_idx).copied().unwrap_or(1.0);
|
||||
|
||||
// Unpack first value (lower 4 bits)
|
||||
let q0 = (byte & 0x0F) as i8;
|
||||
let q0 = if q0 > 7 { q0 - 16 } else { q0 }; // Sign extend
|
||||
let v0 = (q0 as f32) * scale;
|
||||
|
||||
// Apply activation LUT if present
|
||||
let v0 = if let Some(ref lut) = self.activation_lut {
|
||||
lut[(q0 + 4) as usize]
|
||||
} else {
|
||||
v0
|
||||
};
|
||||
|
||||
result.push(v0);
|
||||
value_idx += 1;
|
||||
|
||||
if value_idx >= num_values {
|
||||
break;
|
||||
}
|
||||
|
||||
// Unpack second value (upper 4 bits)
|
||||
let q1 = ((byte >> 4) & 0x0F) as i8;
|
||||
let q1 = if q1 > 7 { q1 - 16 } else { q1 };
|
||||
let v1 = (q1 as f32) * scale;
|
||||
|
||||
let v1 = if let Some(ref lut) = self.activation_lut {
|
||||
lut[(q1 + 4) as usize]
|
||||
} else {
|
||||
v1
|
||||
};
|
||||
|
||||
result.push(v1);
|
||||
value_idx += 1;
|
||||
|
||||
// Update block index
|
||||
if value_idx % self.block_size == 0 {
|
||||
block_idx += 1;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
/// 5-bit quantizer for streaming embeddings
|
||||
///
|
||||
/// Uses signed int8 container with values in -16..15.
|
||||
/// Per-channel or per-block scale for stable streaming updates.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Quantizer5Bit {
|
||||
/// Per-block scale factors
|
||||
pub scales: Vec<f32>,
|
||||
/// Block size
|
||||
pub block_size: usize,
|
||||
/// Use per-channel scaling (instead of per-block)
|
||||
pub per_channel: bool,
|
||||
}
|
||||
|
||||
impl Quantizer5Bit {
|
||||
/// Create a new 5-bit quantizer
|
||||
pub fn new(block_size: usize) -> Self {
|
||||
Self {
|
||||
scales: Vec::new(),
|
||||
block_size,
|
||||
per_channel: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Enable per-channel scaling
|
||||
pub fn with_per_channel(mut self) -> Self {
|
||||
self.per_channel = true;
|
||||
self
|
||||
}
|
||||
|
||||
/// Quantize f32 values to 5-bit (stored in int8)
|
||||
pub fn quantize(&mut self, values: &[f32]) -> Vec<i8> {
|
||||
if self.per_channel {
|
||||
self.quantize_per_channel(values)
|
||||
} else {
|
||||
self.quantize_per_block(values)
|
||||
}
|
||||
}
|
||||
|
||||
fn quantize_per_block(&mut self, values: &[f32]) -> Vec<i8> {
|
||||
let num_blocks = (values.len() + self.block_size - 1) / self.block_size;
|
||||
self.scales = Vec::with_capacity(num_blocks);
|
||||
|
||||
let mut result = Vec::with_capacity(values.len());
|
||||
|
||||
for block in values.chunks(self.block_size) {
|
||||
let max_abs = block.iter().map(|x| x.abs()).fold(0.0f32, f32::max);
|
||||
let scale = if max_abs > 0.0 { max_abs / 15.0 } else { 1.0 }; // 5-bit max is 15
|
||||
self.scales.push(scale);
|
||||
|
||||
for &value in block {
|
||||
let q = (value / scale).round() as i8;
|
||||
result.push(q.clamp(-16, 15));
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn quantize_per_channel(&mut self, values: &[f32]) -> Vec<i8> {
|
||||
self.scales = Vec::with_capacity(values.len());
|
||||
|
||||
values
|
||||
.iter()
|
||||
.map(|&value| {
|
||||
let max_abs = value.abs();
|
||||
let scale = if max_abs > 0.0 { max_abs / 15.0 } else { 1.0 };
|
||||
self.scales.push(scale);
|
||||
let q = (value / scale).round() as i8;
|
||||
q.clamp(-16, 15)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Dequantize 5-bit values to f32
|
||||
pub fn dequantize(&self, data: &[i8]) -> Vec<f32> {
|
||||
if self.per_channel {
|
||||
data.iter()
|
||||
.zip(self.scales.iter())
|
||||
.map(|(&q, &scale)| (q as f32) * scale)
|
||||
.collect()
|
||||
} else {
|
||||
let mut result = Vec::with_capacity(data.len());
|
||||
let mut block_idx = 0;
|
||||
|
||||
for (i, &q) in data.iter().enumerate() {
|
||||
let scale = self.scales.get(block_idx).copied().unwrap_or(1.0);
|
||||
result.push((q as f32) * scale);
|
||||
|
||||
if (i + 1) % self.block_size == 0 {
|
||||
block_idx += 1;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// 7-bit quantizer for reasoning
|
||||
///
|
||||
/// Uses signed int8 container with values in -64..63.
|
||||
/// Stable accumulators, close to int8 quality.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Quantizer7Bit {
|
||||
/// Per-block scale factors
|
||||
pub scales: Vec<f32>,
|
||||
/// Block size
|
||||
pub block_size: usize,
|
||||
}
|
||||
|
||||
impl Quantizer7Bit {
|
||||
/// Create a new 7-bit quantizer
|
||||
pub fn new(block_size: usize) -> Self {
|
||||
Self {
|
||||
scales: Vec::new(),
|
||||
block_size,
|
||||
}
|
||||
}
|
||||
|
||||
/// Quantize f32 values to 7-bit (stored in int8)
|
||||
pub fn quantize(&mut self, values: &[f32]) -> Vec<i8> {
|
||||
let num_blocks = (values.len() + self.block_size - 1) / self.block_size;
|
||||
self.scales = Vec::with_capacity(num_blocks);
|
||||
|
||||
let mut result = Vec::with_capacity(values.len());
|
||||
|
||||
for block in values.chunks(self.block_size) {
|
||||
let max_abs = block.iter().map(|x| x.abs()).fold(0.0f32, f32::max);
|
||||
let scale = if max_abs > 0.0 { max_abs / 63.0 } else { 1.0 }; // 7-bit max is 63
|
||||
self.scales.push(scale);
|
||||
|
||||
for &value in block {
|
||||
let q = (value / scale).round() as i8;
|
||||
result.push(q.clamp(-64, 63));
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Dequantize 7-bit values to f32
|
||||
pub fn dequantize(&self, data: &[i8]) -> Vec<f32> {
|
||||
let mut result = Vec::with_capacity(data.len());
|
||||
let mut block_idx = 0;
|
||||
|
||||
for (i, &q) in data.iter().enumerate() {
|
||||
let scale = self.scales.get(block_idx).copied().unwrap_or(1.0);
|
||||
result.push((q as f32) * scale);
|
||||
|
||||
if (i + 1) % self.block_size == 0 {
|
||||
block_idx += 1;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Apply micro-LoRA delta (in 7-bit precision)
|
||||
pub fn apply_lora_delta(&mut self, base: &[i8], delta: &[i8], alpha: f32) -> Vec<i8> {
|
||||
base.iter()
|
||||
.zip(delta.iter())
|
||||
.map(|(&b, &d)| {
|
||||
let result = (b as f32) + (d as f32) * alpha;
|
||||
(result.round() as i8).clamp(-64, 63)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// Unified quantizer that selects appropriate implementation
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum LaneQuantizer {
|
||||
Bit3(Quantizer3Bit),
|
||||
Bit5(Quantizer5Bit),
|
||||
Bit7(Quantizer7Bit),
|
||||
}
|
||||
|
||||
impl LaneQuantizer {
|
||||
/// Create quantizer for a specific lane
|
||||
pub fn for_lane(lane: PrecisionLane, block_size: usize) -> Self {
|
||||
match lane {
|
||||
PrecisionLane::Bit3 => Self::Bit3(Quantizer3Bit::new(block_size)),
|
||||
PrecisionLane::Bit5 => Self::Bit5(Quantizer5Bit::new(block_size)),
|
||||
PrecisionLane::Bit7 => Self::Bit7(Quantizer7Bit::new(block_size)),
|
||||
PrecisionLane::Float32 => Self::Bit7(Quantizer7Bit::new(block_size)), // Fallback
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the precision lane
|
||||
pub fn lane(&self) -> PrecisionLane {
|
||||
match self {
|
||||
Self::Bit3(_) => PrecisionLane::Bit3,
|
||||
Self::Bit5(_) => PrecisionLane::Bit5,
|
||||
Self::Bit7(_) => PrecisionLane::Bit7,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_3bit_roundtrip() {
|
||||
let mut quantizer = Quantizer3Bit::new(32);
|
||||
let values: Vec<f32> = (0..64).map(|i| (i as f32 - 32.0) * 0.1).collect();
|
||||
|
||||
let quantized = quantizer.quantize(&values);
|
||||
let dequantized = quantizer.dequantize(&quantized, values.len());
|
||||
|
||||
assert_eq!(dequantized.len(), values.len());
|
||||
|
||||
// Check error is bounded (3-bit is very lossy - only 8 levels)
|
||||
// With range ~6.4 (-3.2 to 3.2), each level is ~0.8, so max error is ~0.4
|
||||
// But with grouping, it can be higher
|
||||
for (orig, deq) in values.iter().zip(dequantized.iter()) {
|
||||
let error = (orig - deq).abs();
|
||||
assert!(error < 1.0, "Error too large: {} vs {}", orig, deq);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_5bit_roundtrip() {
|
||||
let mut quantizer = Quantizer5Bit::new(32);
|
||||
let values: Vec<f32> = (0..64).map(|i| (i as f32 - 32.0) * 0.1).collect();
|
||||
|
||||
let quantized = quantizer.quantize(&values);
|
||||
let dequantized = quantizer.dequantize(&quantized);
|
||||
|
||||
assert_eq!(dequantized.len(), values.len());
|
||||
|
||||
for (orig, deq) in values.iter().zip(dequantized.iter()) {
|
||||
let error = (orig - deq).abs();
|
||||
assert!(error < 0.2, "Error too large: {} vs {}", orig, deq);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_7bit_roundtrip() {
|
||||
let mut quantizer = Quantizer7Bit::new(32);
|
||||
let values: Vec<f32> = (0..64).map(|i| (i as f32 - 32.0) * 0.1).collect();
|
||||
|
||||
let quantized = quantizer.quantize(&values);
|
||||
let dequantized = quantizer.dequantize(&quantized);
|
||||
|
||||
assert_eq!(dequantized.len(), values.len());
|
||||
|
||||
for (orig, deq) in values.iter().zip(dequantized.iter()) {
|
||||
let error = (orig - deq).abs();
|
||||
assert!(error < 0.1, "Error too large: {} vs {}", orig, deq);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_7bit_lora_delta() {
|
||||
let mut quantizer = Quantizer7Bit::new(32);
|
||||
let base: Vec<i8> = vec![10, 20, 30, 40];
|
||||
let delta: Vec<i8> = vec![1, 2, 3, 4];
|
||||
|
||||
let result = quantizer.apply_lora_delta(&base, &delta, 0.5);
|
||||
|
||||
assert_eq!(result[0], 11); // 10 + 1*0.5 = 10.5 -> 11
|
||||
assert_eq!(result[1], 21); // 20 + 2*0.5 = 21
|
||||
assert_eq!(result[2], 32); // 30 + 3*0.5 = 31.5 -> 32
|
||||
assert_eq!(result[3], 42); // 40 + 4*0.5 = 42
|
||||
}
|
||||
}
|
||||
345
vendor/ruvector/crates/ruvector-sparse-inference/src/precision/telemetry.rs
vendored
Normal file
345
vendor/ruvector/crates/ruvector-sparse-inference/src/precision/telemetry.rs
vendored
Normal file
@@ -0,0 +1,345 @@
|
||||
//! Telemetry and statistics for precision lanes
|
||||
//!
|
||||
//! Tracks lane usage, transitions, and performance metrics.
|
||||
|
||||
use super::lanes::PrecisionLane;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
/// Statistics for a single precision lane
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct LaneStats {
|
||||
/// Total operations in this lane
|
||||
pub operations: u64,
|
||||
|
||||
/// Total time spent in this lane (nanoseconds)
|
||||
pub total_time_ns: u64,
|
||||
|
||||
/// Average operation time (nanoseconds)
|
||||
pub avg_time_ns: u64,
|
||||
|
||||
/// Peak operation time (nanoseconds)
|
||||
pub peak_time_ns: u64,
|
||||
|
||||
/// Total bytes processed
|
||||
pub bytes_processed: u64,
|
||||
|
||||
/// Average active set size
|
||||
pub avg_active_set_size: f32,
|
||||
|
||||
/// Error count
|
||||
pub errors: u64,
|
||||
|
||||
/// Escalations from this lane
|
||||
pub escalations: u64,
|
||||
|
||||
/// Demotions to this lane
|
||||
pub demotions: u64,
|
||||
}
|
||||
|
||||
impl LaneStats {
|
||||
/// Record a new operation
|
||||
pub fn record_operation(&mut self, duration_ns: u64, bytes: u64, active_set_size: usize) {
|
||||
self.operations += 1;
|
||||
self.total_time_ns += duration_ns;
|
||||
self.bytes_processed += bytes;
|
||||
|
||||
// Update average
|
||||
let ops = self.operations as f32;
|
||||
self.avg_time_ns = (self.total_time_ns / self.operations) as u64;
|
||||
self.avg_active_set_size =
|
||||
(self.avg_active_set_size * (ops - 1.0) + active_set_size as f32) / ops;
|
||||
|
||||
// Update peak
|
||||
if duration_ns > self.peak_time_ns {
|
||||
self.peak_time_ns = duration_ns;
|
||||
}
|
||||
}
|
||||
|
||||
/// Record an error
|
||||
pub fn record_error(&mut self) {
|
||||
self.errors += 1;
|
||||
}
|
||||
|
||||
/// Record an escalation from this lane
|
||||
pub fn record_escalation(&mut self) {
|
||||
self.escalations += 1;
|
||||
}
|
||||
|
||||
/// Record a demotion to this lane
|
||||
pub fn record_demotion(&mut self) {
|
||||
self.demotions += 1;
|
||||
}
|
||||
|
||||
/// Get throughput in bytes per second
|
||||
pub fn throughput_bps(&self) -> f64 {
|
||||
if self.total_time_ns == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
(self.bytes_processed as f64 * 1_000_000_000.0) / self.total_time_ns as f64
|
||||
}
|
||||
}
|
||||
|
||||
/// Comprehensive telemetry for all precision lanes
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LaneTelemetry {
|
||||
/// Per-lane statistics
|
||||
pub lane_stats: HashMap<PrecisionLane, LaneStats>,
|
||||
|
||||
/// Current lane
|
||||
pub current_lane: PrecisionLane,
|
||||
|
||||
/// Total lane transitions
|
||||
pub transitions: u64,
|
||||
|
||||
/// Transition history (recent 100)
|
||||
transition_history: Vec<LaneTransition>,
|
||||
|
||||
/// Start time
|
||||
start_time: Option<Instant>,
|
||||
|
||||
/// Session duration (seconds)
|
||||
pub session_duration_secs: f64,
|
||||
}
|
||||
|
||||
/// Record of a lane transition
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct LaneTransition {
|
||||
/// Source lane
|
||||
pub from: PrecisionLane,
|
||||
|
||||
/// Destination lane
|
||||
pub to: PrecisionLane,
|
||||
|
||||
/// Reason for transition
|
||||
pub reason: TransitionReason,
|
||||
|
||||
/// Timestamp (seconds since session start)
|
||||
pub timestamp_secs: f64,
|
||||
}
|
||||
|
||||
/// Reason for lane transition
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
||||
pub enum TransitionReason {
|
||||
/// Novelty threshold exceeded
|
||||
Novelty,
|
||||
/// Drift persisted
|
||||
DriftPersistence,
|
||||
/// Stability returned
|
||||
StabilityReturned,
|
||||
/// Velocity stalled
|
||||
VelocityStalled,
|
||||
/// Active set shrunk
|
||||
ActiveSetShrunk,
|
||||
/// Manual override
|
||||
Manual,
|
||||
/// Initialization
|
||||
Init,
|
||||
}
|
||||
|
||||
impl LaneTelemetry {
|
||||
/// Create new telemetry tracker
|
||||
pub fn new(initial_lane: PrecisionLane) -> Self {
|
||||
let mut lane_stats = HashMap::new();
|
||||
lane_stats.insert(PrecisionLane::Bit3, LaneStats::default());
|
||||
lane_stats.insert(PrecisionLane::Bit5, LaneStats::default());
|
||||
lane_stats.insert(PrecisionLane::Bit7, LaneStats::default());
|
||||
lane_stats.insert(PrecisionLane::Float32, LaneStats::default());
|
||||
|
||||
Self {
|
||||
lane_stats,
|
||||
current_lane: initial_lane,
|
||||
transitions: 0,
|
||||
transition_history: Vec::with_capacity(100),
|
||||
start_time: Some(Instant::now()),
|
||||
session_duration_secs: 0.0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Start a new session
|
||||
pub fn start_session(&mut self) {
|
||||
self.start_time = Some(Instant::now());
|
||||
}
|
||||
|
||||
/// Record an operation in the current lane
|
||||
pub fn record_operation(&mut self, duration: Duration, bytes: u64, active_set_size: usize) {
|
||||
let duration_ns = duration.as_nanos() as u64;
|
||||
|
||||
if let Some(stats) = self.lane_stats.get_mut(&self.current_lane) {
|
||||
stats.record_operation(duration_ns, bytes, active_set_size);
|
||||
}
|
||||
|
||||
// Update session duration
|
||||
if let Some(start) = self.start_time {
|
||||
self.session_duration_secs = start.elapsed().as_secs_f64();
|
||||
}
|
||||
}
|
||||
|
||||
/// Record a lane transition
|
||||
pub fn record_transition(
|
||||
&mut self,
|
||||
from: PrecisionLane,
|
||||
to: PrecisionLane,
|
||||
reason: TransitionReason,
|
||||
) {
|
||||
self.transitions += 1;
|
||||
self.current_lane = to;
|
||||
|
||||
// Record escalation/demotion in stats
|
||||
if to.bits() > from.bits() {
|
||||
if let Some(stats) = self.lane_stats.get_mut(&from) {
|
||||
stats.record_escalation();
|
||||
}
|
||||
} else {
|
||||
if let Some(stats) = self.lane_stats.get_mut(&to) {
|
||||
stats.record_demotion();
|
||||
}
|
||||
}
|
||||
|
||||
// Add to history
|
||||
let timestamp_secs = self
|
||||
.start_time
|
||||
.map(|s| s.elapsed().as_secs_f64())
|
||||
.unwrap_or(0.0);
|
||||
|
||||
let transition = LaneTransition {
|
||||
from,
|
||||
to,
|
||||
reason,
|
||||
timestamp_secs,
|
||||
};
|
||||
|
||||
if self.transition_history.len() >= 100 {
|
||||
self.transition_history.remove(0);
|
||||
}
|
||||
self.transition_history.push(transition);
|
||||
}
|
||||
|
||||
/// Record an error in the current lane
|
||||
pub fn record_error(&mut self) {
|
||||
if let Some(stats) = self.lane_stats.get_mut(&self.current_lane) {
|
||||
stats.record_error();
|
||||
}
|
||||
}
|
||||
|
||||
/// Get statistics for a specific lane
|
||||
pub fn get_lane_stats(&self, lane: PrecisionLane) -> Option<&LaneStats> {
|
||||
self.lane_stats.get(&lane)
|
||||
}
|
||||
|
||||
/// Get total operations across all lanes
|
||||
pub fn total_operations(&self) -> u64 {
|
||||
self.lane_stats.values().map(|s| s.operations).sum()
|
||||
}
|
||||
|
||||
/// Get total errors across all lanes
|
||||
pub fn total_errors(&self) -> u64 {
|
||||
self.lane_stats.values().map(|s| s.errors).sum()
|
||||
}
|
||||
|
||||
/// Get lane usage distribution (percentage)
|
||||
pub fn lane_distribution(&self) -> HashMap<PrecisionLane, f32> {
|
||||
let total = self.total_operations() as f32;
|
||||
if total == 0.0 {
|
||||
return HashMap::new();
|
||||
}
|
||||
|
||||
self.lane_stats
|
||||
.iter()
|
||||
.map(|(lane, stats)| (*lane, (stats.operations as f32 / total) * 100.0))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get transition history
|
||||
pub fn transition_history(&self) -> &[LaneTransition] {
|
||||
&self.transition_history
|
||||
}
|
||||
|
||||
/// Generate summary report
|
||||
pub fn summary_report(&self) -> TelemetrySummary {
|
||||
TelemetrySummary {
|
||||
session_duration_secs: self.session_duration_secs,
|
||||
total_operations: self.total_operations(),
|
||||
total_transitions: self.transitions,
|
||||
total_errors: self.total_errors(),
|
||||
lane_distribution: self.lane_distribution(),
|
||||
avg_operations_per_sec: if self.session_duration_secs > 0.0 {
|
||||
self.total_operations() as f64 / self.session_duration_secs
|
||||
} else {
|
||||
0.0
|
||||
},
|
||||
current_lane: self.current_lane,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Summary of telemetry data
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TelemetrySummary {
|
||||
pub session_duration_secs: f64,
|
||||
pub total_operations: u64,
|
||||
pub total_transitions: u64,
|
||||
pub total_errors: u64,
|
||||
pub lane_distribution: HashMap<PrecisionLane, f32>,
|
||||
pub avg_operations_per_sec: f64,
|
||||
pub current_lane: PrecisionLane,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_lane_stats_recording() {
|
||||
let mut stats = LaneStats::default();
|
||||
|
||||
stats.record_operation(1000, 64, 100);
|
||||
stats.record_operation(2000, 64, 100);
|
||||
|
||||
assert_eq!(stats.operations, 2);
|
||||
assert_eq!(stats.total_time_ns, 3000);
|
||||
assert_eq!(stats.avg_time_ns, 1500);
|
||||
assert_eq!(stats.bytes_processed, 128);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_telemetry_transitions() {
|
||||
let mut telemetry = LaneTelemetry::new(PrecisionLane::Bit5);
|
||||
|
||||
telemetry.record_transition(
|
||||
PrecisionLane::Bit5,
|
||||
PrecisionLane::Bit7,
|
||||
TransitionReason::Novelty,
|
||||
);
|
||||
|
||||
assert_eq!(telemetry.transitions, 1);
|
||||
assert_eq!(telemetry.current_lane, PrecisionLane::Bit7);
|
||||
assert_eq!(telemetry.transition_history.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lane_distribution() {
|
||||
let mut telemetry = LaneTelemetry::new(PrecisionLane::Bit5);
|
||||
|
||||
// Simulate operations in different lanes
|
||||
for _ in 0..30 {
|
||||
telemetry.current_lane = PrecisionLane::Bit3;
|
||||
telemetry.record_operation(Duration::from_nanos(100), 8, 10);
|
||||
}
|
||||
for _ in 0..50 {
|
||||
telemetry.current_lane = PrecisionLane::Bit5;
|
||||
telemetry.record_operation(Duration::from_nanos(200), 16, 50);
|
||||
}
|
||||
for _ in 0..20 {
|
||||
telemetry.current_lane = PrecisionLane::Bit7;
|
||||
telemetry.record_operation(Duration::from_nanos(500), 32, 100);
|
||||
}
|
||||
|
||||
let distribution = telemetry.lane_distribution();
|
||||
|
||||
assert!((distribution[&PrecisionLane::Bit3] - 30.0).abs() < 0.1);
|
||||
assert!((distribution[&PrecisionLane::Bit5] - 50.0).abs() < 0.1);
|
||||
assert!((distribution[&PrecisionLane::Bit7] - 20.0).abs() < 0.1);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user