Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,215 @@
//! Precision Lane definitions and configuration
//!
//! Defines the three precision lanes (3/5/7-bit) that map to intelligence roles.
use serde::{Deserialize, Serialize};
/// Precision lanes for layered quantization
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum PrecisionLane {
/// 3-bit lane: Reflex signals, gating, boundaries, health metrics
/// Uses signed int4 container restricted to 3-bit domain
/// LUT activation for speed
Bit3,
/// 5-bit lane: Streaming embeddings, semantic motion, drift detection
/// Uses signed int8 container with values in -16..15
/// Per-channel or per-block scale
Bit5,
/// 7-bit lane: Reasoning, synthesis, memory writes, micro-LoRA
/// Uses signed int8 container with values in -64..63
/// Stable accumulators, close to int8 quality
Bit7,
/// Float lane: Training, calibration, aggregation boundaries only
Float32,
}
impl PrecisionLane {
/// Get the number of bits for this lane
pub fn bits(&self) -> u8 {
match self {
Self::Bit3 => 3,
Self::Bit5 => 5,
Self::Bit7 => 7,
Self::Float32 => 32,
}
}
/// Get the value range for this lane
pub fn value_range(&self) -> (i32, i32) {
match self {
Self::Bit3 => (-4, 3), // 3-bit signed: -4 to 3
Self::Bit5 => (-16, 15), // 5-bit signed: -16 to 15
Self::Bit7 => (-64, 63), // 7-bit signed: -64 to 63
Self::Float32 => (i32::MIN, i32::MAX),
}
}
/// Get bytes per element (storage container)
pub fn bytes_per_element(&self) -> f32 {
match self {
Self::Bit3 => 0.5, // Packed into int4
Self::Bit5 => 1.0, // int8 container
Self::Bit7 => 1.0, // int8 container
Self::Float32 => 4.0,
}
}
/// Get the default scale factor for this lane
pub fn default_scale(&self) -> f32 {
match self {
Self::Bit3 => 0.25, // Conservative for reflexes
Self::Bit5 => 0.0625, // 1/16 for streaming
Self::Bit7 => 0.015625, // 1/64 for reasoning
Self::Float32 => 1.0,
}
}
/// Check if this lane supports memory writes
pub fn allows_memory_writes(&self) -> bool {
matches!(self, Self::Bit7 | Self::Float32)
}
/// Check if this lane is event-driven vs continuous
pub fn is_event_driven(&self) -> bool {
matches!(self, Self::Bit5 | Self::Bit7)
}
}
impl Default for PrecisionLane {
fn default() -> Self {
Self::Bit7 // Default to reasoning lane
}
}
/// Configuration for precision lane behavior
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LaneConfig {
/// Default lane for new operations
pub default_lane: PrecisionLane,
/// Time budget per tick for 3-bit lane (microseconds)
pub bit3_tick_budget_us: u64,
/// Maximum consecutive 5-bit updates before forced graduation check
pub bit5_max_updates: usize,
/// Minimum stability steps before demotion
pub min_stability_steps: usize,
/// Novelty threshold for escalation (0.0 to 1.0)
pub novelty_threshold: f32,
/// Drift persistence threshold (steps)
pub drift_persistence_threshold: usize,
/// Confidence threshold for graduation (0.0 to 1.0)
pub confidence_threshold: f32,
/// Cost budget for escalation (arbitrary units)
pub escalation_budget: f32,
/// Enable automatic lane selection
pub auto_lane_selection: bool,
}
impl Default for LaneConfig {
fn default() -> Self {
Self {
default_lane: PrecisionLane::Bit5, // Start at streaming lane
bit3_tick_budget_us: 100, // 100μs per tick for reflexes
bit5_max_updates: 10, // Check graduation every 10 updates
min_stability_steps: 5, // 5 stable steps before demotion
novelty_threshold: 0.3, // 30% novelty triggers escalation
drift_persistence_threshold: 3, // 3 steps of drift
confidence_threshold: 0.7, // 70% confidence required
escalation_budget: 1.0, // Normalized budget
auto_lane_selection: true,
}
}
}
/// Hardware target for lane optimization
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum HardwareTarget {
/// ESP32: 3-bit only, tiny models
Esp32,
/// V0 Appliance: 5-bit streaming + 7-bit reasoning
V0Appliance,
/// Desktop/Server: Full lane support
Desktop,
/// FPGA: Deterministic 7-bit with witness logging
Fpga,
}
impl HardwareTarget {
/// Get supported lanes for this hardware
pub fn supported_lanes(&self) -> Vec<PrecisionLane> {
match self {
Self::Esp32 => vec![PrecisionLane::Bit3],
Self::V0Appliance => vec![
PrecisionLane::Bit3,
PrecisionLane::Bit5,
PrecisionLane::Bit7,
],
Self::Desktop => vec![
PrecisionLane::Bit3,
PrecisionLane::Bit5,
PrecisionLane::Bit7,
PrecisionLane::Float32,
],
Self::Fpga => vec![PrecisionLane::Bit7],
}
}
/// Get the default lane for this hardware
pub fn default_lane(&self) -> PrecisionLane {
match self {
Self::Esp32 => PrecisionLane::Bit3,
Self::V0Appliance => PrecisionLane::Bit5,
Self::Desktop => PrecisionLane::Bit7,
Self::Fpga => PrecisionLane::Bit7,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_lane_bits() {
assert_eq!(PrecisionLane::Bit3.bits(), 3);
assert_eq!(PrecisionLane::Bit5.bits(), 5);
assert_eq!(PrecisionLane::Bit7.bits(), 7);
assert_eq!(PrecisionLane::Float32.bits(), 32);
}
#[test]
fn test_lane_ranges() {
assert_eq!(PrecisionLane::Bit3.value_range(), (-4, 3));
assert_eq!(PrecisionLane::Bit5.value_range(), (-16, 15));
assert_eq!(PrecisionLane::Bit7.value_range(), (-64, 63));
}
#[test]
fn test_memory_write_permission() {
assert!(!PrecisionLane::Bit3.allows_memory_writes());
assert!(!PrecisionLane::Bit5.allows_memory_writes());
assert!(PrecisionLane::Bit7.allows_memory_writes());
assert!(PrecisionLane::Float32.allows_memory_writes());
}
#[test]
fn test_hardware_targets() {
assert_eq!(
HardwareTarget::Esp32.supported_lanes(),
vec![PrecisionLane::Bit3]
);
assert!(HardwareTarget::Desktop
.supported_lanes()
.contains(&PrecisionLane::Float32));
}
}

View File

@@ -0,0 +1,41 @@
//! Precision Lanes Module - Layered Quantization for Sparse Inference
//!
//! This module implements a 3/5/7-bit layered quantization system that turns
//! activation locality into a complete control theory for inference.
//!
//! # Intelligence Roles by Precision Lane
//!
//! - **3-bit Lane**: Reflex signals, gating, anomaly boundaries, mincut triggers, health metrics
//! - **5-bit Lane**: Streaming embeddings, semantic motion, drift detection, lightweight perception
//! - **7-bit Lane**: Reasoning, synthesis, memory writes, micro-LoRA adaptation, summaries
//! - **Float Lane**: Training, offline calibration, rare aggregation boundaries
//!
//! # Graduation Rules
//!
//! Signals move UP lanes when:
//! - Novelty exceeds threshold
//! - Drift persists for N steps
//! - Confidence and stability metrics pass
//! - Cost budget allows escalation
//!
//! Signals move DOWN lanes when:
//! - Stability returns
//! - Velocity stalls
//! - Active set shrinks
//! - Uncertainty is high but no action needed
//!
//! # Key Insight
//!
//! The active neuron set decides WHAT to compute.
//! The lane decides HOW PRECISELY to compute it.
//! The graduation rules decide WHEN computation is allowed to become expensive.
pub mod lanes;
pub mod policy;
pub mod quantizers;
pub mod telemetry;
pub use lanes::{LaneConfig, PrecisionLane};
pub use policy::{GraduationDecision, GraduationMetrics, GraduationPolicy};
pub use quantizers::{QuantizedBlock, Quantizer3Bit, Quantizer5Bit, Quantizer7Bit};
pub use telemetry::{LaneStats, LaneTelemetry};

View File

@@ -0,0 +1,418 @@
//! Graduation Policy - Rules for lane transitions
//!
//! Implements the control theory for when signals should move between precision lanes.
use super::lanes::{LaneConfig, PrecisionLane};
use serde::{Deserialize, Serialize};
/// Metrics used for graduation decisions
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct GraduationMetrics {
/// Novelty score (0.0 to 1.0) - how different from recent patterns
pub novelty: f32,
/// Drift score (0.0 to 1.0) - how much the signal has drifted
pub drift: f32,
/// Number of steps drift has persisted
pub drift_steps: usize,
/// Confidence score (0.0 to 1.0)
pub confidence: f32,
/// Stability score (0.0 to 1.0) - inverse of variance
pub stability: f32,
/// Number of stable steps
pub stable_steps: usize,
/// Velocity (rate of change)
pub velocity: f32,
/// Active set size (number of active neurons)
pub active_set_size: usize,
/// Uncertainty score (0.0 to 1.0)
pub uncertainty: f32,
/// Current cost usage (0.0 to 1.0)
pub cost_usage: f32,
/// Whether action is needed
pub action_needed: bool,
}
impl GraduationMetrics {
/// Create new metrics with default values
pub fn new() -> Self {
Self::default()
}
/// Update metrics with a new observation
pub fn update(&mut self, observation: &ObservationMetrics, ema_alpha: f32) {
// Exponential moving average for smooth updates
self.novelty = ema_alpha * observation.novelty + (1.0 - ema_alpha) * self.novelty;
self.drift = ema_alpha * observation.drift + (1.0 - ema_alpha) * self.drift;
self.confidence = ema_alpha * observation.confidence + (1.0 - ema_alpha) * self.confidence;
self.stability = ema_alpha * observation.stability + (1.0 - ema_alpha) * self.stability;
self.velocity = ema_alpha * observation.velocity + (1.0 - ema_alpha) * self.velocity;
self.uncertainty =
ema_alpha * observation.uncertainty + (1.0 - ema_alpha) * self.uncertainty;
self.active_set_size = observation.active_set_size;
self.action_needed = observation.action_needed;
// Update drift persistence
if observation.drift > 0.1 {
self.drift_steps += 1;
} else {
self.drift_steps = 0;
}
// Update stability persistence
if observation.stability > 0.8 {
self.stable_steps += 1;
} else {
self.stable_steps = 0;
}
}
}
/// Raw observation metrics from a single step
#[derive(Debug, Clone, Default)]
pub struct ObservationMetrics {
pub novelty: f32,
pub drift: f32,
pub confidence: f32,
pub stability: f32,
pub velocity: f32,
pub uncertainty: f32,
pub active_set_size: usize,
pub action_needed: bool,
}
/// Decision from graduation policy
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum GraduationDecision {
/// Stay in current lane
Stay,
/// Escalate to higher precision lane
Escalate(PrecisionLane),
/// Demote to lower precision lane
Demote(PrecisionLane),
}
/// Graduation policy for lane transitions
#[derive(Debug, Clone)]
pub struct GraduationPolicy {
/// Current precision lane
pub current_lane: PrecisionLane,
/// Configuration
pub config: LaneConfig,
/// Accumulated metrics
pub metrics: GraduationMetrics,
/// EMA smoothing factor
pub ema_alpha: f32,
}
impl GraduationPolicy {
/// Create a new graduation policy
pub fn new(initial_lane: PrecisionLane, config: LaneConfig) -> Self {
Self {
current_lane: initial_lane,
config,
metrics: GraduationMetrics::new(),
ema_alpha: 0.3,
}
}
/// Evaluate and return graduation decision
pub fn evaluate(&mut self, observation: &ObservationMetrics) -> GraduationDecision {
// Update metrics
self.metrics.update(observation, self.ema_alpha);
// Check for escalation
if self.should_escalate() {
if let Some(next_lane) = self.next_higher_lane() {
return GraduationDecision::Escalate(next_lane);
}
}
// Check for demotion
if self.should_demote() {
if let Some(prev_lane) = self.next_lower_lane() {
return GraduationDecision::Demote(prev_lane);
}
}
GraduationDecision::Stay
}
/// Apply a graduation decision
pub fn apply_decision(&mut self, decision: GraduationDecision) {
match decision {
GraduationDecision::Stay => {}
GraduationDecision::Escalate(lane) | GraduationDecision::Demote(lane) => {
self.current_lane = lane;
// Reset stability counters on lane change
self.metrics.stable_steps = 0;
self.metrics.drift_steps = 0;
}
}
}
/// Check if escalation conditions are met
fn should_escalate(&self) -> bool {
// Escalate when:
// 1. Novelty exceeds threshold
let novelty_trigger = self.metrics.novelty > self.config.novelty_threshold;
// 2. Drift persists
let drift_trigger = self.metrics.drift_steps >= self.config.drift_persistence_threshold;
// 3. Confidence and stability pass
let quality_pass = self.metrics.confidence >= self.config.confidence_threshold
&& self.metrics.stability >= 0.5;
// 4. Cost budget allows
let budget_allows = self.metrics.cost_usage < self.config.escalation_budget;
// Escalate if any trigger fires AND quality/budget conditions are met
(novelty_trigger || drift_trigger) && quality_pass && budget_allows
}
/// Check if demotion conditions are met
fn should_demote(&self) -> bool {
// Demote when:
// 1. Stability returns
let stability_returned = self.metrics.stable_steps >= self.config.min_stability_steps;
// 2. Velocity stalls
let velocity_stalled = self.metrics.velocity.abs() < 0.01;
// 3. Active set shrinks (not using the precision)
let active_set_shrunk = self.metrics.active_set_size < 10;
// 4. High uncertainty but no action needed
let uncertain_idle = self.metrics.uncertainty > 0.7 && !self.metrics.action_needed;
// Demote if stability AND (velocity stall OR active shrink OR uncertain idle)
stability_returned && (velocity_stalled || active_set_shrunk || uncertain_idle)
}
/// Get the next higher precision lane
fn next_higher_lane(&self) -> Option<PrecisionLane> {
match self.current_lane {
PrecisionLane::Bit3 => Some(PrecisionLane::Bit5),
PrecisionLane::Bit5 => Some(PrecisionLane::Bit7),
PrecisionLane::Bit7 => Some(PrecisionLane::Float32),
PrecisionLane::Float32 => None,
}
}
/// Get the next lower precision lane
fn next_lower_lane(&self) -> Option<PrecisionLane> {
match self.current_lane {
PrecisionLane::Bit3 => None,
PrecisionLane::Bit5 => Some(PrecisionLane::Bit3),
PrecisionLane::Bit7 => Some(PrecisionLane::Bit5),
PrecisionLane::Float32 => Some(PrecisionLane::Bit7),
}
}
}
/// Event processor with precision lane awareness
pub struct LanedEventProcessor {
/// Graduation policy
policy: GraduationPolicy,
/// Event counter
event_count: usize,
}
impl LanedEventProcessor {
/// Create a new event processor
pub fn new(config: LaneConfig) -> Self {
Self {
policy: GraduationPolicy::new(config.default_lane, config),
event_count: 0,
}
}
/// Process an event through the appropriate precision lane
pub fn process_event(&mut self, event: &Event) -> ProcessResult {
self.event_count += 1;
// 3-bit reflex check (always runs first)
let reflex_result = self.reflex_3bit(event);
if !reflex_result.boundary_crossed {
return ProcessResult::Reflexed(reflex_result);
}
// 5-bit embedding update (event-driven)
let embed_result = self.embed_5bit(event);
// Check for graduation to 7-bit
let observation = self.compute_observation(&reflex_result, &embed_result);
let decision = self.policy.evaluate(&observation);
if matches!(decision, GraduationDecision::Escalate(PrecisionLane::Bit7))
|| self.policy.current_lane == PrecisionLane::Bit7
{
// 7-bit reasoning
let reason_result = self.reason_7bit(event, &embed_result);
self.policy.apply_decision(decision);
return ProcessResult::Reasoned(reason_result);
}
self.policy.apply_decision(decision);
ProcessResult::Embedded(embed_result)
}
fn reflex_3bit(&self, _event: &Event) -> ReflexResult {
// 3-bit reflex processing
ReflexResult {
boundary_crossed: true, // Simplified
health_ok: true,
anomaly_detected: false,
}
}
fn embed_5bit(&self, _event: &Event) -> EmbedResult {
// 5-bit embedding update
EmbedResult {
embedding_delta: vec![0.0; 64],
drift_detected: false,
}
}
fn reason_7bit(&self, _event: &Event, _embed: &EmbedResult) -> ReasonResult {
// 7-bit reasoning
ReasonResult {
should_write_memory: false,
summary: String::new(),
actions: Vec::new(),
}
}
fn compute_observation(
&self,
_reflex: &ReflexResult,
_embed: &EmbedResult,
) -> ObservationMetrics {
ObservationMetrics::default()
}
/// Get current lane
pub fn current_lane(&self) -> PrecisionLane {
self.policy.current_lane
}
}
/// Simple event type for processing
#[derive(Debug, Clone)]
pub struct Event {
pub data: Vec<f32>,
pub timestamp: u64,
}
/// Result of 3-bit reflex processing
#[derive(Debug, Clone)]
pub struct ReflexResult {
pub boundary_crossed: bool,
pub health_ok: bool,
pub anomaly_detected: bool,
}
/// Result of 5-bit embedding
#[derive(Debug, Clone)]
pub struct EmbedResult {
pub embedding_delta: Vec<f32>,
pub drift_detected: bool,
}
/// Result of 7-bit reasoning
#[derive(Debug, Clone)]
pub struct ReasonResult {
pub should_write_memory: bool,
pub summary: String,
pub actions: Vec<String>,
}
/// Overall processing result
#[derive(Debug)]
pub enum ProcessResult {
Reflexed(ReflexResult),
Embedded(EmbedResult),
Reasoned(ReasonResult),
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_graduation_policy_creation() {
let config = LaneConfig::default();
let policy = GraduationPolicy::new(PrecisionLane::Bit5, config);
assert_eq!(policy.current_lane, PrecisionLane::Bit5);
}
#[test]
fn test_escalation_on_novelty() {
let config = LaneConfig {
novelty_threshold: 0.3,
confidence_threshold: 0.5,
..Default::default()
};
let mut policy = GraduationPolicy::new(PrecisionLane::Bit5, config);
// Set higher EMA alpha for faster response in tests
policy.ema_alpha = 1.0;
// High novelty, good confidence (use high values to overcome any thresholds)
let observation = ObservationMetrics {
novelty: 0.9,
confidence: 0.9,
stability: 0.6,
..Default::default()
};
let decision = policy.evaluate(&observation);
assert!(matches!(
decision,
GraduationDecision::Escalate(PrecisionLane::Bit7)
));
}
#[test]
fn test_demotion_on_stability() {
let mut config = LaneConfig::default();
config.min_stability_steps = 2;
let mut policy = GraduationPolicy::new(PrecisionLane::Bit7, config);
// Build up stable steps
for _ in 0..5 {
let observation = ObservationMetrics {
stability: 0.9,
velocity: 0.001,
active_set_size: 5,
..Default::default()
};
policy.evaluate(&observation);
}
let observation = ObservationMetrics {
stability: 0.9,
velocity: 0.001,
active_set_size: 5,
..Default::default()
};
let decision = policy.evaluate(&observation);
assert!(matches!(
decision,
GraduationDecision::Demote(PrecisionLane::Bit5)
));
}
}

View File

@@ -0,0 +1,438 @@
//! Quantizers for 3/5/7-bit precision lanes
//!
//! Implements pack/unpack operations for each precision lane with
//! per-block or per-channel scaling.
use super::lanes::PrecisionLane;
use serde::{Deserialize, Serialize};
/// Quantized block with scale factor
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QuantizedBlock {
/// Quantized data
pub data: Vec<i8>,
/// Scale factor for dequantization
pub scale: f32,
/// Zero point offset
pub zero_point: i8,
/// Block size
pub block_size: usize,
/// Precision lane
pub lane: PrecisionLane,
}
impl QuantizedBlock {
/// Create a new quantized block
pub fn new(lane: PrecisionLane, block_size: usize) -> Self {
Self {
data: Vec::with_capacity(block_size),
scale: lane.default_scale(),
zero_point: 0,
block_size,
lane,
}
}
/// Dequantize to f32 values
pub fn dequantize(&self) -> Vec<f32> {
self.data
.iter()
.map(|&q| ((q as i32 - self.zero_point as i32) as f32) * self.scale)
.collect()
}
/// Get memory size in bytes
pub fn size_bytes(&self) -> usize {
self.data.len() + 4 + 1 // data + scale + zero_point
}
}
/// 3-bit quantizer for reflex signals
///
/// Uses signed int4 container with values restricted to -4..3.
/// Optimized for LUT-based activation.
#[derive(Debug, Clone)]
pub struct Quantizer3Bit {
/// Per-block scale factors
pub scales: Vec<f32>,
/// Block size (typically 32)
pub block_size: usize,
/// LUT for activation (optional)
pub activation_lut: Option<[f32; 8]>,
}
impl Quantizer3Bit {
/// Create a new 3-bit quantizer
pub fn new(block_size: usize) -> Self {
Self {
scales: Vec::new(),
block_size,
activation_lut: None,
}
}
/// Set activation LUT (e.g., for ReLU)
pub fn with_activation_lut(mut self, lut: [f32; 8]) -> Self {
self.activation_lut = Some(lut);
self
}
/// Quantize f32 values to 3-bit
pub fn quantize(&mut self, values: &[f32]) -> Vec<u8> {
let num_blocks = (values.len() + self.block_size - 1) / self.block_size;
self.scales = Vec::with_capacity(num_blocks);
let mut result = Vec::with_capacity((values.len() + 1) / 2); // Pack 2 values per byte
for block in values.chunks(self.block_size) {
// Find scale for this block
let max_abs = block.iter().map(|x| x.abs()).fold(0.0f32, f32::max);
let scale = if max_abs > 0.0 { max_abs / 3.0 } else { 1.0 }; // 3-bit max is 3
self.scales.push(scale);
// Quantize values
for pair in block.chunks(2) {
let q0 = Self::quantize_value(pair[0], scale);
let q1 = if pair.len() > 1 {
Self::quantize_value(pair[1], scale)
} else {
0
};
// Pack two 4-bit values into one byte
result.push(((q1 as u8) << 4) | (q0 as u8 & 0x0F));
}
}
result
}
/// Quantize single value to 3-bit
fn quantize_value(value: f32, scale: f32) -> i8 {
let scaled = (value / scale).round() as i8;
scaled.clamp(-4, 3)
}
/// Dequantize 3-bit values to f32
pub fn dequantize(&self, data: &[u8], num_values: usize) -> Vec<f32> {
let mut result = Vec::with_capacity(num_values);
let mut value_idx = 0;
let mut block_idx = 0;
for &byte in data {
if value_idx >= num_values {
break;
}
let scale = self.scales.get(block_idx).copied().unwrap_or(1.0);
// Unpack first value (lower 4 bits)
let q0 = (byte & 0x0F) as i8;
let q0 = if q0 > 7 { q0 - 16 } else { q0 }; // Sign extend
let v0 = (q0 as f32) * scale;
// Apply activation LUT if present
let v0 = if let Some(ref lut) = self.activation_lut {
lut[(q0 + 4) as usize]
} else {
v0
};
result.push(v0);
value_idx += 1;
if value_idx >= num_values {
break;
}
// Unpack second value (upper 4 bits)
let q1 = ((byte >> 4) & 0x0F) as i8;
let q1 = if q1 > 7 { q1 - 16 } else { q1 };
let v1 = (q1 as f32) * scale;
let v1 = if let Some(ref lut) = self.activation_lut {
lut[(q1 + 4) as usize]
} else {
v1
};
result.push(v1);
value_idx += 1;
// Update block index
if value_idx % self.block_size == 0 {
block_idx += 1;
}
}
result
}
}
/// 5-bit quantizer for streaming embeddings
///
/// Uses signed int8 container with values in -16..15.
/// Per-channel or per-block scale for stable streaming updates.
#[derive(Debug, Clone)]
pub struct Quantizer5Bit {
/// Per-block scale factors
pub scales: Vec<f32>,
/// Block size
pub block_size: usize,
/// Use per-channel scaling (instead of per-block)
pub per_channel: bool,
}
impl Quantizer5Bit {
/// Create a new 5-bit quantizer
pub fn new(block_size: usize) -> Self {
Self {
scales: Vec::new(),
block_size,
per_channel: false,
}
}
/// Enable per-channel scaling
pub fn with_per_channel(mut self) -> Self {
self.per_channel = true;
self
}
/// Quantize f32 values to 5-bit (stored in int8)
pub fn quantize(&mut self, values: &[f32]) -> Vec<i8> {
if self.per_channel {
self.quantize_per_channel(values)
} else {
self.quantize_per_block(values)
}
}
fn quantize_per_block(&mut self, values: &[f32]) -> Vec<i8> {
let num_blocks = (values.len() + self.block_size - 1) / self.block_size;
self.scales = Vec::with_capacity(num_blocks);
let mut result = Vec::with_capacity(values.len());
for block in values.chunks(self.block_size) {
let max_abs = block.iter().map(|x| x.abs()).fold(0.0f32, f32::max);
let scale = if max_abs > 0.0 { max_abs / 15.0 } else { 1.0 }; // 5-bit max is 15
self.scales.push(scale);
for &value in block {
let q = (value / scale).round() as i8;
result.push(q.clamp(-16, 15));
}
}
result
}
fn quantize_per_channel(&mut self, values: &[f32]) -> Vec<i8> {
self.scales = Vec::with_capacity(values.len());
values
.iter()
.map(|&value| {
let max_abs = value.abs();
let scale = if max_abs > 0.0 { max_abs / 15.0 } else { 1.0 };
self.scales.push(scale);
let q = (value / scale).round() as i8;
q.clamp(-16, 15)
})
.collect()
}
/// Dequantize 5-bit values to f32
pub fn dequantize(&self, data: &[i8]) -> Vec<f32> {
if self.per_channel {
data.iter()
.zip(self.scales.iter())
.map(|(&q, &scale)| (q as f32) * scale)
.collect()
} else {
let mut result = Vec::with_capacity(data.len());
let mut block_idx = 0;
for (i, &q) in data.iter().enumerate() {
let scale = self.scales.get(block_idx).copied().unwrap_or(1.0);
result.push((q as f32) * scale);
if (i + 1) % self.block_size == 0 {
block_idx += 1;
}
}
result
}
}
}
/// 7-bit quantizer for reasoning
///
/// Uses signed int8 container with values in -64..63.
/// Stable accumulators, close to int8 quality.
#[derive(Debug, Clone)]
pub struct Quantizer7Bit {
/// Per-block scale factors
pub scales: Vec<f32>,
/// Block size
pub block_size: usize,
}
impl Quantizer7Bit {
/// Create a new 7-bit quantizer
pub fn new(block_size: usize) -> Self {
Self {
scales: Vec::new(),
block_size,
}
}
/// Quantize f32 values to 7-bit (stored in int8)
pub fn quantize(&mut self, values: &[f32]) -> Vec<i8> {
let num_blocks = (values.len() + self.block_size - 1) / self.block_size;
self.scales = Vec::with_capacity(num_blocks);
let mut result = Vec::with_capacity(values.len());
for block in values.chunks(self.block_size) {
let max_abs = block.iter().map(|x| x.abs()).fold(0.0f32, f32::max);
let scale = if max_abs > 0.0 { max_abs / 63.0 } else { 1.0 }; // 7-bit max is 63
self.scales.push(scale);
for &value in block {
let q = (value / scale).round() as i8;
result.push(q.clamp(-64, 63));
}
}
result
}
/// Dequantize 7-bit values to f32
pub fn dequantize(&self, data: &[i8]) -> Vec<f32> {
let mut result = Vec::with_capacity(data.len());
let mut block_idx = 0;
for (i, &q) in data.iter().enumerate() {
let scale = self.scales.get(block_idx).copied().unwrap_or(1.0);
result.push((q as f32) * scale);
if (i + 1) % self.block_size == 0 {
block_idx += 1;
}
}
result
}
/// Apply micro-LoRA delta (in 7-bit precision)
pub fn apply_lora_delta(&mut self, base: &[i8], delta: &[i8], alpha: f32) -> Vec<i8> {
base.iter()
.zip(delta.iter())
.map(|(&b, &d)| {
let result = (b as f32) + (d as f32) * alpha;
(result.round() as i8).clamp(-64, 63)
})
.collect()
}
}
/// Unified quantizer that selects appropriate implementation
#[derive(Debug, Clone)]
pub enum LaneQuantizer {
Bit3(Quantizer3Bit),
Bit5(Quantizer5Bit),
Bit7(Quantizer7Bit),
}
impl LaneQuantizer {
/// Create quantizer for a specific lane
pub fn for_lane(lane: PrecisionLane, block_size: usize) -> Self {
match lane {
PrecisionLane::Bit3 => Self::Bit3(Quantizer3Bit::new(block_size)),
PrecisionLane::Bit5 => Self::Bit5(Quantizer5Bit::new(block_size)),
PrecisionLane::Bit7 => Self::Bit7(Quantizer7Bit::new(block_size)),
PrecisionLane::Float32 => Self::Bit7(Quantizer7Bit::new(block_size)), // Fallback
}
}
/// Get the precision lane
pub fn lane(&self) -> PrecisionLane {
match self {
Self::Bit3(_) => PrecisionLane::Bit3,
Self::Bit5(_) => PrecisionLane::Bit5,
Self::Bit7(_) => PrecisionLane::Bit7,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_3bit_roundtrip() {
let mut quantizer = Quantizer3Bit::new(32);
let values: Vec<f32> = (0..64).map(|i| (i as f32 - 32.0) * 0.1).collect();
let quantized = quantizer.quantize(&values);
let dequantized = quantizer.dequantize(&quantized, values.len());
assert_eq!(dequantized.len(), values.len());
// Check error is bounded (3-bit is very lossy - only 8 levels)
// With range ~6.4 (-3.2 to 3.2), each level is ~0.8, so max error is ~0.4
// But with grouping, it can be higher
for (orig, deq) in values.iter().zip(dequantized.iter()) {
let error = (orig - deq).abs();
assert!(error < 1.0, "Error too large: {} vs {}", orig, deq);
}
}
#[test]
fn test_5bit_roundtrip() {
let mut quantizer = Quantizer5Bit::new(32);
let values: Vec<f32> = (0..64).map(|i| (i as f32 - 32.0) * 0.1).collect();
let quantized = quantizer.quantize(&values);
let dequantized = quantizer.dequantize(&quantized);
assert_eq!(dequantized.len(), values.len());
for (orig, deq) in values.iter().zip(dequantized.iter()) {
let error = (orig - deq).abs();
assert!(error < 0.2, "Error too large: {} vs {}", orig, deq);
}
}
#[test]
fn test_7bit_roundtrip() {
let mut quantizer = Quantizer7Bit::new(32);
let values: Vec<f32> = (0..64).map(|i| (i as f32 - 32.0) * 0.1).collect();
let quantized = quantizer.quantize(&values);
let dequantized = quantizer.dequantize(&quantized);
assert_eq!(dequantized.len(), values.len());
for (orig, deq) in values.iter().zip(dequantized.iter()) {
let error = (orig - deq).abs();
assert!(error < 0.1, "Error too large: {} vs {}", orig, deq);
}
}
#[test]
fn test_7bit_lora_delta() {
let mut quantizer = Quantizer7Bit::new(32);
let base: Vec<i8> = vec![10, 20, 30, 40];
let delta: Vec<i8> = vec![1, 2, 3, 4];
let result = quantizer.apply_lora_delta(&base, &delta, 0.5);
assert_eq!(result[0], 11); // 10 + 1*0.5 = 10.5 -> 11
assert_eq!(result[1], 21); // 20 + 2*0.5 = 21
assert_eq!(result[2], 32); // 30 + 3*0.5 = 31.5 -> 32
assert_eq!(result[3], 42); // 40 + 4*0.5 = 42
}
}

View File

@@ -0,0 +1,345 @@
//! Telemetry and statistics for precision lanes
//!
//! Tracks lane usage, transitions, and performance metrics.
use super::lanes::PrecisionLane;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::time::{Duration, Instant};
/// Statistics for a single precision lane
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct LaneStats {
/// Total operations in this lane
pub operations: u64,
/// Total time spent in this lane (nanoseconds)
pub total_time_ns: u64,
/// Average operation time (nanoseconds)
pub avg_time_ns: u64,
/// Peak operation time (nanoseconds)
pub peak_time_ns: u64,
/// Total bytes processed
pub bytes_processed: u64,
/// Average active set size
pub avg_active_set_size: f32,
/// Error count
pub errors: u64,
/// Escalations from this lane
pub escalations: u64,
/// Demotions to this lane
pub demotions: u64,
}
impl LaneStats {
/// Record a new operation
pub fn record_operation(&mut self, duration_ns: u64, bytes: u64, active_set_size: usize) {
self.operations += 1;
self.total_time_ns += duration_ns;
self.bytes_processed += bytes;
// Update average
let ops = self.operations as f32;
self.avg_time_ns = (self.total_time_ns / self.operations) as u64;
self.avg_active_set_size =
(self.avg_active_set_size * (ops - 1.0) + active_set_size as f32) / ops;
// Update peak
if duration_ns > self.peak_time_ns {
self.peak_time_ns = duration_ns;
}
}
/// Record an error
pub fn record_error(&mut self) {
self.errors += 1;
}
/// Record an escalation from this lane
pub fn record_escalation(&mut self) {
self.escalations += 1;
}
/// Record a demotion to this lane
pub fn record_demotion(&mut self) {
self.demotions += 1;
}
/// Get throughput in bytes per second
pub fn throughput_bps(&self) -> f64 {
if self.total_time_ns == 0 {
return 0.0;
}
(self.bytes_processed as f64 * 1_000_000_000.0) / self.total_time_ns as f64
}
}
/// Comprehensive telemetry for all precision lanes
#[derive(Debug, Clone)]
pub struct LaneTelemetry {
/// Per-lane statistics
pub lane_stats: HashMap<PrecisionLane, LaneStats>,
/// Current lane
pub current_lane: PrecisionLane,
/// Total lane transitions
pub transitions: u64,
/// Transition history (recent 100)
transition_history: Vec<LaneTransition>,
/// Start time
start_time: Option<Instant>,
/// Session duration (seconds)
pub session_duration_secs: f64,
}
/// Record of a lane transition
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LaneTransition {
/// Source lane
pub from: PrecisionLane,
/// Destination lane
pub to: PrecisionLane,
/// Reason for transition
pub reason: TransitionReason,
/// Timestamp (seconds since session start)
pub timestamp_secs: f64,
}
/// Reason for lane transition
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum TransitionReason {
/// Novelty threshold exceeded
Novelty,
/// Drift persisted
DriftPersistence,
/// Stability returned
StabilityReturned,
/// Velocity stalled
VelocityStalled,
/// Active set shrunk
ActiveSetShrunk,
/// Manual override
Manual,
/// Initialization
Init,
}
impl LaneTelemetry {
/// Create new telemetry tracker
pub fn new(initial_lane: PrecisionLane) -> Self {
let mut lane_stats = HashMap::new();
lane_stats.insert(PrecisionLane::Bit3, LaneStats::default());
lane_stats.insert(PrecisionLane::Bit5, LaneStats::default());
lane_stats.insert(PrecisionLane::Bit7, LaneStats::default());
lane_stats.insert(PrecisionLane::Float32, LaneStats::default());
Self {
lane_stats,
current_lane: initial_lane,
transitions: 0,
transition_history: Vec::with_capacity(100),
start_time: Some(Instant::now()),
session_duration_secs: 0.0,
}
}
/// Start a new session
pub fn start_session(&mut self) {
self.start_time = Some(Instant::now());
}
/// Record an operation in the current lane
pub fn record_operation(&mut self, duration: Duration, bytes: u64, active_set_size: usize) {
let duration_ns = duration.as_nanos() as u64;
if let Some(stats) = self.lane_stats.get_mut(&self.current_lane) {
stats.record_operation(duration_ns, bytes, active_set_size);
}
// Update session duration
if let Some(start) = self.start_time {
self.session_duration_secs = start.elapsed().as_secs_f64();
}
}
/// Record a lane transition
pub fn record_transition(
&mut self,
from: PrecisionLane,
to: PrecisionLane,
reason: TransitionReason,
) {
self.transitions += 1;
self.current_lane = to;
// Record escalation/demotion in stats
if to.bits() > from.bits() {
if let Some(stats) = self.lane_stats.get_mut(&from) {
stats.record_escalation();
}
} else {
if let Some(stats) = self.lane_stats.get_mut(&to) {
stats.record_demotion();
}
}
// Add to history
let timestamp_secs = self
.start_time
.map(|s| s.elapsed().as_secs_f64())
.unwrap_or(0.0);
let transition = LaneTransition {
from,
to,
reason,
timestamp_secs,
};
if self.transition_history.len() >= 100 {
self.transition_history.remove(0);
}
self.transition_history.push(transition);
}
/// Record an error in the current lane
pub fn record_error(&mut self) {
if let Some(stats) = self.lane_stats.get_mut(&self.current_lane) {
stats.record_error();
}
}
/// Get statistics for a specific lane
pub fn get_lane_stats(&self, lane: PrecisionLane) -> Option<&LaneStats> {
self.lane_stats.get(&lane)
}
/// Get total operations across all lanes
pub fn total_operations(&self) -> u64 {
self.lane_stats.values().map(|s| s.operations).sum()
}
/// Get total errors across all lanes
pub fn total_errors(&self) -> u64 {
self.lane_stats.values().map(|s| s.errors).sum()
}
/// Get lane usage distribution (percentage)
pub fn lane_distribution(&self) -> HashMap<PrecisionLane, f32> {
let total = self.total_operations() as f32;
if total == 0.0 {
return HashMap::new();
}
self.lane_stats
.iter()
.map(|(lane, stats)| (*lane, (stats.operations as f32 / total) * 100.0))
.collect()
}
/// Get transition history
pub fn transition_history(&self) -> &[LaneTransition] {
&self.transition_history
}
/// Generate summary report
pub fn summary_report(&self) -> TelemetrySummary {
TelemetrySummary {
session_duration_secs: self.session_duration_secs,
total_operations: self.total_operations(),
total_transitions: self.transitions,
total_errors: self.total_errors(),
lane_distribution: self.lane_distribution(),
avg_operations_per_sec: if self.session_duration_secs > 0.0 {
self.total_operations() as f64 / self.session_duration_secs
} else {
0.0
},
current_lane: self.current_lane,
}
}
}
/// Summary of telemetry data
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TelemetrySummary {
pub session_duration_secs: f64,
pub total_operations: u64,
pub total_transitions: u64,
pub total_errors: u64,
pub lane_distribution: HashMap<PrecisionLane, f32>,
pub avg_operations_per_sec: f64,
pub current_lane: PrecisionLane,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_lane_stats_recording() {
let mut stats = LaneStats::default();
stats.record_operation(1000, 64, 100);
stats.record_operation(2000, 64, 100);
assert_eq!(stats.operations, 2);
assert_eq!(stats.total_time_ns, 3000);
assert_eq!(stats.avg_time_ns, 1500);
assert_eq!(stats.bytes_processed, 128);
}
#[test]
fn test_telemetry_transitions() {
let mut telemetry = LaneTelemetry::new(PrecisionLane::Bit5);
telemetry.record_transition(
PrecisionLane::Bit5,
PrecisionLane::Bit7,
TransitionReason::Novelty,
);
assert_eq!(telemetry.transitions, 1);
assert_eq!(telemetry.current_lane, PrecisionLane::Bit7);
assert_eq!(telemetry.transition_history.len(), 1);
}
#[test]
fn test_lane_distribution() {
let mut telemetry = LaneTelemetry::new(PrecisionLane::Bit5);
// Simulate operations in different lanes
for _ in 0..30 {
telemetry.current_lane = PrecisionLane::Bit3;
telemetry.record_operation(Duration::from_nanos(100), 8, 10);
}
for _ in 0..50 {
telemetry.current_lane = PrecisionLane::Bit5;
telemetry.record_operation(Duration::from_nanos(200), 16, 50);
}
for _ in 0..20 {
telemetry.current_lane = PrecisionLane::Bit7;
telemetry.record_operation(Duration::from_nanos(500), 32, 100);
}
let distribution = telemetry.lane_distribution();
assert!((distribution[&PrecisionLane::Bit3] - 30.0).abs() < 0.1);
assert!((distribution[&PrecisionLane::Bit5] - 50.0).abs() < 0.1);
assert!((distribution[&PrecisionLane::Bit7] - 20.0).abs() < 0.1);
}
}