Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,130 @@
//! Binary Quantization - 32x Memory Compression
use heapless::Vec as HVec;
pub const MAX_BINARY_SIZE: usize = 64;
/// Binary quantized vector - 1 bit per dimension
#[derive(Debug, Clone)]
pub struct BinaryVector<const N: usize> {
pub data: HVec<u8, N>,
pub dim: usize,
pub threshold: i8,
}
impl<const N: usize> BinaryVector<N> {
pub fn from_i8(values: &[i8], threshold: i8) -> crate::Result<Self> {
let dim = values.len();
let num_bytes = (dim + 7) / 8;
if num_bytes > N {
return Err(crate::Error::BufferOverflow);
}
let mut data = HVec::new();
for chunk_idx in 0..num_bytes {
let mut byte = 0u8;
for bit_idx in 0..8 {
let val_idx = chunk_idx * 8 + bit_idx;
if val_idx < dim && values[val_idx] >= threshold {
byte |= 1 << bit_idx;
}
}
data.push(byte).map_err(|_| crate::Error::BufferOverflow)?;
}
Ok(Self { data, dim, threshold })
}
pub fn num_bytes(&self) -> usize { self.data.len() }
pub fn compression_ratio(&self) -> f32 { self.dim as f32 / self.data.len() as f32 }
}
/// Binary embedding table (32x smaller than INT8)
pub struct BinaryEmbedding<const VOCAB: usize, const DIM_BYTES: usize> {
data: HVec<u8, { 32 * 1024 }>,
vocab_size: usize,
dim: usize,
bytes_per_embed: usize,
}
impl<const VOCAB: usize, const DIM_BYTES: usize> BinaryEmbedding<VOCAB, DIM_BYTES> {
pub fn random(vocab_size: usize, dim: usize, seed: u32) -> crate::Result<Self> {
let bytes_per_embed = (dim + 7) / 8;
let total_bytes = vocab_size * bytes_per_embed;
let mut data = HVec::new();
let mut rng_state = seed;
for _ in 0..total_bytes {
rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345);
let byte = ((rng_state >> 16) & 0xFF) as u8;
data.push(byte).map_err(|_| crate::Error::BufferOverflow)?;
}
Ok(Self { data, vocab_size, dim, bytes_per_embed })
}
pub fn lookup(&self, token_id: u16, output: &mut [u8]) -> crate::Result<()> {
let id = token_id as usize;
if id >= self.vocab_size {
return Err(crate::Error::InvalidModel("Token ID out of range"));
}
let start = id * self.bytes_per_embed;
let end = start + self.bytes_per_embed;
if output.len() < self.bytes_per_embed {
return Err(crate::Error::BufferOverflow);
}
output[..self.bytes_per_embed].copy_from_slice(&self.data[start..end]);
Ok(())
}
pub fn memory_size(&self) -> usize { self.data.len() }
}
/// Hamming distance between binary vectors (POPCNT)
#[inline]
pub fn hamming_distance(a: &[u8], b: &[u8]) -> u32 {
let mut distance: u32 = 0;
let chunks = a.len() / 4;
for i in 0..chunks {
let idx = i * 4;
distance += popcount8(a[idx] ^ b[idx]) + popcount8(a[idx + 1] ^ b[idx + 1])
+ popcount8(a[idx + 2] ^ b[idx + 2]) + popcount8(a[idx + 3] ^ b[idx + 3]);
}
for i in (chunks * 4)..a.len() {
distance += popcount8(a[i] ^ b[i]);
}
distance
}
#[inline]
pub fn hamming_similarity(a: &[u8], b: &[u8]) -> f32 {
let total_bits = (a.len() * 8) as f32;
1.0 - (hamming_distance(a, b) as f32 / total_bits)
}
#[inline]
pub fn popcount8(x: u8) -> u32 {
const TABLE: [u8; 256] = [
0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8,
];
TABLE[x as usize] as u32
}
/// XNOR-popcount for binary neural network inference
#[inline]
pub fn xnor_popcount(a: &[u8], b: &[u8]) -> i32 {
let total_bits = (a.len() * 8) as i32;
let mut matching: i32 = 0;
for (&x, &y) in a.iter().zip(b.iter()) {
matching += popcount8(!(x ^ y)) as i32;
}
2 * matching - total_bits
}

View File

@@ -0,0 +1,124 @@
//! Lookup Tables for Fast Fixed-Point Operations
/// Softmax lookup table
pub struct SoftmaxLUT {
exp_table: [u8; 256],
pub input_scale: i32,
}
impl SoftmaxLUT {
pub const fn new() -> Self {
let mut exp_table = [0u8; 256];
let mut i = 0;
while i < 256 {
let x_scaled = i as i32 - 255;
let mut exp_approx = 255 + x_scaled;
if exp_approx < 1 { exp_approx = 1; }
if exp_approx > 255 { exp_approx = 255; }
exp_table[i] = exp_approx as u8;
i += 1;
}
Self { exp_table, input_scale: 32 }
}
#[inline]
pub fn exp(&self, x: i32) -> u8 {
let x_clamped = x.max(-255).min(0);
self.exp_table[(x_clamped + 255) as usize]
}
pub fn softmax(&self, logits: &[i32], output: &mut [u16]) {
if logits.is_empty() { return; }
let max_logit = logits.iter().cloned().max().unwrap_or(0);
let mut sum: u32 = 0;
for (&logit, out) in logits.iter().zip(output.iter_mut()) {
let exp_val = self.exp(logit - max_logit) as u16;
*out = exp_val;
sum += exp_val as u32;
}
if sum > 0 {
for out in output.iter_mut() {
*out = ((*out as u32 * 256) / sum) as u16;
}
}
}
pub fn softmax_inplace(&self, logits: &mut [i32]) {
if logits.is_empty() { return; }
let max = logits.iter().cloned().max().unwrap_or(0);
let mut sum: i32 = 0;
for logit in logits.iter_mut() {
let x = (*logit - max).max(-255);
*logit = self.exp_table[(x + 255) as usize] as i32;
sum += *logit;
}
if sum > 0 {
for logit in logits.iter_mut() {
*logit = (*logit << 8) / sum;
}
}
}
}
impl Default for SoftmaxLUT {
fn default() -> Self { Self::new() }
}
/// Exponential lookup table
pub struct ExpLUT {
table: [u16; 256],
}
impl ExpLUT {
pub const fn new() -> Self {
let mut table = [0u16; 256];
let mut i = 0;
while i < 256 {
let x = i as i32;
let x_scaled = x * 256 / 64;
let x2 = (x_scaled * x_scaled) >> 9;
let mut exp_val = 256 + x_scaled + (x2 >> 1);
if exp_val > 65535 { exp_val = 65535; }
table[i] = exp_val as u16;
i += 1;
}
Self { table }
}
#[inline]
pub fn exp(&self, x: u8) -> u16 { self.table[x as usize] }
}
/// Distance lookup table for L2 distance
pub struct DistanceLUT<const SIZE: usize> {
sq_diff_table: [u16; 512],
}
impl<const SIZE: usize> DistanceLUT<SIZE> {
pub const fn new() -> Self {
let mut sq_diff_table = [0u16; 512];
let mut i = 0i32;
while i < 512 {
let diff = i - 256;
let mut sq = diff * diff;
if sq > 65535 { sq = 65535; }
sq_diff_table[i as usize] = sq as u16;
i += 1;
}
Self { sq_diff_table }
}
#[inline]
pub fn squared_diff(&self, a: i8, b: i8) -> u16 {
let idx = (a as i32 - b as i32 + 256) as usize;
self.sq_diff_table[idx]
}
pub fn l2_squared(&self, a: &[i8], b: &[i8]) -> u32 {
a.iter().zip(b.iter()).map(|(&x, &y)| self.squared_diff(x, y) as u32).sum()
}
}
pub static SOFTMAX_LUT: SoftmaxLUT = SoftmaxLUT::new();
pub static EXP_LUT: ExpLUT = ExpLUT::new();
pub static DISTANCE_LUT: DistanceLUT<256> = DistanceLUT::new();

View File

@@ -0,0 +1,113 @@
//! MicroLoRA - Tiny Low-Rank Adaptation for ESP32
use heapless::Vec as HVec;
use crate::QuantParams;
pub const MAX_LORA_RANK: usize = 2;
pub const MAX_LORA_DIM: usize = 64;
#[derive(Debug, Clone, Copy)]
pub struct LoRAConfig {
pub rank: usize,
pub dim: usize,
pub scale: i8,
pub frozen: bool,
}
impl Default for LoRAConfig {
fn default() -> Self {
Self { rank: 1, dim: 32, scale: 8, frozen: true }
}
}
pub struct MicroLoRA {
a_weights: HVec<i8, { MAX_LORA_DIM * MAX_LORA_RANK }>,
b_weights: HVec<i8, { MAX_LORA_RANK * MAX_LORA_DIM }>,
config: LoRAConfig,
intermediate: [i32; MAX_LORA_RANK],
}
impl MicroLoRA {
pub fn new(config: LoRAConfig, seed: u32) -> crate::Result<Self> {
if config.rank > MAX_LORA_RANK || config.dim > MAX_LORA_DIM {
return Err(crate::Error::InvalidModel("LoRA dimensions too large"));
}
let mut a_weights = HVec::new();
let mut b_weights = HVec::new();
let mut rng = seed;
for _ in 0..(config.dim * config.rank) {
rng = rng.wrapping_mul(1103515245).wrapping_add(12345);
a_weights.push((((rng >> 16) & 0x3F) as i16 - 32) as i8)
.map_err(|_| crate::Error::BufferOverflow)?;
}
for _ in 0..(config.rank * config.dim) {
b_weights.push(0).map_err(|_| crate::Error::BufferOverflow)?;
}
Ok(Self { a_weights, b_weights, config, intermediate: [0; MAX_LORA_RANK] })
}
pub fn from_weights(config: LoRAConfig, a: &[i8], b: &[i8]) -> crate::Result<Self> {
let mut a_vec = HVec::new();
let mut b_vec = HVec::new();
for &w in a { a_vec.push(w).map_err(|_| crate::Error::BufferOverflow)?; }
for &w in b { b_vec.push(w).map_err(|_| crate::Error::BufferOverflow)?; }
Ok(Self { a_weights: a_vec, b_weights: b_vec, config, intermediate: [0; MAX_LORA_RANK] })
}
#[inline]
pub fn apply(&mut self, input: &[i8], output: &mut [i32]) {
let (dim, rank, scale) = (self.config.dim, self.config.rank, self.config.scale as i32);
for r in 0..rank {
let mut sum: i32 = 0;
for d in 0..dim {
sum += input[d] as i32 * self.a_weights[d * rank + r] as i32;
}
self.intermediate[r] = sum >> 4;
}
for d in 0..dim {
let mut sum: i32 = 0;
for r in 0..rank {
sum += self.intermediate[r] * self.b_weights[r * dim + d] as i32;
}
output[d] += (sum * scale) >> 8;
}
}
pub fn memory_size(&self) -> usize { self.a_weights.len() + self.b_weights.len() }
}
pub struct LoRAStack<const NUM_LAYERS: usize> {
adapters: [Option<MicroLoRA>; NUM_LAYERS],
active_count: usize,
}
impl<const NUM_LAYERS: usize> LoRAStack<NUM_LAYERS> {
pub fn new() -> Self {
Self { adapters: core::array::from_fn(|_| None), active_count: 0 }
}
pub fn add_adapter(&mut self, layer: usize, adapter: MicroLoRA) -> crate::Result<()> {
if layer >= NUM_LAYERS { return Err(crate::Error::InvalidModel("Layer out of range")); }
self.adapters[layer] = Some(adapter);
self.active_count += 1;
Ok(())
}
pub fn get(&mut self, layer: usize) -> Option<&mut MicroLoRA> {
self.adapters.get_mut(layer).and_then(|a| a.as_mut())
}
pub fn total_memory(&self) -> usize {
self.adapters.iter().filter_map(|a| a.as_ref()).map(|a| a.memory_size()).sum()
}
}
impl<const N: usize> Default for LoRAStack<N> {
fn default() -> Self { Self::new() }
}

View File

@@ -0,0 +1,22 @@
//! Advanced Optimizations for ESP32
//!
//! - Binary quantization (32x compression)
//! - Product quantization (8-32x compression)
//! - Lookup tables (fixed-point softmax)
//! - MicroLoRA (on-device adaptation)
//! - Sparse attention patterns
//! - MinCut-inspired pruning
pub mod binary_quant;
pub mod product_quant;
pub mod lookup_tables;
pub mod micro_lora;
pub mod sparse_attention;
pub mod pruning;
pub use binary_quant::{BinaryVector, BinaryEmbedding, hamming_distance, hamming_similarity, popcount8};
pub use product_quant::{ProductQuantizer, PQCode, PQConfig, PQDistanceTable};
pub use lookup_tables::{SoftmaxLUT, ExpLUT, DistanceLUT, SOFTMAX_LUT, EXP_LUT, DISTANCE_LUT};
pub use micro_lora::{MicroLoRA, LoRAConfig, LoRAStack};
pub use sparse_attention::{SparseAttention, AttentionPattern, AttentionPatternCache};
pub use pruning::{LayerPruner, PruningConfig, PruningMask, PruningStats, MinCutScorer};

View File

@@ -0,0 +1,149 @@
//! Product Quantization - 8-32x Memory Compression
use heapless::Vec as HVec;
pub const MAX_SUBQUANTIZERS: usize = 8;
pub const MAX_CODEBOOK_SIZE: usize = 16;
#[derive(Debug, Clone, Copy, Default)]
pub struct PQConfig {
pub num_subquantizers: usize,
pub codebook_size: usize,
pub subvec_dim: usize,
pub dim: usize,
}
impl PQConfig {
pub fn new(dim: usize, num_sub: usize) -> Self {
Self {
num_subquantizers: num_sub,
codebook_size: 16,
subvec_dim: dim / num_sub,
dim,
}
}
}
#[derive(Debug, Clone)]
pub struct PQCode<const M: usize> {
pub codes: HVec<u8, M>,
}
impl<const M: usize> PQCode<M> {
pub fn from_codes(codes: &[u8]) -> crate::Result<Self> {
let mut code_vec = HVec::new();
for &c in codes {
code_vec.push(c).map_err(|_| crate::Error::BufferOverflow)?;
}
Ok(Self { codes: code_vec })
}
#[inline]
pub fn get_code(&self, i: usize) -> u8 {
self.codes.get(i).copied().unwrap_or(0)
}
}
pub struct ProductQuantizer<const M: usize, const K: usize, const D: usize> {
codebooks: HVec<i8, { 8 * 16 * 8 }>,
config: PQConfig,
}
impl<const M: usize, const K: usize, const D: usize> ProductQuantizer<M, K, D> {
pub fn random(config: PQConfig, seed: u32) -> crate::Result<Self> {
let total = config.num_subquantizers * config.codebook_size * config.subvec_dim;
let mut codebooks = HVec::new();
let mut rng = seed;
for _ in 0..total {
rng = rng.wrapping_mul(1103515245).wrapping_add(12345);
let val = (((rng >> 16) & 0xFF) as i16 - 128) as i8;
codebooks.push(val).map_err(|_| crate::Error::BufferOverflow)?;
}
Ok(Self { codebooks, config })
}
#[inline]
fn get_centroid(&self, m: usize, k: usize) -> &[i8] {
let d = self.config.subvec_dim;
let kk = self.config.codebook_size;
let start = m * kk * d + k * d;
&self.codebooks[start..start + d]
}
pub fn encode(&self, vector: &[i8]) -> crate::Result<PQCode<M>> {
if vector.len() != self.config.dim {
return Err(crate::Error::InvalidModel("Dimension mismatch"));
}
let mut codes = HVec::new();
let d = self.config.subvec_dim;
for m in 0..self.config.num_subquantizers {
let subvec = &vector[m * d..(m + 1) * d];
let mut best_code = 0u8;
let mut best_dist = i32::MAX;
for k in 0..self.config.codebook_size {
let dist = Self::l2_squared(subvec, self.get_centroid(m, k));
if dist < best_dist {
best_dist = dist;
best_code = k as u8;
}
}
codes.push(best_code).map_err(|_| crate::Error::BufferOverflow)?;
}
Ok(PQCode { codes })
}
pub fn asymmetric_distance(&self, query: &[i8], code: &PQCode<M>) -> i32 {
let d = self.config.subvec_dim;
let mut total: i32 = 0;
for m in 0..self.config.num_subquantizers {
let query_sub = &query[m * d..(m + 1) * d];
let k = code.get_code(m) as usize;
total += Self::l2_squared(query_sub, self.get_centroid(m, k));
}
total
}
pub fn build_distance_table(&self, query: &[i8]) -> PQDistanceTable<M, K> {
let mut table = PQDistanceTable::new();
let d = self.config.subvec_dim;
for m in 0..self.config.num_subquantizers {
let query_sub = &query[m * d..(m + 1) * d];
for k in 0..self.config.codebook_size {
let dist = Self::l2_squared(query_sub, self.get_centroid(m, k));
table.set(m, k, dist);
}
}
table
}
#[inline]
fn l2_squared(a: &[i8], b: &[i8]) -> i32 {
a.iter().zip(b.iter()).map(|(&x, &y)| {
let diff = x as i32 - y as i32;
diff * diff
}).sum()
}
pub fn compression_ratio(&self) -> f32 {
self.config.dim as f32 / self.config.num_subquantizers as f32
}
}
pub struct PQDistanceTable<const M: usize, const K: usize> {
distances: [i32; 128],
}
impl<const M: usize, const K: usize> PQDistanceTable<M, K> {
pub fn new() -> Self { Self { distances: [0; 128] } }
#[inline]
pub fn get(&self, m: usize, k: usize) -> i32 { self.distances[m * K + k] }
#[inline]
pub fn set(&mut self, m: usize, k: usize, dist: i32) { self.distances[m * K + k] = dist; }
}
impl<const M: usize, const K: usize> Default for PQDistanceTable<M, K> {
fn default() -> Self { Self::new() }
}

View File

@@ -0,0 +1,167 @@
//! MinCut-Inspired Layer Pruning
use heapless::Vec as HVec;
pub const MAX_PRUNING_UNITS: usize = 64;
pub const MAX_MASK_WORDS: usize = 64;
#[derive(Debug, Clone, Copy)]
pub struct PruningConfig {
pub target_sparsity: f32,
pub importance_threshold: i8,
pub structured: bool,
}
impl Default for PruningConfig {
fn default() -> Self {
Self { target_sparsity: 0.5, importance_threshold: 8, structured: true }
}
}
#[derive(Debug, Clone)]
pub struct PruningMask<const N: usize> {
pub mask: HVec<u32, MAX_MASK_WORDS>,
pub size: usize,
pub pruned_count: usize,
}
impl<const N: usize> PruningMask<N> {
pub fn new(size: usize) -> crate::Result<Self> {
let num_words = (size + 31) / 32;
let mut mask = HVec::new();
for i in 0..num_words {
let bits = if i == num_words - 1 && size % 32 != 0 {
(1u32 << (size % 32)) - 1
} else {
u32::MAX
};
mask.push(bits).map_err(|_| crate::Error::BufferOverflow)?;
}
Ok(Self { mask, size, pruned_count: 0 })
}
#[inline]
pub fn is_kept(&self, idx: usize) -> bool {
let word = idx / 32;
let bit = idx % 32;
(self.mask.get(word).copied().unwrap_or(0) >> bit) & 1 == 1
}
pub fn prune(&mut self, idx: usize) {
if idx < self.size && self.is_kept(idx) {
let word = idx / 32;
let bit = idx % 32;
if let Some(w) = self.mask.get_mut(word) {
*w &= !(1 << bit);
self.pruned_count += 1;
}
}
}
pub fn sparsity(&self) -> f32 { self.pruned_count as f32 / self.size as f32 }
}
pub struct LayerPruner {
config: PruningConfig,
importance_scores: HVec<i16, MAX_PRUNING_UNITS>,
}
impl LayerPruner {
pub fn new(config: PruningConfig) -> Self {
Self { config, importance_scores: HVec::new() }
}
pub fn compute_magnitude_importance(&mut self, weights: &[i8]) {
self.importance_scores.clear();
for &w in weights.iter().take(MAX_PRUNING_UNITS) {
let _ = self.importance_scores.push((w as i16).abs());
}
}
pub fn create_mask<const N: usize>(&self, size: usize) -> crate::Result<PruningMask<N>> {
let mut mask = PruningMask::new(size)?;
let threshold = self.compute_threshold(size);
for (idx, &score) in self.importance_scores.iter().enumerate() {
if score < threshold { mask.prune(idx); }
}
Ok(mask)
}
fn compute_threshold(&self, size: usize) -> i16 {
let target = (size as f32 * self.config.target_sparsity) as usize;
if target == 0 || self.importance_scores.is_empty() { return 0; }
let mut sorted: HVec<i16, MAX_PRUNING_UNITS> = self.importance_scores.clone();
for i in 0..sorted.len() {
for j in 0..sorted.len() - 1 - i {
if sorted[j] > sorted[j + 1] { sorted.swap(j, j + 1); }
}
}
sorted.get(target.min(sorted.len() - 1)).copied().unwrap_or(0)
}
pub fn apply_mask<const N: usize>(&self, weights: &mut [i8], mask: &PruningMask<N>) {
for (idx, weight) in weights.iter_mut().enumerate() {
if !mask.is_kept(idx) { *weight = 0; }
}
}
}
#[derive(Debug, Clone)]
pub struct PruningStats {
pub total_weights: usize,
pub pruned_weights: usize,
pub sparsity: f32,
pub memory_saved: usize,
}
pub struct MinCutScorer {
input_flow: HVec<i32, MAX_PRUNING_UNITS>,
output_flow: HVec<i32, MAX_PRUNING_UNITS>,
}
impl MinCutScorer {
pub fn new() -> Self {
Self { input_flow: HVec::new(), output_flow: HVec::new() }
}
pub fn compute_edge_importance(&mut self, weights: &[i8], input_dim: usize, output_dim: usize)
-> HVec<i16, MAX_PRUNING_UNITS>
{
self.input_flow.clear();
self.output_flow.clear();
for in_idx in 0..input_dim.min(MAX_PRUNING_UNITS) {
let flow: i32 = (0..output_dim).map(|out_idx| {
let w_idx = out_idx * input_dim + in_idx;
if w_idx < weights.len() { (weights[w_idx] as i32).abs() } else { 0 }
}).sum();
let _ = self.input_flow.push(flow);
}
for out_idx in 0..output_dim.min(MAX_PRUNING_UNITS) {
let flow: i32 = (0..input_dim).map(|in_idx| {
let w_idx = out_idx * input_dim + in_idx;
if w_idx < weights.len() { (weights[w_idx] as i32).abs() } else { 0 }
}).sum();
let _ = self.output_flow.push(flow);
}
let mut importance: HVec<i16, MAX_PRUNING_UNITS> = HVec::new();
for out_idx in 0..output_dim.min(self.output_flow.len()) {
for in_idx in 0..input_dim.min(self.input_flow.len()) {
let w_idx = out_idx * input_dim + in_idx;
if w_idx < weights.len() && importance.len() < MAX_PRUNING_UNITS {
let w = (weights[w_idx] as i32).abs();
let bottleneck = self.input_flow[in_idx].min(self.output_flow[out_idx]);
let _ = importance.push(((w * bottleneck) >> 10) as i16);
}
}
}
importance
}
}
impl Default for MinCutScorer {
fn default() -> Self { Self::new() }
}

View File

@@ -0,0 +1,120 @@
//! Sparse Attention Patterns for ESP32
use heapless::Vec as HVec;
pub const MAX_SPARSE_SEQ: usize = 32;
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum AttentionPattern {
Full,
SlidingWindow { window_size: usize },
Strided { stride: usize },
Longformer { window_size: usize, stride: usize },
BlockDiagonal { block_size: usize },
BigBird { window_size: usize, global_tokens: usize },
}
impl Default for AttentionPattern {
fn default() -> Self { Self::SlidingWindow { window_size: 4 } }
}
pub struct SparseAttention {
pattern: AttentionPattern,
mask_data: HVec<u32, MAX_SPARSE_SEQ>,
seq_len: usize,
}
impl SparseAttention {
pub fn new(pattern: AttentionPattern, seq_len: usize) -> crate::Result<Self> {
if seq_len > MAX_SPARSE_SEQ { return Err(crate::Error::BufferOverflow); }
let mut sa = Self { pattern, mask_data: HVec::new(), seq_len };
sa.build_mask()?;
Ok(sa)
}
fn build_mask(&mut self) -> crate::Result<()> {
self.mask_data.clear();
for i in 0..self.seq_len {
let mut row_mask: u32 = 0;
for j in 0..self.seq_len {
if j <= i && self.should_attend(i, j) {
row_mask |= 1 << j;
}
}
self.mask_data.push(row_mask).map_err(|_| crate::Error::BufferOverflow)?;
}
Ok(())
}
fn should_attend(&self, i: usize, j: usize) -> bool {
match self.pattern {
AttentionPattern::Full => true,
AttentionPattern::SlidingWindow { window_size } => i.saturating_sub(window_size) <= j,
AttentionPattern::Strided { stride } => j % stride == 0 || i.saturating_sub(1) <= j,
AttentionPattern::Longformer { window_size, stride } =>
i.saturating_sub(window_size) <= j || j % stride == 0,
AttentionPattern::BlockDiagonal { block_size } => i / block_size == j / block_size,
AttentionPattern::BigBird { window_size, global_tokens } =>
i.saturating_sub(window_size) <= j || j < global_tokens,
}
}
#[inline]
pub fn should_attend_at(&self, i: usize, j: usize) -> bool {
if i >= self.seq_len || j >= self.seq_len { return false; }
(self.mask_data[i] >> j) & 1 == 1
}
#[inline]
pub fn get_mask_row(&self, i: usize) -> u32 {
self.mask_data.get(i).copied().unwrap_or(0)
}
pub fn sparse_qk(&self, query: &[i8], keys: &[&[i8]], scores: &mut [i32], query_pos: usize) {
let mask = self.get_mask_row(query_pos);
for (j, key) in keys.iter().enumerate() {
if (mask >> j) & 1 == 1 {
scores[j] = query.iter().zip(key.iter()).map(|(&q, &k)| q as i32 * k as i32).sum();
} else {
scores[j] = i32::MIN;
}
}
}
pub fn active_positions(&self) -> usize {
self.mask_data.iter().map(|m| m.count_ones() as usize).sum()
}
pub fn sparsity_ratio(&self) -> f32 {
let full = self.seq_len * (self.seq_len + 1) / 2;
self.active_positions() as f32 / full as f32
}
}
pub struct AttentionPatternCache {
patterns: [Option<SparseAttention>; 4],
}
impl AttentionPatternCache {
pub fn new_sliding(window: usize) -> Self {
let p = AttentionPattern::SlidingWindow { window_size: window };
Self {
patterns: [
SparseAttention::new(p, 8).ok(),
SparseAttention::new(p, 16).ok(),
SparseAttention::new(p, 24).ok(),
SparseAttention::new(p, 32).ok(),
],
}
}
pub fn get(&self, seq_len: usize) -> Option<&SparseAttention> {
match seq_len {
1..=8 => self.patterns[0].as_ref(),
9..=16 => self.patterns[1].as_ref(),
17..=24 => self.patterns[2].as_ref(),
25..=32 => self.patterns[3].as_ref(),
_ => None,
}
}
}