Files
wifi-densepose/vendor/ruvector/examples/ruvLLM/esp32/src/model.rs

445 lines
14 KiB
Rust

//! Model definition and loading for ESP32
//!
//! Supports tiny transformer models with INT8 quantization.
use crate::quantized::{QuantParams, QuantizationType};
use heapless::Vec as HVec;
use serde::{Deserialize, Serialize};
/// Maximum number of transformer layers
pub const MAX_LAYERS: usize = 2;
/// Maximum embedding table size (vocab * embed_dim bytes)
pub const MAX_EMBEDDING_SIZE: usize = 32 * 1024; // 32KB
/// Maximum weight size per layer
pub const MAX_LAYER_SIZE: usize = 16 * 1024; // 16KB
/// Model configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelConfig {
/// Vocabulary size
pub vocab_size: usize,
/// Embedding dimension
pub embed_dim: usize,
/// Hidden dimension in FFN
pub hidden_dim: usize,
/// Number of transformer layers
pub num_layers: usize,
/// Number of attention heads
pub num_heads: usize,
/// Maximum sequence length
pub max_seq_len: usize,
/// Quantization type
pub quant_type: QuantizationType,
}
impl Default for ModelConfig {
fn default() -> Self {
// Tiny model suitable for ESP32
Self {
vocab_size: 256,
embed_dim: 32,
hidden_dim: 64,
num_layers: 1,
num_heads: 2,
max_seq_len: 16,
quant_type: QuantizationType::Int8,
}
}
}
impl ModelConfig {
/// Validate configuration fits ESP32 constraints
pub fn validate(&self, variant: crate::Esp32Variant) -> crate::Result<()> {
let model_size = self.estimate_size();
let max_ram = variant.max_model_ram();
if model_size > max_ram {
return Err(crate::Error::ModelTooLarge {
required: model_size,
available: max_ram,
});
}
if self.embed_dim % self.num_heads != 0 {
return Err(crate::Error::InvalidModel(
"embed_dim must be divisible by num_heads"
));
}
if self.num_layers > MAX_LAYERS {
return Err(crate::Error::InvalidModel("Too many layers"));
}
Ok(())
}
/// Estimate total model size in bytes
pub fn estimate_size(&self) -> usize {
let bytes_per_weight = match self.quant_type {
QuantizationType::Int8 => 1,
QuantizationType::Int4 => 1, // 2 weights per byte
QuantizationType::Binary => 1, // 8 weights per byte
QuantizationType::Fixed16 => 2,
};
let divisor = match self.quant_type {
QuantizationType::Int4 => 2,
QuantizationType::Binary => 8,
_ => 1,
};
// Embedding table
let embed_size = (self.vocab_size * self.embed_dim * bytes_per_weight) / divisor;
// Per-layer weights
let qkv_size = 3 * self.embed_dim * self.embed_dim * bytes_per_weight / divisor;
let ffn_size = 3 * self.embed_dim * self.hidden_dim * bytes_per_weight / divisor;
let layer_size = qkv_size + ffn_size;
// Output projection
let output_size = (self.vocab_size * self.embed_dim * bytes_per_weight) / divisor;
embed_size + (layer_size * self.num_layers) + output_size
}
/// Get recommended config for variant
pub fn for_variant(variant: crate::Esp32Variant) -> Self {
match variant {
crate::Esp32Variant::Esp32 | crate::Esp32Variant::Esp32S3 => {
// ~300KB available, use larger model (but fits in stack)
Self {
vocab_size: 256,
embed_dim: 64,
hidden_dim: 128,
num_layers: 2,
num_heads: 4,
max_seq_len: 32,
quant_type: QuantizationType::Int8,
}
}
crate::Esp32Variant::Esp32S2 => {
// ~120KB available, use smaller model
Self {
vocab_size: 128,
embed_dim: 32,
hidden_dim: 64,
num_layers: 1,
num_heads: 2,
max_seq_len: 16,
quant_type: QuantizationType::Int8,
}
}
crate::Esp32Variant::Esp32C3 | crate::Esp32Variant::Esp32C6 => {
// ~200KB available
Self {
vocab_size: 256,
embed_dim: 48,
hidden_dim: 96,
num_layers: 2,
num_heads: 3,
max_seq_len: 24,
quant_type: QuantizationType::Int8,
}
}
}
}
}
/// Layer weights for a single transformer layer
#[derive(Clone)]
pub struct LayerWeights {
/// Query projection weights [embed_dim, embed_dim]
pub wq: HVec<i8, MAX_LAYER_SIZE>,
/// Key projection weights
pub wk: HVec<i8, MAX_LAYER_SIZE>,
/// Value projection weights
pub wv: HVec<i8, MAX_LAYER_SIZE>,
/// Output projection weights
pub wo: HVec<i8, MAX_LAYER_SIZE>,
/// FFN up projection [embed_dim, hidden_dim]
pub w_up: HVec<i8, MAX_LAYER_SIZE>,
/// FFN gate projection
pub w_gate: HVec<i8, MAX_LAYER_SIZE>,
/// FFN down projection [hidden_dim, embed_dim]
pub w_down: HVec<i8, MAX_LAYER_SIZE>,
/// Quantization params
pub q_params: QuantParams,
pub k_params: QuantParams,
pub v_params: QuantParams,
pub o_params: QuantParams,
pub up_params: QuantParams,
pub gate_params: QuantParams,
pub down_params: QuantParams,
}
impl Default for LayerWeights {
fn default() -> Self {
Self {
wq: HVec::new(),
wk: HVec::new(),
wv: HVec::new(),
wo: HVec::new(),
w_up: HVec::new(),
w_gate: HVec::new(),
w_down: HVec::new(),
q_params: QuantParams::default(),
k_params: QuantParams::default(),
v_params: QuantParams::default(),
o_params: QuantParams::default(),
up_params: QuantParams::default(),
gate_params: QuantParams::default(),
down_params: QuantParams::default(),
}
}
}
impl LayerWeights {
/// Initialize with random weights (for testing)
pub fn random(config: &ModelConfig, seed: u32) -> crate::Result<Self> {
let mut layer = Self::default();
let embed_dim = config.embed_dim;
let hidden_dim = config.hidden_dim;
// Simple LCG random number generator
let mut rng_state = seed;
let mut next_rand = || {
rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345);
// Get value in range 0-127, then map to -64 to 63
(((rng_state >> 16) & 0x7F) as i16 - 64) as i8
};
// QKV projections [embed_dim, embed_dim]
let qkv_size = embed_dim * embed_dim;
for _ in 0..qkv_size {
layer.wq.push(next_rand()).map_err(|_| crate::Error::BufferOverflow)?;
layer.wk.push(next_rand()).map_err(|_| crate::Error::BufferOverflow)?;
layer.wv.push(next_rand()).map_err(|_| crate::Error::BufferOverflow)?;
layer.wo.push(next_rand()).map_err(|_| crate::Error::BufferOverflow)?;
}
// FFN projections
let up_size = embed_dim * hidden_dim;
for _ in 0..up_size {
layer.w_up.push(next_rand()).map_err(|_| crate::Error::BufferOverflow)?;
layer.w_gate.push(next_rand()).map_err(|_| crate::Error::BufferOverflow)?;
}
let down_size = hidden_dim * embed_dim;
for _ in 0..down_size {
layer.w_down.push(next_rand()).map_err(|_| crate::Error::BufferOverflow)?;
}
// Initialize quant params with reasonable defaults
let scale = 1.0 / 64.0; // For weights in range [-64, 63]
layer.q_params = QuantParams { scale, zero_point: 0.0, min_val: -1.0, max_val: 1.0 };
layer.k_params = layer.q_params;
layer.v_params = layer.q_params;
layer.o_params = layer.q_params;
layer.up_params = layer.q_params;
layer.gate_params = layer.q_params;
layer.down_params = layer.q_params;
Ok(layer)
}
/// Memory size of this layer
pub fn memory_size(&self) -> usize {
self.wq.len() + self.wk.len() + self.wv.len() + self.wo.len()
+ self.w_up.len() + self.w_gate.len() + self.w_down.len()
}
}
/// Complete tiny model
pub struct TinyModel {
/// Model configuration
pub config: ModelConfig,
/// Embedding table [vocab_size, embed_dim]
pub embedding_table: HVec<i8, MAX_EMBEDDING_SIZE>,
/// Transformer layers
pub layers: [LayerWeights; MAX_LAYERS],
/// Output projection [embed_dim, vocab_size]
pub output_proj: HVec<i8, MAX_EMBEDDING_SIZE>,
/// Input quantization params
pub input_params: QuantParams,
/// Output quantization params
pub output_params: QuantParams,
}
impl TinyModel {
/// Create a new model with random weights
pub fn new(config: ModelConfig) -> crate::Result<Self> {
config.validate(crate::Esp32Variant::Esp32)?;
let mut embedding_table = HVec::new();
let mut output_proj = HVec::new();
// Initialize embedding table
let embed_size = config.vocab_size * config.embed_dim;
let mut rng_state = 12345u32;
let mut next_rand = || {
rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345);
// Get value in range 0-255, then map to -128 to 127
(((rng_state >> 16) & 0xFF) as i16 - 128) as i8
};
for _ in 0..embed_size {
embedding_table.push(next_rand()).map_err(|_| crate::Error::BufferOverflow)?;
}
// Initialize output projection
for _ in 0..embed_size {
output_proj.push(next_rand()).map_err(|_| crate::Error::BufferOverflow)?;
}
// Initialize layers
let mut layers: [LayerWeights; MAX_LAYERS] = Default::default();
for i in 0..config.num_layers {
layers[i] = LayerWeights::random(&config, (i * 1000) as u32)?;
}
Ok(Self {
config,
embedding_table,
layers,
output_proj,
input_params: QuantParams::default(),
output_params: QuantParams::default(),
})
}
/// Total memory size of model
pub fn memory_size(&self) -> usize {
let mut size = self.embedding_table.len();
size += self.output_proj.len();
for i in 0..self.config.num_layers {
size += self.layers[i].memory_size();
}
size
}
/// Load model from bytes (e.g., from flash)
pub fn from_bytes(data: &[u8]) -> crate::Result<Self> {
// Parse header
if data.len() < 32 {
return Err(crate::Error::InvalidModel("Data too small"));
}
// Magic number check
if &data[0..4] != b"RUVM" {
return Err(crate::Error::InvalidModel("Invalid magic number"));
}
// Parse config from header
let vocab_size = u16::from_le_bytes([data[4], data[5]]) as usize;
let embed_dim = u16::from_le_bytes([data[6], data[7]]) as usize;
let hidden_dim = u16::from_le_bytes([data[8], data[9]]) as usize;
let num_layers = data[10] as usize;
let num_heads = data[11] as usize;
let max_seq_len = data[12] as usize;
let quant_type = match data[13] {
0 => QuantizationType::Int8,
1 => QuantizationType::Int4,
2 => QuantizationType::Binary,
3 => QuantizationType::Fixed16,
_ => return Err(crate::Error::InvalidModel("Unknown quantization type")),
};
let config = ModelConfig {
vocab_size,
embed_dim,
hidden_dim,
num_layers,
num_heads,
max_seq_len,
quant_type,
};
config.validate(crate::Esp32Variant::Esp32)?;
// For now, create random weights - real implementation would parse from data
Self::new(config)
}
/// Export model to bytes
pub fn to_bytes(&self) -> HVec<u8, 256> {
let mut header: HVec<u8, 256> = HVec::new();
// Magic number
let _ = header.extend_from_slice(b"RUVM");
// Config
let _ = header.extend_from_slice(&(self.config.vocab_size as u16).to_le_bytes());
let _ = header.extend_from_slice(&(self.config.embed_dim as u16).to_le_bytes());
let _ = header.extend_from_slice(&(self.config.hidden_dim as u16).to_le_bytes());
let _ = header.push(self.config.num_layers as u8);
let _ = header.push(self.config.num_heads as u8);
let _ = header.push(self.config.max_seq_len as u8);
let _ = header.push(match self.config.quant_type {
QuantizationType::Int8 => 0,
QuantizationType::Int4 => 1,
QuantizationType::Binary => 2,
QuantizationType::Fixed16 => 3,
});
// Padding to 32 bytes
while header.len() < 32 {
let _ = header.push(0);
}
header
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config() {
let config = ModelConfig::default();
assert!(config.validate(crate::Esp32Variant::Esp32S2).is_ok());
let size = config.estimate_size();
println!("Default model size: {} bytes ({:.1} KB)", size, size as f32 / 1024.0);
assert!(size < 50 * 1024); // < 50KB for testing
}
#[test]
fn test_variant_configs() {
for variant in [
crate::Esp32Variant::Esp32,
crate::Esp32Variant::Esp32S2,
crate::Esp32Variant::Esp32S3,
crate::Esp32Variant::Esp32C3,
crate::Esp32Variant::Esp32C6,
] {
let config = ModelConfig::for_variant(variant);
assert!(config.validate(variant).is_ok());
let size = config.estimate_size();
println!("{:?}: {} bytes ({:.1} KB)", variant, size, size as f32 / 1024.0);
}
}
#[test]
fn test_model_creation() {
let config = ModelConfig::default();
let model = TinyModel::new(config).unwrap();
let size = model.memory_size();
println!("Actual model size: {} bytes ({:.1} KB)", size, size as f32 / 1024.0);
}
#[test]
fn test_serialization() {
let config = ModelConfig::default();
let model = TinyModel::new(config).unwrap();
let header = model.to_bytes();
assert_eq!(&header[0..4], b"RUVM");
}
}