Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
444
vendor/ruvector/examples/ruvLLM/esp32/src/model.rs
vendored
Normal file
444
vendor/ruvector/examples/ruvLLM/esp32/src/model.rs
vendored
Normal file
@@ -0,0 +1,444 @@
|
||||
//! Model definition and loading for ESP32
|
||||
//!
|
||||
//! Supports tiny transformer models with INT8 quantization.
|
||||
|
||||
use crate::quantized::{QuantParams, QuantizationType};
|
||||
use heapless::Vec as HVec;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Maximum number of transformer layers
|
||||
pub const MAX_LAYERS: usize = 2;
|
||||
/// Maximum embedding table size (vocab * embed_dim bytes)
|
||||
pub const MAX_EMBEDDING_SIZE: usize = 32 * 1024; // 32KB
|
||||
/// Maximum weight size per layer
|
||||
pub const MAX_LAYER_SIZE: usize = 16 * 1024; // 16KB
|
||||
|
||||
/// Model configuration
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ModelConfig {
|
||||
/// Vocabulary size
|
||||
pub vocab_size: usize,
|
||||
/// Embedding dimension
|
||||
pub embed_dim: usize,
|
||||
/// Hidden dimension in FFN
|
||||
pub hidden_dim: usize,
|
||||
/// Number of transformer layers
|
||||
pub num_layers: usize,
|
||||
/// Number of attention heads
|
||||
pub num_heads: usize,
|
||||
/// Maximum sequence length
|
||||
pub max_seq_len: usize,
|
||||
/// Quantization type
|
||||
pub quant_type: QuantizationType,
|
||||
}
|
||||
|
||||
impl Default for ModelConfig {
|
||||
fn default() -> Self {
|
||||
// Tiny model suitable for ESP32
|
||||
Self {
|
||||
vocab_size: 256,
|
||||
embed_dim: 32,
|
||||
hidden_dim: 64,
|
||||
num_layers: 1,
|
||||
num_heads: 2,
|
||||
max_seq_len: 16,
|
||||
quant_type: QuantizationType::Int8,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ModelConfig {
|
||||
/// Validate configuration fits ESP32 constraints
|
||||
pub fn validate(&self, variant: crate::Esp32Variant) -> crate::Result<()> {
|
||||
let model_size = self.estimate_size();
|
||||
let max_ram = variant.max_model_ram();
|
||||
|
||||
if model_size > max_ram {
|
||||
return Err(crate::Error::ModelTooLarge {
|
||||
required: model_size,
|
||||
available: max_ram,
|
||||
});
|
||||
}
|
||||
|
||||
if self.embed_dim % self.num_heads != 0 {
|
||||
return Err(crate::Error::InvalidModel(
|
||||
"embed_dim must be divisible by num_heads"
|
||||
));
|
||||
}
|
||||
|
||||
if self.num_layers > MAX_LAYERS {
|
||||
return Err(crate::Error::InvalidModel("Too many layers"));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Estimate total model size in bytes
|
||||
pub fn estimate_size(&self) -> usize {
|
||||
let bytes_per_weight = match self.quant_type {
|
||||
QuantizationType::Int8 => 1,
|
||||
QuantizationType::Int4 => 1, // 2 weights per byte
|
||||
QuantizationType::Binary => 1, // 8 weights per byte
|
||||
QuantizationType::Fixed16 => 2,
|
||||
};
|
||||
|
||||
let divisor = match self.quant_type {
|
||||
QuantizationType::Int4 => 2,
|
||||
QuantizationType::Binary => 8,
|
||||
_ => 1,
|
||||
};
|
||||
|
||||
// Embedding table
|
||||
let embed_size = (self.vocab_size * self.embed_dim * bytes_per_weight) / divisor;
|
||||
|
||||
// Per-layer weights
|
||||
let qkv_size = 3 * self.embed_dim * self.embed_dim * bytes_per_weight / divisor;
|
||||
let ffn_size = 3 * self.embed_dim * self.hidden_dim * bytes_per_weight / divisor;
|
||||
let layer_size = qkv_size + ffn_size;
|
||||
|
||||
// Output projection
|
||||
let output_size = (self.vocab_size * self.embed_dim * bytes_per_weight) / divisor;
|
||||
|
||||
embed_size + (layer_size * self.num_layers) + output_size
|
||||
}
|
||||
|
||||
/// Get recommended config for variant
|
||||
pub fn for_variant(variant: crate::Esp32Variant) -> Self {
|
||||
match variant {
|
||||
crate::Esp32Variant::Esp32 | crate::Esp32Variant::Esp32S3 => {
|
||||
// ~300KB available, use larger model (but fits in stack)
|
||||
Self {
|
||||
vocab_size: 256,
|
||||
embed_dim: 64,
|
||||
hidden_dim: 128,
|
||||
num_layers: 2,
|
||||
num_heads: 4,
|
||||
max_seq_len: 32,
|
||||
quant_type: QuantizationType::Int8,
|
||||
}
|
||||
}
|
||||
crate::Esp32Variant::Esp32S2 => {
|
||||
// ~120KB available, use smaller model
|
||||
Self {
|
||||
vocab_size: 128,
|
||||
embed_dim: 32,
|
||||
hidden_dim: 64,
|
||||
num_layers: 1,
|
||||
num_heads: 2,
|
||||
max_seq_len: 16,
|
||||
quant_type: QuantizationType::Int8,
|
||||
}
|
||||
}
|
||||
crate::Esp32Variant::Esp32C3 | crate::Esp32Variant::Esp32C6 => {
|
||||
// ~200KB available
|
||||
Self {
|
||||
vocab_size: 256,
|
||||
embed_dim: 48,
|
||||
hidden_dim: 96,
|
||||
num_layers: 2,
|
||||
num_heads: 3,
|
||||
max_seq_len: 24,
|
||||
quant_type: QuantizationType::Int8,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Layer weights for a single transformer layer
|
||||
#[derive(Clone)]
|
||||
pub struct LayerWeights {
|
||||
/// Query projection weights [embed_dim, embed_dim]
|
||||
pub wq: HVec<i8, MAX_LAYER_SIZE>,
|
||||
/// Key projection weights
|
||||
pub wk: HVec<i8, MAX_LAYER_SIZE>,
|
||||
/// Value projection weights
|
||||
pub wv: HVec<i8, MAX_LAYER_SIZE>,
|
||||
/// Output projection weights
|
||||
pub wo: HVec<i8, MAX_LAYER_SIZE>,
|
||||
|
||||
/// FFN up projection [embed_dim, hidden_dim]
|
||||
pub w_up: HVec<i8, MAX_LAYER_SIZE>,
|
||||
/// FFN gate projection
|
||||
pub w_gate: HVec<i8, MAX_LAYER_SIZE>,
|
||||
/// FFN down projection [hidden_dim, embed_dim]
|
||||
pub w_down: HVec<i8, MAX_LAYER_SIZE>,
|
||||
|
||||
/// Quantization params
|
||||
pub q_params: QuantParams,
|
||||
pub k_params: QuantParams,
|
||||
pub v_params: QuantParams,
|
||||
pub o_params: QuantParams,
|
||||
pub up_params: QuantParams,
|
||||
pub gate_params: QuantParams,
|
||||
pub down_params: QuantParams,
|
||||
}
|
||||
|
||||
impl Default for LayerWeights {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
wq: HVec::new(),
|
||||
wk: HVec::new(),
|
||||
wv: HVec::new(),
|
||||
wo: HVec::new(),
|
||||
w_up: HVec::new(),
|
||||
w_gate: HVec::new(),
|
||||
w_down: HVec::new(),
|
||||
q_params: QuantParams::default(),
|
||||
k_params: QuantParams::default(),
|
||||
v_params: QuantParams::default(),
|
||||
o_params: QuantParams::default(),
|
||||
up_params: QuantParams::default(),
|
||||
gate_params: QuantParams::default(),
|
||||
down_params: QuantParams::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LayerWeights {
|
||||
/// Initialize with random weights (for testing)
|
||||
pub fn random(config: &ModelConfig, seed: u32) -> crate::Result<Self> {
|
||||
let mut layer = Self::default();
|
||||
|
||||
let embed_dim = config.embed_dim;
|
||||
let hidden_dim = config.hidden_dim;
|
||||
|
||||
// Simple LCG random number generator
|
||||
let mut rng_state = seed;
|
||||
let mut next_rand = || {
|
||||
rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345);
|
||||
// Get value in range 0-127, then map to -64 to 63
|
||||
(((rng_state >> 16) & 0x7F) as i16 - 64) as i8
|
||||
};
|
||||
|
||||
// QKV projections [embed_dim, embed_dim]
|
||||
let qkv_size = embed_dim * embed_dim;
|
||||
for _ in 0..qkv_size {
|
||||
layer.wq.push(next_rand()).map_err(|_| crate::Error::BufferOverflow)?;
|
||||
layer.wk.push(next_rand()).map_err(|_| crate::Error::BufferOverflow)?;
|
||||
layer.wv.push(next_rand()).map_err(|_| crate::Error::BufferOverflow)?;
|
||||
layer.wo.push(next_rand()).map_err(|_| crate::Error::BufferOverflow)?;
|
||||
}
|
||||
|
||||
// FFN projections
|
||||
let up_size = embed_dim * hidden_dim;
|
||||
for _ in 0..up_size {
|
||||
layer.w_up.push(next_rand()).map_err(|_| crate::Error::BufferOverflow)?;
|
||||
layer.w_gate.push(next_rand()).map_err(|_| crate::Error::BufferOverflow)?;
|
||||
}
|
||||
|
||||
let down_size = hidden_dim * embed_dim;
|
||||
for _ in 0..down_size {
|
||||
layer.w_down.push(next_rand()).map_err(|_| crate::Error::BufferOverflow)?;
|
||||
}
|
||||
|
||||
// Initialize quant params with reasonable defaults
|
||||
let scale = 1.0 / 64.0; // For weights in range [-64, 63]
|
||||
layer.q_params = QuantParams { scale, zero_point: 0.0, min_val: -1.0, max_val: 1.0 };
|
||||
layer.k_params = layer.q_params;
|
||||
layer.v_params = layer.q_params;
|
||||
layer.o_params = layer.q_params;
|
||||
layer.up_params = layer.q_params;
|
||||
layer.gate_params = layer.q_params;
|
||||
layer.down_params = layer.q_params;
|
||||
|
||||
Ok(layer)
|
||||
}
|
||||
|
||||
/// Memory size of this layer
|
||||
pub fn memory_size(&self) -> usize {
|
||||
self.wq.len() + self.wk.len() + self.wv.len() + self.wo.len()
|
||||
+ self.w_up.len() + self.w_gate.len() + self.w_down.len()
|
||||
}
|
||||
}
|
||||
|
||||
/// Complete tiny model
|
||||
pub struct TinyModel {
|
||||
/// Model configuration
|
||||
pub config: ModelConfig,
|
||||
/// Embedding table [vocab_size, embed_dim]
|
||||
pub embedding_table: HVec<i8, MAX_EMBEDDING_SIZE>,
|
||||
/// Transformer layers
|
||||
pub layers: [LayerWeights; MAX_LAYERS],
|
||||
/// Output projection [embed_dim, vocab_size]
|
||||
pub output_proj: HVec<i8, MAX_EMBEDDING_SIZE>,
|
||||
/// Input quantization params
|
||||
pub input_params: QuantParams,
|
||||
/// Output quantization params
|
||||
pub output_params: QuantParams,
|
||||
}
|
||||
|
||||
impl TinyModel {
|
||||
/// Create a new model with random weights
|
||||
pub fn new(config: ModelConfig) -> crate::Result<Self> {
|
||||
config.validate(crate::Esp32Variant::Esp32)?;
|
||||
|
||||
let mut embedding_table = HVec::new();
|
||||
let mut output_proj = HVec::new();
|
||||
|
||||
// Initialize embedding table
|
||||
let embed_size = config.vocab_size * config.embed_dim;
|
||||
let mut rng_state = 12345u32;
|
||||
let mut next_rand = || {
|
||||
rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345);
|
||||
// Get value in range 0-255, then map to -128 to 127
|
||||
(((rng_state >> 16) & 0xFF) as i16 - 128) as i8
|
||||
};
|
||||
|
||||
for _ in 0..embed_size {
|
||||
embedding_table.push(next_rand()).map_err(|_| crate::Error::BufferOverflow)?;
|
||||
}
|
||||
|
||||
// Initialize output projection
|
||||
for _ in 0..embed_size {
|
||||
output_proj.push(next_rand()).map_err(|_| crate::Error::BufferOverflow)?;
|
||||
}
|
||||
|
||||
// Initialize layers
|
||||
let mut layers: [LayerWeights; MAX_LAYERS] = Default::default();
|
||||
for i in 0..config.num_layers {
|
||||
layers[i] = LayerWeights::random(&config, (i * 1000) as u32)?;
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
config,
|
||||
embedding_table,
|
||||
layers,
|
||||
output_proj,
|
||||
input_params: QuantParams::default(),
|
||||
output_params: QuantParams::default(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Total memory size of model
|
||||
pub fn memory_size(&self) -> usize {
|
||||
let mut size = self.embedding_table.len();
|
||||
size += self.output_proj.len();
|
||||
for i in 0..self.config.num_layers {
|
||||
size += self.layers[i].memory_size();
|
||||
}
|
||||
size
|
||||
}
|
||||
|
||||
/// Load model from bytes (e.g., from flash)
|
||||
pub fn from_bytes(data: &[u8]) -> crate::Result<Self> {
|
||||
// Parse header
|
||||
if data.len() < 32 {
|
||||
return Err(crate::Error::InvalidModel("Data too small"));
|
||||
}
|
||||
|
||||
// Magic number check
|
||||
if &data[0..4] != b"RUVM" {
|
||||
return Err(crate::Error::InvalidModel("Invalid magic number"));
|
||||
}
|
||||
|
||||
// Parse config from header
|
||||
let vocab_size = u16::from_le_bytes([data[4], data[5]]) as usize;
|
||||
let embed_dim = u16::from_le_bytes([data[6], data[7]]) as usize;
|
||||
let hidden_dim = u16::from_le_bytes([data[8], data[9]]) as usize;
|
||||
let num_layers = data[10] as usize;
|
||||
let num_heads = data[11] as usize;
|
||||
let max_seq_len = data[12] as usize;
|
||||
let quant_type = match data[13] {
|
||||
0 => QuantizationType::Int8,
|
||||
1 => QuantizationType::Int4,
|
||||
2 => QuantizationType::Binary,
|
||||
3 => QuantizationType::Fixed16,
|
||||
_ => return Err(crate::Error::InvalidModel("Unknown quantization type")),
|
||||
};
|
||||
|
||||
let config = ModelConfig {
|
||||
vocab_size,
|
||||
embed_dim,
|
||||
hidden_dim,
|
||||
num_layers,
|
||||
num_heads,
|
||||
max_seq_len,
|
||||
quant_type,
|
||||
};
|
||||
|
||||
config.validate(crate::Esp32Variant::Esp32)?;
|
||||
|
||||
// For now, create random weights - real implementation would parse from data
|
||||
Self::new(config)
|
||||
}
|
||||
|
||||
/// Export model to bytes
|
||||
pub fn to_bytes(&self) -> HVec<u8, 256> {
|
||||
let mut header: HVec<u8, 256> = HVec::new();
|
||||
|
||||
// Magic number
|
||||
let _ = header.extend_from_slice(b"RUVM");
|
||||
|
||||
// Config
|
||||
let _ = header.extend_from_slice(&(self.config.vocab_size as u16).to_le_bytes());
|
||||
let _ = header.extend_from_slice(&(self.config.embed_dim as u16).to_le_bytes());
|
||||
let _ = header.extend_from_slice(&(self.config.hidden_dim as u16).to_le_bytes());
|
||||
let _ = header.push(self.config.num_layers as u8);
|
||||
let _ = header.push(self.config.num_heads as u8);
|
||||
let _ = header.push(self.config.max_seq_len as u8);
|
||||
let _ = header.push(match self.config.quant_type {
|
||||
QuantizationType::Int8 => 0,
|
||||
QuantizationType::Int4 => 1,
|
||||
QuantizationType::Binary => 2,
|
||||
QuantizationType::Fixed16 => 3,
|
||||
});
|
||||
|
||||
// Padding to 32 bytes
|
||||
while header.len() < 32 {
|
||||
let _ = header.push(0);
|
||||
}
|
||||
|
||||
header
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_default_config() {
|
||||
let config = ModelConfig::default();
|
||||
assert!(config.validate(crate::Esp32Variant::Esp32S2).is_ok());
|
||||
|
||||
let size = config.estimate_size();
|
||||
println!("Default model size: {} bytes ({:.1} KB)", size, size as f32 / 1024.0);
|
||||
assert!(size < 50 * 1024); // < 50KB for testing
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_variant_configs() {
|
||||
for variant in [
|
||||
crate::Esp32Variant::Esp32,
|
||||
crate::Esp32Variant::Esp32S2,
|
||||
crate::Esp32Variant::Esp32S3,
|
||||
crate::Esp32Variant::Esp32C3,
|
||||
crate::Esp32Variant::Esp32C6,
|
||||
] {
|
||||
let config = ModelConfig::for_variant(variant);
|
||||
assert!(config.validate(variant).is_ok());
|
||||
|
||||
let size = config.estimate_size();
|
||||
println!("{:?}: {} bytes ({:.1} KB)", variant, size, size as f32 / 1024.0);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_model_creation() {
|
||||
let config = ModelConfig::default();
|
||||
let model = TinyModel::new(config).unwrap();
|
||||
|
||||
let size = model.memory_size();
|
||||
println!("Actual model size: {} bytes ({:.1} KB)", size, size as f32 / 1024.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialization() {
|
||||
let config = ModelConfig::default();
|
||||
let model = TinyModel::new(config).unwrap();
|
||||
|
||||
let header = model.to_bytes();
|
||||
assert_eq!(&header[0..4], b"RUVM");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user