Files
wifi-densepose/examples/ruvLLM/esp32/src/models/mod.rs
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

239 lines
6.8 KiB
Rust

//! Model Zoo - Pre-quantized Models for RuvLLM ESP32
//!
//! Ready-to-use language models optimized for ESP32 microcontrollers.
//!
//! # Available Models
//!
//! | Model | Size | RAM | Tokens/sec | Use Case |
//! |-------|------|-----|------------|----------|
//! | TinyStories | 8KB | 20KB | ~50 | Story generation |
//! | MicroChat | 16KB | 32KB | ~30 | Simple chatbot |
//! | NanoEmbed | 4KB | 8KB | ~100 | Embeddings only |
//! | TinyQA | 12KB | 24KB | ~40 | Question answering |
use heapless::Vec;
/// Model metadata
#[derive(Clone)]
pub struct ModelInfo {
/// Model name
pub name: &'static str,
/// Model version
pub version: &'static str,
/// Model size in bytes
pub size_bytes: u32,
/// Required RAM in bytes
pub ram_bytes: u32,
/// Vocabulary size
pub vocab_size: u16,
/// Hidden dimension
pub hidden_dim: u16,
/// Number of layers
pub num_layers: u8,
/// Number of attention heads
pub num_heads: u8,
/// Maximum sequence length
pub max_seq_len: u16,
/// Quantization bits (8 = INT8, 4 = INT4, 1 = binary)
pub quant_bits: u8,
/// Description
pub description: &'static str,
}
/// Available pre-quantized models
pub const MODELS: &[ModelInfo] = &[
ModelInfo {
name: "tinystories-1m",
version: "1.0.0",
size_bytes: 8 * 1024, // 8KB
ram_bytes: 20 * 1024, // 20KB
vocab_size: 256,
hidden_dim: 64,
num_layers: 2,
num_heads: 2,
max_seq_len: 64,
quant_bits: 8,
description: "Tiny model for simple story generation",
},
ModelInfo {
name: "microchat-2m",
version: "1.0.0",
size_bytes: 16 * 1024, // 16KB
ram_bytes: 32 * 1024, // 32KB
vocab_size: 512,
hidden_dim: 96,
num_layers: 3,
num_heads: 3,
max_seq_len: 128,
quant_bits: 8,
description: "Simple chatbot for basic conversations",
},
ModelInfo {
name: "nanoembed-500k",
version: "1.0.0",
size_bytes: 4 * 1024, // 4KB
ram_bytes: 8 * 1024, // 8KB
vocab_size: 256,
hidden_dim: 32,
num_layers: 1,
num_heads: 1,
max_seq_len: 32,
quant_bits: 8,
description: "Ultra-light embedding model for semantic search",
},
ModelInfo {
name: "tinyqa-1.5m",
version: "1.0.0",
size_bytes: 12 * 1024, // 12KB
ram_bytes: 24 * 1024, // 24KB
vocab_size: 384,
hidden_dim: 80,
num_layers: 2,
num_heads: 2,
max_seq_len: 96,
quant_bits: 8,
description: "Question-answering model for simple queries",
},
ModelInfo {
name: "binary-embed-250k",
version: "1.0.0",
size_bytes: 2 * 1024, // 2KB
ram_bytes: 4 * 1024, // 4KB
vocab_size: 128,
hidden_dim: 64,
num_layers: 1,
num_heads: 1,
max_seq_len: 16,
quant_bits: 1, // Binary quantization
description: "Binary quantized embeddings (32x compression)",
},
];
/// Model selection by use case
#[derive(Debug, Clone, Copy)]
pub enum UseCase {
/// Story/text generation
Generation,
/// Conversational AI
Chat,
/// Semantic embeddings
Embedding,
/// Question answering
QA,
/// Minimum memory footprint
MinMemory,
}
/// Get recommended model for use case
pub fn recommend_model(use_case: UseCase, max_ram_kb: u32) -> Option<&'static ModelInfo> {
let max_ram = max_ram_kb * 1024;
let candidates: Vec<&ModelInfo, 8> = MODELS
.iter()
.filter(|m| m.ram_bytes <= max_ram)
.collect();
match use_case {
UseCase::Generation => candidates
.iter()
.find(|m| m.name.contains("stories"))
.copied(),
UseCase::Chat => candidates
.iter()
.find(|m| m.name.contains("chat"))
.copied(),
UseCase::Embedding => candidates
.iter()
.find(|m| m.name.contains("embed"))
.copied(),
UseCase::QA => candidates
.iter()
.find(|m| m.name.contains("qa"))
.copied(),
UseCase::MinMemory => candidates
.iter()
.min_by_key(|m| m.ram_bytes)
.copied(),
}
}
/// Get model by name
pub fn get_model(name: &str) -> Option<&'static ModelInfo> {
MODELS.iter().find(|m| m.name == name)
}
/// List all models
pub fn list_models() -> &'static [ModelInfo] {
MODELS
}
/// Calculate tokens per second estimate for model on given chip
pub fn estimate_performance(model: &ModelInfo, chip: &str) -> u32 {
let base_speed = match chip {
"esp32s3" => 60, // SIMD acceleration
"esp32" => 40,
"esp32s2" => 35,
"esp32c3" => 30,
"esp32c6" => 35,
_ => 30,
};
// Adjust for model complexity
let complexity_factor = 1.0 / (model.num_layers as f32 * 0.3 + 1.0);
let quant_factor = if model.quant_bits == 1 { 2.0 } else { 1.0 };
(base_speed as f32 * complexity_factor * quant_factor) as u32
}
/// Print model info table
pub fn print_model_table() -> heapless::String<1024> {
let mut output = heapless::String::new();
let _ = output.push_str("Available Models:\n");
let _ = output.push_str("─────────────────────────────────────────────────\n");
let _ = output.push_str("Name Size RAM Quant Use Case\n");
let _ = output.push_str("─────────────────────────────────────────────────\n");
for model in MODELS {
let _ = core::fmt::write(
&mut output,
format_args!(
"{:<17} {:>4}KB {:>4}KB INT{:<2} {}\n",
model.name,
model.size_bytes / 1024,
model.ram_bytes / 1024,
model.quant_bits,
model.description.chars().take(20).collect::<heapless::String<20>>()
)
);
}
output
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_model_lookup() {
let model = get_model("tinystories-1m");
assert!(model.is_some());
assert_eq!(model.unwrap().vocab_size, 256);
}
#[test]
fn test_recommend_model() {
let model = recommend_model(UseCase::MinMemory, 10);
assert!(model.is_some());
assert_eq!(model.unwrap().name, "binary-embed-250k");
}
#[test]
fn test_performance_estimate() {
let model = get_model("nanoembed-500k").unwrap();
let speed = estimate_performance(model, "esp32s3");
assert!(speed > 0);
}
}