//! ESP32 Simulation Benchmarks //! //! Simulates ESP32 performance constraints to validate the implementation //! will work on actual hardware. use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId}; use std::time::Duration; // Import the ESP32 crate (compiled for host for simulation) #[path = "../src/lib.rs"] mod ruvllm_esp32; use ruvllm_esp32::prelude::*; use ruvllm_esp32::model::ModelConfig; use ruvllm_esp32::quantized::{QuantizationType, matmul_int8, QuantParams}; use ruvllm_esp32::attention::MicroAttention; /// ESP32 clock speed in MHz const ESP32_CLOCK_MHZ: u64 = 240; /// Estimated cycles per INT8 multiply-accumulate on ESP32 const CYCLES_PER_MAC: u64 = 4; /// Estimate ESP32 execution time from x86 measurement fn estimate_esp32_time(x86_duration: Duration, mac_ops: u64) -> Duration { // ESP32 is roughly 10-20x slower than modern x86 for pure compute // But INT8 operations are more efficient let estimated_cycles = mac_ops * CYCLES_PER_MAC; let esp32_seconds = estimated_cycles as f64 / (ESP32_CLOCK_MHZ as f64 * 1_000_000.0); Duration::from_secs_f64(esp32_seconds.max(x86_duration.as_secs_f64() * 15.0)) } fn benchmark_matmul_int8(c: &mut Criterion) { let mut group = c.benchmark_group("INT8 MatMul"); group.warm_up_time(Duration::from_millis(500)); group.measurement_time(Duration::from_secs(3)); // Test different sizes typical for ESP32 models for (out_dim, in_dim) in [(32, 32), (64, 64), (128, 64), (64, 128)] { let weights: Vec = (0..out_dim * in_dim) .map(|i| ((i * 17) % 256) as i8 - 128) .collect(); let input: Vec = (0..in_dim) .map(|i| ((i * 13) % 256) as i8 - 128) .collect(); let mut output = vec![0i32; out_dim]; let params = QuantParams::default(); let mac_ops = (out_dim * in_dim) as u64; group.bench_with_input( BenchmarkId::new("size", format!("{}x{}", out_dim, in_dim)), &(out_dim, in_dim), |b, _| { b.iter(|| { matmul_int8( black_box(&weights), black_box(¶ms), black_box(&input), black_box(¶ms), black_box(&mut output), out_dim, in_dim, ) }) }, ); // Print ESP32 estimate println!( " {}x{}: {} MAC ops, estimated ESP32 time: {:.1} us", out_dim, in_dim, mac_ops, mac_ops as f64 * CYCLES_PER_MAC as f64 / ESP32_CLOCK_MHZ as f64 ); } group.finish(); } fn benchmark_attention(c: &mut Criterion) { let mut group = c.benchmark_group("Micro Attention"); group.warm_up_time(Duration::from_millis(500)); group.measurement_time(Duration::from_secs(3)); for (embed_dim, num_heads, seq_len) in [(64, 4, 16), (64, 4, 32), (32, 2, 16)] { let head_dim = embed_dim / num_heads; let attn = MicroAttention::new(embed_dim, num_heads); let query: Vec = (0..head_dim).map(|i| (i * 7 % 128) as i8).collect(); let keys: Vec> = (0..seq_len) .map(|s| (0..head_dim).map(|i| ((i + s) * 11 % 128) as i8).collect()) .collect(); let key_refs: Vec<&[i8]> = keys.iter().map(|k| k.as_slice()).collect(); let mut scores = vec![0i32; seq_len]; group.bench_with_input( BenchmarkId::new("config", format!("d{}_h{}_s{}", embed_dim, num_heads, seq_len)), &seq_len, |b, _| { b.iter(|| { attn.compute_scores( black_box(&query), black_box(&key_refs), black_box(&mut scores), ) }) }, ); } group.finish(); } fn benchmark_full_forward(c: &mut Criterion) { let mut group = c.benchmark_group("Full Forward Pass"); group.warm_up_time(Duration::from_millis(1000)); group.measurement_time(Duration::from_secs(5)); // Test configurations for different ESP32 variants let configs = [ ("ESP32", ModelConfig { vocab_size: 256, embed_dim: 64, hidden_dim: 128, num_layers: 2, num_heads: 4, max_seq_len: 32, quant_type: QuantizationType::Int8, }), ("ESP32-S2", ModelConfig { vocab_size: 128, embed_dim: 32, hidden_dim: 64, num_layers: 1, num_heads: 2, max_seq_len: 16, quant_type: QuantizationType::Int8, }), ("ESP32-S3", ModelConfig { vocab_size: 512, embed_dim: 64, hidden_dim: 128, num_layers: 2, num_heads: 4, max_seq_len: 32, quant_type: QuantizationType::Int8, }), ]; for (variant, config) in configs { let model = TinyModel::new(config.clone()).unwrap(); let mut engine = MicroEngine::new(model).unwrap(); let model_size = config.estimate_size(); group.bench_with_input( BenchmarkId::new("variant", variant), &variant, |b, _| { b.iter(|| { engine.reset(); black_box(engine.forward_one(black_box(42)).unwrap()) }) }, ); println!( " {}: model size {} KB, embed_dim {}, layers {}", variant, model_size / 1024, config.embed_dim, config.num_layers ); } group.finish(); } fn benchmark_generation(c: &mut Criterion) { let mut group = c.benchmark_group("Token Generation"); group.warm_up_time(Duration::from_millis(1000)); group.measurement_time(Duration::from_secs(5)); group.sample_size(20); // Fewer samples for slower operation let config = ModelConfig::for_variant(Esp32Variant::Esp32); let model = TinyModel::new(config).unwrap(); let mut engine = MicroEngine::new(model).unwrap(); let prompt = [1u16, 2, 3, 4, 5]; let gen_config = InferenceConfig { max_tokens: 10, greedy: true, ..Default::default() }; group.bench_function("generate_10_tokens", |b| { b.iter(|| { engine.reset(); black_box(engine.generate(black_box(&prompt), black_box(&gen_config)).unwrap()) }) }); group.finish(); } fn benchmark_memory_constraints(c: &mut Criterion) { let mut group = c.benchmark_group("Memory Validation"); // Validate that models fit within ESP32 memory constraints for variant in [ Esp32Variant::Esp32, Esp32Variant::Esp32S2, Esp32Variant::Esp32S3, Esp32Variant::Esp32C3, Esp32Variant::Esp32C6, ] { let config = ModelConfig::for_variant(variant); let model = TinyModel::new(config.clone()).unwrap(); let engine = MicroEngine::new(model).unwrap(); let usage = engine.memory_usage(); let available = variant.max_model_ram(); println!(" {:?}:", variant); println!(" Available RAM: {} KB", available / 1024); println!(" Model weights: {} KB", usage.model_weights / 1024); println!(" Activations: {} KB", usage.activation_buffers / 1024); println!(" KV cache: {} KB", usage.kv_cache / 1024); println!(" Total used: {} KB", usage.total / 1024); println!(" Headroom: {} KB", (available - usage.total) / 1024); println!(); assert!( usage.total <= available, "{:?} exceeds memory: {} > {}", variant, usage.total, available ); } // Dummy benchmark to satisfy criterion group.bench_function("memory_check", |b| { b.iter(|| black_box(Esp32Variant::Esp32.max_model_ram())) }); group.finish(); } fn benchmark_quantization(c: &mut Criterion) { let mut group = c.benchmark_group("Quantization"); group.warm_up_time(Duration::from_millis(500)); group.measurement_time(Duration::from_secs(3)); use ruvllm_esp32::quantized::QuantizedTensor; // Test quantization of different sized tensors for size in [256, 1024, 4096] { let data: Vec = (0..size) .map(|i| (i as f32 / size as f32) * 2.0 - 1.0) .collect(); group.bench_with_input( BenchmarkId::new("int8", size), &size, |b, _| { b.iter(|| { QuantizedTensor::<16384>::from_f32( black_box(&data), &[size], QuantizationType::Int8, ).unwrap() }) }, ); group.bench_with_input( BenchmarkId::new("int4", size), &size, |b, _| { b.iter(|| { QuantizedTensor::<16384>::from_f32( black_box(&data), &[size], QuantizationType::Int4, ).unwrap() }) }, ); group.bench_with_input( BenchmarkId::new("binary", size), &size, |b, _| { b.iter(|| { QuantizedTensor::<16384>::from_f32( black_box(&data), &[size], QuantizationType::Binary, ).unwrap() }) }, ); } group.finish(); } criterion_group!( benches, benchmark_matmul_int8, benchmark_attention, benchmark_full_forward, benchmark_generation, benchmark_memory_constraints, benchmark_quantization, ); criterion_main!(benches);