Files
wifi-densepose/examples/ruvLLM/esp32/examples/medium_scale_demo.rs
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

234 lines
15 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! Medium Scale Federation Demo - 100 to 500 Chip Clusters
//!
//! Shows the "sweet spot" for ESP32 federation where you get:
//! - High efficiency (40-70%)
//! - Great throughput (50K-100K tokens/sec)
//! - Practical costs ($400-$2,000)
//! - Real model capabilities (Small to Base models)
use ruvllm_esp32::federation::{
MediumClusterConfig, ScaleComparison, MediumScaleAnalyzer,
ModelCategory, HardwareConfig, BusType,
MEDIUM_SCALE_MIN, MEDIUM_SCALE_MAX, MEDIUM_SCALE_OPTIMAL,
};
fn main() {
println!("╔═══════════════════════════════════════════════════════════════════════╗");
println!("║ RuvLLM ESP32 - Medium Scale Federation (100-500 Chips) ║");
println!("║ The Sweet Spot for Practical Distributed Inference ║");
println!("╚═══════════════════════════════════════════════════════════════════════╝\n");
// ============================================================
// 1. Why 100-500 Chips is the Sweet Spot
// ============================================================
println!("═══ Why 100-500 Chips? ═══\n");
println!(" The 100-500 chip range is optimal because:");
println!(" • High efficiency (40-70%) - minimal wasted compute");
println!(" • Communication overhead stays low (<50%)");
println!(" • Cost-effective ($400-$2,000 total)");
println!(" • Can run meaningful models (5M-100M parameters)");
println!(" • Practical hardware: fits in 1-2 rack units");
println!();
// ============================================================
// 2. Standard Configurations
// ============================================================
println!("═══ Standard Medium-Scale Configurations ═══\n");
println!("┌─────────┬───────────────┬────────────────┬────────────┬──────────┬──────────┐");
println!("│ Chips │ Topology │ Throughput │ Efficiency │ Cost │ Power │");
println!("│ │ (clusters) │ (tok/sec) │ │ ($) │ (W) │");
println!("├─────────┼───────────────┼────────────────┼────────────┼──────────┼──────────┤");
for config in MediumClusterConfig::standard_configs() {
println!("{:>7}{:>5} × {:>5}{:>14.0}{:>9.1}% │ {:>8.0}{:>8.1}",
config.total_chips,
config.clusters,
config.chips_per_cluster,
config.expected_throughput,
config.expected_efficiency * 100.0,
config.cost_usd,
config.power_watts,
);
}
println!("└─────────┴───────────────┴────────────────┴────────────┴──────────┴──────────┘\n");
// ============================================================
// 3. Comparison vs Smaller Clusters
// ============================================================
println!("═══ Performance Comparison: Small vs Medium Clusters ═══\n");
let key_sizes = [100, 256, 500];
for chips in key_sizes {
let comparison = ScaleComparison::analyze(chips);
println!(" {} Chips vs Baselines:", chips);
println!(" ┌───────────────┬─────────────────┬────────────────┐");
println!(" │ Configuration │ Throughput │ Improvement │");
println!(" ├───────────────┼─────────────────┼────────────────┤");
println!(" │ 1 chip │ {:>13.0} │ (baseline) │",
comparison.single_chip.throughput_tokens_sec);
println!(" │ 5 chips │ {:>13.0}{:>11.1}x │",
comparison.small_cluster.throughput_tokens_sec,
comparison.small_cluster.throughput_tokens_sec / comparison.single_chip.throughput_tokens_sec);
println!("{} chips │ {:>13.0}{:>11.1}x │",
chips,
comparison.medium_cluster.throughput_tokens_sec,
comparison.throughput_multiplier);
println!(" └───────────────┴─────────────────┴────────────────┘");
println!(" Cost per 1K tok/s: ${:.2}\n", comparison.cost_per_1k_tokens);
}
// ============================================================
// 4. Model Capabilities at Each Scale
// ============================================================
println!("═══ What Models Can You Run? ═══\n");
println!("┌─────────┬───────────────┬────────────────────────────────────────────────┐");
println!("│ Chips │ Model Size │ Example Models │");
println!("├─────────┼───────────────┼────────────────────────────────────────────────┤");
for chips in [100, 150, 200, 256, 300, 400, 500] {
let category = ModelCategory::for_chip_count(chips);
let (min_params, max_params) = category.param_range();
println!("{:>7}{:>5}-{:>5}{:46}",
chips,
format_params(min_params),
format_params(max_params),
category.examples(),
);
}
println!("└─────────┴───────────────┴────────────────────────────────────────────────┘\n");
// ============================================================
// 5. Hardware Requirements
// ============================================================
println!("═══ Hardware Requirements for Deployment ═══\n");
println!("┌─────────┬────────────┬──────────┬─────────────┬───────────────────────────┐");
println!("│ Chips │ PCBs Req'd │ Chip/PCB │ Power (W) │ Form Factor │");
println!("├─────────┼────────────┼──────────┼─────────────┼───────────────────────────┤");
for chips in [100, 144, 256, 400, 500] {
let hw = HardwareConfig::for_cluster(chips);
println!("{:>7}{:>10}{:>8}{:>11.0}{:25}",
chips,
hw.num_boards,
hw.chips_per_board,
hw.power_supply_watts,
hw.form_factor,
);
}
println!("└─────────┴────────────┴──────────┴─────────────┴───────────────────────────┘\n");
println!(" Communication Bus Options:");
println!(" ┌──────────────┬───────────────┬────────────────────────────────────────┐");
println!(" │ Bus Type │ Bandwidth │ Best For │");
println!(" ├──────────────┼───────────────┼────────────────────────────────────────┤");
println!(" │ SPI │ {:>11} │ Small clusters, simple wiring │",
format_bandwidth(BusType::Spi.bandwidth_bytes_sec()));
println!(" │ I2C │ {:>11} │ Slow but many devices │",
format_bandwidth(BusType::I2c.bandwidth_bytes_sec()));
println!(" │ UART Mesh │ {:>11} │ Medium clusters, flexible │",
format_bandwidth(BusType::Uart.bandwidth_bytes_sec()));
println!(" │ High-Speed │ {:>11} │ Large clusters, custom hardware │",
format_bandwidth(BusType::HighSpeed.bandwidth_bytes_sec()));
println!(" └──────────────┴───────────────┴────────────────────────────────────────┘\n");
// ============================================================
// 6. Optimization: Find Best Config for Your Needs
// ============================================================
println!("═══ Find Your Optimal Configuration ═══\n");
// By throughput target
println!(" Target Throughput → Recommended Chips:");
println!(" ┌─────────────────────┬─────────┬────────────────┬──────────┐");
println!(" │ Target (tok/sec) │ Chips │ Actual Output │ Cost │");
println!(" ├─────────────────────┼─────────┼────────────────┼──────────┤");
for target in [50_000.0, 60_000.0, 70_000.0, 80_000.0] {
if let Some(config) = MediumScaleAnalyzer::optimize_for_throughput(target) {
println!("{:>19.0}{:>7}{:>14.0} │ ${:>7.0}",
target,
config.total_chips,
config.expected_throughput,
config.cost_usd,
);
}
}
println!(" └─────────────────────┴─────────┴────────────────┴──────────┘\n");
// By budget
println!(" Budget → Maximum Configuration:");
println!(" ┌─────────────────────┬─────────┬────────────────┬────────────┐");
println!(" │ Budget ($) │ Chips │ Throughput │ Efficiency │");
println!(" ├─────────────────────┼─────────┼────────────────┼────────────┤");
for budget in [500.0, 1000.0, 1500.0, 2000.0] {
let config = MediumScaleAnalyzer::optimize_for_budget(budget);
println!(" │ ${:>18.0}{:>7}{:>14.0}{:>9.1}% │",
budget,
config.total_chips,
config.expected_throughput,
config.expected_efficiency * 100.0,
);
}
println!(" └─────────────────────┴─────────┴────────────────┴────────────┘\n");
// ============================================================
// 7. Summary: The Sweet Spot
// ============================================================
println!("╔═══════════════════════════════════════════════════════════════════════╗");
println!("║ MEDIUM SCALE SUMMARY ║");
println!("╠═══════════════════════════════════════════════════════════════════════╣");
println!("║ ║");
println!("║ The 100-500 chip range is ideal for: ║");
println!("║ ║");
println!("║ ✓ HOME/OFFICE: 100 chips ($400) = 53K tok/s, 70% efficient ║");
println!("║ - Runs Small models (5-20M params) ║");
println!("║ - Fits in single rack unit ║");
println!("║ - 50W power consumption ║");
println!("║ ║");
println!("║ ✓ WORKSTATION: 256 chips ($1,024) = 88K tok/s, 55% efficient ║");
println!("║ - Runs Base models (20-100M params) ║");
println!("║ - 2U rack mount ║");
println!("║ - 130W power consumption ║");
println!("║ ║");
println!("║ ✓ SERVER: 500 chips ($2,000) = 106K tok/s, 40% efficient ║");
println!("║ - Runs Large models (100M+ params) ║");
println!("║ - Full rack unit ║");
println!("║ - 250W power consumption ║");
println!("║ ║");
println!("║ KEY INSIGHT: Beyond 500 chips, efficiency drops significantly. ║");
println!("║ For larger models, use multiple 256-500 chip clusters in parallel. ║");
println!("║ ║");
println!("╚═══════════════════════════════════════════════════════════════════════╝");
}
fn format_params(n: usize) -> String {
if n >= 1_000_000_000 {
format!("{:.0}B", n as f64 / 1_000_000_000.0)
} else if n >= 1_000_000 {
format!("{:.0}M", n as f64 / 1_000_000.0)
} else if n >= 1_000 {
format!("{:.0}K", n as f64 / 1_000.0)
} else {
format!("{}", n)
}
}
fn format_bandwidth(bps: usize) -> String {
if bps >= 1_000_000 {
format!("{} MB/s", bps / 1_000_000)
} else if bps >= 1_000 {
format!("{} KB/s", bps / 1_000)
} else {
format!("{} B/s", bps)
}
}