88 lines
1.8 KiB
TOML
88 lines
1.8 KiB
TOML
# RuvLLM ESP32 Cluster Configuration Example
|
|
# Copy to cluster.toml and edit ports for your setup
|
|
|
|
[cluster]
|
|
name = "ruvllm-home-cluster"
|
|
chips = 5
|
|
topology = "pipeline" # Options: pipeline, tensor, hybrid
|
|
|
|
# Communication settings
|
|
[cluster.network]
|
|
baudrate = 921600 # UART between chips
|
|
protocol = "esp-now" # esp-now, uart, spi
|
|
sync_interval_ms = 100
|
|
|
|
# Pipeline parallelism: each chip runs different layers
|
|
# 5 chips with 10-layer model = 2 layers per chip
|
|
[chips]
|
|
|
|
# Master chip - runs layers 0-1, coordinates cluster
|
|
[[chips.nodes]]
|
|
id = 1
|
|
role = "master"
|
|
port = "/dev/ttyUSB0" # Linux
|
|
# port = "/dev/cu.usbserial-0001" # macOS
|
|
# port = "COM3" # Windows
|
|
layers = [0, 1]
|
|
ram_mb = 520
|
|
features = ["coordinator", "rag-primary"]
|
|
|
|
# Worker chip 2 - runs layers 2-3
|
|
[[chips.nodes]]
|
|
id = 2
|
|
role = "worker"
|
|
port = "/dev/ttyUSB1"
|
|
layers = [2, 3]
|
|
ram_mb = 520
|
|
|
|
# Worker chip 3 - runs layers 4-5
|
|
[[chips.nodes]]
|
|
id = 3
|
|
role = "worker"
|
|
port = "/dev/ttyUSB2"
|
|
layers = [4, 5]
|
|
ram_mb = 520
|
|
|
|
# Worker chip 4 - runs layers 6-7
|
|
[[chips.nodes]]
|
|
id = 4
|
|
role = "worker"
|
|
port = "/dev/ttyUSB3"
|
|
layers = [6, 7]
|
|
ram_mb = 520
|
|
features = ["rag-secondary"]
|
|
|
|
# Worker chip 5 - runs layers 8-9, output projection
|
|
[[chips.nodes]]
|
|
id = 5
|
|
role = "worker"
|
|
port = "/dev/ttyUSB4"
|
|
layers = [8, 9]
|
|
ram_mb = 520
|
|
features = ["output-head"]
|
|
|
|
# Model configuration
|
|
[model]
|
|
name = "ruvllm-500k"
|
|
vocab_size = 1024
|
|
embed_dim = 128
|
|
num_layers = 10
|
|
num_heads = 8
|
|
max_seq_len = 64
|
|
quantization = "int8"
|
|
|
|
# RAG configuration (distributed across cluster)
|
|
[rag]
|
|
enabled = true
|
|
total_vectors = 1000
|
|
vectors_per_chip = 200
|
|
embedding_dim = 128
|
|
index_type = "hnsw"
|
|
|
|
# Speculative decoding (optional)
|
|
[speculative]
|
|
enabled = false
|
|
draft_chips = [1] # Which chips run draft model
|
|
verify_chips = [5] # Which chips verify
|
|
lookahead = 4 # Tokens to speculate
|