# RuvLLM ESP32 Cluster Configuration Example
# Copy to cluster.toml and edit ports for your setup

[cluster]
name = "ruvllm-home-cluster"
chips = 5
topology = "pipeline"  # Options: pipeline, tensor, hybrid

# Communication settings
[cluster.network]
baudrate = 921600      # UART between chips
protocol = "esp-now"   # esp-now, uart, spi
sync_interval_ms = 100

# Pipeline parallelism: each chip runs different layers
# 5 chips with 10-layer model = 2 layers per chip
[chips]

# Master chip - runs layers 0-1, coordinates cluster
[[chips.nodes]]
id = 1
role = "master"
port = "/dev/ttyUSB0"  # Linux
# port = "/dev/cu.usbserial-0001"  # macOS
# port = "COM3"  # Windows
layers = [0, 1]
ram_mb = 520
features = ["coordinator", "rag-primary"]

# Worker chip 2 - runs layers 2-3
[[chips.nodes]]
id = 2
role = "worker"
port = "/dev/ttyUSB1"
layers = [2, 3]
ram_mb = 520

# Worker chip 3 - runs layers 4-5
[[chips.nodes]]
id = 3
role = "worker"
port = "/dev/ttyUSB2"
layers = [4, 5]
ram_mb = 520

# Worker chip 4 - runs layers 6-7
[[chips.nodes]]
id = 4
role = "worker"
port = "/dev/ttyUSB3"
layers = [6, 7]
ram_mb = 520
features = ["rag-secondary"]

# Worker chip 5 - runs layers 8-9, output projection
[[chips.nodes]]
id = 5
role = "worker"
port = "/dev/ttyUSB4"
layers = [8, 9]
ram_mb = 520
features = ["output-head"]

# Model configuration
[model]
name = "ruvllm-500k"
vocab_size = 1024
embed_dim = 128
num_layers = 10
num_heads = 8
max_seq_len = 64
quantization = "int8"

# RAG configuration (distributed across cluster)
[rag]
enabled = true
total_vectors = 1000
vectors_per_chip = 200
embedding_dim = 128
index_type = "hnsw"

# Speculative decoding (optional)
[speculative]
enabled = false
draft_chips = [1]      # Which chips run draft model
verify_chips = [5]     # Which chips verify
lookahead = 4          # Tokens to speculate