Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/examples/ruvLLM/esp32-flash/src/federation/mod.rs
+++ b/vendor/ruvector/examples/ruvLLM/esp32-flash/src/federation/mod.rs
@@ -0,0 +1,176 @@
+//! Federation Module for Multi-Chip Distributed Inference
+//!
+//! Supports:
+//! - Pipeline parallelism (layers across chips)
+//! - Tensor parallelism (attention heads across chips)
+//! - Speculative decoding (draft/verify)
+//! - SPI/I2C/UART/ESP-NOW communication
+
+pub mod protocol;
+pub mod pipeline;
+pub mod speculative;
+
+pub use protocol::{
+    ChipId, MessageType, MessageHeader, FederationMessage, CommStats,
+    MAX_ACTIVATION_SIZE, MAX_PAYLOAD_SIZE,
+};
+pub use pipeline::{
+    PipelineNode, PipelineConfig, PipelineRole, PipelineState, PipelineStats,
+    InFlightToken, calculate_pipeline_efficiency,
+    MAX_LAYERS_PER_CHIP, MAX_PIPELINE_DEPTH,
+};
+pub use speculative::{
+    SpeculativeDecoder, DraftVerifyConfig, DraftResult, VerifyResult, SpecStats,
+    MAX_DRAFT_TOKENS,
+};
+
+/// Maximum chips in federation
+pub const MAX_FEDERATION_SIZE: usize = 8;
+
+/// Federation mode
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum FederationMode {
+    Standalone,
+    Pipeline,
+    TensorParallel,
+    Hybrid,
+    Speculative,
+    MixtureOfExperts,
+}
+
+/// Communication bus type
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum CommunicationBus {
+    Spi,
+    I2c,
+    Uart,
+    EspNow,
+    Parallel,
+}
+
+impl CommunicationBus {
+    pub const fn bandwidth_bytes_per_sec(&self) -> usize {
+        match self {
+            Self::Spi => 10_000_000,
+            Self::I2c => 100_000,
+            Self::Uart => 500_000,
+            Self::EspNow => 125_000,
+            Self::Parallel => 20_000_000,
+        }
+    }
+
+    pub const fn latency_us(&self) -> usize {
+        match self {
+            Self::Spi => 10,
+            Self::I2c => 50,
+            Self::Uart => 20,
+            Self::EspNow => 500,
+            Self::Parallel => 5,
+        }
+    }
+}
+
+/// Federation configuration
+#[derive(Debug, Clone)]
+pub struct FederationConfig {
+    pub num_chips: usize,
+    pub chip_id: ChipId,
+    pub mode: FederationMode,
+    pub bus: CommunicationBus,
+    pub layers_per_chip: usize,
+    pub heads_per_chip: usize,
+    pub enable_pipelining: bool,
+}
+
+impl Default for FederationConfig {
+    fn default() -> Self {
+        Self {
+            num_chips: 5,
+            chip_id: ChipId(0),
+            mode: FederationMode::Pipeline,
+            bus: CommunicationBus::Spi,
+            layers_per_chip: 2,
+            heads_per_chip: 1,
+            enable_pipelining: true,
+        }
+    }
+}
+
+/// Calculate optimal federation config
+pub fn calculate_optimal_config(
+    model_size: usize,
+    num_layers: usize,
+    num_heads: usize,
+    num_chips: usize,
+    per_chip_ram: usize,
+) -> FederationConfig {
+    let model_per_chip = model_size / num_chips;
+
+    if model_per_chip <= per_chip_ram {
+        let layers_per_chip = (num_layers + num_chips - 1) / num_chips;
+        FederationConfig {
+            num_chips,
+            chip_id: ChipId(0),
+            mode: FederationMode::Pipeline,
+            bus: CommunicationBus::Spi,
+            layers_per_chip,
+            heads_per_chip: num_heads,
+            enable_pipelining: true,
+        }
+    } else {
+        let heads_per_chip = (num_heads + num_chips - 1) / num_chips;
+        FederationConfig {
+            num_chips,
+            chip_id: ChipId(0),
+            mode: FederationMode::TensorParallel,
+            bus: CommunicationBus::Spi,
+            layers_per_chip: num_layers,
+            heads_per_chip,
+            enable_pipelining: false,
+        }
+    }
+}
+
+/// Federation speedup estimates
+#[derive(Debug, Clone)]
+pub struct FederationSpeedup {
+    pub throughput_multiplier: f32,
+    pub latency_reduction: f32,
+    pub memory_per_chip_reduction: f32,
+}
+
+pub fn estimate_speedup(config: &FederationConfig) -> FederationSpeedup {
+    let n = config.num_chips as f32;
+    match config.mode {
+        FederationMode::Standalone => FederationSpeedup {
+            throughput_multiplier: 1.0,
+            latency_reduction: 1.0,
+            memory_per_chip_reduction: 1.0,
+        },
+        FederationMode::Pipeline => FederationSpeedup {
+            throughput_multiplier: n * 0.85,
+            latency_reduction: 1.0 / (1.0 + 0.1 * (n - 1.0)),
+            memory_per_chip_reduction: n,
+        },
+        FederationMode::TensorParallel => FederationSpeedup {
+            throughput_multiplier: n * 0.7,
+            latency_reduction: n * 0.7,
+            memory_per_chip_reduction: n * 0.8,
+        },
+        FederationMode::Hybrid => FederationSpeedup {
+            throughput_multiplier: n * 0.75,
+            latency_reduction: (n / 2.0) * 0.8,
+            memory_per_chip_reduction: n * 0.9,
+        },
+        FederationMode::Speculative => FederationSpeedup {
+            throughput_multiplier: 2.5,
+            latency_reduction: 2.0,
+            memory_per_chip_reduction: 1.0,
+        },
+        FederationMode::MixtureOfExperts => FederationSpeedup {
+            throughput_multiplier: n * 0.9,
+            latency_reduction: 1.5,
+            memory_per_chip_reduction: n,
+        },
+    }
+}
--- a/vendor/ruvector/examples/ruvLLM/esp32-flash/src/federation/pipeline.rs
+++ b/vendor/ruvector/examples/ruvLLM/esp32-flash/src/federation/pipeline.rs
@@ -0,0 +1,180 @@
+//! Pipeline Parallelism for Multi-ESP32 Inference
+
+use heapless::Vec as HVec;
+use super::protocol::{ChipId, FederationMessage};
+
+pub const MAX_LAYERS_PER_CHIP: usize = 4;
+pub const MAX_PIPELINE_DEPTH: usize = 8;
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum PipelineRole { Head, Middle, Tail, Standalone }
+
+#[derive(Debug, Clone)]
+pub struct PipelineConfig {
+    pub num_chips: usize,
+    pub position: usize,
+    pub layer_start: usize,
+    pub layer_count: usize,
+    pub total_layers: usize,
+    pub embed_dim: usize,
+    pub micro_batch_size: usize,
+}
+
+impl PipelineConfig {
+    pub fn for_chip(chip_pos: usize, num_chips: usize, total_layers: usize, embed_dim: usize) -> Self {
+        let layers_per_chip = (total_layers + num_chips - 1) / num_chips;
+        let layer_start = chip_pos * layers_per_chip;
+        let layer_count = layers_per_chip.min(total_layers - layer_start);
+        Self { num_chips, position: chip_pos, layer_start, layer_count, total_layers, embed_dim, micro_batch_size: 1 }
+    }
+
+    pub fn role(&self) -> PipelineRole {
+        if self.num_chips == 1 { PipelineRole::Standalone }
+        else if self.position == 0 { PipelineRole::Head }
+        else if self.position == self.num_chips - 1 { PipelineRole::Tail }
+        else { PipelineRole::Middle }
+    }
+
+    pub fn prev_chip(&self) -> Option<ChipId> {
+        if self.position > 0 { Some(ChipId((self.position - 1) as u8)) } else { None }
+    }
+
+    pub fn next_chip(&self) -> Option<ChipId> {
+        if self.position + 1 < self.num_chips { Some(ChipId((self.position + 1) as u8)) } else { None }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum PipelineState { WaitingInput, Processing, WaitingSend, Idle }
+
+#[derive(Debug, Clone)]
+pub struct InFlightToken {
+    pub seq_pos: u16,
+    pub token_id: u16,
+    pub current_layer: u8,
+    pub activation: HVec<i8, 128>,
+}
+
+pub struct PipelineNode {
+    config: PipelineConfig,
+    state: PipelineState,
+    chip_id: ChipId,
+    seq_counter: u16,
+    in_flight: HVec<InFlightToken, MAX_PIPELINE_DEPTH>,
+    output_queue: HVec<InFlightToken, MAX_PIPELINE_DEPTH>,
+    barrier_counter: u16,
+}
+
+impl PipelineNode {
+    pub fn new(config: PipelineConfig) -> Self {
+        Self {
+            chip_id: ChipId(config.position as u8),
+            config,
+            state: PipelineState::Idle,
+            seq_counter: 0,
+            in_flight: HVec::new(),
+            output_queue: HVec::new(),
+            barrier_counter: 0,
+        }
+    }
+
+    pub fn state(&self) -> PipelineState { self.state }
+    pub fn handles_embedding(&self) -> bool { matches!(self.config.role(), PipelineRole::Head | PipelineRole::Standalone) }
+    pub fn handles_output(&self) -> bool { matches!(self.config.role(), PipelineRole::Tail | PipelineRole::Standalone) }
+
+    pub fn start_token(&mut self, token_id: u16) -> crate::Result<()> {
+        if !self.handles_embedding() { return Err(crate::Error::UnsupportedFeature("Not head chip")); }
+        if self.in_flight.len() >= MAX_PIPELINE_DEPTH { return Err(crate::Error::BufferOverflow); }
+
+        let token = InFlightToken { seq_pos: self.seq_counter, token_id, current_layer: 0, activation: HVec::new() };
+        self.in_flight.push(token).map_err(|_| crate::Error::BufferOverflow)?;
+        self.seq_counter += 1;
+        self.state = PipelineState::Processing;
+        Ok(())
+    }
+
+    pub fn receive_activation(&mut self, msg: &FederationMessage) -> crate::Result<()> {
+        let (layer_idx, position, data) = msg.get_activation_data()
+            .ok_or(crate::Error::InvalidModel("Invalid activation"))?;
+
+        let mut activation = HVec::new();
+        for &d in data { activation.push(d as i8).map_err(|_| crate::Error::BufferOverflow)?; }
+
+        let token = InFlightToken { seq_pos: position, token_id: 0, current_layer: layer_idx, activation };
+        self.in_flight.push(token).map_err(|_| crate::Error::BufferOverflow)?;
+        self.state = PipelineState::Processing;
+        Ok(())
+    }
+
+    pub fn process_step<F>(&mut self, mut layer_fn: F) -> crate::Result<bool>
+    where F: FnMut(usize, &mut [i8]) -> crate::Result<()>
+    {
+        if self.in_flight.is_empty() {
+            self.state = PipelineState::WaitingInput;
+            return Ok(false);
+        }
+
+        let token = &mut self.in_flight[0];
+        let relative_layer = token.current_layer as usize - self.config.layer_start;
+
+        if relative_layer < self.config.layer_count {
+            let layer_idx = self.config.layer_start + relative_layer;
+            layer_fn(layer_idx, &mut token.activation)?;
+            token.current_layer += 1;
+        }
+
+        let next = token.current_layer as usize;
+        if next >= self.config.layer_start + self.config.layer_count {
+            if let Some(completed) = self.in_flight.pop() {
+                self.output_queue.push(completed).map_err(|_| crate::Error::BufferOverflow)?;
+            }
+            self.state = PipelineState::WaitingSend;
+        }
+        Ok(true)
+    }
+
+    pub fn get_output(&mut self) -> Option<FederationMessage> {
+        if self.output_queue.is_empty() { return None; }
+        let token = self.output_queue.pop()?;
+        let next_chip = self.config.next_chip()?;
+        let data: heapless::Vec<i8, 128> = token.activation.iter().cloned().collect();
+        FederationMessage::activation(self.chip_id, next_chip, token.seq_pos, token.current_layer, token.seq_pos, &data).ok()
+    }
+
+    pub fn has_final_output(&self) -> bool { self.handles_output() && !self.output_queue.is_empty() }
+
+    pub fn get_final_output(&mut self) -> Option<HVec<i8, 128>> {
+        if !self.handles_output() { return None; }
+        self.output_queue.pop().map(|t| t.activation)
+    }
+
+    pub fn stats(&self) -> PipelineStats {
+        PipelineStats {
+            in_flight_count: self.in_flight.len(),
+            output_queue_len: self.output_queue.len(),
+            tokens_processed: self.seq_counter as usize,
+            current_state: self.state,
+        }
+    }
+
+    pub fn create_barrier(&mut self) -> FederationMessage {
+        self.barrier_counter += 1;
+        FederationMessage::barrier(self.chip_id, self.barrier_counter)
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct PipelineStats {
+    pub in_flight_count: usize,
+    pub output_queue_len: usize,
+    pub tokens_processed: usize,
+    pub current_state: PipelineState,
+}
+
+pub fn calculate_pipeline_efficiency(num_chips: usize, tokens: usize) -> f32 {
+    if tokens <= num_chips {
+        tokens as f32 / (num_chips as f32 * tokens as f32)
+    } else {
+        tokens as f32 / (tokens as f32 + (num_chips - 1) as f32)
+    }
+}
--- a/vendor/ruvector/examples/ruvLLM/esp32-flash/src/federation/protocol.rs
+++ b/vendor/ruvector/examples/ruvLLM/esp32-flash/src/federation/protocol.rs
@@ -0,0 +1,187 @@
+//! Inter-Chip Communication Protocol
+
+use heapless::Vec as HVec;
+
+pub const MAX_ACTIVATION_SIZE: usize = 256;
+pub const MAX_PAYLOAD_SIZE: usize = 512;
+pub const PROTOCOL_VERSION: u8 = 1;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
+pub struct ChipId(pub u8);
+
+impl ChipId {
+    pub const BROADCAST: ChipId = ChipId(0xFF);
+    pub fn is_broadcast(&self) -> bool { self.0 == 0xFF }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+#[repr(u8)]
+pub enum MessageType {
+    Heartbeat = 0x00,
+    Discovery = 0x01,
+    Ready = 0x02,
+    Activation = 0x10,
+    KVCache = 0x11,
+    Gradient = 0x12,
+    EmbedRequest = 0x20,
+    EmbedResponse = 0x21,
+    Logits = 0x22,
+    Token = 0x23,
+    DraftTokens = 0x30,
+    VerifyResult = 0x31,
+    Barrier = 0x40,
+    Ack = 0x41,
+    Error = 0xFF,
+}
+
+impl From<u8> for MessageType {
+    fn from(v: u8) -> Self {
+        match v {
+            0x00 => Self::Heartbeat, 0x01 => Self::Discovery, 0x02 => Self::Ready,
+            0x10 => Self::Activation, 0x11 => Self::KVCache, 0x12 => Self::Gradient,
+            0x20 => Self::EmbedRequest, 0x21 => Self::EmbedResponse,
+            0x22 => Self::Logits, 0x23 => Self::Token,
+            0x30 => Self::DraftTokens, 0x31 => Self::VerifyResult,
+            0x40 => Self::Barrier, 0x41 => Self::Ack,
+            _ => Self::Error,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+#[repr(C, packed)]
+pub struct MessageHeader {
+    pub version: u8,
+    pub msg_type: u8,
+    pub src: u8,
+    pub dst: u8,
+    pub seq: u16,
+    pub payload_len: u16,
+}
+
+impl MessageHeader {
+    pub const SIZE: usize = 8;
+
+    pub fn new(msg_type: MessageType, src: ChipId, dst: ChipId, seq: u16, payload_len: u16) -> Self {
+        Self { version: PROTOCOL_VERSION, msg_type: msg_type as u8, src: src.0, dst: dst.0, seq, payload_len }
+    }
+
+    pub fn to_bytes(&self) -> [u8; 8] {
+        [self.version, self.msg_type, self.src, self.dst,
+         (self.seq & 0xFF) as u8, (self.seq >> 8) as u8,
+         (self.payload_len & 0xFF) as u8, (self.payload_len >> 8) as u8]
+    }
+
+    pub fn from_bytes(b: &[u8]) -> Option<Self> {
+        if b.len() < 8 { return None; }
+        Some(Self {
+            version: b[0], msg_type: b[1], src: b[2], dst: b[3],
+            seq: (b[4] as u16) | ((b[5] as u16) << 8),
+            payload_len: (b[6] as u16) | ((b[7] as u16) << 8),
+        })
+    }
+
+    pub fn checksum(&self) -> u8 {
+        self.to_bytes().iter().fold(0u8, |acc, &b| acc.wrapping_add(b))
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct FederationMessage {
+    pub header: MessageHeader,
+    pub payload: HVec<u8, MAX_PAYLOAD_SIZE>,
+    pub checksum: u8,
+}
+
+impl FederationMessage {
+    pub fn new(msg_type: MessageType, src: ChipId, dst: ChipId, seq: u16) -> Self {
+        Self {
+            header: MessageHeader::new(msg_type, src, dst, seq, 0),
+            payload: HVec::new(),
+            checksum: 0,
+        }
+    }
+
+    pub fn activation(src: ChipId, dst: ChipId, seq: u16, layer: u8, pos: u16, data: &[i8]) -> crate::Result<Self> {
+        let mut msg = Self::new(MessageType::Activation, src, dst, seq);
+        msg.payload.push(layer).map_err(|_| crate::Error::BufferOverflow)?;
+        msg.payload.push((pos & 0xFF) as u8).map_err(|_| crate::Error::BufferOverflow)?;
+        msg.payload.push((pos >> 8) as u8).map_err(|_| crate::Error::BufferOverflow)?;
+        for &d in data {
+            msg.payload.push(d as u8).map_err(|_| crate::Error::BufferOverflow)?;
+        }
+        msg.header.payload_len = msg.payload.len() as u16;
+        msg.update_checksum();
+        Ok(msg)
+    }
+
+    pub fn token(src: ChipId, dst: ChipId, seq: u16, token_id: u16) -> Self {
+        let mut msg = Self::new(MessageType::Token, src, dst, seq);
+        let _ = msg.payload.push((token_id & 0xFF) as u8);
+        let _ = msg.payload.push((token_id >> 8) as u8);
+        msg.header.payload_len = 2;
+        msg.update_checksum();
+        msg
+    }
+
+    pub fn draft_tokens(src: ChipId, dst: ChipId, seq: u16, tokens: &[u16]) -> crate::Result<Self> {
+        let mut msg = Self::new(MessageType::DraftTokens, src, dst, seq);
+        msg.payload.push(tokens.len() as u8).map_err(|_| crate::Error::BufferOverflow)?;
+        for &t in tokens {
+            msg.payload.push((t & 0xFF) as u8).map_err(|_| crate::Error::BufferOverflow)?;
+            msg.payload.push((t >> 8) as u8).map_err(|_| crate::Error::BufferOverflow)?;
+        }
+        msg.header.payload_len = msg.payload.len() as u16;
+        msg.update_checksum();
+        Ok(msg)
+    }
+
+    pub fn barrier(src: ChipId, barrier_id: u16) -> Self {
+        let mut msg = Self::new(MessageType::Barrier, src, ChipId::BROADCAST, 0);
+        let _ = msg.payload.push((barrier_id & 0xFF) as u8);
+        let _ = msg.payload.push((barrier_id >> 8) as u8);
+        msg.header.payload_len = 2;
+        msg.update_checksum();
+        msg
+    }
+
+    pub fn update_checksum(&mut self) {
+        let mut sum = self.header.checksum();
+        for &b in &self.payload { sum = sum.wrapping_add(b); }
+        self.checksum = sum;
+    }
+
+    pub fn verify_checksum(&self) -> bool {
+        let mut sum = self.header.checksum();
+        for &b in &self.payload { sum = sum.wrapping_add(b); }
+        sum == self.checksum
+    }
+
+    pub fn to_bytes(&self) -> HVec<u8, { MAX_PAYLOAD_SIZE + 16 }> {
+        let mut bytes = HVec::new();
+        for b in self.header.to_bytes() { let _ = bytes.push(b); }
+        for &b in &self.payload { let _ = bytes.push(b); }
+        let _ = bytes.push(self.checksum);
+        bytes
+    }
+
+    pub fn get_activation_data(&self) -> Option<(u8, u16, &[u8])> {
+        if self.header.msg_type != MessageType::Activation as u8 || self.payload.len() < 3 { return None; }
+        Some((self.payload[0], (self.payload[1] as u16) | ((self.payload[2] as u16) << 8), &self.payload[3..]))
+    }
+
+    pub fn get_token(&self) -> Option<u16> {
+        if self.header.msg_type != MessageType::Token as u8 || self.payload.len() < 2 { return None; }
+        Some((self.payload[0] as u16) | ((self.payload[1] as u16) << 8))
+    }
+}
+
+#[derive(Debug, Default, Clone)]
+pub struct CommStats {
+    pub messages_sent: u32,
+    pub messages_received: u32,
+    pub bytes_sent: u32,
+    pub bytes_received: u32,
+    pub checksum_errors: u32,
+    pub timeouts: u32,
+}
--- a/vendor/ruvector/examples/ruvLLM/esp32-flash/src/federation/speculative.rs
+++ b/vendor/ruvector/examples/ruvLLM/esp32-flash/src/federation/speculative.rs
@@ -0,0 +1,146 @@
+//! Speculative Decoding - Draft and Verify
+
+use heapless::Vec as HVec;
+use super::protocol::{ChipId, FederationMessage};
+
+pub const MAX_DRAFT_TOKENS: usize = 8;
+
+#[derive(Debug, Clone)]
+pub struct DraftVerifyConfig {
+    pub draft_length: usize,
+    pub acceptance_threshold: f32,
+    pub draft_chip: ChipId,
+    pub verify_chips: HVec<ChipId, 4>,
+    pub adaptive: bool,
+}
+
+impl Default for DraftVerifyConfig {
+    fn default() -> Self {
+        Self { draft_length: 4, acceptance_threshold: 0.9, draft_chip: ChipId(0), verify_chips: HVec::new(), adaptive: true }
+    }
+}
+
+impl DraftVerifyConfig {
+    pub fn for_five_chips() -> Self {
+        let mut verify_chips = HVec::new();
+        for i in 1..5 { let _ = verify_chips.push(ChipId(i)); }
+        Self { draft_length: 4, acceptance_threshold: 0.9, draft_chip: ChipId(0), verify_chips, adaptive: true }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct DraftResult {
+    pub tokens: HVec<u16, MAX_DRAFT_TOKENS>,
+    pub probs: HVec<u8, MAX_DRAFT_TOKENS>,
+    pub start_pos: u16,
+}
+
+#[derive(Debug, Clone)]
+pub struct VerifyResult {
+    pub accepted_count: usize,
+    pub correction: Option<u16>,
+    pub verify_probs: HVec<u8, MAX_DRAFT_TOKENS>,
+}
+
+pub struct SpeculativeDecoder {
+    config: DraftVerifyConfig,
+    is_draft_chip: bool,
+    acceptance_rate: f32,
+    pending_draft: Option<DraftResult>,
+    stats: SpecStats,
+}
+
+impl SpeculativeDecoder {
+    pub fn new(config: DraftVerifyConfig, chip_id: ChipId) -> Self {
+        let is_draft = chip_id == config.draft_chip;
+        Self { config, is_draft_chip: is_draft, acceptance_rate: 0.9, pending_draft: None, stats: SpecStats::default() }
+    }
+
+    pub fn is_drafter(&self) -> bool { self.is_draft_chip }
+
+    pub fn submit_draft(&mut self, draft: DraftResult) -> crate::Result<FederationMessage> {
+        if !self.is_draft_chip { return Err(crate::Error::UnsupportedFeature("Not draft chip")); }
+        let tokens: heapless::Vec<u16, MAX_DRAFT_TOKENS> = draft.tokens.iter().cloned().collect();
+        let msg = FederationMessage::draft_tokens(self.config.draft_chip, ChipId::BROADCAST, draft.start_pos, &tokens)?;
+        self.pending_draft = Some(draft);
+        self.stats.drafts_sent += 1;
+        Ok(msg)
+    }
+
+    pub fn verify_draft<F>(&mut self, draft: &DraftResult, mut get_prob: F) -> VerifyResult
+    where F: FnMut(u16, u16) -> u8
+    {
+        let mut accepted = 0;
+        let mut correction = None;
+        let mut verify_probs = HVec::new();
+
+        for (i, &token) in draft.tokens.iter().enumerate() {
+            let pos = draft.start_pos + i as u16;
+            let verify_prob = get_prob(pos, token);
+            let _ = verify_probs.push(verify_prob);
+            let draft_prob = draft.probs.get(i).copied().unwrap_or(128);
+            let threshold = (draft_prob as f32 * self.config.acceptance_threshold) as u8;
+
+            if verify_prob >= threshold {
+                accepted += 1;
+            } else {
+                correction = Some(token.wrapping_add(1));
+                break;
+            }
+        }
+
+        VerifyResult { accepted_count: accepted, correction, verify_probs }
+    }
+
+    pub fn process_verification(&mut self, result: &VerifyResult) -> HVec<u16, MAX_DRAFT_TOKENS> {
+        let mut accepted_tokens = HVec::new();
+
+        if let Some(ref draft) = self.pending_draft {
+            for i in 0..result.accepted_count {
+                if let Some(&token) = draft.tokens.get(i) {
+                    let _ = accepted_tokens.push(token);
+                }
+            }
+            if let Some(correct) = result.correction {
+                let _ = accepted_tokens.push(correct);
+            }
+
+            self.stats.tokens_accepted += result.accepted_count;
+            self.stats.tokens_rejected += draft.tokens.len() - result.accepted_count;
+            let rate = result.accepted_count as f32 / draft.tokens.len() as f32;
+            self.acceptance_rate = 0.9 * self.acceptance_rate + 0.1 * rate;
+        }
+
+        self.pending_draft = None;
+        accepted_tokens
+    }
+
+    pub fn adaptive_draft_length(&self) -> usize {
+        if !self.config.adaptive { return self.config.draft_length; }
+        if self.acceptance_rate > 0.95 { (self.config.draft_length + 2).min(MAX_DRAFT_TOKENS) }
+        else if self.acceptance_rate > 0.8 { self.config.draft_length }
+        else if self.acceptance_rate > 0.5 { (self.config.draft_length - 1).max(1) }
+        else { 1 }
+    }
+
+    pub fn estimated_speedup(&self) -> f32 {
+        let avg = self.acceptance_rate * self.adaptive_draft_length() as f32;
+        avg / 1.2
+    }
+
+    pub fn stats(&self) -> &SpecStats { &self.stats }
+}
+
+#[derive(Debug, Default, Clone)]
+pub struct SpecStats {
+    pub drafts_sent: usize,
+    pub tokens_accepted: usize,
+    pub tokens_rejected: usize,
+}
+
+impl SpecStats {
+    pub fn acceptance_rate(&self) -> f32 {
+        let total = self.tokens_accepted + self.tokens_rejected;
+        if total == 0 { 0.0 } else { self.tokens_accepted as f32 / total as f32 }
+    }
+}