//! Benchmarks for 256-tile parallel tick //! //! ADR-014 Performance Target: < 1ms for 256-tile parallel tick //! //! The cognitum-gate-kernel provides 256 WASM tiles, each maintaining //! a local graph shard with E-value accumulation and witness fragments. use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; // ============================================================================ // Tile Types (Simulated, matching cognitum-gate-kernel structure) // ============================================================================ /// Maximum delta buffer per tile pub const MAX_DELTA_BUFFER: usize = 64; /// Number of tiles in fabric pub const NUM_TILES: usize = 256; /// Maximum vertices per shard pub const MAX_SHARD_VERTICES: usize = 256; /// Maximum edges per shard pub const MAX_SHARD_EDGES: usize = 1024; /// Delta operation type #[derive(Clone, Copy)] pub enum DeltaType { EdgeAdd, EdgeRemove, Observation, WeightUpdate, } /// Delta (change event) for tile #[derive(Clone, Copy)] pub struct Delta { pub delta_type: DeltaType, pub source: u16, pub target: u16, pub weight: u16, pub payload: u32, } impl Delta { pub fn edge_add(src: u16, tgt: u16, weight: u16) -> Self { Self { delta_type: DeltaType::EdgeAdd, source: src, target: tgt, weight, payload: 0, } } pub fn observation(vertex: u16, positive: bool) -> Self { Self { delta_type: DeltaType::Observation, source: vertex, target: 0, weight: 0, payload: positive as u32, } } } /// Compact vertex state #[derive(Clone, Copy, Default)] pub struct VertexState { pub degree: u8, pub component_id: u8, pub active: bool, pub energy_contrib: f32, } impl VertexState { pub fn is_active(&self) -> bool { self.active } } /// Compact edge #[derive(Clone, Copy, Default)] pub struct CompactEdge { pub source: u16, pub target: u16, pub weight: u16, pub active: bool, } impl CompactEdge { pub fn is_active(&self) -> bool { self.active } } /// Compact graph for single tile pub struct CompactGraph { pub vertices: [VertexState; MAX_SHARD_VERTICES], pub edges: [CompactEdge; MAX_SHARD_EDGES], pub edge_count: usize, pub vertex_count: usize, pub component_count: u8, } impl CompactGraph { pub fn new() -> Self { Self { vertices: [VertexState::default(); MAX_SHARD_VERTICES], edges: [CompactEdge::default(); MAX_SHARD_EDGES], edge_count: 0, vertex_count: 0, component_count: 0, } } pub fn add_edge(&mut self, src: u16, tgt: u16, weight: u16) -> bool { if self.edge_count >= MAX_SHARD_EDGES { return false; } // Activate vertices self.vertices[src as usize].active = true; self.vertices[src as usize].degree += 1; self.vertices[tgt as usize].active = true; self.vertices[tgt as usize].degree += 1; // Add edge self.edges[self.edge_count] = CompactEdge { source: src, target: tgt, weight, active: true, }; self.edge_count += 1; true } pub fn recompute_components(&mut self) { // Simple union-find simulation let mut parent = [0u8; MAX_SHARD_VERTICES]; for i in 0..MAX_SHARD_VERTICES { parent[i] = i as u8; } // Union edges for edge in &self.edges[..self.edge_count] { if edge.active { let s = edge.source as usize; let t = edge.target as usize; parent[s] = parent[t]; } } // Count unique components let mut seen = [false; MAX_SHARD_VERTICES]; let mut count = 0u8; for i in 0..MAX_SHARD_VERTICES { if self.vertices[i].active && !seen[parent[i] as usize] { seen[parent[i] as usize] = true; count += 1; } } self.component_count = count; } pub fn compute_total_energy(&self) -> f32 { let mut energy = 0.0f32; for edge in &self.edges[..self.edge_count] { if edge.active { // Simplified: weight as energy contribution energy += edge.weight as f32 / 100.0; } } energy } } /// E-value accumulator (log-space evidence) pub struct EvidenceAccumulator { /// Log e-value (fixed-point: value / 65536 = log2(e-value)) pub log_e_values: Vec, pub hypothesis_count: usize, } impl EvidenceAccumulator { pub fn new(capacity: usize) -> Self { Self { log_e_values: vec![0; capacity], hypothesis_count: 0, } } pub fn add_hypothesis(&mut self) -> usize { let idx = self.hypothesis_count; if idx < self.log_e_values.len() { self.hypothesis_count += 1; } idx } #[inline] pub fn update(&mut self, idx: usize, log_lr: i32) { if idx < self.hypothesis_count { self.log_e_values[idx] = self.log_e_values[idx].saturating_add(log_lr); } } pub fn global_log_e(&self) -> i64 { self.log_e_values[..self.hypothesis_count] .iter() .map(|&v| v as i64) .sum() } } /// Tile report (output of tick) #[derive(Clone, Copy)] pub struct TileReport { pub tile_id: u8, pub tick: u32, pub connected: bool, pub component_count: u8, pub log_e_value: i64, pub energy: f32, pub witness_hash: u64, } impl TileReport { pub fn new(tile_id: u8) -> Self { Self { tile_id, tick: 0, connected: true, component_count: 1, log_e_value: 0, energy: 0.0, witness_hash: 0, } } } /// Single tile state pub struct TileState { pub tile_id: u8, pub graph: CompactGraph, pub evidence: EvidenceAccumulator, pub delta_buffer: Vec, pub tick_count: u32, } impl TileState { pub fn new(tile_id: u8) -> Self { Self { tile_id, graph: CompactGraph::new(), evidence: EvidenceAccumulator::new(64), delta_buffer: Vec::with_capacity(MAX_DELTA_BUFFER), tick_count: 0, } } pub fn ingest_delta(&mut self, delta: &Delta) -> bool { if self.delta_buffer.len() >= MAX_DELTA_BUFFER { return false; } self.delta_buffer.push(*delta); true } pub fn tick(&mut self, tick_number: u32) -> TileReport { // Process pending deltas for delta in self.delta_buffer.drain(..) { match delta.delta_type { DeltaType::EdgeAdd => { self.graph .add_edge(delta.source, delta.target, delta.weight); } DeltaType::Observation => { // Update evidence accumulator let log_lr = if delta.payload != 0 { 65536 } else { -65536 }; if self.evidence.hypothesis_count > 0 { self.evidence.update(0, log_lr); } } _ => {} } } // Recompute components if needed self.graph.recompute_components(); // Compute energy let energy = self.graph.compute_total_energy(); // Build report self.tick_count = tick_number; TileReport { tile_id: self.tile_id, tick: tick_number, connected: self.graph.component_count <= 1, component_count: self.graph.component_count, log_e_value: self.evidence.global_log_e(), energy, witness_hash: self.compute_witness_hash(), } } fn compute_witness_hash(&self) -> u64 { let mut hash = self.tile_id as u64; hash = hash.wrapping_mul(0x517cc1b727220a95); hash ^= self.tick_count as u64; hash = hash.wrapping_mul(0x517cc1b727220a95); hash ^= self.graph.edge_count as u64; hash } pub fn reset(&mut self) { self.graph = CompactGraph::new(); self.delta_buffer.clear(); self.tick_count = 0; } } /// 256-tile coherence fabric pub struct CoherenceFabric { pub tiles: Vec, } impl CoherenceFabric { pub fn new() -> Self { Self { tiles: (0..NUM_TILES).map(|i| TileState::new(i as u8)).collect(), } } /// Execute tick on all tiles sequentially pub fn tick_sequential(&mut self, tick_number: u32) -> Vec { self.tiles.iter_mut().map(|t| t.tick(tick_number)).collect() } /// Aggregate reports into global coherence pub fn aggregate_reports(reports: &[TileReport]) -> FabricReport { let total_energy: f32 = reports.iter().map(|r| r.energy).sum(); let total_log_e: i64 = reports.iter().map(|r| r.log_e_value).sum(); let all_connected = reports.iter().all(|r| r.connected); // Compute global witness hash let mut global_hash = 0u64; for r in reports { global_hash = global_hash.wrapping_mul(0x517cc1b727220a95); global_hash ^= r.witness_hash; } FabricReport { tick: reports.first().map(|r| r.tick).unwrap_or(0), total_energy, total_log_e, all_connected, global_witness_hash: global_hash, } } /// Distribute delta to appropriate tile pub fn distribute_delta(&mut self, node_id: u64, delta: &Delta) { let tile_id = (node_id % NUM_TILES as u64) as usize; self.tiles[tile_id].ingest_delta(delta); } } /// Aggregated fabric report pub struct FabricReport { pub tick: u32, pub total_energy: f32, pub total_log_e: i64, pub all_connected: bool, pub global_witness_hash: u64, } // ============================================================================ // Benchmarks // ============================================================================ /// Benchmark single tile tick fn bench_single_tile_tick(c: &mut Criterion) { let mut group = c.benchmark_group("tile_single_tick"); group.throughput(Throughput::Elements(1)); // Empty tick let mut tile = TileState::new(0); group.bench_function("empty", |b| b.iter(|| black_box(tile.tick(black_box(1))))); // Tick with small graph let mut tile = TileState::new(0); for i in 0..20u16 { tile.ingest_delta(&Delta::edge_add(i, i + 1, 100)); } tile.tick(0); group.bench_function("small_graph_20_edges", |b| { b.iter(|| black_box(tile.tick(black_box(1)))) }); // Tick with pending deltas group.bench_function("with_10_deltas", |b| { b.iter_batched( || { let mut t = TileState::new(0); for i in 0..10u16 { t.ingest_delta(&Delta::edge_add(i, i + 1, 100)); } t }, |mut t| black_box(t.tick(1)), criterion::BatchSize::SmallInput, ) }); // Tick with full delta buffer group.bench_function("with_64_deltas", |b| { b.iter_batched( || { let mut t = TileState::new(0); for i in 0..MAX_DELTA_BUFFER as u16 { t.ingest_delta(&Delta::edge_add(i % 200, (i + 1) % 200, 100)); } t }, |mut t| black_box(t.tick(1)), criterion::BatchSize::SmallInput, ) }); group.finish(); } /// Benchmark 256-tile parallel tick (sequential baseline) fn bench_256_tile_tick_sequential(c: &mut Criterion) { let mut group = c.benchmark_group("tile_256_sequential"); group.throughput(Throughput::Elements(NUM_TILES as u64)); // Empty fabric let mut fabric = CoherenceFabric::new(); group.bench_function("empty_fabric", |b| { b.iter(|| black_box(fabric.tick_sequential(black_box(1)))) }); // Fabric with some data per tile let mut fabric = CoherenceFabric::new(); for i in 0..NUM_TILES { for j in 0..10u16 { fabric.tiles[i].ingest_delta(&Delta::edge_add(j, j + 1, 100)); } fabric.tiles[i].tick(0); } group.bench_function("populated_10_edges_per_tile", |b| { b.iter(|| black_box(fabric.tick_sequential(black_box(1)))) }); group.finish(); } /// Benchmark report aggregation fn bench_report_aggregation(c: &mut Criterion) { let mut group = c.benchmark_group("tile_report_aggregation"); group.throughput(Throughput::Elements(NUM_TILES as u64)); // Generate 256 reports let reports: Vec = (0..NUM_TILES) .map(|i| TileReport { tile_id: i as u8, tick: 1, connected: i % 10 != 0, component_count: (i % 5) as u8 + 1, log_e_value: (i as i64) * 1000 - 128000, energy: (i as f32) * 0.1, witness_hash: i as u64 * 0x517cc1b727220a95, }) .collect(); group.bench_function("aggregate_256_reports", |b| { b.iter(|| black_box(CoherenceFabric::aggregate_reports(black_box(&reports)))) }); group.finish(); } /// Benchmark delta distribution fn bench_delta_distribution(c: &mut Criterion) { let mut group = c.benchmark_group("tile_delta_distribution"); let mut fabric = CoherenceFabric::new(); // Single delta let delta = Delta::edge_add(0, 1, 100); group.bench_function("distribute_single", |b| { b.iter(|| fabric.distribute_delta(black_box(12345), black_box(&delta))) }); // Batch distribution for batch_size in [100, 1000, 10000] { let deltas: Vec<(u64, Delta)> = (0..batch_size) .map(|i| { ( i as u64, Delta::edge_add((i % 200) as u16, ((i + 1) % 200) as u16, 100), ) }) .collect(); group.throughput(Throughput::Elements(batch_size as u64)); group.bench_with_input( BenchmarkId::new("distribute_batch", batch_size), &deltas, |b, deltas| { b.iter(|| { for (node_id, delta) in deltas { fabric.distribute_delta(*node_id, delta); } }) }, ); } group.finish(); } /// Benchmark evidence accumulator fn bench_evidence_accumulator(c: &mut Criterion) { let mut group = c.benchmark_group("tile_evidence"); let mut acc = EvidenceAccumulator::new(64); for _ in 0..16 { acc.add_hypothesis(); } // Single update group.bench_function("update_single", |b| { b.iter(|| acc.update(black_box(5), black_box(65536))) }); // Global e-value computation group.bench_function("global_log_e_16_hyp", |b| { b.iter(|| black_box(acc.global_log_e())) }); // 64 hypotheses let mut acc64 = EvidenceAccumulator::new(64); for _ in 0..64 { acc64.add_hypothesis(); } for i in 0..64 { acc64.log_e_values[i] = (i as i32 - 32) * 1000; } group.bench_function("global_log_e_64_hyp", |b| { b.iter(|| black_box(acc64.global_log_e())) }); group.finish(); } /// Benchmark component recomputation fn bench_component_recompute(c: &mut Criterion) { let mut group = c.benchmark_group("tile_component_recompute"); for edge_count in [50, 200, 500, 1000] { let mut graph = CompactGraph::new(); for i in 0..edge_count.min(MAX_SHARD_EDGES) { let src = (i % 200) as u16; let tgt = ((i + 1) % 200) as u16; if src != tgt { graph.add_edge(src, tgt, 100); } } group.bench_with_input( BenchmarkId::new("recompute", edge_count), &edge_count, |b, _| { b.iter(|| { graph.recompute_components(); black_box(graph.component_count) }) }, ); } group.finish(); } /// Benchmark full tick + aggregate cycle fn bench_full_cycle(c: &mut Criterion) { let mut group = c.benchmark_group("tile_full_cycle"); group.sample_size(50); // Populate fabric let mut fabric = CoherenceFabric::new(); for i in 0..NUM_TILES { for j in 0..50u16 { fabric.tiles[i].ingest_delta(&Delta::edge_add(j, (j + 1) % 200, 100)); } fabric.tiles[i].tick(0); } group.bench_function("tick_and_aggregate_256_tiles", |b| { let mut tick = 1u32; b.iter(|| { let reports = fabric.tick_sequential(tick); let fabric_report = CoherenceFabric::aggregate_reports(&reports); tick += 1; black_box(fabric_report) }) }); group.finish(); } /// Benchmark memory access patterns fn bench_memory_patterns(c: &mut Criterion) { let mut group = c.benchmark_group("tile_memory"); // Sequential tile access let fabric = CoherenceFabric::new(); group.bench_function("sequential_tile_scan", |b| { b.iter(|| { let mut total = 0usize; for tile in &fabric.tiles { total += tile.graph.edge_count; } black_box(total) }) }); // Strided tile access group.bench_function("strided_tile_scan", |b| { let stride = 7; b.iter(|| { let mut total = 0usize; let mut idx = 0; for _ in 0..NUM_TILES { total += fabric.tiles[idx % NUM_TILES].graph.edge_count; idx += stride; } black_box(total) }) }); group.finish(); } criterion_group!( benches, bench_single_tile_tick, bench_256_tile_tick_sequential, bench_report_aggregation, bench_delta_distribution, bench_evidence_accumulator, bench_component_recompute, bench_full_cycle, bench_memory_patterns, ); criterion_main!(benches);