Files
wifi-densepose/vendor/ruvector/crates/prime-radiant/benches/tile_bench.rs

664 lines
18 KiB
Rust

//! Benchmarks for 256-tile parallel tick
//!
//! ADR-014 Performance Target: < 1ms for 256-tile parallel tick
//!
//! The cognitum-gate-kernel provides 256 WASM tiles, each maintaining
//! a local graph shard with E-value accumulation and witness fragments.
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
// ============================================================================
// Tile Types (Simulated, matching cognitum-gate-kernel structure)
// ============================================================================
/// Maximum delta buffer per tile
pub const MAX_DELTA_BUFFER: usize = 64;
/// Number of tiles in fabric
pub const NUM_TILES: usize = 256;
/// Maximum vertices per shard
pub const MAX_SHARD_VERTICES: usize = 256;
/// Maximum edges per shard
pub const MAX_SHARD_EDGES: usize = 1024;
/// Delta operation type
#[derive(Clone, Copy)]
pub enum DeltaType {
EdgeAdd,
EdgeRemove,
Observation,
WeightUpdate,
}
/// Delta (change event) for tile
#[derive(Clone, Copy)]
pub struct Delta {
pub delta_type: DeltaType,
pub source: u16,
pub target: u16,
pub weight: u16,
pub payload: u32,
}
impl Delta {
pub fn edge_add(src: u16, tgt: u16, weight: u16) -> Self {
Self {
delta_type: DeltaType::EdgeAdd,
source: src,
target: tgt,
weight,
payload: 0,
}
}
pub fn observation(vertex: u16, positive: bool) -> Self {
Self {
delta_type: DeltaType::Observation,
source: vertex,
target: 0,
weight: 0,
payload: positive as u32,
}
}
}
/// Compact vertex state
#[derive(Clone, Copy, Default)]
pub struct VertexState {
pub degree: u8,
pub component_id: u8,
pub active: bool,
pub energy_contrib: f32,
}
impl VertexState {
pub fn is_active(&self) -> bool {
self.active
}
}
/// Compact edge
#[derive(Clone, Copy, Default)]
pub struct CompactEdge {
pub source: u16,
pub target: u16,
pub weight: u16,
pub active: bool,
}
impl CompactEdge {
pub fn is_active(&self) -> bool {
self.active
}
}
/// Compact graph for single tile
pub struct CompactGraph {
pub vertices: [VertexState; MAX_SHARD_VERTICES],
pub edges: [CompactEdge; MAX_SHARD_EDGES],
pub edge_count: usize,
pub vertex_count: usize,
pub component_count: u8,
}
impl CompactGraph {
pub fn new() -> Self {
Self {
vertices: [VertexState::default(); MAX_SHARD_VERTICES],
edges: [CompactEdge::default(); MAX_SHARD_EDGES],
edge_count: 0,
vertex_count: 0,
component_count: 0,
}
}
pub fn add_edge(&mut self, src: u16, tgt: u16, weight: u16) -> bool {
if self.edge_count >= MAX_SHARD_EDGES {
return false;
}
// Activate vertices
self.vertices[src as usize].active = true;
self.vertices[src as usize].degree += 1;
self.vertices[tgt as usize].active = true;
self.vertices[tgt as usize].degree += 1;
// Add edge
self.edges[self.edge_count] = CompactEdge {
source: src,
target: tgt,
weight,
active: true,
};
self.edge_count += 1;
true
}
pub fn recompute_components(&mut self) {
// Simple union-find simulation
let mut parent = [0u8; MAX_SHARD_VERTICES];
for i in 0..MAX_SHARD_VERTICES {
parent[i] = i as u8;
}
// Union edges
for edge in &self.edges[..self.edge_count] {
if edge.active {
let s = edge.source as usize;
let t = edge.target as usize;
parent[s] = parent[t];
}
}
// Count unique components
let mut seen = [false; MAX_SHARD_VERTICES];
let mut count = 0u8;
for i in 0..MAX_SHARD_VERTICES {
if self.vertices[i].active && !seen[parent[i] as usize] {
seen[parent[i] as usize] = true;
count += 1;
}
}
self.component_count = count;
}
pub fn compute_total_energy(&self) -> f32 {
let mut energy = 0.0f32;
for edge in &self.edges[..self.edge_count] {
if edge.active {
// Simplified: weight as energy contribution
energy += edge.weight as f32 / 100.0;
}
}
energy
}
}
/// E-value accumulator (log-space evidence)
pub struct EvidenceAccumulator {
/// Log e-value (fixed-point: value / 65536 = log2(e-value))
pub log_e_values: Vec<i32>,
pub hypothesis_count: usize,
}
impl EvidenceAccumulator {
pub fn new(capacity: usize) -> Self {
Self {
log_e_values: vec![0; capacity],
hypothesis_count: 0,
}
}
pub fn add_hypothesis(&mut self) -> usize {
let idx = self.hypothesis_count;
if idx < self.log_e_values.len() {
self.hypothesis_count += 1;
}
idx
}
#[inline]
pub fn update(&mut self, idx: usize, log_lr: i32) {
if idx < self.hypothesis_count {
self.log_e_values[idx] = self.log_e_values[idx].saturating_add(log_lr);
}
}
pub fn global_log_e(&self) -> i64 {
self.log_e_values[..self.hypothesis_count]
.iter()
.map(|&v| v as i64)
.sum()
}
}
/// Tile report (output of tick)
#[derive(Clone, Copy)]
pub struct TileReport {
pub tile_id: u8,
pub tick: u32,
pub connected: bool,
pub component_count: u8,
pub log_e_value: i64,
pub energy: f32,
pub witness_hash: u64,
}
impl TileReport {
pub fn new(tile_id: u8) -> Self {
Self {
tile_id,
tick: 0,
connected: true,
component_count: 1,
log_e_value: 0,
energy: 0.0,
witness_hash: 0,
}
}
}
/// Single tile state
pub struct TileState {
pub tile_id: u8,
pub graph: CompactGraph,
pub evidence: EvidenceAccumulator,
pub delta_buffer: Vec<Delta>,
pub tick_count: u32,
}
impl TileState {
pub fn new(tile_id: u8) -> Self {
Self {
tile_id,
graph: CompactGraph::new(),
evidence: EvidenceAccumulator::new(64),
delta_buffer: Vec::with_capacity(MAX_DELTA_BUFFER),
tick_count: 0,
}
}
pub fn ingest_delta(&mut self, delta: &Delta) -> bool {
if self.delta_buffer.len() >= MAX_DELTA_BUFFER {
return false;
}
self.delta_buffer.push(*delta);
true
}
pub fn tick(&mut self, tick_number: u32) -> TileReport {
// Process pending deltas
for delta in self.delta_buffer.drain(..) {
match delta.delta_type {
DeltaType::EdgeAdd => {
self.graph
.add_edge(delta.source, delta.target, delta.weight);
}
DeltaType::Observation => {
// Update evidence accumulator
let log_lr = if delta.payload != 0 { 65536 } else { -65536 };
if self.evidence.hypothesis_count > 0 {
self.evidence.update(0, log_lr);
}
}
_ => {}
}
}
// Recompute components if needed
self.graph.recompute_components();
// Compute energy
let energy = self.graph.compute_total_energy();
// Build report
self.tick_count = tick_number;
TileReport {
tile_id: self.tile_id,
tick: tick_number,
connected: self.graph.component_count <= 1,
component_count: self.graph.component_count,
log_e_value: self.evidence.global_log_e(),
energy,
witness_hash: self.compute_witness_hash(),
}
}
fn compute_witness_hash(&self) -> u64 {
let mut hash = self.tile_id as u64;
hash = hash.wrapping_mul(0x517cc1b727220a95);
hash ^= self.tick_count as u64;
hash = hash.wrapping_mul(0x517cc1b727220a95);
hash ^= self.graph.edge_count as u64;
hash
}
pub fn reset(&mut self) {
self.graph = CompactGraph::new();
self.delta_buffer.clear();
self.tick_count = 0;
}
}
/// 256-tile coherence fabric
pub struct CoherenceFabric {
pub tiles: Vec<TileState>,
}
impl CoherenceFabric {
pub fn new() -> Self {
Self {
tiles: (0..NUM_TILES).map(|i| TileState::new(i as u8)).collect(),
}
}
/// Execute tick on all tiles sequentially
pub fn tick_sequential(&mut self, tick_number: u32) -> Vec<TileReport> {
self.tiles.iter_mut().map(|t| t.tick(tick_number)).collect()
}
/// Aggregate reports into global coherence
pub fn aggregate_reports(reports: &[TileReport]) -> FabricReport {
let total_energy: f32 = reports.iter().map(|r| r.energy).sum();
let total_log_e: i64 = reports.iter().map(|r| r.log_e_value).sum();
let all_connected = reports.iter().all(|r| r.connected);
// Compute global witness hash
let mut global_hash = 0u64;
for r in reports {
global_hash = global_hash.wrapping_mul(0x517cc1b727220a95);
global_hash ^= r.witness_hash;
}
FabricReport {
tick: reports.first().map(|r| r.tick).unwrap_or(0),
total_energy,
total_log_e,
all_connected,
global_witness_hash: global_hash,
}
}
/// Distribute delta to appropriate tile
pub fn distribute_delta(&mut self, node_id: u64, delta: &Delta) {
let tile_id = (node_id % NUM_TILES as u64) as usize;
self.tiles[tile_id].ingest_delta(delta);
}
}
/// Aggregated fabric report
pub struct FabricReport {
pub tick: u32,
pub total_energy: f32,
pub total_log_e: i64,
pub all_connected: bool,
pub global_witness_hash: u64,
}
// ============================================================================
// Benchmarks
// ============================================================================
/// Benchmark single tile tick
fn bench_single_tile_tick(c: &mut Criterion) {
let mut group = c.benchmark_group("tile_single_tick");
group.throughput(Throughput::Elements(1));
// Empty tick
let mut tile = TileState::new(0);
group.bench_function("empty", |b| b.iter(|| black_box(tile.tick(black_box(1)))));
// Tick with small graph
let mut tile = TileState::new(0);
for i in 0..20u16 {
tile.ingest_delta(&Delta::edge_add(i, i + 1, 100));
}
tile.tick(0);
group.bench_function("small_graph_20_edges", |b| {
b.iter(|| black_box(tile.tick(black_box(1))))
});
// Tick with pending deltas
group.bench_function("with_10_deltas", |b| {
b.iter_batched(
|| {
let mut t = TileState::new(0);
for i in 0..10u16 {
t.ingest_delta(&Delta::edge_add(i, i + 1, 100));
}
t
},
|mut t| black_box(t.tick(1)),
criterion::BatchSize::SmallInput,
)
});
// Tick with full delta buffer
group.bench_function("with_64_deltas", |b| {
b.iter_batched(
|| {
let mut t = TileState::new(0);
for i in 0..MAX_DELTA_BUFFER as u16 {
t.ingest_delta(&Delta::edge_add(i % 200, (i + 1) % 200, 100));
}
t
},
|mut t| black_box(t.tick(1)),
criterion::BatchSize::SmallInput,
)
});
group.finish();
}
/// Benchmark 256-tile parallel tick (sequential baseline)
fn bench_256_tile_tick_sequential(c: &mut Criterion) {
let mut group = c.benchmark_group("tile_256_sequential");
group.throughput(Throughput::Elements(NUM_TILES as u64));
// Empty fabric
let mut fabric = CoherenceFabric::new();
group.bench_function("empty_fabric", |b| {
b.iter(|| black_box(fabric.tick_sequential(black_box(1))))
});
// Fabric with some data per tile
let mut fabric = CoherenceFabric::new();
for i in 0..NUM_TILES {
for j in 0..10u16 {
fabric.tiles[i].ingest_delta(&Delta::edge_add(j, j + 1, 100));
}
fabric.tiles[i].tick(0);
}
group.bench_function("populated_10_edges_per_tile", |b| {
b.iter(|| black_box(fabric.tick_sequential(black_box(1))))
});
group.finish();
}
/// Benchmark report aggregation
fn bench_report_aggregation(c: &mut Criterion) {
let mut group = c.benchmark_group("tile_report_aggregation");
group.throughput(Throughput::Elements(NUM_TILES as u64));
// Generate 256 reports
let reports: Vec<TileReport> = (0..NUM_TILES)
.map(|i| TileReport {
tile_id: i as u8,
tick: 1,
connected: i % 10 != 0,
component_count: (i % 5) as u8 + 1,
log_e_value: (i as i64) * 1000 - 128000,
energy: (i as f32) * 0.1,
witness_hash: i as u64 * 0x517cc1b727220a95,
})
.collect();
group.bench_function("aggregate_256_reports", |b| {
b.iter(|| black_box(CoherenceFabric::aggregate_reports(black_box(&reports))))
});
group.finish();
}
/// Benchmark delta distribution
fn bench_delta_distribution(c: &mut Criterion) {
let mut group = c.benchmark_group("tile_delta_distribution");
let mut fabric = CoherenceFabric::new();
// Single delta
let delta = Delta::edge_add(0, 1, 100);
group.bench_function("distribute_single", |b| {
b.iter(|| fabric.distribute_delta(black_box(12345), black_box(&delta)))
});
// Batch distribution
for batch_size in [100, 1000, 10000] {
let deltas: Vec<(u64, Delta)> = (0..batch_size)
.map(|i| {
(
i as u64,
Delta::edge_add((i % 200) as u16, ((i + 1) % 200) as u16, 100),
)
})
.collect();
group.throughput(Throughput::Elements(batch_size as u64));
group.bench_with_input(
BenchmarkId::new("distribute_batch", batch_size),
&deltas,
|b, deltas| {
b.iter(|| {
for (node_id, delta) in deltas {
fabric.distribute_delta(*node_id, delta);
}
})
},
);
}
group.finish();
}
/// Benchmark evidence accumulator
fn bench_evidence_accumulator(c: &mut Criterion) {
let mut group = c.benchmark_group("tile_evidence");
let mut acc = EvidenceAccumulator::new(64);
for _ in 0..16 {
acc.add_hypothesis();
}
// Single update
group.bench_function("update_single", |b| {
b.iter(|| acc.update(black_box(5), black_box(65536)))
});
// Global e-value computation
group.bench_function("global_log_e_16_hyp", |b| {
b.iter(|| black_box(acc.global_log_e()))
});
// 64 hypotheses
let mut acc64 = EvidenceAccumulator::new(64);
for _ in 0..64 {
acc64.add_hypothesis();
}
for i in 0..64 {
acc64.log_e_values[i] = (i as i32 - 32) * 1000;
}
group.bench_function("global_log_e_64_hyp", |b| {
b.iter(|| black_box(acc64.global_log_e()))
});
group.finish();
}
/// Benchmark component recomputation
fn bench_component_recompute(c: &mut Criterion) {
let mut group = c.benchmark_group("tile_component_recompute");
for edge_count in [50, 200, 500, 1000] {
let mut graph = CompactGraph::new();
for i in 0..edge_count.min(MAX_SHARD_EDGES) {
let src = (i % 200) as u16;
let tgt = ((i + 1) % 200) as u16;
if src != tgt {
graph.add_edge(src, tgt, 100);
}
}
group.bench_with_input(
BenchmarkId::new("recompute", edge_count),
&edge_count,
|b, _| {
b.iter(|| {
graph.recompute_components();
black_box(graph.component_count)
})
},
);
}
group.finish();
}
/// Benchmark full tick + aggregate cycle
fn bench_full_cycle(c: &mut Criterion) {
let mut group = c.benchmark_group("tile_full_cycle");
group.sample_size(50);
// Populate fabric
let mut fabric = CoherenceFabric::new();
for i in 0..NUM_TILES {
for j in 0..50u16 {
fabric.tiles[i].ingest_delta(&Delta::edge_add(j, (j + 1) % 200, 100));
}
fabric.tiles[i].tick(0);
}
group.bench_function("tick_and_aggregate_256_tiles", |b| {
let mut tick = 1u32;
b.iter(|| {
let reports = fabric.tick_sequential(tick);
let fabric_report = CoherenceFabric::aggregate_reports(&reports);
tick += 1;
black_box(fabric_report)
})
});
group.finish();
}
/// Benchmark memory access patterns
fn bench_memory_patterns(c: &mut Criterion) {
let mut group = c.benchmark_group("tile_memory");
// Sequential tile access
let fabric = CoherenceFabric::new();
group.bench_function("sequential_tile_scan", |b| {
b.iter(|| {
let mut total = 0usize;
for tile in &fabric.tiles {
total += tile.graph.edge_count;
}
black_box(total)
})
});
// Strided tile access
group.bench_function("strided_tile_scan", |b| {
let stride = 7;
b.iter(|| {
let mut total = 0usize;
let mut idx = 0;
for _ in 0..NUM_TILES {
total += fabric.tiles[idx % NUM_TILES].graph.edge_count;
idx += stride;
}
black_box(total)
})
});
group.finish();
}
criterion_group!(
benches,
bench_single_tile_tick,
bench_256_tile_tick_sequential,
bench_report_aggregation,
bench_delta_distribution,
bench_evidence_accumulator,
bench_component_recompute,
bench_full_cycle,
bench_memory_patterns,
);
criterion_main!(benches);