664 lines
18 KiB
Rust
664 lines
18 KiB
Rust
//! Benchmarks for 256-tile parallel tick
|
|
//!
|
|
//! ADR-014 Performance Target: < 1ms for 256-tile parallel tick
|
|
//!
|
|
//! The cognitum-gate-kernel provides 256 WASM tiles, each maintaining
|
|
//! a local graph shard with E-value accumulation and witness fragments.
|
|
|
|
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
|
|
|
// ============================================================================
|
|
// Tile Types (Simulated, matching cognitum-gate-kernel structure)
|
|
// ============================================================================
|
|
|
|
/// Maximum delta buffer per tile
|
|
pub const MAX_DELTA_BUFFER: usize = 64;
|
|
/// Number of tiles in fabric
|
|
pub const NUM_TILES: usize = 256;
|
|
/// Maximum vertices per shard
|
|
pub const MAX_SHARD_VERTICES: usize = 256;
|
|
/// Maximum edges per shard
|
|
pub const MAX_SHARD_EDGES: usize = 1024;
|
|
|
|
/// Delta operation type
|
|
#[derive(Clone, Copy)]
|
|
pub enum DeltaType {
|
|
EdgeAdd,
|
|
EdgeRemove,
|
|
Observation,
|
|
WeightUpdate,
|
|
}
|
|
|
|
/// Delta (change event) for tile
|
|
#[derive(Clone, Copy)]
|
|
pub struct Delta {
|
|
pub delta_type: DeltaType,
|
|
pub source: u16,
|
|
pub target: u16,
|
|
pub weight: u16,
|
|
pub payload: u32,
|
|
}
|
|
|
|
impl Delta {
|
|
pub fn edge_add(src: u16, tgt: u16, weight: u16) -> Self {
|
|
Self {
|
|
delta_type: DeltaType::EdgeAdd,
|
|
source: src,
|
|
target: tgt,
|
|
weight,
|
|
payload: 0,
|
|
}
|
|
}
|
|
|
|
pub fn observation(vertex: u16, positive: bool) -> Self {
|
|
Self {
|
|
delta_type: DeltaType::Observation,
|
|
source: vertex,
|
|
target: 0,
|
|
weight: 0,
|
|
payload: positive as u32,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Compact vertex state
|
|
#[derive(Clone, Copy, Default)]
|
|
pub struct VertexState {
|
|
pub degree: u8,
|
|
pub component_id: u8,
|
|
pub active: bool,
|
|
pub energy_contrib: f32,
|
|
}
|
|
|
|
impl VertexState {
|
|
pub fn is_active(&self) -> bool {
|
|
self.active
|
|
}
|
|
}
|
|
|
|
/// Compact edge
|
|
#[derive(Clone, Copy, Default)]
|
|
pub struct CompactEdge {
|
|
pub source: u16,
|
|
pub target: u16,
|
|
pub weight: u16,
|
|
pub active: bool,
|
|
}
|
|
|
|
impl CompactEdge {
|
|
pub fn is_active(&self) -> bool {
|
|
self.active
|
|
}
|
|
}
|
|
|
|
/// Compact graph for single tile
|
|
pub struct CompactGraph {
|
|
pub vertices: [VertexState; MAX_SHARD_VERTICES],
|
|
pub edges: [CompactEdge; MAX_SHARD_EDGES],
|
|
pub edge_count: usize,
|
|
pub vertex_count: usize,
|
|
pub component_count: u8,
|
|
}
|
|
|
|
impl CompactGraph {
|
|
pub fn new() -> Self {
|
|
Self {
|
|
vertices: [VertexState::default(); MAX_SHARD_VERTICES],
|
|
edges: [CompactEdge::default(); MAX_SHARD_EDGES],
|
|
edge_count: 0,
|
|
vertex_count: 0,
|
|
component_count: 0,
|
|
}
|
|
}
|
|
|
|
pub fn add_edge(&mut self, src: u16, tgt: u16, weight: u16) -> bool {
|
|
if self.edge_count >= MAX_SHARD_EDGES {
|
|
return false;
|
|
}
|
|
|
|
// Activate vertices
|
|
self.vertices[src as usize].active = true;
|
|
self.vertices[src as usize].degree += 1;
|
|
self.vertices[tgt as usize].active = true;
|
|
self.vertices[tgt as usize].degree += 1;
|
|
|
|
// Add edge
|
|
self.edges[self.edge_count] = CompactEdge {
|
|
source: src,
|
|
target: tgt,
|
|
weight,
|
|
active: true,
|
|
};
|
|
self.edge_count += 1;
|
|
|
|
true
|
|
}
|
|
|
|
pub fn recompute_components(&mut self) {
|
|
// Simple union-find simulation
|
|
let mut parent = [0u8; MAX_SHARD_VERTICES];
|
|
for i in 0..MAX_SHARD_VERTICES {
|
|
parent[i] = i as u8;
|
|
}
|
|
|
|
// Union edges
|
|
for edge in &self.edges[..self.edge_count] {
|
|
if edge.active {
|
|
let s = edge.source as usize;
|
|
let t = edge.target as usize;
|
|
parent[s] = parent[t];
|
|
}
|
|
}
|
|
|
|
// Count unique components
|
|
let mut seen = [false; MAX_SHARD_VERTICES];
|
|
let mut count = 0u8;
|
|
for i in 0..MAX_SHARD_VERTICES {
|
|
if self.vertices[i].active && !seen[parent[i] as usize] {
|
|
seen[parent[i] as usize] = true;
|
|
count += 1;
|
|
}
|
|
}
|
|
self.component_count = count;
|
|
}
|
|
|
|
pub fn compute_total_energy(&self) -> f32 {
|
|
let mut energy = 0.0f32;
|
|
for edge in &self.edges[..self.edge_count] {
|
|
if edge.active {
|
|
// Simplified: weight as energy contribution
|
|
energy += edge.weight as f32 / 100.0;
|
|
}
|
|
}
|
|
energy
|
|
}
|
|
}
|
|
|
|
/// E-value accumulator (log-space evidence)
|
|
pub struct EvidenceAccumulator {
|
|
/// Log e-value (fixed-point: value / 65536 = log2(e-value))
|
|
pub log_e_values: Vec<i32>,
|
|
pub hypothesis_count: usize,
|
|
}
|
|
|
|
impl EvidenceAccumulator {
|
|
pub fn new(capacity: usize) -> Self {
|
|
Self {
|
|
log_e_values: vec![0; capacity],
|
|
hypothesis_count: 0,
|
|
}
|
|
}
|
|
|
|
pub fn add_hypothesis(&mut self) -> usize {
|
|
let idx = self.hypothesis_count;
|
|
if idx < self.log_e_values.len() {
|
|
self.hypothesis_count += 1;
|
|
}
|
|
idx
|
|
}
|
|
|
|
#[inline]
|
|
pub fn update(&mut self, idx: usize, log_lr: i32) {
|
|
if idx < self.hypothesis_count {
|
|
self.log_e_values[idx] = self.log_e_values[idx].saturating_add(log_lr);
|
|
}
|
|
}
|
|
|
|
pub fn global_log_e(&self) -> i64 {
|
|
self.log_e_values[..self.hypothesis_count]
|
|
.iter()
|
|
.map(|&v| v as i64)
|
|
.sum()
|
|
}
|
|
}
|
|
|
|
/// Tile report (output of tick)
|
|
#[derive(Clone, Copy)]
|
|
pub struct TileReport {
|
|
pub tile_id: u8,
|
|
pub tick: u32,
|
|
pub connected: bool,
|
|
pub component_count: u8,
|
|
pub log_e_value: i64,
|
|
pub energy: f32,
|
|
pub witness_hash: u64,
|
|
}
|
|
|
|
impl TileReport {
|
|
pub fn new(tile_id: u8) -> Self {
|
|
Self {
|
|
tile_id,
|
|
tick: 0,
|
|
connected: true,
|
|
component_count: 1,
|
|
log_e_value: 0,
|
|
energy: 0.0,
|
|
witness_hash: 0,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Single tile state
|
|
pub struct TileState {
|
|
pub tile_id: u8,
|
|
pub graph: CompactGraph,
|
|
pub evidence: EvidenceAccumulator,
|
|
pub delta_buffer: Vec<Delta>,
|
|
pub tick_count: u32,
|
|
}
|
|
|
|
impl TileState {
|
|
pub fn new(tile_id: u8) -> Self {
|
|
Self {
|
|
tile_id,
|
|
graph: CompactGraph::new(),
|
|
evidence: EvidenceAccumulator::new(64),
|
|
delta_buffer: Vec::with_capacity(MAX_DELTA_BUFFER),
|
|
tick_count: 0,
|
|
}
|
|
}
|
|
|
|
pub fn ingest_delta(&mut self, delta: &Delta) -> bool {
|
|
if self.delta_buffer.len() >= MAX_DELTA_BUFFER {
|
|
return false;
|
|
}
|
|
self.delta_buffer.push(*delta);
|
|
true
|
|
}
|
|
|
|
pub fn tick(&mut self, tick_number: u32) -> TileReport {
|
|
// Process pending deltas
|
|
for delta in self.delta_buffer.drain(..) {
|
|
match delta.delta_type {
|
|
DeltaType::EdgeAdd => {
|
|
self.graph
|
|
.add_edge(delta.source, delta.target, delta.weight);
|
|
}
|
|
DeltaType::Observation => {
|
|
// Update evidence accumulator
|
|
let log_lr = if delta.payload != 0 { 65536 } else { -65536 };
|
|
if self.evidence.hypothesis_count > 0 {
|
|
self.evidence.update(0, log_lr);
|
|
}
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
// Recompute components if needed
|
|
self.graph.recompute_components();
|
|
|
|
// Compute energy
|
|
let energy = self.graph.compute_total_energy();
|
|
|
|
// Build report
|
|
self.tick_count = tick_number;
|
|
TileReport {
|
|
tile_id: self.tile_id,
|
|
tick: tick_number,
|
|
connected: self.graph.component_count <= 1,
|
|
component_count: self.graph.component_count,
|
|
log_e_value: self.evidence.global_log_e(),
|
|
energy,
|
|
witness_hash: self.compute_witness_hash(),
|
|
}
|
|
}
|
|
|
|
fn compute_witness_hash(&self) -> u64 {
|
|
let mut hash = self.tile_id as u64;
|
|
hash = hash.wrapping_mul(0x517cc1b727220a95);
|
|
hash ^= self.tick_count as u64;
|
|
hash = hash.wrapping_mul(0x517cc1b727220a95);
|
|
hash ^= self.graph.edge_count as u64;
|
|
hash
|
|
}
|
|
|
|
pub fn reset(&mut self) {
|
|
self.graph = CompactGraph::new();
|
|
self.delta_buffer.clear();
|
|
self.tick_count = 0;
|
|
}
|
|
}
|
|
|
|
/// 256-tile coherence fabric
|
|
pub struct CoherenceFabric {
|
|
pub tiles: Vec<TileState>,
|
|
}
|
|
|
|
impl CoherenceFabric {
|
|
pub fn new() -> Self {
|
|
Self {
|
|
tiles: (0..NUM_TILES).map(|i| TileState::new(i as u8)).collect(),
|
|
}
|
|
}
|
|
|
|
/// Execute tick on all tiles sequentially
|
|
pub fn tick_sequential(&mut self, tick_number: u32) -> Vec<TileReport> {
|
|
self.tiles.iter_mut().map(|t| t.tick(tick_number)).collect()
|
|
}
|
|
|
|
/// Aggregate reports into global coherence
|
|
pub fn aggregate_reports(reports: &[TileReport]) -> FabricReport {
|
|
let total_energy: f32 = reports.iter().map(|r| r.energy).sum();
|
|
let total_log_e: i64 = reports.iter().map(|r| r.log_e_value).sum();
|
|
let all_connected = reports.iter().all(|r| r.connected);
|
|
|
|
// Compute global witness hash
|
|
let mut global_hash = 0u64;
|
|
for r in reports {
|
|
global_hash = global_hash.wrapping_mul(0x517cc1b727220a95);
|
|
global_hash ^= r.witness_hash;
|
|
}
|
|
|
|
FabricReport {
|
|
tick: reports.first().map(|r| r.tick).unwrap_or(0),
|
|
total_energy,
|
|
total_log_e,
|
|
all_connected,
|
|
global_witness_hash: global_hash,
|
|
}
|
|
}
|
|
|
|
/// Distribute delta to appropriate tile
|
|
pub fn distribute_delta(&mut self, node_id: u64, delta: &Delta) {
|
|
let tile_id = (node_id % NUM_TILES as u64) as usize;
|
|
self.tiles[tile_id].ingest_delta(delta);
|
|
}
|
|
}
|
|
|
|
/// Aggregated fabric report
|
|
pub struct FabricReport {
|
|
pub tick: u32,
|
|
pub total_energy: f32,
|
|
pub total_log_e: i64,
|
|
pub all_connected: bool,
|
|
pub global_witness_hash: u64,
|
|
}
|
|
|
|
// ============================================================================
|
|
// Benchmarks
|
|
// ============================================================================
|
|
|
|
/// Benchmark single tile tick
|
|
fn bench_single_tile_tick(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("tile_single_tick");
|
|
group.throughput(Throughput::Elements(1));
|
|
|
|
// Empty tick
|
|
let mut tile = TileState::new(0);
|
|
group.bench_function("empty", |b| b.iter(|| black_box(tile.tick(black_box(1)))));
|
|
|
|
// Tick with small graph
|
|
let mut tile = TileState::new(0);
|
|
for i in 0..20u16 {
|
|
tile.ingest_delta(&Delta::edge_add(i, i + 1, 100));
|
|
}
|
|
tile.tick(0);
|
|
|
|
group.bench_function("small_graph_20_edges", |b| {
|
|
b.iter(|| black_box(tile.tick(black_box(1))))
|
|
});
|
|
|
|
// Tick with pending deltas
|
|
group.bench_function("with_10_deltas", |b| {
|
|
b.iter_batched(
|
|
|| {
|
|
let mut t = TileState::new(0);
|
|
for i in 0..10u16 {
|
|
t.ingest_delta(&Delta::edge_add(i, i + 1, 100));
|
|
}
|
|
t
|
|
},
|
|
|mut t| black_box(t.tick(1)),
|
|
criterion::BatchSize::SmallInput,
|
|
)
|
|
});
|
|
|
|
// Tick with full delta buffer
|
|
group.bench_function("with_64_deltas", |b| {
|
|
b.iter_batched(
|
|
|| {
|
|
let mut t = TileState::new(0);
|
|
for i in 0..MAX_DELTA_BUFFER as u16 {
|
|
t.ingest_delta(&Delta::edge_add(i % 200, (i + 1) % 200, 100));
|
|
}
|
|
t
|
|
},
|
|
|mut t| black_box(t.tick(1)),
|
|
criterion::BatchSize::SmallInput,
|
|
)
|
|
});
|
|
|
|
group.finish();
|
|
}
|
|
|
|
/// Benchmark 256-tile parallel tick (sequential baseline)
|
|
fn bench_256_tile_tick_sequential(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("tile_256_sequential");
|
|
group.throughput(Throughput::Elements(NUM_TILES as u64));
|
|
|
|
// Empty fabric
|
|
let mut fabric = CoherenceFabric::new();
|
|
group.bench_function("empty_fabric", |b| {
|
|
b.iter(|| black_box(fabric.tick_sequential(black_box(1))))
|
|
});
|
|
|
|
// Fabric with some data per tile
|
|
let mut fabric = CoherenceFabric::new();
|
|
for i in 0..NUM_TILES {
|
|
for j in 0..10u16 {
|
|
fabric.tiles[i].ingest_delta(&Delta::edge_add(j, j + 1, 100));
|
|
}
|
|
fabric.tiles[i].tick(0);
|
|
}
|
|
|
|
group.bench_function("populated_10_edges_per_tile", |b| {
|
|
b.iter(|| black_box(fabric.tick_sequential(black_box(1))))
|
|
});
|
|
|
|
group.finish();
|
|
}
|
|
|
|
/// Benchmark report aggregation
|
|
fn bench_report_aggregation(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("tile_report_aggregation");
|
|
group.throughput(Throughput::Elements(NUM_TILES as u64));
|
|
|
|
// Generate 256 reports
|
|
let reports: Vec<TileReport> = (0..NUM_TILES)
|
|
.map(|i| TileReport {
|
|
tile_id: i as u8,
|
|
tick: 1,
|
|
connected: i % 10 != 0,
|
|
component_count: (i % 5) as u8 + 1,
|
|
log_e_value: (i as i64) * 1000 - 128000,
|
|
energy: (i as f32) * 0.1,
|
|
witness_hash: i as u64 * 0x517cc1b727220a95,
|
|
})
|
|
.collect();
|
|
|
|
group.bench_function("aggregate_256_reports", |b| {
|
|
b.iter(|| black_box(CoherenceFabric::aggregate_reports(black_box(&reports))))
|
|
});
|
|
|
|
group.finish();
|
|
}
|
|
|
|
/// Benchmark delta distribution
|
|
fn bench_delta_distribution(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("tile_delta_distribution");
|
|
|
|
let mut fabric = CoherenceFabric::new();
|
|
|
|
// Single delta
|
|
let delta = Delta::edge_add(0, 1, 100);
|
|
group.bench_function("distribute_single", |b| {
|
|
b.iter(|| fabric.distribute_delta(black_box(12345), black_box(&delta)))
|
|
});
|
|
|
|
// Batch distribution
|
|
for batch_size in [100, 1000, 10000] {
|
|
let deltas: Vec<(u64, Delta)> = (0..batch_size)
|
|
.map(|i| {
|
|
(
|
|
i as u64,
|
|
Delta::edge_add((i % 200) as u16, ((i + 1) % 200) as u16, 100),
|
|
)
|
|
})
|
|
.collect();
|
|
|
|
group.throughput(Throughput::Elements(batch_size as u64));
|
|
group.bench_with_input(
|
|
BenchmarkId::new("distribute_batch", batch_size),
|
|
&deltas,
|
|
|b, deltas| {
|
|
b.iter(|| {
|
|
for (node_id, delta) in deltas {
|
|
fabric.distribute_delta(*node_id, delta);
|
|
}
|
|
})
|
|
},
|
|
);
|
|
}
|
|
|
|
group.finish();
|
|
}
|
|
|
|
/// Benchmark evidence accumulator
|
|
fn bench_evidence_accumulator(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("tile_evidence");
|
|
|
|
let mut acc = EvidenceAccumulator::new(64);
|
|
for _ in 0..16 {
|
|
acc.add_hypothesis();
|
|
}
|
|
|
|
// Single update
|
|
group.bench_function("update_single", |b| {
|
|
b.iter(|| acc.update(black_box(5), black_box(65536)))
|
|
});
|
|
|
|
// Global e-value computation
|
|
group.bench_function("global_log_e_16_hyp", |b| {
|
|
b.iter(|| black_box(acc.global_log_e()))
|
|
});
|
|
|
|
// 64 hypotheses
|
|
let mut acc64 = EvidenceAccumulator::new(64);
|
|
for _ in 0..64 {
|
|
acc64.add_hypothesis();
|
|
}
|
|
for i in 0..64 {
|
|
acc64.log_e_values[i] = (i as i32 - 32) * 1000;
|
|
}
|
|
|
|
group.bench_function("global_log_e_64_hyp", |b| {
|
|
b.iter(|| black_box(acc64.global_log_e()))
|
|
});
|
|
|
|
group.finish();
|
|
}
|
|
|
|
/// Benchmark component recomputation
|
|
fn bench_component_recompute(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("tile_component_recompute");
|
|
|
|
for edge_count in [50, 200, 500, 1000] {
|
|
let mut graph = CompactGraph::new();
|
|
for i in 0..edge_count.min(MAX_SHARD_EDGES) {
|
|
let src = (i % 200) as u16;
|
|
let tgt = ((i + 1) % 200) as u16;
|
|
if src != tgt {
|
|
graph.add_edge(src, tgt, 100);
|
|
}
|
|
}
|
|
|
|
group.bench_with_input(
|
|
BenchmarkId::new("recompute", edge_count),
|
|
&edge_count,
|
|
|b, _| {
|
|
b.iter(|| {
|
|
graph.recompute_components();
|
|
black_box(graph.component_count)
|
|
})
|
|
},
|
|
);
|
|
}
|
|
|
|
group.finish();
|
|
}
|
|
|
|
/// Benchmark full tick + aggregate cycle
|
|
fn bench_full_cycle(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("tile_full_cycle");
|
|
group.sample_size(50);
|
|
|
|
// Populate fabric
|
|
let mut fabric = CoherenceFabric::new();
|
|
for i in 0..NUM_TILES {
|
|
for j in 0..50u16 {
|
|
fabric.tiles[i].ingest_delta(&Delta::edge_add(j, (j + 1) % 200, 100));
|
|
}
|
|
fabric.tiles[i].tick(0);
|
|
}
|
|
|
|
group.bench_function("tick_and_aggregate_256_tiles", |b| {
|
|
let mut tick = 1u32;
|
|
b.iter(|| {
|
|
let reports = fabric.tick_sequential(tick);
|
|
let fabric_report = CoherenceFabric::aggregate_reports(&reports);
|
|
tick += 1;
|
|
black_box(fabric_report)
|
|
})
|
|
});
|
|
|
|
group.finish();
|
|
}
|
|
|
|
/// Benchmark memory access patterns
|
|
fn bench_memory_patterns(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("tile_memory");
|
|
|
|
// Sequential tile access
|
|
let fabric = CoherenceFabric::new();
|
|
group.bench_function("sequential_tile_scan", |b| {
|
|
b.iter(|| {
|
|
let mut total = 0usize;
|
|
for tile in &fabric.tiles {
|
|
total += tile.graph.edge_count;
|
|
}
|
|
black_box(total)
|
|
})
|
|
});
|
|
|
|
// Strided tile access
|
|
group.bench_function("strided_tile_scan", |b| {
|
|
let stride = 7;
|
|
b.iter(|| {
|
|
let mut total = 0usize;
|
|
let mut idx = 0;
|
|
for _ in 0..NUM_TILES {
|
|
total += fabric.tiles[idx % NUM_TILES].graph.edge_count;
|
|
idx += stride;
|
|
}
|
|
black_box(total)
|
|
})
|
|
});
|
|
|
|
group.finish();
|
|
}
|
|
|
|
criterion_group!(
|
|
benches,
|
|
bench_single_tile_tick,
|
|
bench_256_tile_tick_sequential,
|
|
bench_report_aggregation,
|
|
bench_delta_distribution,
|
|
bench_evidence_accumulator,
|
|
bench_component_recompute,
|
|
bench_full_cycle,
|
|
bench_memory_patterns,
|
|
);
|
|
|
|
criterion_main!(benches);
|