Files
wifi-densepose/vendor/ruvector/crates/prime-radiant/benches/coherence_benchmarks.rs

1064 lines
32 KiB
Rust

//! Comprehensive Coherence Engine Benchmarks
//!
//! This benchmark suite covers the core coherence computation primitives
//! across varying dimensions, graph sizes, and topologies.
//!
//! ## Performance Targets (ADR-014)
//! - Residual computation: < 1us per edge
//! - Energy computation: < 10ms for 10K nodes
//! - Incremental update: < 100us for single node
//!
//! ## Benchmark Categories
//! 1. Coherence Core - residual, energy, incremental
//! 2. Restriction Maps - identity, diagonal, dense, sparse
//! 3. Scaling Tests - nodes, edges, dimensions
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use std::collections::HashMap;
// ============================================================================
// BENCHMARK TYPES
// ============================================================================
/// Linear restriction map: y = Ax + b
#[derive(Clone)]
pub struct RestrictionMap {
pub matrix: Vec<f32>,
pub bias: Vec<f32>,
pub input_dim: usize,
pub output_dim: usize,
pub map_type: MapType,
}
#[derive(Clone, Copy, Debug)]
pub enum MapType {
Identity,
Diagonal,
Dense,
Sparse { density: f32 },
}
impl RestrictionMap {
/// Create identity restriction map
pub fn identity(dim: usize) -> Self {
let mut matrix = vec![0.0f32; dim * dim];
for i in 0..dim {
matrix[i * dim + i] = 1.0;
}
Self {
matrix,
bias: vec![0.0; dim],
input_dim: dim,
output_dim: dim,
map_type: MapType::Identity,
}
}
/// Create diagonal restriction map (scaling)
pub fn diagonal(dim: usize, seed: u64) -> Self {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut matrix = vec![0.0f32; dim * dim];
for i in 0..dim {
let mut hasher = DefaultHasher::new();
(seed, i, "diag").hash(&mut hasher);
let val = (hasher.finish() % 1000) as f32 / 500.0; // 0 to 2
matrix[i * dim + i] = val;
}
Self {
matrix,
bias: vec![0.0; dim],
input_dim: dim,
output_dim: dim,
map_type: MapType::Diagonal,
}
}
/// Create dense random restriction map
pub fn dense(input_dim: usize, output_dim: usize, seed: u64) -> Self {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut matrix = Vec::with_capacity(output_dim * input_dim);
for i in 0..(output_dim * input_dim) {
let mut hasher = DefaultHasher::new();
(seed, i).hash(&mut hasher);
let val = (hasher.finish() % 1000) as f32 / 1000.0 - 0.5;
matrix.push(val);
}
let mut bias = Vec::with_capacity(output_dim);
for i in 0..output_dim {
let mut hasher = DefaultHasher::new();
(seed, i, "bias").hash(&mut hasher);
let val = (hasher.finish() % 100) as f32 / 1000.0;
bias.push(val);
}
Self {
matrix,
bias,
input_dim,
output_dim,
map_type: MapType::Dense,
}
}
/// Create sparse restriction map with given density
pub fn sparse(input_dim: usize, output_dim: usize, density: f32, seed: u64) -> Self {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut matrix = vec![0.0f32; output_dim * input_dim];
let density_threshold = (density * 1000.0) as u64;
for i in 0..(output_dim * input_dim) {
let mut hasher = DefaultHasher::new();
(seed, i, "sparse").hash(&mut hasher);
if hasher.finish() % 1000 < density_threshold {
let mut hasher = DefaultHasher::new();
(seed, i, "val").hash(&mut hasher);
let val = (hasher.finish() % 1000) as f32 / 1000.0 - 0.5;
matrix[i] = val;
}
}
Self {
matrix,
bias: vec![0.0; output_dim],
input_dim,
output_dim,
map_type: MapType::Sparse { density },
}
}
/// Apply restriction map: y = Ax + b (allocating)
#[inline]
pub fn apply(&self, input: &[f32]) -> Vec<f32> {
debug_assert_eq!(input.len(), self.input_dim);
let mut output = self.bias.clone();
for i in 0..self.output_dim {
let row_start = i * self.input_dim;
for j in 0..self.input_dim {
output[i] += self.matrix[row_start + j] * input[j];
}
}
output
}
/// Apply restriction map with pre-allocated buffer (zero allocation)
#[inline]
pub fn apply_into(&self, input: &[f32], output: &mut [f32]) {
debug_assert_eq!(input.len(), self.input_dim);
debug_assert_eq!(output.len(), self.output_dim);
output.copy_from_slice(&self.bias);
for i in 0..self.output_dim {
let row_start = i * self.input_dim;
for j in 0..self.input_dim {
output[i] += self.matrix[row_start + j] * input[j];
}
}
}
/// Apply identity map (optimized fast path)
#[inline]
pub fn apply_identity_into(&self, input: &[f32], output: &mut [f32]) {
debug_assert!(matches!(self.map_type, MapType::Identity));
output.copy_from_slice(input);
}
/// Apply diagonal map (optimized)
#[inline]
pub fn apply_diagonal_into(&self, input: &[f32], output: &mut [f32]) {
debug_assert!(matches!(self.map_type, MapType::Diagonal));
let dim = self.input_dim;
for i in 0..dim {
output[i] = self.matrix[i * dim + i] * input[i] + self.bias[i];
}
}
}
/// Node in sheaf graph
#[derive(Clone)]
pub struct SheafNode {
pub id: u64,
pub state: Vec<f32>,
}
/// Edge with restriction maps
#[derive(Clone)]
pub struct SheafEdge {
pub id: u64,
pub source: u64,
pub target: u64,
pub weight: f32,
pub rho_source: RestrictionMap,
pub rho_target: RestrictionMap,
}
impl SheafEdge {
/// Calculate residual with pre-allocated buffers
#[inline]
pub fn residual_into(
&self,
source_state: &[f32],
target_state: &[f32],
source_buf: &mut [f32],
target_buf: &mut [f32],
residual: &mut [f32],
) {
self.rho_source.apply_into(source_state, source_buf);
self.rho_target.apply_into(target_state, target_buf);
for i in 0..residual.len() {
residual[i] = source_buf[i] - target_buf[i];
}
}
/// Calculate weighted residual energy: w_e * |r_e|^2
#[inline]
pub fn weighted_residual_energy_into(
&self,
source: &[f32],
target: &[f32],
source_buf: &mut [f32],
target_buf: &mut [f32],
) -> f32 {
self.rho_source.apply_into(source, source_buf);
self.rho_target.apply_into(target, target_buf);
let mut norm_sq = 0.0f32;
for i in 0..source_buf.len() {
let diff = source_buf[i] - target_buf[i];
norm_sq += diff * diff;
}
self.weight * norm_sq
}
}
/// Full sheaf graph for coherence computation
pub struct SheafGraph {
pub nodes: HashMap<u64, SheafNode>,
pub edges: Vec<SheafEdge>,
pub state_dim: usize,
pub edge_dim: usize,
}
impl SheafGraph {
/// Generate a random graph for benchmarking
pub fn random(num_nodes: usize, avg_degree: usize, state_dim: usize, seed: u64) -> Self {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let nodes: HashMap<u64, SheafNode> = (0..num_nodes as u64)
.map(|id| {
let state: Vec<f32> = (0..state_dim)
.map(|i| {
let mut hasher = DefaultHasher::new();
(seed, id, i).hash(&mut hasher);
(hasher.finish() % 1000) as f32 / 1000.0 - 0.5
})
.collect();
(id, SheafNode { id, state })
})
.collect();
let num_edges = (num_nodes * avg_degree) / 2;
let mut edges = Vec::with_capacity(num_edges);
for i in 0..num_edges {
let mut h = DefaultHasher::new();
(seed, i, "source").hash(&mut h);
let source = h.finish() % num_nodes as u64;
let mut h = DefaultHasher::new();
(seed, i, "target").hash(&mut h);
let target = h.finish() % num_nodes as u64;
if source != target {
edges.push(SheafEdge {
id: i as u64,
source,
target,
weight: 1.0,
rho_source: RestrictionMap::identity(state_dim),
rho_target: RestrictionMap::identity(state_dim),
});
}
}
Self {
nodes,
edges,
state_dim,
edge_dim: state_dim,
}
}
/// Generate graph with specific restriction map type
pub fn with_restriction_type(
num_nodes: usize,
avg_degree: usize,
state_dim: usize,
edge_dim: usize,
map_type: MapType,
seed: u64,
) -> Self {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let nodes: HashMap<u64, SheafNode> = (0..num_nodes as u64)
.map(|id| {
let state: Vec<f32> = (0..state_dim)
.map(|i| {
let mut hasher = DefaultHasher::new();
(seed, id, i).hash(&mut hasher);
(hasher.finish() % 1000) as f32 / 1000.0 - 0.5
})
.collect();
(id, SheafNode { id, state })
})
.collect();
let num_edges = (num_nodes * avg_degree) / 2;
let mut edges = Vec::with_capacity(num_edges);
for i in 0..num_edges {
let mut h = DefaultHasher::new();
(seed, i, "source").hash(&mut h);
let source = h.finish() % num_nodes as u64;
let mut h = DefaultHasher::new();
(seed, i, "target").hash(&mut h);
let target = h.finish() % num_nodes as u64;
if source != target {
let rho_source = match map_type {
MapType::Identity => RestrictionMap::identity(state_dim),
MapType::Diagonal => RestrictionMap::diagonal(state_dim, seed + i as u64),
MapType::Dense => RestrictionMap::dense(state_dim, edge_dim, seed + i as u64),
MapType::Sparse { density } => {
RestrictionMap::sparse(state_dim, edge_dim, density, seed + i as u64)
}
};
let rho_target = match map_type {
MapType::Identity => RestrictionMap::identity(state_dim),
MapType::Diagonal => {
RestrictionMap::diagonal(state_dim, seed + i as u64 + 1000)
}
MapType::Dense => {
RestrictionMap::dense(state_dim, edge_dim, seed + i as u64 + 1000)
}
MapType::Sparse { density } => {
RestrictionMap::sparse(state_dim, edge_dim, density, seed + i as u64 + 1000)
}
};
edges.push(SheafEdge {
id: i as u64,
source,
target,
weight: 1.0,
rho_source,
rho_target,
});
}
}
Self {
nodes,
edges,
state_dim,
edge_dim,
}
}
/// Compute global coherence energy (sequential)
pub fn compute_total_energy(&self) -> f32 {
let mut source_buf = vec![0.0f32; self.edge_dim];
let mut target_buf = vec![0.0f32; self.edge_dim];
let mut total = 0.0f32;
for edge in &self.edges {
let source_state = &self.nodes[&edge.source].state;
let target_state = &self.nodes[&edge.target].state;
total += edge.weighted_residual_energy_into(
source_state,
target_state,
&mut source_buf,
&mut target_buf,
);
}
total
}
/// Compute energy with per-edge tracking
pub fn compute_energy_with_edges(&self) -> (f32, Vec<f32>) {
let mut source_buf = vec![0.0f32; self.edge_dim];
let mut target_buf = vec![0.0f32; self.edge_dim];
let edge_energies: Vec<f32> = self
.edges
.iter()
.map(|edge| {
let source_state = &self.nodes[&edge.source].state;
let target_state = &self.nodes[&edge.target].state;
edge.weighted_residual_energy_into(
source_state,
target_state,
&mut source_buf,
&mut target_buf,
)
})
.collect();
let total: f32 = edge_energies.iter().sum();
(total, edge_energies)
}
}
// ============================================================================
// HELPER FUNCTIONS
// ============================================================================
fn generate_state(dim: usize, seed: u64) -> Vec<f32> {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
(0..dim)
.map(|i| {
let mut hasher = DefaultHasher::new();
(seed, i).hash(&mut hasher);
(hasher.finish() % 1000) as f32 / 1000.0 - 0.5
})
.collect()
}
/// Compute squared norm (naive)
#[inline]
fn norm_sq_naive(v: &[f32]) -> f32 {
v.iter().map(|x| x * x).sum()
}
/// Compute squared norm (unrolled)
#[inline]
fn norm_sq_unrolled(v: &[f32]) -> f32 {
let chunks = v.chunks_exact(4);
let remainder = chunks.remainder();
let mut acc0 = 0.0f32;
let mut acc1 = 0.0f32;
let mut acc2 = 0.0f32;
let mut acc3 = 0.0f32;
for chunk in chunks {
acc0 += chunk[0] * chunk[0];
acc1 += chunk[1] * chunk[1];
acc2 += chunk[2] * chunk[2];
acc3 += chunk[3] * chunk[3];
}
let mut sum = acc0 + acc1 + acc2 + acc3;
for &x in remainder {
sum += x * x;
}
sum
}
// ============================================================================
// COHERENCE CORE BENCHMARKS
// ============================================================================
/// Benchmark single edge residual computation at varying dimensions
fn bench_residual_computation(c: &mut Criterion) {
let mut group = c.benchmark_group("coherence_residual");
group.throughput(Throughput::Elements(1));
// ADR-014 target dimensions: 64, 256, 1024
for dim in [64, 256, 1024] {
let rho_source = RestrictionMap::identity(dim);
let rho_target = RestrictionMap::identity(dim);
let source_state = generate_state(dim, 42);
let target_state = generate_state(dim, 123);
let edge = SheafEdge {
id: 0,
source: 0,
target: 1,
weight: 1.0,
rho_source,
rho_target,
};
let mut source_buf = vec![0.0f32; dim];
let mut target_buf = vec![0.0f32; dim];
let mut residual = vec![0.0f32; dim];
group.bench_with_input(BenchmarkId::new("dim", dim), &dim, |b, _| {
b.iter(|| {
edge.residual_into(
black_box(&source_state),
black_box(&target_state),
&mut source_buf,
&mut target_buf,
&mut residual,
);
black_box(residual[0])
})
});
}
group.finish();
}
/// Benchmark full graph energy computation at varying sizes
fn bench_energy_computation(c: &mut Criterion) {
let mut group = c.benchmark_group("coherence_energy");
// ADR-014 targets: 100, 1K, 10K, 100K nodes
let sizes = [(100, 100), (1_000, 50), (10_000, 20), (100_000, 10)];
for (num_nodes, sample_size) in sizes {
let graph = SheafGraph::random(num_nodes, 4, 64, 42);
group.sample_size(sample_size);
group.throughput(Throughput::Elements(graph.edges.len() as u64));
group.bench_with_input(BenchmarkId::new("nodes", num_nodes), &num_nodes, |b, _| {
b.iter(|| black_box(graph.compute_total_energy()))
});
}
group.finish();
}
/// Benchmark incremental single node update
fn bench_incremental_update(c: &mut Criterion) {
let mut group = c.benchmark_group("coherence_incremental");
// Simulated incremental update tracking
struct IncrementalTracker {
graph: SheafGraph,
node_to_edges: HashMap<u64, Vec<usize>>,
edge_energies: Vec<f32>,
total_energy: f32,
}
impl IncrementalTracker {
fn new(graph: SheafGraph) -> Self {
let mut node_to_edges: HashMap<u64, Vec<usize>> = HashMap::new();
for (idx, edge) in graph.edges.iter().enumerate() {
node_to_edges.entry(edge.source).or_default().push(idx);
node_to_edges.entry(edge.target).or_default().push(idx);
}
let (total_energy, edge_energies) = graph.compute_energy_with_edges();
Self {
graph,
node_to_edges,
edge_energies,
total_energy,
}
}
fn update_node(&mut self, node_id: u64, new_state: Vec<f32>) {
if let Some(node) = self.graph.nodes.get_mut(&node_id) {
node.state = new_state;
}
let affected = self
.node_to_edges
.get(&node_id)
.cloned()
.unwrap_or_default();
let mut source_buf = vec![0.0f32; self.graph.edge_dim];
let mut target_buf = vec![0.0f32; self.graph.edge_dim];
for &edge_idx in &affected {
let edge = &self.graph.edges[edge_idx];
let source_state = &self.graph.nodes[&edge.source].state;
let target_state = &self.graph.nodes[&edge.target].state;
let old_energy = self.edge_energies[edge_idx];
let new_energy = edge.weighted_residual_energy_into(
source_state,
target_state,
&mut source_buf,
&mut target_buf,
);
self.total_energy += new_energy - old_energy;
self.edge_energies[edge_idx] = new_energy;
}
}
}
// ADR-014 target: <100us for single node update
for num_nodes in [1_000, 10_000, 100_000] {
let graph = SheafGraph::random(num_nodes, 4, 64, 42);
let mut tracker = IncrementalTracker::new(graph);
let node_id = (num_nodes / 2) as u64;
let sample_size = if num_nodes > 50_000 { 20 } else { 100 };
group.sample_size(sample_size);
group.throughput(Throughput::Elements(1));
group.bench_with_input(
BenchmarkId::new("single_node", num_nodes),
&num_nodes,
|b, _| {
b.iter(|| {
let new_state = generate_state(64, rand::random());
tracker.update_node(black_box(node_id), new_state);
black_box(tracker.total_energy)
})
},
);
}
group.finish();
}
/// Benchmark restriction map application
fn bench_restriction_map_apply(c: &mut Criterion) {
let mut group = c.benchmark_group("coherence_restriction_map");
group.throughput(Throughput::Elements(1));
let dim = 64;
let input = generate_state(dim, 42);
// Identity map
{
let rho = RestrictionMap::identity(dim);
let mut output = vec![0.0f32; dim];
group.bench_function("identity", |b| {
b.iter(|| {
rho.apply_identity_into(black_box(&input), &mut output);
black_box(output[0])
})
});
}
// Diagonal map
{
let rho = RestrictionMap::diagonal(dim, 42);
let mut output = vec![0.0f32; dim];
group.bench_function("diagonal", |b| {
b.iter(|| {
rho.apply_diagonal_into(black_box(&input), &mut output);
black_box(output[0])
})
});
}
// Dense map (64x64)
{
let rho = RestrictionMap::dense(dim, dim, 42);
let mut output = vec![0.0f32; dim];
group.bench_function("dense_64x64", |b| {
b.iter(|| {
rho.apply_into(black_box(&input), &mut output);
black_box(output[0])
})
});
}
// Dense projection (64x32)
{
let rho = RestrictionMap::dense(64, 32, 42);
let mut output = vec![0.0f32; 32];
group.bench_function("dense_64x32", |b| {
b.iter(|| {
rho.apply_into(black_box(&input), &mut output);
black_box(output[0])
})
});
}
// Sparse map (10% density)
{
let rho = RestrictionMap::sparse(dim, dim, 0.1, 42);
let mut output = vec![0.0f32; dim];
group.bench_function("sparse_10pct", |b| {
b.iter(|| {
rho.apply_into(black_box(&input), &mut output);
black_box(output[0])
})
});
}
// Sparse map (30% density)
{
let rho = RestrictionMap::sparse(dim, dim, 0.3, 42);
let mut output = vec![0.0f32; dim];
group.bench_function("sparse_30pct", |b| {
b.iter(|| {
rho.apply_into(black_box(&input), &mut output);
black_box(output[0])
})
});
}
group.finish();
}
// ============================================================================
// SCALING BENCHMARKS
// ============================================================================
/// Benchmark energy computation scaling with node count
fn bench_scaling_nodes(c: &mut Criterion) {
let mut group = c.benchmark_group("scaling_nodes");
let node_counts = [100, 500, 1000, 2000, 5000, 10000];
for &num_nodes in &node_counts {
let graph = SheafGraph::random(num_nodes, 4, 64, 42);
let sample_size = if num_nodes > 5000 { 20 } else { 50 };
group.sample_size(sample_size);
group.throughput(Throughput::Elements(graph.edges.len() as u64));
group.bench_with_input(BenchmarkId::new("energy", num_nodes), &num_nodes, |b, _| {
b.iter(|| black_box(graph.compute_total_energy()))
});
}
group.finish();
}
/// Benchmark energy computation scaling with edge density
fn bench_scaling_edges(c: &mut Criterion) {
let mut group = c.benchmark_group("scaling_edges");
let num_nodes = 1000;
let avg_degrees = [2, 4, 8, 16, 32, 64];
for &avg_degree in &avg_degrees {
let graph = SheafGraph::random(num_nodes, avg_degree, 64, 42);
group.throughput(Throughput::Elements(graph.edges.len() as u64));
group.bench_with_input(
BenchmarkId::new("avg_degree", avg_degree),
&avg_degree,
|b, _| b.iter(|| black_box(graph.compute_total_energy())),
);
}
group.finish();
}
/// Benchmark computation scaling with state vector dimension
fn bench_scaling_dimension(c: &mut Criterion) {
let mut group = c.benchmark_group("scaling_dimension");
let num_nodes = 1000;
let dimensions = [16, 32, 64, 128, 256, 512, 1024];
for &dim in &dimensions {
let graph = SheafGraph::random(num_nodes, 4, dim, 42);
let sample_size = if dim > 512 { 20 } else { 50 };
group.sample_size(sample_size);
group.throughput(Throughput::Elements(graph.edges.len() as u64));
group.bench_with_input(BenchmarkId::new("state_dim", dim), &dim, |b, _| {
b.iter(|| black_box(graph.compute_total_energy()))
});
}
group.finish();
}
/// Benchmark with different restriction map types
fn bench_restriction_map_types(c: &mut Criterion) {
let mut group = c.benchmark_group("restriction_map_types");
let num_nodes = 1000;
let state_dim = 64;
// Identity maps
{
let graph = SheafGraph::with_restriction_type(
num_nodes,
4,
state_dim,
state_dim,
MapType::Identity,
42,
);
group.throughput(Throughput::Elements(graph.edges.len() as u64));
group.bench_function("identity", |b| {
b.iter(|| black_box(graph.compute_total_energy()))
});
}
// Diagonal maps
{
let graph = SheafGraph::with_restriction_type(
num_nodes,
4,
state_dim,
state_dim,
MapType::Diagonal,
42,
);
group.bench_function("diagonal", |b| {
b.iter(|| black_box(graph.compute_total_energy()))
});
}
// Dense maps
{
let graph = SheafGraph::with_restriction_type(
num_nodes,
4,
state_dim,
state_dim,
MapType::Dense,
42,
);
group.bench_function("dense", |b| {
b.iter(|| black_box(graph.compute_total_energy()))
});
}
// Dense projection (64 -> 32)
{
let graph =
SheafGraph::with_restriction_type(num_nodes, 4, state_dim, 32, MapType::Dense, 42);
group.bench_function("dense_projection", |b| {
b.iter(|| black_box(graph.compute_total_energy()))
});
}
// Sparse 10%
{
let graph = SheafGraph::with_restriction_type(
num_nodes,
4,
state_dim,
state_dim,
MapType::Sparse { density: 0.1 },
42,
);
group.bench_function("sparse_10pct", |b| {
b.iter(|| black_box(graph.compute_total_energy()))
});
}
group.finish();
}
// ============================================================================
// NORM COMPUTATION BENCHMARKS
// ============================================================================
/// Benchmark norm computation variants
fn bench_norm_computation(c: &mut Criterion) {
let mut group = c.benchmark_group("norm_computation");
for dim in [64, 256, 1024] {
let v = generate_state(dim, 42);
group.throughput(Throughput::Elements(dim as u64));
group.bench_with_input(BenchmarkId::new("naive", dim), &dim, |b, _| {
b.iter(|| black_box(norm_sq_naive(black_box(&v))))
});
group.bench_with_input(BenchmarkId::new("unrolled", dim), &dim, |b, _| {
b.iter(|| black_box(norm_sq_unrolled(black_box(&v))))
});
// Iterator-based (auto-vectorization friendly)
group.bench_with_input(BenchmarkId::new("iter_fold", dim), &dim, |b, _| {
b.iter(|| {
let sum: f32 = black_box(&v).iter().fold(0.0, |acc, &x| acc + x * x);
black_box(sum)
})
});
}
group.finish();
}
// ============================================================================
// BATCH PROCESSING BENCHMARKS
// ============================================================================
/// Benchmark batch residual computation
fn bench_batch_residual(c: &mut Criterion) {
let mut group = c.benchmark_group("batch_residual");
let dim = 64;
for batch_size in [10, 100, 1000] {
let edges: Vec<SheafEdge> = (0..batch_size)
.map(|i| SheafEdge {
id: i as u64,
source: i as u64,
target: (i + 1) as u64,
weight: 1.0,
rho_source: RestrictionMap::identity(dim),
rho_target: RestrictionMap::identity(dim),
})
.collect();
let states: Vec<Vec<f32>> = (0..batch_size + 1)
.map(|i| generate_state(dim, i as u64))
.collect();
group.throughput(Throughput::Elements(batch_size as u64));
// Sequential processing
group.bench_with_input(
BenchmarkId::new("sequential", batch_size),
&batch_size,
|b, _| {
b.iter(|| {
let mut source_buf = vec![0.0f32; dim];
let mut target_buf = vec![0.0f32; dim];
let mut total = 0.0f32;
for (i, edge) in edges.iter().enumerate() {
total += edge.weighted_residual_energy_into(
&states[i],
&states[i + 1],
&mut source_buf,
&mut target_buf,
);
}
black_box(total)
})
},
);
// Separate buffer per edge (more allocations but parallelizable)
group.bench_with_input(
BenchmarkId::new("per_edge_buffers", batch_size),
&batch_size,
|b, _| {
b.iter(|| {
let total: f32 = edges
.iter()
.enumerate()
.map(|(i, edge)| {
let mut source_buf = vec![0.0f32; dim];
let mut target_buf = vec![0.0f32; dim];
edge.weighted_residual_energy_into(
&states[i],
&states[i + 1],
&mut source_buf,
&mut target_buf,
)
})
.sum();
black_box(total)
})
},
);
}
group.finish();
}
/// Benchmark memory access patterns
fn bench_memory_patterns(c: &mut Criterion) {
let mut group = c.benchmark_group("memory_patterns");
let num_nodes = 10000;
let dim = 64;
// Chain graph (sequential access)
{
let nodes: HashMap<u64, SheafNode> = (0..num_nodes as u64)
.map(|id| {
(
id,
SheafNode {
id,
state: generate_state(dim, id),
},
)
})
.collect();
let edges: Vec<SheafEdge> = (0..num_nodes - 1)
.map(|i| SheafEdge {
id: i as u64,
source: i as u64,
target: (i + 1) as u64,
weight: 1.0,
rho_source: RestrictionMap::identity(dim),
rho_target: RestrictionMap::identity(dim),
})
.collect();
let graph = SheafGraph {
nodes,
edges,
state_dim: dim,
edge_dim: dim,
};
group.throughput(Throughput::Elements(graph.edges.len() as u64));
group.bench_function("sequential_access", |b| {
b.iter(|| black_box(graph.compute_total_energy()))
});
}
// Random graph (random access)
{
let graph = SheafGraph::random(num_nodes, 4, dim, 42);
group.bench_function("random_access", |b| {
b.iter(|| black_box(graph.compute_total_energy()))
});
}
group.finish();
}
// ============================================================================
// CRITERION CONFIGURATION
// ============================================================================
criterion_group!(
coherence_core,
bench_residual_computation,
bench_energy_computation,
bench_incremental_update,
bench_restriction_map_apply,
);
criterion_group!(
scaling_tests,
bench_scaling_nodes,
bench_scaling_edges,
bench_scaling_dimension,
bench_restriction_map_types,
);
criterion_group!(
optimization_tests,
bench_norm_computation,
bench_batch_residual,
bench_memory_patterns,
);
criterion_main!(coherence_core, scaling_tests, optimization_tests);