Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
269
crates/ruvector-mincut-gated-transformer/benches/gate.rs
Normal file
269
crates/ruvector-mincut-gated-transformer/benches/gate.rs
Normal file
@@ -0,0 +1,269 @@
|
||||
//! Gate overhead benchmarks.
|
||||
//!
|
||||
//! Measures the cost of gate evaluation separate from inference.
|
||||
|
||||
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
|
||||
use ruvector_mincut_gated_transformer::{
|
||||
gate::GateController, spike::SpikeScheduler, GatePacket, GatePolicy, SpikePacket,
|
||||
};
|
||||
|
||||
fn bench_gate_evaluation(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("gate_evaluation");
|
||||
|
||||
let policy = GatePolicy::default();
|
||||
let controller = GateController::new(policy);
|
||||
|
||||
// Allow case
|
||||
let gate_allow = GatePacket {
|
||||
lambda: 100,
|
||||
lambda_prev: 95,
|
||||
boundary_edges: 5,
|
||||
boundary_concentration_q15: 8192,
|
||||
partition_count: 3,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
group.bench_function("allow", |b| {
|
||||
b.iter(|| {
|
||||
let decision = controller.evaluate(black_box(&gate_allow), None);
|
||||
black_box(decision)
|
||||
})
|
||||
});
|
||||
|
||||
// ReduceScope case
|
||||
let gate_reduce = GatePacket {
|
||||
lambda: 100,
|
||||
lambda_prev: 95,
|
||||
boundary_edges: 30, // Triggers boundary spike
|
||||
boundary_concentration_q15: 8192,
|
||||
partition_count: 3,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
group.bench_function("reduce_scope", |b| {
|
||||
b.iter(|| {
|
||||
let decision = controller.evaluate(black_box(&gate_reduce), None);
|
||||
black_box(decision)
|
||||
})
|
||||
});
|
||||
|
||||
// FlushKv case
|
||||
let gate_flush = GatePacket {
|
||||
lambda: 30,
|
||||
lambda_prev: 100, // 70% drop
|
||||
boundary_edges: 5,
|
||||
boundary_concentration_q15: 8192,
|
||||
partition_count: 3,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
group.bench_function("flush_kv", |b| {
|
||||
b.iter(|| {
|
||||
let decision = controller.evaluate(black_box(&gate_flush), None);
|
||||
black_box(decision)
|
||||
})
|
||||
});
|
||||
|
||||
// QuarantineUpdates case
|
||||
let gate_quarantine = GatePacket {
|
||||
lambda: 10, // Below min
|
||||
lambda_prev: 100,
|
||||
boundary_edges: 5,
|
||||
boundary_concentration_q15: 8192,
|
||||
partition_count: 3,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
group.bench_function("quarantine", |b| {
|
||||
b.iter(|| {
|
||||
let decision = controller.evaluate(black_box(&gate_quarantine), None);
|
||||
black_box(decision)
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_gate_with_spikes(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("gate_with_spikes");
|
||||
|
||||
let policy = GatePolicy::default();
|
||||
let controller = GateController::new(policy);
|
||||
|
||||
let gate = GatePacket {
|
||||
lambda: 100,
|
||||
lambda_prev: 95,
|
||||
boundary_edges: 5,
|
||||
boundary_concentration_q15: 8192,
|
||||
partition_count: 3,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
// Active spike
|
||||
let spike_active = SpikePacket {
|
||||
fired: 1,
|
||||
rate_q15: 10000,
|
||||
novelty_q15: 15000,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
group.bench_function("with_active_spike", |b| {
|
||||
b.iter(|| {
|
||||
let decision = controller.evaluate(black_box(&gate), Some(black_box(&spike_active)));
|
||||
black_box(decision)
|
||||
})
|
||||
});
|
||||
|
||||
// Inactive spike
|
||||
let spike_inactive = SpikePacket {
|
||||
fired: 0,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
group.bench_function("with_inactive_spike", |b| {
|
||||
b.iter(|| {
|
||||
let decision = controller.evaluate(black_box(&gate), Some(black_box(&spike_inactive)));
|
||||
black_box(decision)
|
||||
})
|
||||
});
|
||||
|
||||
// Storm spike
|
||||
let spike_storm = SpikePacket {
|
||||
fired: 1,
|
||||
rate_q15: 30000, // Very high
|
||||
novelty_q15: 5000,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
group.bench_function("with_storm_spike", |b| {
|
||||
b.iter(|| {
|
||||
let decision = controller.evaluate(black_box(&gate), Some(black_box(&spike_storm)));
|
||||
black_box(decision)
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_spike_scheduler(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("spike_scheduler");
|
||||
|
||||
let scheduler = SpikeScheduler::new();
|
||||
|
||||
// Active spike
|
||||
let spike_active = SpikePacket {
|
||||
fired: 1,
|
||||
rate_q15: 10000,
|
||||
novelty_q15: 15000,
|
||||
top_len: 8,
|
||||
top_idx: [1, 5, 10, 15, 20, 25, 30, 35, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
top_w_q15: [
|
||||
16384, 8192, 4096, 2048, 1024, 512, 256, 128, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
flags: SpikePacket::FLAG_SPARSE_MASK,
|
||||
};
|
||||
|
||||
group.bench_function("evaluate_active", |b| {
|
||||
b.iter(|| {
|
||||
let decision = scheduler.evaluate(black_box(&spike_active));
|
||||
black_box(decision)
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("build_sparse_mask", |b| {
|
||||
b.iter(|| {
|
||||
let mask = scheduler.build_sparse_mask(black_box(&spike_active), 64);
|
||||
black_box(mask)
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("get_weighted_positions", |b| {
|
||||
b.iter(|| {
|
||||
let positions = scheduler.get_weighted_positions(black_box(&spike_active));
|
||||
black_box(positions)
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_policy_variants(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("policy_variants");
|
||||
|
||||
let gate = GatePacket {
|
||||
lambda: 50,
|
||||
lambda_prev: 80,
|
||||
boundary_edges: 15,
|
||||
boundary_concentration_q15: 15000,
|
||||
partition_count: 8,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
// Default policy
|
||||
let default_controller = GateController::new(GatePolicy::default());
|
||||
|
||||
group.bench_function("default_policy", |b| {
|
||||
b.iter(|| {
|
||||
let decision = default_controller.evaluate(black_box(&gate), None);
|
||||
black_box(decision)
|
||||
})
|
||||
});
|
||||
|
||||
// Conservative policy
|
||||
let conservative_controller = GateController::new(GatePolicy::conservative());
|
||||
|
||||
group.bench_function("conservative_policy", |b| {
|
||||
b.iter(|| {
|
||||
let decision = conservative_controller.evaluate(black_box(&gate), None);
|
||||
black_box(decision)
|
||||
})
|
||||
});
|
||||
|
||||
// Permissive policy
|
||||
let permissive_controller = GateController::new(GatePolicy::permissive());
|
||||
|
||||
group.bench_function("permissive_policy", |b| {
|
||||
b.iter(|| {
|
||||
let decision = permissive_controller.evaluate(black_box(&gate), None);
|
||||
black_box(decision)
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_drop_ratio_calculation(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("drop_ratio");
|
||||
|
||||
for drop_percent in [10, 25, 50, 75].iter() {
|
||||
let gate = GatePacket {
|
||||
lambda: 100 - drop_percent,
|
||||
lambda_prev: 100,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::from_parameter(drop_percent),
|
||||
drop_percent,
|
||||
|b, _| {
|
||||
b.iter(|| {
|
||||
let ratio = black_box(&gate).drop_ratio_q15();
|
||||
black_box(ratio)
|
||||
})
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_gate_evaluation,
|
||||
bench_gate_with_spikes,
|
||||
bench_spike_scheduler,
|
||||
bench_policy_variants,
|
||||
bench_drop_ratio_calculation,
|
||||
);
|
||||
|
||||
criterion_main!(benches);
|
||||
569
crates/ruvector-mincut-gated-transformer/benches/kernel.rs
Normal file
569
crates/ruvector-mincut-gated-transformer/benches/kernel.rs
Normal file
@@ -0,0 +1,569 @@
|
||||
//! Kernel benchmarks for low-level operations.
|
||||
//!
|
||||
//! Tests GEMM, INT4 quantization, arena allocation, and SIMD operations.
|
||||
|
||||
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
||||
use ruvector_mincut_gated_transformer::arena::{calculate_arena_size, WeightArena};
|
||||
use ruvector_mincut_gated_transformer::kernel::{
|
||||
compute_gflops, dequantize_int4_to_f32, int4_gemm, int4_gemv, layer_norm, pack_int4, qgemm_i8,
|
||||
qgemm_i8_simd, quantize_f32_to_int4, rms_norm, unpack_int4, BenchStats, Int4Weights, Timer,
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// INT8 GEMM Benchmarks
|
||||
// ============================================================================
|
||||
|
||||
fn bench_qgemm_i8_sizes(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("qgemm_i8");
|
||||
|
||||
for size in [64, 128, 256].iter() {
|
||||
let m = *size;
|
||||
let n = *size;
|
||||
let k = *size;
|
||||
|
||||
let a: Vec<i8> = (0..m * k).map(|i| ((i as i16 % 256 - 128) as i8)).collect();
|
||||
let b: Vec<i8> = (0..n * k).map(|i| ((i as i16 % 256 - 128) as i8)).collect();
|
||||
let b_scales: Vec<f32> = vec![1.0 / 128.0; n];
|
||||
|
||||
let ops = 2 * m * n * k;
|
||||
group.throughput(Throughput::Elements(ops as u64));
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("scalar", size), size, |bench, _| {
|
||||
let mut c_out = vec![0i32; m * n];
|
||||
bench.iter(|| {
|
||||
qgemm_i8(m, n, k, &a, 1.0 / 128.0, &b, &b_scales, None, &mut c_out);
|
||||
black_box(c_out[0])
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("simd", size), size, |bench, _| {
|
||||
let mut c_out = vec![0i32; m * n];
|
||||
bench.iter(|| {
|
||||
qgemm_i8_simd(m, n, k, &a, 1.0 / 128.0, &b, &b_scales, None, &mut c_out);
|
||||
black_box(c_out[0])
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_qgemv(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("qgemv");
|
||||
|
||||
for size in [128, 256, 512].iter() {
|
||||
let n = *size;
|
||||
let k = *size;
|
||||
|
||||
let a: Vec<i8> = (0..k).map(|i| ((i as i16 % 256 - 128) as i8)).collect();
|
||||
let b: Vec<i8> = (0..n * k).map(|i| ((i as i16 % 256 - 128) as i8)).collect();
|
||||
let b_scales: Vec<f32> = vec![1.0 / 128.0; n];
|
||||
|
||||
group.throughput(Throughput::Elements((2 * n * k) as u64));
|
||||
|
||||
group.bench_with_input(BenchmarkId::from_parameter(size), size, |bench, _| {
|
||||
let mut c_out = vec![0i32; n];
|
||||
bench.iter(|| {
|
||||
qgemm_i8_simd(1, n, k, &a, 1.0 / 128.0, &b, &b_scales, None, &mut c_out);
|
||||
black_box(c_out[0])
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// INT4 Quantization Benchmarks
|
||||
// ============================================================================
|
||||
|
||||
fn bench_int4_pack_unpack(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("int4_pack_unpack");
|
||||
|
||||
group.bench_function("pack_single", |b| {
|
||||
b.iter(|| {
|
||||
let packed = pack_int4(black_box(5), black_box(-3));
|
||||
black_box(packed)
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("unpack_single", |b| {
|
||||
let packed = pack_int4(5, -3);
|
||||
b.iter(|| {
|
||||
let (v0, v1) = unpack_int4(black_box(packed));
|
||||
black_box((v0, v1))
|
||||
})
|
||||
});
|
||||
|
||||
// Bulk operations
|
||||
for count in [256, 1024, 4096].iter() {
|
||||
let values: Vec<f32> = (0..*count)
|
||||
.map(|i| (i as f32 - *count as f32 / 2.0) / 100.0)
|
||||
.collect();
|
||||
group.throughput(Throughput::Elements(*count as u64));
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("quantize", count), count, |bench, cnt| {
|
||||
let mut packed = vec![0u8; (*cnt + 1) / 2];
|
||||
bench.iter(|| {
|
||||
let scale = quantize_f32_to_int4(&values, &mut packed);
|
||||
black_box(scale)
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("dequantize", count),
|
||||
count,
|
||||
|bench, cnt| {
|
||||
let mut packed = vec![0u8; (*cnt + 1) / 2];
|
||||
let scale = quantize_f32_to_int4(&values, &mut packed);
|
||||
let mut output = vec![0.0f32; *cnt];
|
||||
bench.iter(|| {
|
||||
dequantize_int4_to_f32(&packed, scale, *cnt, &mut output);
|
||||
black_box(output[0])
|
||||
})
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_int4_weights(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("int4_weights");
|
||||
|
||||
for (rows, cols) in [(256, 256), (512, 512), (768, 768)].iter() {
|
||||
let weights: Vec<f32> = (0..rows * cols)
|
||||
.map(|i| ((i % 200) as f32 - 100.0) / 100.0)
|
||||
.collect();
|
||||
|
||||
group.throughput(Throughput::Bytes((*rows * *cols * 4) as u64));
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("from_f32", format!("{}x{}", rows, cols)),
|
||||
&(*rows, *cols),
|
||||
|bench, (r, c)| {
|
||||
bench.iter(|| {
|
||||
let int4_w = Int4Weights::from_f32(&weights, *r, *c);
|
||||
black_box(int4_w.memory_bytes())
|
||||
})
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_int4_gemv(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("int4_gemv");
|
||||
|
||||
for size in [256, 512, 768].iter() {
|
||||
let n = *size;
|
||||
let k = *size;
|
||||
|
||||
let weights: Vec<f32> = (0..n * k)
|
||||
.map(|i| ((i % 200) as f32 - 100.0) / 100.0)
|
||||
.collect();
|
||||
let int4_w = Int4Weights::from_f32(&weights, n, k);
|
||||
let x: Vec<f32> = (0..k).map(|i| (i as f32) / k as f32).collect();
|
||||
|
||||
let ops = 2 * n * k;
|
||||
group.throughput(Throughput::Elements(ops as u64));
|
||||
|
||||
group.bench_with_input(BenchmarkId::from_parameter(size), size, |bench, sz| {
|
||||
let mut y = vec![0.0f32; *sz];
|
||||
bench.iter(|| {
|
||||
int4_gemv(&int4_w, &x, 1.0, &mut y);
|
||||
black_box(y[0])
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_int4_gemm(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("int4_gemm");
|
||||
|
||||
for (m, n, k) in [(32, 256, 256), (64, 512, 512)].iter() {
|
||||
let weights: Vec<f32> = (0..n * k)
|
||||
.map(|i| ((i % 200) as f32 - 100.0) / 100.0)
|
||||
.collect();
|
||||
let int4_w = Int4Weights::from_f32(&weights, *n, *k);
|
||||
let a: Vec<f32> = (0..m * k).map(|i| (i as f32) / (m * k) as f32).collect();
|
||||
|
||||
let ops = 2 * m * n * k;
|
||||
group.throughput(Throughput::Elements(ops as u64));
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::from_parameter(format!("{}x{}x{}", m, n, k)),
|
||||
&(*m, *n, *k),
|
||||
|bench, (batch, nn, _)| {
|
||||
let mut c_out = vec![0.0f32; *batch * *nn];
|
||||
bench.iter(|| {
|
||||
int4_gemm(&int4_w, &a, 1.0, *batch, &mut c_out);
|
||||
black_box(c_out[0])
|
||||
})
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_int4_memory_comparison(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("int4_vs_int8_memory");
|
||||
|
||||
for size in [256, 512].iter() {
|
||||
let n = *size;
|
||||
let k = *size;
|
||||
let total_weights = n * k;
|
||||
|
||||
// INT8 baseline
|
||||
let weights_i8: Vec<i8> = (0..total_weights)
|
||||
.map(|i| (i as i16 % 256 - 128) as i8)
|
||||
.collect();
|
||||
let b_scales: Vec<f32> = vec![1.0 / 128.0; n];
|
||||
let x_i8: Vec<i8> = (0..k).map(|i| (i as i16 % 256 - 128) as i8).collect();
|
||||
|
||||
// INT4
|
||||
let weights_f32: Vec<f32> = (0..total_weights)
|
||||
.map(|i| ((i % 200) as f32 - 100.0) / 100.0)
|
||||
.collect();
|
||||
let int4_w = Int4Weights::from_f32(&weights_f32, n, k);
|
||||
let x_f32: Vec<f32> = (0..k).map(|i| i as f32 / k as f32).collect();
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("int8_gemv", size), size, |bench, sz| {
|
||||
let mut y_i8 = vec![0i32; *sz];
|
||||
bench.iter(|| {
|
||||
qgemm_i8_simd(
|
||||
1,
|
||||
n,
|
||||
k,
|
||||
&x_i8,
|
||||
1.0 / 128.0,
|
||||
&weights_i8,
|
||||
&b_scales,
|
||||
None,
|
||||
&mut y_i8,
|
||||
);
|
||||
black_box(y_i8[0])
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("int4_gemv", size), size, |bench, sz| {
|
||||
let mut y_f32 = vec![0.0f32; *sz];
|
||||
bench.iter(|| {
|
||||
int4_gemv(&int4_w, &x_f32, 1.0, &mut y_f32);
|
||||
black_box(y_f32[0])
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Normalization Benchmarks
|
||||
// ============================================================================
|
||||
|
||||
fn bench_layer_norm(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("layer_norm");
|
||||
|
||||
for size in [128, 256, 512, 768].iter() {
|
||||
let input: Vec<f32> = (0..*size)
|
||||
.map(|i| (i as f32 - *size as f32 / 2.0) / 100.0)
|
||||
.collect();
|
||||
let gamma: Vec<f32> = vec![1.0f32; *size];
|
||||
let beta: Vec<f32> = vec![0.0f32; *size];
|
||||
|
||||
group.throughput(Throughput::Elements(*size as u64));
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("layer_norm", size), size, |bench, sz| {
|
||||
let mut output = vec![0.0f32; *sz];
|
||||
bench.iter(|| {
|
||||
layer_norm(&input, &gamma, &beta, 1e-5, &mut output);
|
||||
black_box(output[0])
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("rms_norm", size), size, |bench, sz| {
|
||||
let mut output = vec![0.0f32; *sz];
|
||||
bench.iter(|| {
|
||||
rms_norm(&input, &gamma, 1e-5, &mut output);
|
||||
black_box(output[0])
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Arena Allocator Benchmarks
|
||||
// ============================================================================
|
||||
|
||||
fn bench_arena_allocation(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("arena_alloc");
|
||||
|
||||
for size_kb in [64, 256, 1024].iter() {
|
||||
let size = size_kb * 1024;
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("create", format!("{}KB", size_kb)),
|
||||
&size,
|
||||
|bench, sz| {
|
||||
bench.iter(|| {
|
||||
let arena = WeightArena::new(black_box(*sz));
|
||||
black_box(arena.capacity())
|
||||
})
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
// Allocation patterns
|
||||
let arena_size = 1024 * 1024; // 1MB
|
||||
|
||||
group.bench_function("alloc_i8_1024", |b| {
|
||||
b.iter(|| {
|
||||
let mut arena = WeightArena::new(arena_size);
|
||||
for _ in 0..100 {
|
||||
let _ = arena.alloc_i8(black_box(1024));
|
||||
}
|
||||
black_box(arena.offset())
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("alloc_f32_256", |b| {
|
||||
b.iter(|| {
|
||||
let mut arena = WeightArena::new(arena_size);
|
||||
for _ in 0..100 {
|
||||
let _ = arena.alloc_f32(black_box(256));
|
||||
}
|
||||
black_box(arena.offset())
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("alloc_mixed", |b| {
|
||||
b.iter(|| {
|
||||
let mut arena = WeightArena::new(arena_size);
|
||||
for i in 0..50 {
|
||||
if i % 2 == 0 {
|
||||
let _ = arena.alloc_i8(black_box(1024));
|
||||
} else {
|
||||
let _ = arena.alloc_f32(black_box(256));
|
||||
}
|
||||
}
|
||||
black_box(arena.offset())
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("reset_reuse", |b| {
|
||||
let mut arena = WeightArena::new(arena_size);
|
||||
b.iter(|| {
|
||||
arena.reset();
|
||||
for _ in 0..100 {
|
||||
let _ = arena.alloc_i8(black_box(1024));
|
||||
}
|
||||
black_box(arena.offset())
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_arena_size_calculation(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("arena_size_calc");
|
||||
|
||||
for (layers, hidden) in [(4, 256), (12, 768), (24, 1024)].iter() {
|
||||
group.bench_with_input(
|
||||
BenchmarkId::from_parameter(format!("{}L_{}H", layers, hidden)),
|
||||
&(*layers, *hidden),
|
||||
|bench, (l, h)| {
|
||||
bench.iter(|| {
|
||||
let size = calculate_arena_size(black_box(*l), black_box(*h), 4, 8);
|
||||
black_box(size)
|
||||
})
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Timer/Stats Utilities Benchmarks
|
||||
// ============================================================================
|
||||
|
||||
fn bench_timer_overhead(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("timer_overhead");
|
||||
|
||||
group.bench_function("timer_start_stop", |b| {
|
||||
b.iter(|| {
|
||||
let mut timer = Timer::new();
|
||||
timer.start();
|
||||
timer.stop();
|
||||
black_box(timer.elapsed_ns())
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("bench_stats_record", |b| {
|
||||
let mut stats = BenchStats::new(1000);
|
||||
b.iter(|| {
|
||||
stats.add_sample(black_box(100));
|
||||
black_box(stats.min_ns())
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("compute_gflops", |b| {
|
||||
b.iter(|| {
|
||||
let gflops = compute_gflops(black_box(2_000_000_000), black_box(1_000_000));
|
||||
black_box(gflops)
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Combined Workload Benchmarks
|
||||
// ============================================================================
|
||||
|
||||
fn bench_transformer_layer_simulation(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("layer_simulation");
|
||||
|
||||
let hidden = 256;
|
||||
let ffn_hidden = hidden * 4;
|
||||
|
||||
let q_weights: Vec<i8> = (0..hidden * hidden)
|
||||
.map(|i| ((i as i16 % 256 - 128) as i8))
|
||||
.collect();
|
||||
let k_weights: Vec<i8> = (0..hidden * hidden)
|
||||
.map(|i| ((i as i16 % 256 - 128) as i8))
|
||||
.collect();
|
||||
let v_weights: Vec<i8> = (0..hidden * hidden)
|
||||
.map(|i| ((i as i16 % 256 - 128) as i8))
|
||||
.collect();
|
||||
let ffn_up: Vec<i8> = (0..hidden * ffn_hidden)
|
||||
.map(|i| ((i as i16 % 256 - 128) as i8))
|
||||
.collect();
|
||||
let ffn_down: Vec<i8> = (0..ffn_hidden * hidden)
|
||||
.map(|i| ((i as i16 % 256 - 128) as i8))
|
||||
.collect();
|
||||
|
||||
let q_scales: Vec<f32> = vec![1.0 / 128.0; hidden];
|
||||
let k_scales: Vec<f32> = vec![1.0 / 128.0; hidden];
|
||||
let v_scales: Vec<f32> = vec![1.0 / 128.0; hidden];
|
||||
let ffn_up_scales: Vec<f32> = vec![1.0 / 128.0; ffn_hidden];
|
||||
let ffn_down_scales: Vec<f32> = vec![1.0 / 128.0; hidden];
|
||||
|
||||
let input: Vec<i8> = (0..hidden)
|
||||
.map(|i| ((i as i16 % 256 - 128) as i8))
|
||||
.collect();
|
||||
|
||||
group.bench_function("qkv_projection", |b| {
|
||||
let mut q_out = vec![0i32; hidden];
|
||||
let mut k_out = vec![0i32; hidden];
|
||||
let mut v_out = vec![0i32; hidden];
|
||||
b.iter(|| {
|
||||
qgemm_i8_simd(
|
||||
1,
|
||||
hidden,
|
||||
hidden,
|
||||
&input,
|
||||
1.0 / 128.0,
|
||||
&q_weights,
|
||||
&q_scales,
|
||||
None,
|
||||
&mut q_out,
|
||||
);
|
||||
qgemm_i8_simd(
|
||||
1,
|
||||
hidden,
|
||||
hidden,
|
||||
&input,
|
||||
1.0 / 128.0,
|
||||
&k_weights,
|
||||
&k_scales,
|
||||
None,
|
||||
&mut k_out,
|
||||
);
|
||||
qgemm_i8_simd(
|
||||
1,
|
||||
hidden,
|
||||
hidden,
|
||||
&input,
|
||||
1.0 / 128.0,
|
||||
&v_weights,
|
||||
&v_scales,
|
||||
None,
|
||||
&mut v_out,
|
||||
);
|
||||
black_box((q_out[0], k_out[0], v_out[0]))
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("ffn_forward", |b| {
|
||||
let mut ffn_mid = vec![0i32; ffn_hidden];
|
||||
let mut out = vec![0i32; hidden];
|
||||
b.iter(|| {
|
||||
qgemm_i8_simd(
|
||||
1,
|
||||
ffn_hidden,
|
||||
hidden,
|
||||
&input,
|
||||
1.0 / 128.0,
|
||||
&ffn_up,
|
||||
&ffn_up_scales,
|
||||
None,
|
||||
&mut ffn_mid,
|
||||
);
|
||||
let ffn_mid_i8: Vec<i8> = ffn_mid
|
||||
.iter()
|
||||
.map(|&x| (x >> 8).clamp(-128, 127) as i8)
|
||||
.collect();
|
||||
qgemm_i8_simd(
|
||||
1,
|
||||
hidden,
|
||||
ffn_hidden,
|
||||
&ffn_mid_i8,
|
||||
1.0 / 128.0,
|
||||
&ffn_down,
|
||||
&ffn_down_scales,
|
||||
None,
|
||||
&mut out,
|
||||
);
|
||||
black_box(out[0])
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("layer_norm_overhead", |b| {
|
||||
let input_f32: Vec<f32> = input.iter().map(|&x| x as f32 / 128.0).collect();
|
||||
let gamma_f32: Vec<f32> = vec![1.0f32; hidden];
|
||||
let beta_f32: Vec<f32> = vec![0.0f32; hidden];
|
||||
let mut output = vec![0.0f32; hidden];
|
||||
b.iter(|| {
|
||||
layer_norm(&input_f32, &gamma_f32, &beta_f32, 1e-5, &mut output);
|
||||
black_box(output[0])
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_qgemm_i8_sizes,
|
||||
bench_qgemv,
|
||||
bench_int4_pack_unpack,
|
||||
bench_int4_weights,
|
||||
bench_int4_gemv,
|
||||
bench_int4_gemm,
|
||||
bench_int4_memory_comparison,
|
||||
bench_layer_norm,
|
||||
bench_arena_allocation,
|
||||
bench_arena_size_calculation,
|
||||
bench_timer_overhead,
|
||||
bench_transformer_layer_simulation,
|
||||
);
|
||||
|
||||
criterion_main!(benches);
|
||||
607
crates/ruvector-mincut-gated-transformer/benches/latency.rs
Normal file
607
crates/ruvector-mincut-gated-transformer/benches/latency.rs
Normal file
@@ -0,0 +1,607 @@
|
||||
//! Latency benchmarks for mincut gated transformer.
|
||||
//!
|
||||
//! Tests inference latency across different tiers and configurations.
|
||||
|
||||
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
|
||||
use ruvector_mincut_gated_transformer::{
|
||||
GatePacket, GatePolicy, InferInput, InferOutput, MincutGatedTransformer, QuantizedWeights,
|
||||
SpikePacket, TransformerConfig,
|
||||
};
|
||||
|
||||
fn create_transformer(config: TransformerConfig) -> MincutGatedTransformer {
|
||||
let policy = GatePolicy::default();
|
||||
let weights = QuantizedWeights::empty(&config);
|
||||
MincutGatedTransformer::new(config, policy, weights).unwrap()
|
||||
}
|
||||
|
||||
fn bench_tier0_inference(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("tier0_inference");
|
||||
|
||||
for seq_len in [16, 32, 64].iter() {
|
||||
let mut config = TransformerConfig::baseline();
|
||||
config.seq_len_max = *seq_len;
|
||||
config.seq_len_degraded = seq_len / 2;
|
||||
config.seq_len_safe = seq_len / 8;
|
||||
let mut transformer = create_transformer(config.clone());
|
||||
|
||||
let gate = GatePacket {
|
||||
lambda: 100,
|
||||
lambda_prev: 95,
|
||||
boundary_edges: 5,
|
||||
boundary_concentration_q15: 8192,
|
||||
partition_count: 3,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
let tokens: Vec<u32> = (0..*seq_len as u32).collect();
|
||||
let input = InferInput::from_tokens(&tokens, gate);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
|
||||
group.bench_with_input(BenchmarkId::from_parameter(seq_len), seq_len, |b, _| {
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_tier1_degraded(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("tier1_degraded");
|
||||
|
||||
let config = TransformerConfig::baseline();
|
||||
let mut transformer = create_transformer(config.clone());
|
||||
|
||||
// Gate packet that triggers tier 1 (ReduceScope)
|
||||
let gate = GatePacket {
|
||||
lambda: 100,
|
||||
lambda_prev: 95,
|
||||
boundary_edges: 30, // Above default max of 20
|
||||
boundary_concentration_q15: 8192,
|
||||
partition_count: 3,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
let tokens: Vec<u32> = (0..64).collect();
|
||||
let input = InferInput::from_tokens(&tokens, gate);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
|
||||
group.bench_function("baseline_64_degraded", |b| {
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_tier2_safe(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("tier2_safe");
|
||||
|
||||
let config = TransformerConfig::baseline();
|
||||
let mut transformer = create_transformer(config.clone());
|
||||
|
||||
// Gate packet that triggers tier 2 (FreezeWrites via force flag)
|
||||
let gate = GatePacket {
|
||||
lambda: 100,
|
||||
lambda_prev: 95,
|
||||
boundary_edges: 5,
|
||||
boundary_concentration_q15: 8192,
|
||||
partition_count: 3,
|
||||
flags: GatePacket::FLAG_FORCE_SAFE,
|
||||
};
|
||||
|
||||
let tokens: Vec<u32> = (0..64).collect();
|
||||
let input = InferInput::from_tokens(&tokens, gate);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
|
||||
group.bench_function("baseline_64_safe", |b| {
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_tier3_skip(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("tier3_skip");
|
||||
|
||||
let config = TransformerConfig::baseline();
|
||||
let mut transformer = create_transformer(config.clone());
|
||||
|
||||
// Gate packet that triggers skip
|
||||
let gate = GatePacket {
|
||||
lambda: 100,
|
||||
flags: GatePacket::FLAG_SKIP,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let tokens: Vec<u32> = (0..64).collect();
|
||||
let input = InferInput::from_tokens(&tokens, gate);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
|
||||
group.bench_function("baseline_64_skip", |b| {
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_spike_inactive_skip(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("spike_inactive");
|
||||
|
||||
let config = TransformerConfig::baseline();
|
||||
let mut transformer = create_transformer(config.clone());
|
||||
|
||||
let gate = GatePacket {
|
||||
lambda: 100,
|
||||
lambda_prev: 95,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let spike = SpikePacket {
|
||||
fired: 0, // Not fired - should skip
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let tokens: Vec<u32> = (0..64).collect();
|
||||
let input = InferInput::from_tokens(&tokens, gate).with_spikes(spike);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
|
||||
group.bench_function("baseline_64_spike_inactive", |b| {
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_window_sizes(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("window_sweep");
|
||||
|
||||
for window in [4, 8, 16, 32].iter() {
|
||||
let mut config = TransformerConfig::baseline();
|
||||
config.window_normal = *window;
|
||||
config.window_degraded = window / 2;
|
||||
let mut transformer = create_transformer(config.clone());
|
||||
|
||||
let gate = GatePacket {
|
||||
lambda: 100,
|
||||
lambda_prev: 95,
|
||||
boundary_edges: 5,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let tokens: Vec<u32> = (0..64).collect();
|
||||
let input = InferInput::from_tokens(&tokens, gate);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
|
||||
group.bench_with_input(BenchmarkId::from_parameter(window), window, |b, _| {
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_micro_config(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("micro_config");
|
||||
|
||||
let config = TransformerConfig::micro();
|
||||
let mut transformer = create_transformer(config.clone());
|
||||
|
||||
let gate = GatePacket {
|
||||
lambda: 100,
|
||||
lambda_prev: 95,
|
||||
boundary_edges: 5,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let tokens: Vec<u32> = (0..32).collect();
|
||||
let input = InferInput::from_tokens(&tokens, gate);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
|
||||
group.bench_function("micro_32", |b| {
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_mod_routing_overhead(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("mod_routing");
|
||||
|
||||
let config = TransformerConfig::baseline();
|
||||
let mut transformer = create_transformer(config.clone());
|
||||
|
||||
let tokens: Vec<u32> = (0..64).collect();
|
||||
|
||||
// Baseline without routing overhead
|
||||
let gate_normal = GatePacket {
|
||||
lambda: 100,
|
||||
lambda_prev: 95,
|
||||
boundary_edges: 5,
|
||||
boundary_concentration_q15: 8192,
|
||||
partition_count: 3,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
group.bench_function("no_routing_overhead", |b| {
|
||||
let input = InferInput::from_tokens(&tokens, gate_normal);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
|
||||
// With routing overhead (boundary spike)
|
||||
let gate_routing = GatePacket {
|
||||
lambda: 100,
|
||||
lambda_prev: 95,
|
||||
boundary_edges: 30,
|
||||
boundary_concentration_q15: 8192,
|
||||
partition_count: 3,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
group.bench_function("with_routing_overhead", |b| {
|
||||
let input = InferInput::from_tokens(&tokens, gate_routing);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_early_exit_speedup(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("early_exit");
|
||||
|
||||
let config = TransformerConfig::baseline();
|
||||
let mut transformer = create_transformer(config.clone());
|
||||
|
||||
let tokens: Vec<u32> = (0..64).collect();
|
||||
|
||||
// Full execution
|
||||
let gate_full = GatePacket {
|
||||
lambda: 100,
|
||||
lambda_prev: 95,
|
||||
boundary_edges: 5,
|
||||
boundary_concentration_q15: 8192,
|
||||
partition_count: 3,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
group.bench_function("full_execution", |b| {
|
||||
let input = InferInput::from_tokens(&tokens, gate_full);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
|
||||
// Early exit (tier 1)
|
||||
let gate_exit = GatePacket {
|
||||
lambda: 100,
|
||||
lambda_prev: 95,
|
||||
boundary_edges: 30,
|
||||
boundary_concentration_q15: 8192,
|
||||
partition_count: 3,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
group.bench_function("early_exit_tier1", |b| {
|
||||
let input = InferInput::from_tokens(&tokens, gate_exit);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
|
||||
// Minimal execution (tier 2)
|
||||
let gate_minimal = GatePacket {
|
||||
lambda: 100,
|
||||
lambda_prev: 95,
|
||||
boundary_edges: 5,
|
||||
boundary_concentration_q15: 8192,
|
||||
partition_count: 3,
|
||||
flags: GatePacket::FLAG_FORCE_SAFE,
|
||||
};
|
||||
|
||||
group.bench_function("minimal_tier2", |b| {
|
||||
let input = InferInput::from_tokens(&tokens, gate_minimal);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_sparse_vs_dense_attention(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("sparse_attention");
|
||||
|
||||
let config = TransformerConfig::baseline();
|
||||
let mut transformer = create_transformer(config.clone());
|
||||
|
||||
let tokens: Vec<u32> = (0..64).collect();
|
||||
|
||||
// Dense attention (normal window)
|
||||
let gate_dense = GatePacket {
|
||||
lambda: 100,
|
||||
lambda_prev: 95,
|
||||
boundary_edges: 5,
|
||||
boundary_concentration_q15: 8192,
|
||||
partition_count: 3,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
group.bench_function("dense_attention", |b| {
|
||||
let input = InferInput::from_tokens(&tokens, gate_dense);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
|
||||
// Sparse attention (reduced scope)
|
||||
let gate_sparse = GatePacket {
|
||||
lambda: 100,
|
||||
lambda_prev: 95,
|
||||
boundary_edges: 30,
|
||||
boundary_concentration_q15: 8192,
|
||||
partition_count: 3,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
group.bench_function("sparse_attention", |b| {
|
||||
let input = InferInput::from_tokens(&tokens, gate_sparse);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_spike_vs_standard_attention(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("spike_attention");
|
||||
|
||||
let config = TransformerConfig::baseline();
|
||||
let mut transformer = create_transformer(config.clone());
|
||||
|
||||
let tokens: Vec<u32> = (0..64).collect();
|
||||
let gate = GatePacket {
|
||||
lambda: 100,
|
||||
lambda_prev: 95,
|
||||
boundary_edges: 5,
|
||||
boundary_concentration_q15: 8192,
|
||||
partition_count: 3,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
// Standard (no spikes)
|
||||
group.bench_function("standard_no_spikes", |b| {
|
||||
let input = InferInput::from_tokens(&tokens, gate);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
|
||||
// With active spikes
|
||||
let spike_active = SpikePacket {
|
||||
fired: 1,
|
||||
rate_q15: 20000,
|
||||
novelty_q15: 15000,
|
||||
top_len: 8,
|
||||
top_idx: [2, 8, 14, 20, 26, 32, 38, 44, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
top_w_q15: [14336; 16],
|
||||
flags: SpikePacket::FLAG_SPARSE_MASK,
|
||||
};
|
||||
|
||||
group.bench_function("with_active_spikes", |b| {
|
||||
let input = InferInput::from_tokens(&tokens, gate).with_spikes(spike_active);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
|
||||
// With inactive spikes (skip path)
|
||||
let spike_inactive = SpikePacket {
|
||||
fired: 0,
|
||||
rate_q15: 500,
|
||||
novelty_q15: 500,
|
||||
top_len: 0,
|
||||
top_idx: [0; 16],
|
||||
top_w_q15: [0; 16],
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
group.bench_function("inactive_spikes_skip", |b| {
|
||||
let input = InferInput::from_tokens(&tokens, gate).with_spikes(spike_inactive);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_lambda_drop_patterns(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("lambda_patterns");
|
||||
|
||||
let config = TransformerConfig::baseline();
|
||||
let mut transformer = create_transformer(config.clone());
|
||||
|
||||
let tokens: Vec<u32> = (0..64).collect();
|
||||
|
||||
// Stable lambda
|
||||
let gate_stable = GatePacket {
|
||||
lambda: 100,
|
||||
lambda_prev: 98,
|
||||
boundary_edges: 5,
|
||||
boundary_concentration_q15: 8192,
|
||||
partition_count: 3,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
group.bench_function("stable_lambda", |b| {
|
||||
let input = InferInput::from_tokens(&tokens, gate_stable);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
|
||||
// Fast lambda drop
|
||||
let gate_drop = GatePacket {
|
||||
lambda: 40,
|
||||
lambda_prev: 100,
|
||||
boundary_edges: 5,
|
||||
boundary_concentration_q15: 8192,
|
||||
partition_count: 3,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
group.bench_function("fast_lambda_drop", |b| {
|
||||
let input = InferInput::from_tokens(&tokens, gate_drop);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_policy_variants(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("policy_comparison");
|
||||
|
||||
let config = TransformerConfig::baseline();
|
||||
let tokens: Vec<u32> = (0..64).collect();
|
||||
|
||||
let gate = GatePacket {
|
||||
lambda: 45,
|
||||
lambda_prev: 50,
|
||||
boundary_edges: 12,
|
||||
boundary_concentration_q15: 15000,
|
||||
partition_count: 6,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
// Default policy
|
||||
group.bench_function("default_policy", |b| {
|
||||
let policy = GatePolicy::default();
|
||||
let weights = QuantizedWeights::empty(&config);
|
||||
let mut transformer = MincutGatedTransformer::new(config.clone(), policy, weights).unwrap();
|
||||
let input = InferInput::from_tokens(&tokens, gate);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
|
||||
// Conservative policy
|
||||
group.bench_function("conservative_policy", |b| {
|
||||
let policy = GatePolicy::conservative();
|
||||
let weights = QuantizedWeights::empty(&config);
|
||||
let mut transformer = MincutGatedTransformer::new(config.clone(), policy, weights).unwrap();
|
||||
let input = InferInput::from_tokens(&tokens, gate);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
|
||||
// Permissive policy
|
||||
group.bench_function("permissive_policy", |b| {
|
||||
let policy = GatePolicy::permissive();
|
||||
let weights = QuantizedWeights::empty(&config);
|
||||
let mut transformer = MincutGatedTransformer::new(config.clone(), policy, weights).unwrap();
|
||||
let input = InferInput::from_tokens(&tokens, gate);
|
||||
let mut logits = vec![0i32; config.logits as usize];
|
||||
b.iter(|| {
|
||||
let mut output = InferOutput::new(&mut logits);
|
||||
transformer.infer(black_box(&input), &mut output).unwrap();
|
||||
black_box(output.witness)
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_tier0_inference,
|
||||
bench_tier1_degraded,
|
||||
bench_tier2_safe,
|
||||
bench_tier3_skip,
|
||||
bench_spike_inactive_skip,
|
||||
bench_window_sizes,
|
||||
bench_micro_config,
|
||||
bench_mod_routing_overhead,
|
||||
bench_early_exit_speedup,
|
||||
bench_sparse_vs_dense_attention,
|
||||
bench_spike_vs_standard_attention,
|
||||
bench_lambda_drop_patterns,
|
||||
bench_policy_variants,
|
||||
);
|
||||
|
||||
criterion_main!(benches);
|
||||
Reference in New Issue
Block a user