Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
329
crates/ruvector-attention/benches/attention_bench.rs
Normal file
329
crates/ruvector-attention/benches/attention_bench.rs
Normal file
@@ -0,0 +1,329 @@
|
||||
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
|
||||
use ruvector_attention::{
|
||||
attention::ScaledDotProductAttention,
|
||||
graph::{
|
||||
DualSpaceAttention, DualSpaceConfig, EdgeFeaturedAttention, EdgeFeaturedConfig, GraphRoPE,
|
||||
RoPEConfig,
|
||||
},
|
||||
hyperbolic::{HyperbolicAttention, HyperbolicAttentionConfig},
|
||||
moe::{MoEAttention, MoEConfig},
|
||||
sparse::{FlashAttention, LinearAttention, LocalGlobalAttention},
|
||||
training::{Adam, InfoNCELoss, Loss, Optimizer},
|
||||
traits::Attention,
|
||||
};
|
||||
|
||||
fn bench_scaled_dot_product(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("scaled_dot_product");
|
||||
|
||||
for dim in [64, 128, 256, 512] {
|
||||
let attention = ScaledDotProductAttention::new(dim);
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("dim", dim), &dim, |b, &dim| {
|
||||
let query = vec![0.5; dim];
|
||||
let keys: Vec<Vec<f32>> = (0..100)
|
||||
.map(|i| vec![(i as f32 * 0.01) % 1.0; dim])
|
||||
.collect();
|
||||
let values: Vec<Vec<f32>> = (0..100)
|
||||
.map(|i| vec![(i as f32 * 0.02) % 1.0; dim])
|
||||
.collect();
|
||||
let keys_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
|
||||
let values_refs: Vec<&[f32]> = values.iter().map(|v| v.as_slice()).collect();
|
||||
|
||||
b.iter(|| black_box(attention.compute(&query, &keys_refs, &values_refs).unwrap()));
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_flash_attention(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("flash_attention");
|
||||
|
||||
for seq_len in [64, 256, 512, 1024] {
|
||||
let dim = 256;
|
||||
let attention = FlashAttention::new(dim, 64);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("seq_len", seq_len),
|
||||
&seq_len,
|
||||
|b, &seq_len| {
|
||||
let query = vec![0.5; dim];
|
||||
let keys: Vec<Vec<f32>> = (0..seq_len)
|
||||
.map(|i| vec![(i as f32 * 0.01) % 1.0; dim])
|
||||
.collect();
|
||||
let values: Vec<Vec<f32>> = (0..seq_len)
|
||||
.map(|i| vec![(i as f32 * 0.02) % 1.0; dim])
|
||||
.collect();
|
||||
let keys_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
|
||||
let values_refs: Vec<&[f32]> = values.iter().map(|v| v.as_slice()).collect();
|
||||
|
||||
b.iter(|| black_box(attention.compute(&query, &keys_refs, &values_refs).unwrap()));
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_linear_attention(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("linear_attention");
|
||||
|
||||
for seq_len in [256, 512, 1024, 2048] {
|
||||
let dim = 256;
|
||||
let attention = LinearAttention::new(dim, 64);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("seq_len", seq_len),
|
||||
&seq_len,
|
||||
|b, &seq_len| {
|
||||
let query = vec![0.5; dim];
|
||||
let keys: Vec<Vec<f32>> = (0..seq_len)
|
||||
.map(|i| vec![(i as f32 * 0.01) % 1.0; dim])
|
||||
.collect();
|
||||
let values: Vec<Vec<f32>> = (0..seq_len)
|
||||
.map(|i| vec![(i as f32 * 0.02) % 1.0; dim])
|
||||
.collect();
|
||||
let keys_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
|
||||
let values_refs: Vec<&[f32]> = values.iter().map(|v| v.as_slice()).collect();
|
||||
|
||||
b.iter(|| black_box(attention.compute(&query, &keys_refs, &values_refs).unwrap()));
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_local_global_attention(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("local_global_attention");
|
||||
|
||||
for window_size in [16, 32, 64, 128] {
|
||||
let dim = 256;
|
||||
let attention = LocalGlobalAttention::new(dim, window_size, 4);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("window", window_size),
|
||||
&window_size,
|
||||
|b, _| {
|
||||
let query = vec![0.5; dim];
|
||||
let keys: Vec<Vec<f32>> = (0..512)
|
||||
.map(|i| vec![(i as f32 * 0.01) % 1.0; dim])
|
||||
.collect();
|
||||
let values: Vec<Vec<f32>> = (0..512)
|
||||
.map(|i| vec![(i as f32 * 0.02) % 1.0; dim])
|
||||
.collect();
|
||||
let keys_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
|
||||
let values_refs: Vec<&[f32]> = values.iter().map(|v| v.as_slice()).collect();
|
||||
|
||||
b.iter(|| black_box(attention.compute(&query, &keys_refs, &values_refs).unwrap()));
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_moe_attention(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("moe_attention");
|
||||
|
||||
for num_experts in [2, 4, 8] {
|
||||
let config = MoEConfig::builder()
|
||||
.dim(256)
|
||||
.num_experts(num_experts)
|
||||
.top_k(2)
|
||||
.build();
|
||||
let attention = MoEAttention::new(config);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("experts", num_experts),
|
||||
&num_experts,
|
||||
|b, _| {
|
||||
let query = vec![0.5; 256];
|
||||
let keys: Vec<Vec<f32>> = (0..100)
|
||||
.map(|i| vec![(i as f32 * 0.01) % 1.0; 256])
|
||||
.collect();
|
||||
let values: Vec<Vec<f32>> = (0..100)
|
||||
.map(|i| vec![(i as f32 * 0.02) % 1.0; 256])
|
||||
.collect();
|
||||
let keys_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
|
||||
let values_refs: Vec<&[f32]> = values.iter().map(|v| v.as_slice()).collect();
|
||||
|
||||
b.iter(|| black_box(attention.compute(&query, &keys_refs, &values_refs).unwrap()));
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_hyperbolic_attention(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("hyperbolic_attention");
|
||||
|
||||
for dim in [64, 128, 256] {
|
||||
let config = HyperbolicAttentionConfig {
|
||||
dim,
|
||||
curvature: -1.0,
|
||||
..Default::default()
|
||||
};
|
||||
let attention = HyperbolicAttention::new(config);
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("dim", dim), &dim, |b, &dim| {
|
||||
let query = vec![0.1; dim];
|
||||
let keys: Vec<Vec<f32>> = (0..100)
|
||||
.map(|i| vec![(i as f32 * 0.001) % 0.5; dim])
|
||||
.collect();
|
||||
let values: Vec<Vec<f32>> = (0..100)
|
||||
.map(|i| vec![(i as f32 * 0.002) % 0.5; dim])
|
||||
.collect();
|
||||
let keys_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
|
||||
let values_refs: Vec<&[f32]> = values.iter().map(|v| v.as_slice()).collect();
|
||||
|
||||
b.iter(|| black_box(attention.compute(&query, &keys_refs, &values_refs).unwrap()));
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_edge_featured_attention(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("edge_featured_attention");
|
||||
|
||||
for num_heads in [1, 2, 4, 8] {
|
||||
let config = EdgeFeaturedConfig::builder()
|
||||
.node_dim(256)
|
||||
.edge_dim(32)
|
||||
.num_heads(num_heads)
|
||||
.build();
|
||||
let attention = EdgeFeaturedAttention::new(config);
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("heads", num_heads), &num_heads, |b, _| {
|
||||
let query = vec![0.5; 256];
|
||||
let keys: Vec<Vec<f32>> = (0..64)
|
||||
.map(|i| vec![(i as f32 * 0.01) % 1.0; 256])
|
||||
.collect();
|
||||
let values: Vec<Vec<f32>> = (0..64)
|
||||
.map(|i| vec![(i as f32 * 0.02) % 1.0; 256])
|
||||
.collect();
|
||||
let keys_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
|
||||
let values_refs: Vec<&[f32]> = values.iter().map(|v| v.as_slice()).collect();
|
||||
|
||||
b.iter(|| black_box(attention.compute(&query, &keys_refs, &values_refs).unwrap()));
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_graph_rope(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("graph_rope");
|
||||
|
||||
for dim in [64, 128, 256] {
|
||||
let config = RoPEConfig::builder().dim(dim).max_position(1024).build();
|
||||
let attention = GraphRoPE::new(config);
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("dim", dim), &dim, |b, &dim| {
|
||||
let query = vec![0.5; dim];
|
||||
let keys: Vec<Vec<f32>> = (0..256)
|
||||
.map(|i| vec![(i as f32 * 0.01) % 1.0; dim])
|
||||
.collect();
|
||||
let values: Vec<Vec<f32>> = (0..256)
|
||||
.map(|i| vec![(i as f32 * 0.02) % 1.0; dim])
|
||||
.collect();
|
||||
let keys_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
|
||||
let values_refs: Vec<&[f32]> = values.iter().map(|v| v.as_slice()).collect();
|
||||
|
||||
b.iter(|| black_box(attention.compute(&query, &keys_refs, &values_refs).unwrap()));
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_dual_space_attention(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("dual_space_attention");
|
||||
|
||||
for dim in [64, 128, 256] {
|
||||
let config = DualSpaceConfig::builder()
|
||||
.dim(dim)
|
||||
.euclidean_weight(0.5)
|
||||
.hyperbolic_weight(0.5)
|
||||
.build();
|
||||
let attention = DualSpaceAttention::new(config);
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("dim", dim), &dim, |b, &dim| {
|
||||
let query = vec![0.1; dim];
|
||||
let keys: Vec<Vec<f32>> = (0..100)
|
||||
.map(|i| vec![(i as f32 * 0.001) % 0.3; dim])
|
||||
.collect();
|
||||
let values: Vec<Vec<f32>> = (0..100)
|
||||
.map(|i| vec![(i as f32 * 0.002) % 0.3; dim])
|
||||
.collect();
|
||||
let keys_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
|
||||
let values_refs: Vec<&[f32]> = values.iter().map(|v| v.as_slice()).collect();
|
||||
|
||||
b.iter(|| black_box(attention.compute(&query, &keys_refs, &values_refs).unwrap()));
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_infonce_loss(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("infonce_loss");
|
||||
|
||||
for num_negatives in [10, 50, 100, 200] {
|
||||
let loss = InfoNCELoss::new(0.07);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("negatives", num_negatives),
|
||||
&num_negatives,
|
||||
|b, &num_neg| {
|
||||
let anchor = vec![0.5; 128];
|
||||
let positive = vec![0.6; 128];
|
||||
let negatives: Vec<Vec<f32>> = (0..num_neg)
|
||||
.map(|i| vec![(i as f32 * 0.01) % 1.0; 128])
|
||||
.collect();
|
||||
let neg_refs: Vec<&[f32]> = negatives.iter().map(|n| n.as_slice()).collect();
|
||||
|
||||
b.iter(|| black_box(loss.compute(&anchor, &positive, &neg_refs)));
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_adam_optimizer(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("adam_optimizer");
|
||||
|
||||
for dim in [128, 256, 512, 1024] {
|
||||
group.bench_with_input(BenchmarkId::new("dim", dim), &dim, |b, &dim| {
|
||||
let mut optimizer = Adam::new(dim, 0.001);
|
||||
let mut params = vec![0.5; dim];
|
||||
let gradients = vec![0.01; dim];
|
||||
|
||||
b.iter(|| {
|
||||
optimizer.step(&mut params, &gradients);
|
||||
black_box(¶ms)
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_scaled_dot_product,
|
||||
bench_flash_attention,
|
||||
bench_linear_attention,
|
||||
bench_local_global_attention,
|
||||
bench_moe_attention,
|
||||
bench_hyperbolic_attention,
|
||||
bench_edge_featured_attention,
|
||||
bench_graph_rope,
|
||||
bench_dual_space_attention,
|
||||
bench_infonce_loss,
|
||||
bench_adam_optimizer,
|
||||
);
|
||||
criterion_main!(benches);
|
||||
303
crates/ruvector-attention/benches/attention_benchmarks.rs
Normal file
303
crates/ruvector-attention/benches/attention_benchmarks.rs
Normal file
@@ -0,0 +1,303 @@
|
||||
//! Benchmarks for ruvector-attention
|
||||
//!
|
||||
//! Run with: cargo bench -p ruvector-attention
|
||||
|
||||
use std::time::Instant;
|
||||
|
||||
use ruvector_attention::{
|
||||
attention::ScaledDotProductAttention,
|
||||
graph::{
|
||||
DualSpaceAttention, DualSpaceConfig, EdgeFeaturedAttention, EdgeFeaturedConfig, GraphRoPE,
|
||||
RoPEConfig,
|
||||
},
|
||||
hyperbolic::{HyperbolicAttention, HyperbolicAttentionConfig},
|
||||
moe::{MoEAttention, MoEConfig},
|
||||
sparse::{FlashAttention, LinearAttention, LocalGlobalAttention},
|
||||
training::{Adam, InfoNCELoss, Loss, Optimizer},
|
||||
traits::Attention,
|
||||
};
|
||||
|
||||
fn main() {
|
||||
println!("=== ruvector-attention Benchmarks ===\n");
|
||||
|
||||
// Configuration
|
||||
let dim = 256;
|
||||
let seq_len = 512;
|
||||
let iterations = 100;
|
||||
|
||||
// Generate test data
|
||||
let query = vec![0.5f32; dim];
|
||||
let keys: Vec<Vec<f32>> = (0..seq_len)
|
||||
.map(|i| vec![(i as f32 * 0.01) % 1.0; dim])
|
||||
.collect();
|
||||
let values: Vec<Vec<f32>> = (0..seq_len)
|
||||
.map(|i| vec![(i as f32 * 0.02) % 1.0; dim])
|
||||
.collect();
|
||||
let keys_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
|
||||
let values_refs: Vec<&[f32]> = values.iter().map(|v| v.as_slice()).collect();
|
||||
|
||||
println!("Configuration:");
|
||||
println!(" Dimension: {}", dim);
|
||||
println!(" Sequence Length: {}", seq_len);
|
||||
println!(" Iterations: {}", iterations);
|
||||
println!();
|
||||
|
||||
// 1. Scaled Dot-Product Attention
|
||||
{
|
||||
let attention = ScaledDotProductAttention::new(dim);
|
||||
let start = Instant::now();
|
||||
for _ in 0..iterations {
|
||||
let _ = attention.compute(&query, &keys_refs, &values_refs).unwrap();
|
||||
}
|
||||
let elapsed = start.elapsed();
|
||||
let avg_us = elapsed.as_micros() as f64 / iterations as f64;
|
||||
println!("Scaled Dot-Product Attention:");
|
||||
println!(" Total: {:?}", elapsed);
|
||||
println!(" Per iteration: {:.2} µs", avg_us);
|
||||
println!(" Throughput: {:.0} ops/sec", 1_000_000.0 / avg_us);
|
||||
println!();
|
||||
}
|
||||
|
||||
// 2. Flash Attention
|
||||
{
|
||||
let attention = FlashAttention::new(dim, 64);
|
||||
let start = Instant::now();
|
||||
for _ in 0..iterations {
|
||||
let _ = attention.compute(&query, &keys_refs, &values_refs).unwrap();
|
||||
}
|
||||
let elapsed = start.elapsed();
|
||||
let avg_us = elapsed.as_micros() as f64 / iterations as f64;
|
||||
println!("Flash Attention (block_size=64):");
|
||||
println!(" Total: {:?}", elapsed);
|
||||
println!(" Per iteration: {:.2} µs", avg_us);
|
||||
println!(" Throughput: {:.0} ops/sec", 1_000_000.0 / avg_us);
|
||||
println!();
|
||||
}
|
||||
|
||||
// 3. Linear Attention
|
||||
{
|
||||
let attention = LinearAttention::new(dim, 64);
|
||||
let start = Instant::now();
|
||||
for _ in 0..iterations {
|
||||
let _ = attention.compute(&query, &keys_refs, &values_refs).unwrap();
|
||||
}
|
||||
let elapsed = start.elapsed();
|
||||
let avg_us = elapsed.as_micros() as f64 / iterations as f64;
|
||||
println!("Linear Attention (num_features=64):");
|
||||
println!(" Total: {:?}", elapsed);
|
||||
println!(" Per iteration: {:.2} µs", avg_us);
|
||||
println!(" Throughput: {:.0} ops/sec", 1_000_000.0 / avg_us);
|
||||
println!();
|
||||
}
|
||||
|
||||
// 4. Local-Global Attention
|
||||
{
|
||||
let attention = LocalGlobalAttention::new(dim, 32, 4);
|
||||
let start = Instant::now();
|
||||
for _ in 0..iterations {
|
||||
let _ = attention.compute(&query, &keys_refs, &values_refs).unwrap();
|
||||
}
|
||||
let elapsed = start.elapsed();
|
||||
let avg_us = elapsed.as_micros() as f64 / iterations as f64;
|
||||
println!("Local-Global Attention (window=32, global=4):");
|
||||
println!(" Total: {:?}", elapsed);
|
||||
println!(" Per iteration: {:.2} µs", avg_us);
|
||||
println!(" Throughput: {:.0} ops/sec", 1_000_000.0 / avg_us);
|
||||
println!();
|
||||
}
|
||||
|
||||
// 5. MoE Attention
|
||||
{
|
||||
let config = MoEConfig::builder()
|
||||
.dim(dim)
|
||||
.num_experts(4)
|
||||
.top_k(2)
|
||||
.build();
|
||||
let attention = MoEAttention::new(config);
|
||||
let start = Instant::now();
|
||||
for _ in 0..iterations {
|
||||
let _ = attention.compute(&query, &keys_refs, &values_refs).unwrap();
|
||||
}
|
||||
let elapsed = start.elapsed();
|
||||
let avg_us = elapsed.as_micros() as f64 / iterations as f64;
|
||||
println!("MoE Attention (4 experts, top-2):");
|
||||
println!(" Total: {:?}", elapsed);
|
||||
println!(" Per iteration: {:.2} µs", avg_us);
|
||||
println!(" Throughput: {:.0} ops/sec", 1_000_000.0 / avg_us);
|
||||
println!();
|
||||
}
|
||||
|
||||
// 6. Hyperbolic Attention
|
||||
{
|
||||
let config = HyperbolicAttentionConfig {
|
||||
dim,
|
||||
curvature: -1.0,
|
||||
..Default::default()
|
||||
};
|
||||
let attention = HyperbolicAttention::new(config);
|
||||
// Use smaller values for Poincaré ball
|
||||
let hyp_query = vec![0.1f32; dim];
|
||||
let hyp_keys: Vec<Vec<f32>> = (0..seq_len)
|
||||
.map(|i| vec![(i as f32 * 0.001) % 0.5; dim])
|
||||
.collect();
|
||||
let hyp_values: Vec<Vec<f32>> = (0..seq_len)
|
||||
.map(|i| vec![(i as f32 * 0.002) % 0.5; dim])
|
||||
.collect();
|
||||
let hyp_keys_refs: Vec<&[f32]> = hyp_keys.iter().map(|k| k.as_slice()).collect();
|
||||
let hyp_values_refs: Vec<&[f32]> = hyp_values.iter().map(|v| v.as_slice()).collect();
|
||||
|
||||
let start = Instant::now();
|
||||
for _ in 0..iterations {
|
||||
let _ = attention
|
||||
.compute(&hyp_query, &hyp_keys_refs, &hyp_values_refs)
|
||||
.unwrap();
|
||||
}
|
||||
let elapsed = start.elapsed();
|
||||
let avg_us = elapsed.as_micros() as f64 / iterations as f64;
|
||||
println!("Hyperbolic Attention (curvature=1.0):");
|
||||
println!(" Total: {:?}", elapsed);
|
||||
println!(" Per iteration: {:.2} µs", avg_us);
|
||||
println!(" Throughput: {:.0} ops/sec", 1_000_000.0 / avg_us);
|
||||
println!();
|
||||
}
|
||||
|
||||
// 7. Edge-Featured Graph Attention
|
||||
{
|
||||
let config = EdgeFeaturedConfig::builder()
|
||||
.node_dim(dim)
|
||||
.edge_dim(32)
|
||||
.num_heads(4)
|
||||
.build();
|
||||
let attention = EdgeFeaturedAttention::new(config);
|
||||
|
||||
let graph_keys: Vec<Vec<f32>> = (0..64)
|
||||
.map(|i| vec![(i as f32 * 0.01) % 1.0; dim])
|
||||
.collect();
|
||||
let graph_values: Vec<Vec<f32>> = (0..64)
|
||||
.map(|i| vec![(i as f32 * 0.02) % 1.0; dim])
|
||||
.collect();
|
||||
let graph_keys_refs: Vec<&[f32]> = graph_keys.iter().map(|k| k.as_slice()).collect();
|
||||
let graph_values_refs: Vec<&[f32]> = graph_values.iter().map(|v| v.as_slice()).collect();
|
||||
|
||||
let start = Instant::now();
|
||||
for _ in 0..iterations {
|
||||
let _ = attention
|
||||
.compute(&query, &graph_keys_refs, &graph_values_refs)
|
||||
.unwrap();
|
||||
}
|
||||
let elapsed = start.elapsed();
|
||||
let avg_us = elapsed.as_micros() as f64 / iterations as f64;
|
||||
println!("Edge-Featured Graph Attention (4 heads):");
|
||||
println!(" Total: {:?}", elapsed);
|
||||
println!(" Per iteration: {:.2} µs", avg_us);
|
||||
println!(" Throughput: {:.0} ops/sec", 1_000_000.0 / avg_us);
|
||||
println!();
|
||||
}
|
||||
|
||||
// 8. Graph RoPE
|
||||
{
|
||||
let config = RoPEConfig::builder().dim(dim).max_position(1024).build();
|
||||
let attention = GraphRoPE::new(config);
|
||||
let start = Instant::now();
|
||||
for _ in 0..iterations {
|
||||
let _ = attention.compute(&query, &keys_refs, &values_refs).unwrap();
|
||||
}
|
||||
let elapsed = start.elapsed();
|
||||
let avg_us = elapsed.as_micros() as f64 / iterations as f64;
|
||||
println!("Graph RoPE Attention:");
|
||||
println!(" Total: {:?}", elapsed);
|
||||
println!(" Per iteration: {:.2} µs", avg_us);
|
||||
println!(" Throughput: {:.0} ops/sec", 1_000_000.0 / avg_us);
|
||||
println!();
|
||||
}
|
||||
|
||||
// 9. Dual-Space Attention
|
||||
{
|
||||
let config = DualSpaceConfig::builder()
|
||||
.dim(dim)
|
||||
.euclidean_weight(0.5)
|
||||
.hyperbolic_weight(0.5)
|
||||
.build();
|
||||
let attention = DualSpaceAttention::new(config);
|
||||
|
||||
// Use smaller values for hyperbolic component
|
||||
let dual_query = vec![0.1f32; dim];
|
||||
let dual_keys: Vec<Vec<f32>> = (0..seq_len)
|
||||
.map(|i| vec![(i as f32 * 0.001) % 0.3; dim])
|
||||
.collect();
|
||||
let dual_values: Vec<Vec<f32>> = (0..seq_len)
|
||||
.map(|i| vec![(i as f32 * 0.002) % 0.3; dim])
|
||||
.collect();
|
||||
let dual_keys_refs: Vec<&[f32]> = dual_keys.iter().map(|k| k.as_slice()).collect();
|
||||
let dual_values_refs: Vec<&[f32]> = dual_values.iter().map(|v| v.as_slice()).collect();
|
||||
|
||||
let start = Instant::now();
|
||||
for _ in 0..iterations {
|
||||
let _ = attention
|
||||
.compute(&dual_query, &dual_keys_refs, &dual_values_refs)
|
||||
.unwrap();
|
||||
}
|
||||
let elapsed = start.elapsed();
|
||||
let avg_us = elapsed.as_micros() as f64 / iterations as f64;
|
||||
println!("Dual-Space Attention (Euclidean + Hyperbolic):");
|
||||
println!(" Total: {:?}", elapsed);
|
||||
println!(" Per iteration: {:.2} µs", avg_us);
|
||||
println!(" Throughput: {:.0} ops/sec", 1_000_000.0 / avg_us);
|
||||
println!();
|
||||
}
|
||||
|
||||
// 10. Training: InfoNCE Loss
|
||||
{
|
||||
let loss = InfoNCELoss::new(0.07);
|
||||
let anchor = vec![0.5f32; 128];
|
||||
let positive = vec![0.6f32; 128];
|
||||
let negatives: Vec<Vec<f32>> = (0..50)
|
||||
.map(|i| vec![(i as f32 * 0.01) % 1.0; 128])
|
||||
.collect();
|
||||
let neg_refs: Vec<&[f32]> = negatives.iter().map(|n| n.as_slice()).collect();
|
||||
|
||||
let start = Instant::now();
|
||||
for _ in 0..iterations {
|
||||
let _ = loss.compute(&anchor, &positive, &neg_refs);
|
||||
}
|
||||
let elapsed = start.elapsed();
|
||||
let avg_us = elapsed.as_micros() as f64 / iterations as f64;
|
||||
println!("InfoNCE Loss (50 negatives):");
|
||||
println!(" Total: {:?}", elapsed);
|
||||
println!(" Per iteration: {:.2} µs", avg_us);
|
||||
println!(" Throughput: {:.0} ops/sec", 1_000_000.0 / avg_us);
|
||||
println!();
|
||||
}
|
||||
|
||||
// 11. Training: Adam Optimizer
|
||||
{
|
||||
let mut optimizer = Adam::new(dim, 0.001);
|
||||
let mut params = vec![0.5f32; dim];
|
||||
let gradients = vec![0.01f32; dim];
|
||||
|
||||
let start = Instant::now();
|
||||
for _ in 0..iterations * 10 {
|
||||
optimizer.step(&mut params, &gradients);
|
||||
}
|
||||
let elapsed = start.elapsed();
|
||||
let avg_us = elapsed.as_micros() as f64 / (iterations * 10) as f64;
|
||||
println!("Adam Optimizer Step:");
|
||||
println!(" Total: {:?}", elapsed);
|
||||
println!(" Per iteration: {:.2} µs", avg_us);
|
||||
println!(" Throughput: {:.0} ops/sec", 1_000_000.0 / avg_us);
|
||||
println!();
|
||||
}
|
||||
|
||||
println!("=== Benchmark Complete ===");
|
||||
|
||||
// Summary
|
||||
println!("\n=== Summary ===");
|
||||
println!("All attention mechanisms functional and benchmarked.");
|
||||
println!("Module coverage:");
|
||||
println!(" - Core: ScaledDotProductAttention, MultiHeadAttention");
|
||||
println!(" - Sparse: FlashAttention, LinearAttention, LocalGlobalAttention");
|
||||
println!(" - MoE: MoEAttention with learned routing");
|
||||
println!(" - Graph: EdgeFeaturedAttention, GraphRoPE, DualSpaceAttention");
|
||||
println!(" - Hyperbolic: HyperbolicAttention, MixedCurvatureAttention");
|
||||
println!(" - Training: InfoNCE, ContrastiveLoss, Adam/AdamW/SGD, Curriculum");
|
||||
}
|
||||
Reference in New Issue
Block a user