Files
wifi-densepose/examples/ruvLLM/benches/pipeline.rs
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

125 lines
3.7 KiB
Rust

//! Pipeline benchmarks for RuvLLM
//!
//! Benchmarks the complete request-to-response pipeline.
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use ruvllm::{Config, Request, RuvLLM};
use tokio::runtime::Runtime;
fn benchmark_query(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let config = Config::builder()
.embedding_dim(128)
.router_hidden_dim(32)
.learning_enabled(false)
.build()
.unwrap();
let llm = rt.block_on(RuvLLM::new(config)).unwrap();
c.bench_function("query_simple", |b| {
b.to_async(&rt)
.iter(|| async { black_box(llm.query("What is Rust?").await.unwrap()) })
});
}
fn benchmark_query_lengths(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let config = Config::builder()
.embedding_dim(128)
.router_hidden_dim(32)
.learning_enabled(false)
.build()
.unwrap();
let llm = rt.block_on(RuvLLM::new(config)).unwrap();
let queries = vec![
("short", "Hi"),
("medium", "What is machine learning and how does it work?"),
("long", "Please explain in detail how neural networks process information, including concepts like forward propagation, backpropagation, gradient descent, and the role of activation functions in learning complex patterns from data."),
];
let mut group = c.benchmark_group("query_by_length");
for (name, query) in queries {
group.bench_with_input(BenchmarkId::from_parameter(name), &query, |b, query| {
b.to_async(&rt)
.iter(|| async { black_box(llm.query(*query).await.unwrap()) })
});
}
group.finish();
}
fn benchmark_concurrent_queries(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let config = Config::builder()
.embedding_dim(128)
.router_hidden_dim(32)
.learning_enabled(false)
.build()
.unwrap();
let llm = std::sync::Arc::new(rt.block_on(RuvLLM::new(config)).unwrap());
let mut group = c.benchmark_group("concurrent_queries");
for concurrency in [1, 2, 4, 8] {
group.bench_with_input(
BenchmarkId::from_parameter(concurrency),
&concurrency,
|b, &concurrency| {
b.to_async(&rt).iter(|| async {
let mut handles = Vec::new();
for _ in 0..concurrency {
let llm_clone = llm.clone();
handles.push(tokio::spawn(async move {
llm_clone.query("Test query").await.unwrap()
}));
}
for handle in handles {
black_box(handle.await.unwrap());
}
})
},
);
}
group.finish();
}
fn benchmark_session(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let config = Config::builder()
.embedding_dim(128)
.router_hidden_dim(32)
.learning_enabled(false)
.build()
.unwrap();
let llm = rt.block_on(RuvLLM::new(config)).unwrap();
c.bench_function("session_multi_turn", |b| {
b.to_async(&rt).iter(|| async {
let session = llm.new_session();
black_box(llm.query_session(&session, "First question").await.unwrap());
black_box(llm.query_session(&session, "Follow up").await.unwrap());
black_box(
llm.query_session(&session, "Another follow up")
.await
.unwrap(),
);
})
});
}
criterion_group!(
benches,
benchmark_query,
benchmark_query_lengths,
benchmark_concurrent_queries,
benchmark_session,
);
criterion_main!(benches);