Files
numa/benches/throughput.rs
Razvan Dimescu 962b400f4c perf: optimize DNS query hot path (#15)
* perf: optimize hot path — RwLock, inline filtering, pre-allocated strings

- Mutex → RwLock for cache, blocklist, and overrides (concurrent read access)
- Make cache.lookup() and overrides.lookup() take &self (read-only)
- Eliminate 3 Vec allocations per DnsPacket::write() via inline filtering
- Pre-allocate domain strings with capacity 64 in parse path
- Add criterion micro-benchmarks (hot_path + throughput)
- Add bench README documenting both benchmark suites

Measured improvement: ~14% faster parsing, ~9% pipeline throughput,
round-trip cached 733ns → 698ns (~2.3M queries/sec).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* chore: simplify benchmark code after review

- Remove redundant DnsHeader::new() (already set by DnsPacket::new())
- Remove unused DnsHeader import
- Change simulate_cached_pipeline to take &DnsCache (lookup is &self now)
- Remove unnecessary mut on cache in cache_lookup_miss bench

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-27 02:01:08 +02:00

95 lines
3.0 KiB
Rust

use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use std::net::Ipv4Addr;
use numa::buffer::BytePacketBuffer;
use numa::header::ResultCode;
use numa::packet::DnsPacket;
use numa::question::{DnsQuestion, QueryType};
use numa::record::DnsRecord;
fn make_query_wire(domain: &str) -> Vec<u8> {
let mut q = DnsPacket::new();
q.header.id = 0xABCD;
q.header.recursion_desired = true;
q.questions
.push(DnsQuestion::new(domain.to_string(), QueryType::A));
let mut buf = BytePacketBuffer::new();
q.write(&mut buf).unwrap();
buf.filled().to_vec()
}
fn make_response(domain: &str) -> DnsPacket {
let mut pkt = DnsPacket::new();
pkt.header.id = 0xABCD;
pkt.header.response = true;
pkt.header.recursion_desired = true;
pkt.header.recursion_available = true;
pkt.header.rescode = ResultCode::NOERROR;
pkt.questions
.push(DnsQuestion::new(domain.to_string(), QueryType::A));
pkt.answers.push(DnsRecord::A {
domain: domain.to_string(),
addr: Ipv4Addr::new(93, 184, 216, 34),
ttl: 300,
});
pkt
}
/// Simulates the complete cached query pipeline (sans network I/O):
/// parse → cache lookup → TTL adjust → serialize response
fn simulate_cached_pipeline(query_wire: &[u8], cache: &numa::cache::DnsCache) -> usize {
let mut buf = BytePacketBuffer::from_bytes(query_wire);
let query = DnsPacket::from_buffer(&mut buf).unwrap();
let q = &query.questions[0];
let mut resp = cache.lookup(&q.name, q.qtype).unwrap();
resp.header.id = query.header.id;
let mut resp_buf = BytePacketBuffer::new();
resp.write(&mut resp_buf).unwrap();
resp_buf.pos()
}
fn bench_pipeline_throughput(c: &mut Criterion) {
let domains: Vec<String> = (0..100)
.map(|i| format!("domain-{i}.example.com"))
.collect();
let mut cache = numa::cache::DnsCache::new(10_000, 60, 86400);
for d in &domains {
cache.insert(d, QueryType::A, &make_response(d));
}
let query_wires: Vec<Vec<u8>> = domains.iter().map(|d| make_query_wire(d)).collect();
let mut group = c.benchmark_group("pipeline_throughput");
for count in [1, 10, 100] {
group.throughput(Throughput::Elements(count));
group.bench_with_input(BenchmarkId::from_parameter(count), &count, |b, &count| {
let mut idx = 0usize;
b.iter(|| {
for _ in 0..count {
let wire = &query_wires[idx % query_wires.len()];
simulate_cached_pipeline(wire, &cache);
idx += 1;
}
});
});
}
group.finish();
}
/// Measures the overhead of BytePacketBuffer allocation + zero-init
fn bench_buffer_alloc(c: &mut Criterion) {
c.bench_function("buffer_alloc", |b| {
b.iter(|| {
let buf = BytePacketBuffer::new();
criterion::black_box(buf.pos());
})
});
}
criterion_group!(benches, bench_pipeline_throughput, bench_buffer_alloc,);
criterion_main!(benches);