perf: optimize hot path — RwLock, inline filtering, pre-allocated strings
- Mutex → RwLock for cache, blocklist, and overrides (concurrent read access) - Make cache.lookup() and overrides.lookup() take &self (read-only) - Eliminate 3 Vec allocations per DnsPacket::write() via inline filtering - Pre-allocate domain strings with capacity 64 in parse path - Add criterion micro-benchmarks (hot_path + throughput) - Add bench README documenting both benchmark suites Measured improvement: ~14% faster parsing, ~9% pipeline throughput, round-trip cached 733ns → 698ns (~2.3M queries/sec). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
186
benches/hot_path.rs
Normal file
186
benches/hot_path.rs
Normal file
@@ -0,0 +1,186 @@
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use std::net::Ipv4Addr;
|
||||
|
||||
use numa::buffer::BytePacketBuffer;
|
||||
use numa::cache::DnsCache;
|
||||
use numa::header::{DnsHeader, ResultCode};
|
||||
use numa::packet::DnsPacket;
|
||||
use numa::question::{DnsQuestion, QueryType};
|
||||
use numa::record::DnsRecord;
|
||||
|
||||
fn make_response(domain: &str) -> DnsPacket {
|
||||
let mut pkt = DnsPacket::new();
|
||||
pkt.header = DnsHeader::new();
|
||||
pkt.header.id = 0x1234;
|
||||
pkt.header.response = true;
|
||||
pkt.header.recursion_desired = true;
|
||||
pkt.header.recursion_available = true;
|
||||
pkt.header.rescode = ResultCode::NOERROR;
|
||||
pkt.questions
|
||||
.push(DnsQuestion::new(domain.to_string(), QueryType::A));
|
||||
pkt.answers.push(DnsRecord::A {
|
||||
domain: domain.to_string(),
|
||||
addr: Ipv4Addr::new(93, 184, 216, 34),
|
||||
ttl: 300,
|
||||
});
|
||||
// Typical response includes authority + additional records
|
||||
pkt.authorities.push(DnsRecord::NS {
|
||||
domain: domain.to_string(),
|
||||
host: format!("ns1.{domain}"),
|
||||
ttl: 172800,
|
||||
});
|
||||
pkt.authorities.push(DnsRecord::NS {
|
||||
domain: domain.to_string(),
|
||||
host: format!("ns2.{domain}"),
|
||||
ttl: 172800,
|
||||
});
|
||||
pkt.resources.push(DnsRecord::A {
|
||||
domain: format!("ns1.{domain}"),
|
||||
addr: Ipv4Addr::new(198, 51, 100, 1),
|
||||
ttl: 172800,
|
||||
});
|
||||
pkt
|
||||
}
|
||||
|
||||
fn to_wire(pkt: &DnsPacket) -> Vec<u8> {
|
||||
let mut buf = BytePacketBuffer::new();
|
||||
pkt.write(&mut buf).unwrap();
|
||||
buf.filled().to_vec()
|
||||
}
|
||||
|
||||
fn bench_buffer_parse(c: &mut Criterion) {
|
||||
let pkt = make_response("example.com");
|
||||
let wire = to_wire(&pkt);
|
||||
|
||||
c.bench_function("buffer_parse", |b| {
|
||||
b.iter(|| {
|
||||
let mut buf = BytePacketBuffer::from_bytes(black_box(&wire));
|
||||
DnsPacket::from_buffer(&mut buf).unwrap()
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_buffer_serialize(c: &mut Criterion) {
|
||||
let pkt = make_response("example.com");
|
||||
|
||||
c.bench_function("buffer_serialize", |b| {
|
||||
b.iter(|| {
|
||||
let mut buf = BytePacketBuffer::new();
|
||||
black_box(&pkt).write(&mut buf).unwrap();
|
||||
black_box(buf.pos());
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_packet_clone(c: &mut Criterion) {
|
||||
let pkt = make_response("example.com");
|
||||
|
||||
c.bench_function("packet_clone", |b| b.iter(|| black_box(&pkt).clone()));
|
||||
}
|
||||
|
||||
fn bench_cache_lookup_hit(c: &mut Criterion) {
|
||||
let mut cache = DnsCache::new(10_000, 60, 86400);
|
||||
let pkt = make_response("example.com");
|
||||
cache.insert("example.com", QueryType::A, &pkt);
|
||||
|
||||
c.bench_function("cache_lookup_hit", |b| {
|
||||
b.iter(|| {
|
||||
cache
|
||||
.lookup(black_box("example.com"), QueryType::A)
|
||||
.unwrap()
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_cache_lookup_miss(c: &mut Criterion) {
|
||||
let mut cache = DnsCache::new(10_000, 60, 86400);
|
||||
|
||||
c.bench_function("cache_lookup_miss", |b| {
|
||||
b.iter(|| cache.lookup(black_box("nonexistent.com"), QueryType::A))
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_cache_insert(c: &mut Criterion) {
|
||||
let pkt = make_response("example.com");
|
||||
|
||||
c.bench_function("cache_insert", |b| {
|
||||
let mut cache = DnsCache::new(10_000, 60, 86400);
|
||||
let mut i = 0u64;
|
||||
b.iter(|| {
|
||||
let domain = format!("bench-{i}.example.com");
|
||||
cache.insert(&domain, QueryType::A, black_box(&pkt));
|
||||
i += 1;
|
||||
// Reset cache periodically to avoid filling up
|
||||
if i % 5000 == 0 {
|
||||
cache.clear();
|
||||
}
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_round_trip(c: &mut Criterion) {
|
||||
// Simulates the cached hot path: parse query → cache hit → serialize response
|
||||
let query_pkt = {
|
||||
let mut q = DnsPacket::new();
|
||||
q.header.id = 0xABCD;
|
||||
q.header.recursion_desired = true;
|
||||
q.questions
|
||||
.push(DnsQuestion::new("example.com".to_string(), QueryType::A));
|
||||
q
|
||||
};
|
||||
let query_wire = to_wire(&query_pkt);
|
||||
|
||||
let response = make_response("example.com");
|
||||
let mut cache = DnsCache::new(10_000, 60, 86400);
|
||||
cache.insert("example.com", QueryType::A, &response);
|
||||
|
||||
c.bench_function("round_trip_cached", |b| {
|
||||
b.iter(|| {
|
||||
// 1. Parse incoming query
|
||||
let mut buf = BytePacketBuffer::from_bytes(black_box(&query_wire));
|
||||
let query = DnsPacket::from_buffer(&mut buf).unwrap();
|
||||
let qname = &query.questions[0].name;
|
||||
let qtype = query.questions[0].qtype;
|
||||
|
||||
// 2. Cache lookup
|
||||
let mut resp = cache.lookup(qname, qtype).unwrap();
|
||||
resp.header.id = query.header.id;
|
||||
|
||||
// 3. Serialize response
|
||||
let mut resp_buf = BytePacketBuffer::new();
|
||||
resp.write(&mut resp_buf).unwrap();
|
||||
black_box(resp_buf.pos());
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_cache_populated_lookup(c: &mut Criterion) {
|
||||
// Benchmark with a realistically populated cache (1000 entries)
|
||||
let mut cache = DnsCache::new(10_000, 60, 86400);
|
||||
for i in 0..1000 {
|
||||
let domain = format!("domain-{i}.example.com");
|
||||
let pkt = make_response(&domain);
|
||||
cache.insert(&domain, QueryType::A, &pkt);
|
||||
}
|
||||
|
||||
c.bench_function("cache_lookup_hit_populated", |b| {
|
||||
b.iter(|| {
|
||||
cache
|
||||
.lookup(black_box("domain-500.example.com"), QueryType::A)
|
||||
.unwrap()
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_buffer_parse,
|
||||
bench_buffer_serialize,
|
||||
bench_packet_clone,
|
||||
bench_cache_lookup_hit,
|
||||
bench_cache_lookup_miss,
|
||||
bench_cache_insert,
|
||||
bench_round_trip,
|
||||
bench_cache_populated_lookup,
|
||||
);
|
||||
criterion_main!(benches);
|
||||
94
benches/throughput.rs
Normal file
94
benches/throughput.rs
Normal file
@@ -0,0 +1,94 @@
|
||||
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
||||
use std::net::Ipv4Addr;
|
||||
|
||||
use numa::buffer::BytePacketBuffer;
|
||||
use numa::header::ResultCode;
|
||||
use numa::packet::DnsPacket;
|
||||
use numa::question::{DnsQuestion, QueryType};
|
||||
use numa::record::DnsRecord;
|
||||
|
||||
fn make_query_wire(domain: &str) -> Vec<u8> {
|
||||
let mut q = DnsPacket::new();
|
||||
q.header.id = 0xABCD;
|
||||
q.header.recursion_desired = true;
|
||||
q.questions
|
||||
.push(DnsQuestion::new(domain.to_string(), QueryType::A));
|
||||
let mut buf = BytePacketBuffer::new();
|
||||
q.write(&mut buf).unwrap();
|
||||
buf.filled().to_vec()
|
||||
}
|
||||
|
||||
fn make_response(domain: &str) -> DnsPacket {
|
||||
let mut pkt = DnsPacket::new();
|
||||
pkt.header.id = 0xABCD;
|
||||
pkt.header.response = true;
|
||||
pkt.header.recursion_desired = true;
|
||||
pkt.header.recursion_available = true;
|
||||
pkt.header.rescode = ResultCode::NOERROR;
|
||||
pkt.questions
|
||||
.push(DnsQuestion::new(domain.to_string(), QueryType::A));
|
||||
pkt.answers.push(DnsRecord::A {
|
||||
domain: domain.to_string(),
|
||||
addr: Ipv4Addr::new(93, 184, 216, 34),
|
||||
ttl: 300,
|
||||
});
|
||||
pkt
|
||||
}
|
||||
|
||||
/// Simulates the complete cached query pipeline (sans network I/O):
|
||||
/// parse → cache lookup → TTL adjust → serialize response
|
||||
fn simulate_cached_pipeline(query_wire: &[u8], cache: &mut numa::cache::DnsCache) -> usize {
|
||||
let mut buf = BytePacketBuffer::from_bytes(query_wire);
|
||||
let query = DnsPacket::from_buffer(&mut buf).unwrap();
|
||||
let q = &query.questions[0];
|
||||
|
||||
let mut resp = cache.lookup(&q.name, q.qtype).unwrap();
|
||||
resp.header.id = query.header.id;
|
||||
|
||||
let mut resp_buf = BytePacketBuffer::new();
|
||||
resp.write(&mut resp_buf).unwrap();
|
||||
resp_buf.pos()
|
||||
}
|
||||
|
||||
fn bench_pipeline_throughput(c: &mut Criterion) {
|
||||
let domains: Vec<String> = (0..100)
|
||||
.map(|i| format!("domain-{i}.example.com"))
|
||||
.collect();
|
||||
|
||||
let mut cache = numa::cache::DnsCache::new(10_000, 60, 86400);
|
||||
for d in &domains {
|
||||
cache.insert(d, QueryType::A, &make_response(d));
|
||||
}
|
||||
|
||||
let query_wires: Vec<Vec<u8>> = domains.iter().map(|d| make_query_wire(d)).collect();
|
||||
|
||||
let mut group = c.benchmark_group("pipeline_throughput");
|
||||
|
||||
for count in [1, 10, 100] {
|
||||
group.throughput(Throughput::Elements(count));
|
||||
group.bench_with_input(BenchmarkId::from_parameter(count), &count, |b, &count| {
|
||||
let mut idx = 0usize;
|
||||
b.iter(|| {
|
||||
for _ in 0..count {
|
||||
let wire = &query_wires[idx % query_wires.len()];
|
||||
simulate_cached_pipeline(wire, &mut cache);
|
||||
idx += 1;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
/// Measures the overhead of BytePacketBuffer allocation + zero-init
|
||||
fn bench_buffer_alloc(c: &mut Criterion) {
|
||||
c.bench_function("buffer_alloc", |b| {
|
||||
b.iter(|| {
|
||||
let buf = BytePacketBuffer::new();
|
||||
criterion::black_box(buf.pos());
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_pipeline_throughput, bench_buffer_alloc,);
|
||||
criterion_main!(benches);
|
||||
Reference in New Issue
Block a user