feat: DoT client, recursive optimization, bench refactor

- Add DoT forwarding client (tls://IP#hostname upstream config)
- Recursive: cache NS delegations, serve-stale (RFC 8767), parallel
  NS queries on cold, no TCP fallback on individual UDP timeouts,
  400ms NS/TCP timeout (down from 800/1500ms)
- Reduce recursive p99 from 2367ms to 402ms (vs Unbound's 148ms)
- Refactor benchmark suite: generic compare_two engine, delete
  one-off diagnostics (1969 → 750 lines)
- Code cleanup: forward_query delegates to _raw, Option<String>
  for tls_name, saturating_sub for ns_idx
This commit is contained in:
Razvan Dimescu
2026-04-12 06:22:42 +03:00
parent 7efac85836
commit 5d9a3a809b
5 changed files with 754 additions and 1400 deletions

View File

@@ -15,8 +15,8 @@ use crate::srtt::SrttCache;
const MAX_REFERRAL_DEPTH: u8 = 10;
const MAX_CNAME_DEPTH: u8 = 8;
const NS_QUERY_TIMEOUT: Duration = Duration::from_millis(800);
const TCP_TIMEOUT: Duration = Duration::from_millis(1500);
const NS_QUERY_TIMEOUT: Duration = Duration::from_millis(400);
const TCP_TIMEOUT: Duration = Duration::from_millis(400);
const UDP_FAIL_THRESHOLD: u8 = 3;
static QUERY_ID: AtomicU16 = AtomicU16::new(1);
@@ -213,11 +213,13 @@ pub(crate) fn resolve_iterative<'a>(
ns_addrs[ns_idx], q_type, q_name, current_zone, referral_depth
);
let response = match send_query_hedged(q_name, q_type, &ns_addrs[ns_idx..], srtt).await {
let response = match send_query_hedged(q_name, q_type, &ns_addrs[ns_idx..], srtt).await
{
Ok(r) => r,
Err(e) => {
debug!("recursive: NS query failed: {}", e);
ns_idx += 2; // both tried, skip past them
let remaining = ns_addrs.len().saturating_sub(ns_idx);
ns_idx += remaining.min(2);
continue;
}
};
@@ -660,7 +662,10 @@ async fn send_query_hedged(
}
match (a_err.take(), b_err.take()) {
(Some(e), Some(_)) => return Err(e),
(a, b) => { a_err = a; b_err = b; }
(a, b) => {
a_err = a;
b_err = b;
}
}
}
} else {
@@ -739,9 +744,13 @@ async fn send_query(
"send_query: {} consecutive UDP failures — switching to TCP-first",
fails
);
// Now that UDP is disabled, retry this query via TCP
return tcp_with_srtt(&query, server, srtt, start).await;
}
debug!("send_query: UDP failed for {}: {}, trying TCP", server, e);
tcp_with_srtt(&query, server, srtt, start).await
// UDP works in general (priming succeeded) but this server timed out.
// Don't waste another 400ms on TCP — the server is unreachable.
srtt.write().unwrap().record_failure(server.ip());
Err(e)
}
}
}
@@ -1021,10 +1030,10 @@ mod tests {
}
/// TCP-only server returns authoritative answer directly.
/// Verifies: UDP fails → TCP fallback → resolves.
/// Verifies: when UDP is disabled, TCP-first resolves.
#[tokio::test]
async fn tcp_fallback_resolves_when_udp_blocked() {
UDP_DISABLED.store(false, Ordering::Relaxed);
UDP_DISABLED.store(true, Ordering::Relaxed);
UDP_FAILURES.store(0, Ordering::Release);
let server_addr = spawn_tcp_dns_server(|query| {
@@ -1107,7 +1116,7 @@ mod tests {
#[tokio::test]
async fn tcp_fallback_handles_nxdomain() {
UDP_DISABLED.store(false, Ordering::Relaxed);
UDP_DISABLED.store(true, Ordering::Relaxed);
UDP_FAILURES.store(0, Ordering::Release);
let server_addr = spawn_tcp_dns_server(|query| {