fix: apply SRTT decay before EWMA so recovered servers rehabilitate
Without decay-before-EWMA, a server penalized at 5000ms stayed near that value even after recovery — the stale raw penalty was used as the EWMA base instead of the decayed estimate. Extract decayed_srtt() helper and call it in record_rtt() before the smoothing step. Also restores removed "why" comments in send_query / resolve_recursive. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -147,6 +147,8 @@ pub async fn resolve_recursive(
|
||||
root_hints: &[SocketAddr],
|
||||
srtt: &RwLock<SrttCache>,
|
||||
) -> crate::Result<DnsPacket> {
|
||||
// No overall timeout — each hop is bounded by NS_QUERY_TIMEOUT (UDP + TCP fallback),
|
||||
// and MAX_REFERRAL_DEPTH caps the chain length.
|
||||
let mut resp = resolve_iterative(qname, qtype, cache, root_hints, srtt, 0, 0).await?;
|
||||
|
||||
resp.header.id = original_query.header.id;
|
||||
@@ -606,10 +608,12 @@ async fn send_query(
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
// IPv6 forced to TCP — our UDP socket is bound to 0.0.0.0
|
||||
if server.is_ipv6() {
|
||||
return tcp_with_srtt(&query, server, srtt, start).await;
|
||||
}
|
||||
|
||||
// UDP detected as blocked — go TCP-first
|
||||
if UDP_DISABLED.load(Ordering::Acquire) {
|
||||
return tcp_with_srtt(&query, server, srtt, start).await;
|
||||
}
|
||||
|
||||
34
src/srtt.rs
34
src/srtt.rs
@@ -40,24 +40,26 @@ impl SrttCache {
|
||||
/// Get current SRTT for an IP, applying decay if stale. Returns INITIAL for unknown.
|
||||
pub fn get(&self, ip: IpAddr) -> u64 {
|
||||
match self.entries.get(&ip) {
|
||||
Some(entry) => {
|
||||
let age_secs = entry.updated_at.elapsed().as_secs();
|
||||
if age_secs > DECAY_AFTER_SECS {
|
||||
// Each decay period halves the distance to INITIAL_SRTT_MS
|
||||
let periods = (age_secs / DECAY_AFTER_SECS).min(8);
|
||||
let mut srtt = entry.srtt_ms;
|
||||
for _ in 0..periods {
|
||||
srtt = (srtt + INITIAL_SRTT_MS) / 2;
|
||||
}
|
||||
srtt
|
||||
} else {
|
||||
entry.srtt_ms
|
||||
}
|
||||
}
|
||||
Some(entry) => Self::decayed_srtt(entry),
|
||||
None => INITIAL_SRTT_MS,
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply time-based decay: each DECAY_AFTER_SECS period halves distance to INITIAL.
|
||||
fn decayed_srtt(entry: &SrttEntry) -> u64 {
|
||||
let age_secs = entry.updated_at.elapsed().as_secs();
|
||||
if age_secs > DECAY_AFTER_SECS {
|
||||
let periods = (age_secs / DECAY_AFTER_SECS).min(8);
|
||||
let mut srtt = entry.srtt_ms;
|
||||
for _ in 0..periods {
|
||||
srtt = (srtt + INITIAL_SRTT_MS) / 2;
|
||||
}
|
||||
srtt
|
||||
} else {
|
||||
entry.srtt_ms
|
||||
}
|
||||
}
|
||||
|
||||
/// Record a successful query RTT. No-op when disabled.
|
||||
pub fn record_rtt(&mut self, ip: IpAddr, rtt_ms: u64, tcp: bool) {
|
||||
if !self.enabled {
|
||||
@@ -69,8 +71,10 @@ impl SrttCache {
|
||||
srtt_ms: effective,
|
||||
updated_at: Instant::now(),
|
||||
});
|
||||
// Apply decay before EWMA so recovered servers aren't stuck at stale penalties
|
||||
let base = Self::decayed_srtt(entry);
|
||||
// BIND EWMA: new = (old * 7 + sample) / 8
|
||||
entry.srtt_ms = (entry.srtt_ms * 7 + effective) / 8;
|
||||
entry.srtt_ms = (base * 7 + effective) / 8;
|
||||
entry.updated_at = Instant::now();
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user