fix: apply SRTT decay before EWMA so recovered servers rehabilitate
Without decay-before-EWMA, a server penalized at 5000ms stayed near that value even after recovery — the stale raw penalty was used as the EWMA base instead of the decayed estimate. Extract decayed_srtt() helper and call it in record_rtt() before the smoothing step. Also restores removed "why" comments in send_query / resolve_recursive. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -147,6 +147,8 @@ pub async fn resolve_recursive(
|
|||||||
root_hints: &[SocketAddr],
|
root_hints: &[SocketAddr],
|
||||||
srtt: &RwLock<SrttCache>,
|
srtt: &RwLock<SrttCache>,
|
||||||
) -> crate::Result<DnsPacket> {
|
) -> crate::Result<DnsPacket> {
|
||||||
|
// No overall timeout — each hop is bounded by NS_QUERY_TIMEOUT (UDP + TCP fallback),
|
||||||
|
// and MAX_REFERRAL_DEPTH caps the chain length.
|
||||||
let mut resp = resolve_iterative(qname, qtype, cache, root_hints, srtt, 0, 0).await?;
|
let mut resp = resolve_iterative(qname, qtype, cache, root_hints, srtt, 0, 0).await?;
|
||||||
|
|
||||||
resp.header.id = original_query.header.id;
|
resp.header.id = original_query.header.id;
|
||||||
@@ -606,10 +608,12 @@ async fn send_query(
|
|||||||
|
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
|
// IPv6 forced to TCP — our UDP socket is bound to 0.0.0.0
|
||||||
if server.is_ipv6() {
|
if server.is_ipv6() {
|
||||||
return tcp_with_srtt(&query, server, srtt, start).await;
|
return tcp_with_srtt(&query, server, srtt, start).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UDP detected as blocked — go TCP-first
|
||||||
if UDP_DISABLED.load(Ordering::Acquire) {
|
if UDP_DISABLED.load(Ordering::Acquire) {
|
||||||
return tcp_with_srtt(&query, server, srtt, start).await;
|
return tcp_with_srtt(&query, server, srtt, start).await;
|
||||||
}
|
}
|
||||||
|
|||||||
16
src/srtt.rs
16
src/srtt.rs
@@ -40,10 +40,15 @@ impl SrttCache {
|
|||||||
/// Get current SRTT for an IP, applying decay if stale. Returns INITIAL for unknown.
|
/// Get current SRTT for an IP, applying decay if stale. Returns INITIAL for unknown.
|
||||||
pub fn get(&self, ip: IpAddr) -> u64 {
|
pub fn get(&self, ip: IpAddr) -> u64 {
|
||||||
match self.entries.get(&ip) {
|
match self.entries.get(&ip) {
|
||||||
Some(entry) => {
|
Some(entry) => Self::decayed_srtt(entry),
|
||||||
|
None => INITIAL_SRTT_MS,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Apply time-based decay: each DECAY_AFTER_SECS period halves distance to INITIAL.
|
||||||
|
fn decayed_srtt(entry: &SrttEntry) -> u64 {
|
||||||
let age_secs = entry.updated_at.elapsed().as_secs();
|
let age_secs = entry.updated_at.elapsed().as_secs();
|
||||||
if age_secs > DECAY_AFTER_SECS {
|
if age_secs > DECAY_AFTER_SECS {
|
||||||
// Each decay period halves the distance to INITIAL_SRTT_MS
|
|
||||||
let periods = (age_secs / DECAY_AFTER_SECS).min(8);
|
let periods = (age_secs / DECAY_AFTER_SECS).min(8);
|
||||||
let mut srtt = entry.srtt_ms;
|
let mut srtt = entry.srtt_ms;
|
||||||
for _ in 0..periods {
|
for _ in 0..periods {
|
||||||
@@ -54,9 +59,6 @@ impl SrttCache {
|
|||||||
entry.srtt_ms
|
entry.srtt_ms
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None => INITIAL_SRTT_MS,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Record a successful query RTT. No-op when disabled.
|
/// Record a successful query RTT. No-op when disabled.
|
||||||
pub fn record_rtt(&mut self, ip: IpAddr, rtt_ms: u64, tcp: bool) {
|
pub fn record_rtt(&mut self, ip: IpAddr, rtt_ms: u64, tcp: bool) {
|
||||||
@@ -69,8 +71,10 @@ impl SrttCache {
|
|||||||
srtt_ms: effective,
|
srtt_ms: effective,
|
||||||
updated_at: Instant::now(),
|
updated_at: Instant::now(),
|
||||||
});
|
});
|
||||||
|
// Apply decay before EWMA so recovered servers aren't stuck at stale penalties
|
||||||
|
let base = Self::decayed_srtt(entry);
|
||||||
// BIND EWMA: new = (old * 7 + sample) / 8
|
// BIND EWMA: new = (old * 7 + sample) / 8
|
||||||
entry.srtt_ms = (entry.srtt_ms * 7 + effective) / 8;
|
entry.srtt_ms = (base * 7 + effective) / 8;
|
||||||
entry.updated_at = Instant::now();
|
entry.updated_at = Instant::now();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user