From 239938dc077aa89a58e5d95899474f871b171fba Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sat, 28 Mar 2026 23:16:18 +0200 Subject: [PATCH] fix: apply SRTT decay before EWMA so recovered servers rehabilitate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without decay-before-EWMA, a server penalized at 5000ms stayed near that value even after recovery — the stale raw penalty was used as the EWMA base instead of the decayed estimate. Extract decayed_srtt() helper and call it in record_rtt() before the smoothing step. Also restores removed "why" comments in send_query / resolve_recursive. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/recursive.rs | 4 ++++ src/srtt.rs | 34 +++++++++++++++++++--------------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/recursive.rs b/src/recursive.rs index 325e2f3..645ba13 100644 --- a/src/recursive.rs +++ b/src/recursive.rs @@ -147,6 +147,8 @@ pub async fn resolve_recursive( root_hints: &[SocketAddr], srtt: &RwLock, ) -> crate::Result { + // No overall timeout — each hop is bounded by NS_QUERY_TIMEOUT (UDP + TCP fallback), + // and MAX_REFERRAL_DEPTH caps the chain length. let mut resp = resolve_iterative(qname, qtype, cache, root_hints, srtt, 0, 0).await?; resp.header.id = original_query.header.id; @@ -606,10 +608,12 @@ async fn send_query( let start = Instant::now(); + // IPv6 forced to TCP — our UDP socket is bound to 0.0.0.0 if server.is_ipv6() { return tcp_with_srtt(&query, server, srtt, start).await; } + // UDP detected as blocked — go TCP-first if UDP_DISABLED.load(Ordering::Acquire) { return tcp_with_srtt(&query, server, srtt, start).await; } diff --git a/src/srtt.rs b/src/srtt.rs index 53327ec..3fe4694 100644 --- a/src/srtt.rs +++ b/src/srtt.rs @@ -40,24 +40,26 @@ impl SrttCache { /// Get current SRTT for an IP, applying decay if stale. Returns INITIAL for unknown. pub fn get(&self, ip: IpAddr) -> u64 { match self.entries.get(&ip) { - Some(entry) => { - let age_secs = entry.updated_at.elapsed().as_secs(); - if age_secs > DECAY_AFTER_SECS { - // Each decay period halves the distance to INITIAL_SRTT_MS - let periods = (age_secs / DECAY_AFTER_SECS).min(8); - let mut srtt = entry.srtt_ms; - for _ in 0..periods { - srtt = (srtt + INITIAL_SRTT_MS) / 2; - } - srtt - } else { - entry.srtt_ms - } - } + Some(entry) => Self::decayed_srtt(entry), None => INITIAL_SRTT_MS, } } + /// Apply time-based decay: each DECAY_AFTER_SECS period halves distance to INITIAL. + fn decayed_srtt(entry: &SrttEntry) -> u64 { + let age_secs = entry.updated_at.elapsed().as_secs(); + if age_secs > DECAY_AFTER_SECS { + let periods = (age_secs / DECAY_AFTER_SECS).min(8); + let mut srtt = entry.srtt_ms; + for _ in 0..periods { + srtt = (srtt + INITIAL_SRTT_MS) / 2; + } + srtt + } else { + entry.srtt_ms + } + } + /// Record a successful query RTT. No-op when disabled. pub fn record_rtt(&mut self, ip: IpAddr, rtt_ms: u64, tcp: bool) { if !self.enabled { @@ -69,8 +71,10 @@ impl SrttCache { srtt_ms: effective, updated_at: Instant::now(), }); + // Apply decay before EWMA so recovered servers aren't stuck at stale penalties + let base = Self::decayed_srtt(entry); // BIND EWMA: new = (old * 7 + sample) / 8 - entry.srtt_ms = (entry.srtt_ms * 7 + effective) / 8; + entry.srtt_ms = (base * 7 + effective) / 8; entry.updated_at = Instant::now(); }