From cf128c19af0cc2b747398ae4fd853e7150078edb Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 20 Apr 2026 15:44:09 +0300 Subject: [PATCH] feat(odoh): bootstrap-IP overrides + zero hedge for ODoH (post-deploy fixes) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues surfaced from running mode = "odoh" against the live Hetzner relay as system DNS: 1. **Bootstrap deadlock.** The reqwest HTTPS client resolves the relay and target hostnames via system DNS. When numa is itself the system resolver, the ODoH client loops trying to resolve through itself. Adds optional `relay_ip` and `target_ip` to `[upstream]`, plumbed into reqwest's `resolve()` so the HTTPS client bypasses system DNS for those two hostnames. TLS still validates against the URL hostname, so a stale IP fails loudly rather than silently MITM'ing. 2. **2x relay load.** Default `hedge_ms = 10` triggers a duplicate in-flight query for every request. Useful for UDP/DoH/DoT (rescues tail latency cheaply); wasteful for ODoH (doubles HPKE seal/unseal, doubles sealed-byte footprint a passive observer can correlate, no latency win — relay hop dominates either way). Force-zero in oblivious mode regardless of configured hedge_ms. Validated end-to-end against odoh-relay.numa.rs → Cloudflare: 3 digs produced 3 forwarded_ok on the relay (was 6 before the hedge fix), upstream_transport.odoh ticks correctly. --- src/config.rs | 123 ++++++++++++++++++++++++++++++++++++++++++++++++- src/forward.rs | 21 ++++++++- src/serve.rs | 13 ++++-- 3 files changed, 149 insertions(+), 8 deletions(-) diff --git a/src/config.rs b/src/config.rs index 2d2f1ba..1205e37 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; -use std::net::Ipv4Addr; -use std::net::Ipv6Addr; +use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}; use std::path::{Path, PathBuf}; +use std::time::Duration; use serde::Deserialize; @@ -146,6 +146,19 @@ impl UpstreamMode { UpstreamMode::Odoh => "odoh", } } + + /// Hedging duplicates the in-flight query against the same upstream to + /// rescue tail latency. Beneficial for UDP/DoH/DoT (cheap retransmit / + /// h2 stream multiplexing). For ODoH it doubles the relay's HPKE + /// seal/unseal load and the sealed-byte footprint a passive observer + /// can correlate, with no latency win — the relay hop dominates either + /// way. Force-zero in oblivious mode regardless of `hedge_ms`. + pub fn hedge_delay(self, hedge_ms: u64) -> Duration { + match self { + UpstreamMode::Odoh => Duration::ZERO, + _ => Duration::from_millis(hedge_ms), + } + } } #[derive(Deserialize)] @@ -182,6 +195,16 @@ pub struct UpstreamConfig { /// a user who configured ODoH rarely wants a silent non-oblivious path. #[serde(default)] pub strict: Option, + + /// Bootstrap IP for the relay host, used when numa is its own system + /// resolver (otherwise the ODoH HTTPS client loops resolving through + /// itself). TLS still validates the cert against `relay`'s hostname. + #[serde(default)] + pub relay_ip: Option, + + /// Same as `relay_ip` but for the target host. + #[serde(default)] + pub target_ip: Option, } impl Default for UpstreamConfig { @@ -199,6 +222,8 @@ impl Default for UpstreamConfig { relay: None, target: None, strict: None, + relay_ip: None, + target_ip: None, } } } @@ -208,9 +233,12 @@ impl Default for UpstreamConfig { #[derive(Debug)] pub struct OdohUpstream { pub relay_url: String, + pub relay_host: String, pub target_host: String, pub target_path: String, pub strict: bool, + pub relay_bootstrap: Option, + pub target_bootstrap: Option, } impl UpstreamConfig { @@ -246,6 +274,10 @@ impl UpstreamConfig { .into()); } + let relay_host = relay_url + .host_str() + .ok_or("upstream.relay has no host")? + .to_string(); let target_host = target_url .host_str() .ok_or("upstream.target has no host")? @@ -256,11 +288,17 @@ impl UpstreamConfig { target_url.path().to_string() }; + let relay_port = relay_url.port_or_known_default().unwrap_or(443); + let target_port = target_url.port_or_known_default().unwrap_or(443); + Ok(OdohUpstream { relay_url: relay.to_string(), + relay_host, target_host, target_path, strict: self.strict.unwrap_or(true), + relay_bootstrap: self.relay_ip.map(|ip| SocketAddr::new(ip, relay_port)), + target_bootstrap: self.target_ip.map(|ip| SocketAddr::new(ip, target_port)), }) } } @@ -817,6 +855,87 @@ target = "https://odoh.cloudflare-dns.com/dns-query" assert!(err.contains("upstream.relay"), "got: {err}"); } + #[test] + fn odoh_bootstrap_ips_parse_into_socket_addrs() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://odoh-relay.numa.rs/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +relay_ip = "178.104.229.30" +target_ip = "104.16.249.249" +"#; + let config: Config = toml::from_str(toml).unwrap(); + let odoh = config.upstream.odoh_upstream().unwrap(); + assert_eq!(odoh.relay_host, "odoh-relay.numa.rs"); + assert_eq!( + odoh.relay_bootstrap.unwrap().to_string(), + "178.104.229.30:443" + ); + assert_eq!( + odoh.target_bootstrap.unwrap().to_string(), + "104.16.249.249:443" + ); + } + + #[test] + fn odoh_bootstrap_ips_optional() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://odoh-relay.numa.rs/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +"#; + let config: Config = toml::from_str(toml).unwrap(); + let odoh = config.upstream.odoh_upstream().unwrap(); + assert!(odoh.relay_bootstrap.is_none()); + assert!(odoh.target_bootstrap.is_none()); + } + + #[test] + fn odoh_bootstrap_ip_rejects_garbage() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://odoh-relay.numa.rs/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +relay_ip = "not-an-ip" +"#; + let err = toml::from_str::(toml).err().unwrap().to_string(); + assert!(err.contains("relay_ip"), "got: {err}"); + } + + #[test] + fn odoh_bootstrap_uses_url_port_when_non_default() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://odoh-relay.numa.rs:8443/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +relay_ip = "178.104.229.30" +"#; + let config: Config = toml::from_str(toml).unwrap(); + let odoh = config.upstream.odoh_upstream().unwrap(); + assert_eq!( + odoh.relay_bootstrap.unwrap().to_string(), + "178.104.229.30:8443" + ); + } + + #[test] + fn hedge_delay_zeroed_for_odoh_mode() { + assert_eq!( + UpstreamMode::Odoh.hedge_delay(50), + Duration::ZERO, + "ODoH mode must zero hedge regardless of configured hedge_ms" + ); + assert_eq!( + UpstreamMode::Forward.hedge_delay(50), + Duration::from_millis(50), + "non-ODoH modes honour configured hedge_ms" + ); + } + #[test] fn odoh_missing_target_rejected() { let toml = r#" diff --git a/src/forward.rs b/src/forward.rs index bb91fcf..530f1ed 100644 --- a/src/forward.rs +++ b/src/forward.rs @@ -176,6 +176,25 @@ pub fn build_https_client() -> reqwest::Client { /// and benefit from a larger pool so warm connections survive concurrent /// fan-out. pub fn build_https_client_with_pool(pool_max_idle_per_host: usize) -> reqwest::Client { + https_client_builder(pool_max_idle_per_host) + .build() + .unwrap_or_default() +} + +/// HTTPS client for the ODoH upstream, with bootstrap-IP overrides applied +/// so relay/target hostname resolution can bypass system DNS. +pub fn build_odoh_client(odoh: &crate::config::OdohUpstream) -> reqwest::Client { + let mut builder = https_client_builder(1); + if let Some(addr) = odoh.relay_bootstrap { + builder = builder.resolve(&odoh.relay_host, addr); + } + if let Some(addr) = odoh.target_bootstrap { + builder = builder.resolve(&odoh.target_host, addr); + } + builder.build().unwrap_or_default() +} + +fn https_client_builder(pool_max_idle_per_host: usize) -> reqwest::ClientBuilder { reqwest::Client::builder() .use_rustls_tls() .http2_initial_stream_window_size(65_535) @@ -185,8 +204,6 @@ pub fn build_https_client_with_pool(pool_max_idle_per_host: usize) -> reqwest::C .http2_keep_alive_timeout(Duration::from_secs(10)) .pool_idle_timeout(Duration::from_secs(300)) .pool_max_idle_per_host(pool_max_idle_per_host) - .build() - .unwrap_or_default() } fn build_dot_connector() -> Result { diff --git a/src/serve.rs b/src/serve.rs index 2037857..9b4b587 100644 --- a/src/serve.rs +++ b/src/serve.rs @@ -17,7 +17,9 @@ use crate::buffer::BytePacketBuffer; use crate::cache::DnsCache; use crate::config::{build_zone_map, load_config, ConfigLoad}; use crate::ctx::{handle_query, ServerCtx}; -use crate::forward::{build_https_client, parse_upstream_list, Upstream, UpstreamPool}; +use crate::forward::{ + build_https_client, build_odoh_client, parse_upstream_list, Upstream, UpstreamPool, +}; use crate::odoh::OdohConfigCache; use crate::override_store::OverrideStore; use crate::query_log::QueryLog; @@ -94,8 +96,11 @@ pub async fn run(config_path: String) -> crate::Result<()> { } crate::config::UpstreamMode::Odoh => { let odoh = config.upstream.odoh_upstream()?; - let client = build_https_client(); - let target_config = Arc::new(OdohConfigCache::new(odoh.target_host, client.clone())); + let client = build_odoh_client(&odoh); + let target_config = Arc::new(OdohConfigCache::new( + odoh.target_host.clone(), + client.clone(), + )); let primary = vec![Upstream::Odoh { relay_url: odoh.relay_url, target_path: odoh.target_path, @@ -222,7 +227,7 @@ pub async fn run(config_path: String) -> crate::Result<()> { upstream_port: config.upstream.port, lan_ip: Mutex::new(crate::lan::detect_lan_ip().unwrap_or(std::net::Ipv4Addr::LOCALHOST)), timeout: Duration::from_millis(config.upstream.timeout_ms), - hedge_delay: Duration::from_millis(config.upstream.hedge_ms), + hedge_delay: resolved_mode.hedge_delay(config.upstream.hedge_ms), proxy_tld_suffix: if config.proxy.tld.is_empty() { String::new() } else {