feat(odoh): bootstrap-IP overrides + zero hedge for ODoH (post-deploy fixes)
Two issues surfaced from running mode = "odoh" against the live Hetzner relay as system DNS: 1. **Bootstrap deadlock.** The reqwest HTTPS client resolves the relay and target hostnames via system DNS. When numa is itself the system resolver, the ODoH client loops trying to resolve through itself. Adds optional `relay_ip` and `target_ip` to `[upstream]`, plumbed into reqwest's `resolve()` so the HTTPS client bypasses system DNS for those two hostnames. TLS still validates against the URL hostname, so a stale IP fails loudly rather than silently MITM'ing. 2. **2x relay load.** Default `hedge_ms = 10` triggers a duplicate in-flight query for every request. Useful for UDP/DoH/DoT (rescues tail latency cheaply); wasteful for ODoH (doubles HPKE seal/unseal, doubles sealed-byte footprint a passive observer can correlate, no latency win — relay hop dominates either way). Force-zero in oblivious mode regardless of configured hedge_ms. Validated end-to-end against odoh-relay.numa.rs → Cloudflare: 3 digs produced 3 forwarded_ok on the relay (was 6 before the hedge fix), upstream_transport.odoh ticks correctly.
This commit is contained in:
123
src/config.rs
123
src/config.rs
@@ -1,7 +1,7 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::net::Ipv4Addr;
|
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr};
|
||||||
use std::net::Ipv6Addr;
|
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
|
|
||||||
@@ -146,6 +146,19 @@ impl UpstreamMode {
|
|||||||
UpstreamMode::Odoh => "odoh",
|
UpstreamMode::Odoh => "odoh",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Hedging duplicates the in-flight query against the same upstream to
|
||||||
|
/// rescue tail latency. Beneficial for UDP/DoH/DoT (cheap retransmit /
|
||||||
|
/// h2 stream multiplexing). For ODoH it doubles the relay's HPKE
|
||||||
|
/// seal/unseal load and the sealed-byte footprint a passive observer
|
||||||
|
/// can correlate, with no latency win — the relay hop dominates either
|
||||||
|
/// way. Force-zero in oblivious mode regardless of `hedge_ms`.
|
||||||
|
pub fn hedge_delay(self, hedge_ms: u64) -> Duration {
|
||||||
|
match self {
|
||||||
|
UpstreamMode::Odoh => Duration::ZERO,
|
||||||
|
_ => Duration::from_millis(hedge_ms),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
@@ -182,6 +195,16 @@ pub struct UpstreamConfig {
|
|||||||
/// a user who configured ODoH rarely wants a silent non-oblivious path.
|
/// a user who configured ODoH rarely wants a silent non-oblivious path.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub strict: Option<bool>,
|
pub strict: Option<bool>,
|
||||||
|
|
||||||
|
/// Bootstrap IP for the relay host, used when numa is its own system
|
||||||
|
/// resolver (otherwise the ODoH HTTPS client loops resolving through
|
||||||
|
/// itself). TLS still validates the cert against `relay`'s hostname.
|
||||||
|
#[serde(default)]
|
||||||
|
pub relay_ip: Option<IpAddr>,
|
||||||
|
|
||||||
|
/// Same as `relay_ip` but for the target host.
|
||||||
|
#[serde(default)]
|
||||||
|
pub target_ip: Option<IpAddr>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for UpstreamConfig {
|
impl Default for UpstreamConfig {
|
||||||
@@ -199,6 +222,8 @@ impl Default for UpstreamConfig {
|
|||||||
relay: None,
|
relay: None,
|
||||||
target: None,
|
target: None,
|
||||||
strict: None,
|
strict: None,
|
||||||
|
relay_ip: None,
|
||||||
|
target_ip: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -208,9 +233,12 @@ impl Default for UpstreamConfig {
|
|||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct OdohUpstream {
|
pub struct OdohUpstream {
|
||||||
pub relay_url: String,
|
pub relay_url: String,
|
||||||
|
pub relay_host: String,
|
||||||
pub target_host: String,
|
pub target_host: String,
|
||||||
pub target_path: String,
|
pub target_path: String,
|
||||||
pub strict: bool,
|
pub strict: bool,
|
||||||
|
pub relay_bootstrap: Option<SocketAddr>,
|
||||||
|
pub target_bootstrap: Option<SocketAddr>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl UpstreamConfig {
|
impl UpstreamConfig {
|
||||||
@@ -246,6 +274,10 @@ impl UpstreamConfig {
|
|||||||
.into());
|
.into());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let relay_host = relay_url
|
||||||
|
.host_str()
|
||||||
|
.ok_or("upstream.relay has no host")?
|
||||||
|
.to_string();
|
||||||
let target_host = target_url
|
let target_host = target_url
|
||||||
.host_str()
|
.host_str()
|
||||||
.ok_or("upstream.target has no host")?
|
.ok_or("upstream.target has no host")?
|
||||||
@@ -256,11 +288,17 @@ impl UpstreamConfig {
|
|||||||
target_url.path().to_string()
|
target_url.path().to_string()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let relay_port = relay_url.port_or_known_default().unwrap_or(443);
|
||||||
|
let target_port = target_url.port_or_known_default().unwrap_or(443);
|
||||||
|
|
||||||
Ok(OdohUpstream {
|
Ok(OdohUpstream {
|
||||||
relay_url: relay.to_string(),
|
relay_url: relay.to_string(),
|
||||||
|
relay_host,
|
||||||
target_host,
|
target_host,
|
||||||
target_path,
|
target_path,
|
||||||
strict: self.strict.unwrap_or(true),
|
strict: self.strict.unwrap_or(true),
|
||||||
|
relay_bootstrap: self.relay_ip.map(|ip| SocketAddr::new(ip, relay_port)),
|
||||||
|
target_bootstrap: self.target_ip.map(|ip| SocketAddr::new(ip, target_port)),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -817,6 +855,87 @@ target = "https://odoh.cloudflare-dns.com/dns-query"
|
|||||||
assert!(err.contains("upstream.relay"), "got: {err}");
|
assert!(err.contains("upstream.relay"), "got: {err}");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn odoh_bootstrap_ips_parse_into_socket_addrs() {
|
||||||
|
let toml = r#"
|
||||||
|
[upstream]
|
||||||
|
mode = "odoh"
|
||||||
|
relay = "https://odoh-relay.numa.rs/relay"
|
||||||
|
target = "https://odoh.cloudflare-dns.com/dns-query"
|
||||||
|
relay_ip = "178.104.229.30"
|
||||||
|
target_ip = "104.16.249.249"
|
||||||
|
"#;
|
||||||
|
let config: Config = toml::from_str(toml).unwrap();
|
||||||
|
let odoh = config.upstream.odoh_upstream().unwrap();
|
||||||
|
assert_eq!(odoh.relay_host, "odoh-relay.numa.rs");
|
||||||
|
assert_eq!(
|
||||||
|
odoh.relay_bootstrap.unwrap().to_string(),
|
||||||
|
"178.104.229.30:443"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
odoh.target_bootstrap.unwrap().to_string(),
|
||||||
|
"104.16.249.249:443"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn odoh_bootstrap_ips_optional() {
|
||||||
|
let toml = r#"
|
||||||
|
[upstream]
|
||||||
|
mode = "odoh"
|
||||||
|
relay = "https://odoh-relay.numa.rs/relay"
|
||||||
|
target = "https://odoh.cloudflare-dns.com/dns-query"
|
||||||
|
"#;
|
||||||
|
let config: Config = toml::from_str(toml).unwrap();
|
||||||
|
let odoh = config.upstream.odoh_upstream().unwrap();
|
||||||
|
assert!(odoh.relay_bootstrap.is_none());
|
||||||
|
assert!(odoh.target_bootstrap.is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn odoh_bootstrap_ip_rejects_garbage() {
|
||||||
|
let toml = r#"
|
||||||
|
[upstream]
|
||||||
|
mode = "odoh"
|
||||||
|
relay = "https://odoh-relay.numa.rs/relay"
|
||||||
|
target = "https://odoh.cloudflare-dns.com/dns-query"
|
||||||
|
relay_ip = "not-an-ip"
|
||||||
|
"#;
|
||||||
|
let err = toml::from_str::<Config>(toml).err().unwrap().to_string();
|
||||||
|
assert!(err.contains("relay_ip"), "got: {err}");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn odoh_bootstrap_uses_url_port_when_non_default() {
|
||||||
|
let toml = r#"
|
||||||
|
[upstream]
|
||||||
|
mode = "odoh"
|
||||||
|
relay = "https://odoh-relay.numa.rs:8443/relay"
|
||||||
|
target = "https://odoh.cloudflare-dns.com/dns-query"
|
||||||
|
relay_ip = "178.104.229.30"
|
||||||
|
"#;
|
||||||
|
let config: Config = toml::from_str(toml).unwrap();
|
||||||
|
let odoh = config.upstream.odoh_upstream().unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
odoh.relay_bootstrap.unwrap().to_string(),
|
||||||
|
"178.104.229.30:8443"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn hedge_delay_zeroed_for_odoh_mode() {
|
||||||
|
assert_eq!(
|
||||||
|
UpstreamMode::Odoh.hedge_delay(50),
|
||||||
|
Duration::ZERO,
|
||||||
|
"ODoH mode must zero hedge regardless of configured hedge_ms"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
UpstreamMode::Forward.hedge_delay(50),
|
||||||
|
Duration::from_millis(50),
|
||||||
|
"non-ODoH modes honour configured hedge_ms"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn odoh_missing_target_rejected() {
|
fn odoh_missing_target_rejected() {
|
||||||
let toml = r#"
|
let toml = r#"
|
||||||
|
|||||||
@@ -176,6 +176,25 @@ pub fn build_https_client() -> reqwest::Client {
|
|||||||
/// and benefit from a larger pool so warm connections survive concurrent
|
/// and benefit from a larger pool so warm connections survive concurrent
|
||||||
/// fan-out.
|
/// fan-out.
|
||||||
pub fn build_https_client_with_pool(pool_max_idle_per_host: usize) -> reqwest::Client {
|
pub fn build_https_client_with_pool(pool_max_idle_per_host: usize) -> reqwest::Client {
|
||||||
|
https_client_builder(pool_max_idle_per_host)
|
||||||
|
.build()
|
||||||
|
.unwrap_or_default()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// HTTPS client for the ODoH upstream, with bootstrap-IP overrides applied
|
||||||
|
/// so relay/target hostname resolution can bypass system DNS.
|
||||||
|
pub fn build_odoh_client(odoh: &crate::config::OdohUpstream) -> reqwest::Client {
|
||||||
|
let mut builder = https_client_builder(1);
|
||||||
|
if let Some(addr) = odoh.relay_bootstrap {
|
||||||
|
builder = builder.resolve(&odoh.relay_host, addr);
|
||||||
|
}
|
||||||
|
if let Some(addr) = odoh.target_bootstrap {
|
||||||
|
builder = builder.resolve(&odoh.target_host, addr);
|
||||||
|
}
|
||||||
|
builder.build().unwrap_or_default()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn https_client_builder(pool_max_idle_per_host: usize) -> reqwest::ClientBuilder {
|
||||||
reqwest::Client::builder()
|
reqwest::Client::builder()
|
||||||
.use_rustls_tls()
|
.use_rustls_tls()
|
||||||
.http2_initial_stream_window_size(65_535)
|
.http2_initial_stream_window_size(65_535)
|
||||||
@@ -185,8 +204,6 @@ pub fn build_https_client_with_pool(pool_max_idle_per_host: usize) -> reqwest::C
|
|||||||
.http2_keep_alive_timeout(Duration::from_secs(10))
|
.http2_keep_alive_timeout(Duration::from_secs(10))
|
||||||
.pool_idle_timeout(Duration::from_secs(300))
|
.pool_idle_timeout(Duration::from_secs(300))
|
||||||
.pool_max_idle_per_host(pool_max_idle_per_host)
|
.pool_max_idle_per_host(pool_max_idle_per_host)
|
||||||
.build()
|
|
||||||
.unwrap_or_default()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_dot_connector() -> Result<tokio_rustls::TlsConnector> {
|
fn build_dot_connector() -> Result<tokio_rustls::TlsConnector> {
|
||||||
|
|||||||
13
src/serve.rs
13
src/serve.rs
@@ -17,7 +17,9 @@ use crate::buffer::BytePacketBuffer;
|
|||||||
use crate::cache::DnsCache;
|
use crate::cache::DnsCache;
|
||||||
use crate::config::{build_zone_map, load_config, ConfigLoad};
|
use crate::config::{build_zone_map, load_config, ConfigLoad};
|
||||||
use crate::ctx::{handle_query, ServerCtx};
|
use crate::ctx::{handle_query, ServerCtx};
|
||||||
use crate::forward::{build_https_client, parse_upstream_list, Upstream, UpstreamPool};
|
use crate::forward::{
|
||||||
|
build_https_client, build_odoh_client, parse_upstream_list, Upstream, UpstreamPool,
|
||||||
|
};
|
||||||
use crate::odoh::OdohConfigCache;
|
use crate::odoh::OdohConfigCache;
|
||||||
use crate::override_store::OverrideStore;
|
use crate::override_store::OverrideStore;
|
||||||
use crate::query_log::QueryLog;
|
use crate::query_log::QueryLog;
|
||||||
@@ -94,8 +96,11 @@ pub async fn run(config_path: String) -> crate::Result<()> {
|
|||||||
}
|
}
|
||||||
crate::config::UpstreamMode::Odoh => {
|
crate::config::UpstreamMode::Odoh => {
|
||||||
let odoh = config.upstream.odoh_upstream()?;
|
let odoh = config.upstream.odoh_upstream()?;
|
||||||
let client = build_https_client();
|
let client = build_odoh_client(&odoh);
|
||||||
let target_config = Arc::new(OdohConfigCache::new(odoh.target_host, client.clone()));
|
let target_config = Arc::new(OdohConfigCache::new(
|
||||||
|
odoh.target_host.clone(),
|
||||||
|
client.clone(),
|
||||||
|
));
|
||||||
let primary = vec![Upstream::Odoh {
|
let primary = vec![Upstream::Odoh {
|
||||||
relay_url: odoh.relay_url,
|
relay_url: odoh.relay_url,
|
||||||
target_path: odoh.target_path,
|
target_path: odoh.target_path,
|
||||||
@@ -222,7 +227,7 @@ pub async fn run(config_path: String) -> crate::Result<()> {
|
|||||||
upstream_port: config.upstream.port,
|
upstream_port: config.upstream.port,
|
||||||
lan_ip: Mutex::new(crate::lan::detect_lan_ip().unwrap_or(std::net::Ipv4Addr::LOCALHOST)),
|
lan_ip: Mutex::new(crate::lan::detect_lan_ip().unwrap_or(std::net::Ipv4Addr::LOCALHOST)),
|
||||||
timeout: Duration::from_millis(config.upstream.timeout_ms),
|
timeout: Duration::from_millis(config.upstream.timeout_ms),
|
||||||
hedge_delay: Duration::from_millis(config.upstream.hedge_ms),
|
hedge_delay: resolved_mode.hedge_delay(config.upstream.hedge_ms),
|
||||||
proxy_tld_suffix: if config.proxy.tld.is_empty() {
|
proxy_tld_suffix: if config.proxy.tld.is_empty() {
|
||||||
String::new()
|
String::new()
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
Reference in New Issue
Block a user