Compare commits
5 Commits
v0.14.1
...
fix/self-r
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
25ebdb311f | ||
|
|
51cce0347b | ||
|
|
459395203d | ||
|
|
10469e96bd | ||
|
|
31adc31c9b |
@@ -383,7 +383,7 @@ fn run_default(rt: &tokio::runtime::Runtime) {
|
|||||||
|
|
||||||
/// Library-to-library: Numa forward_query_raw vs Hickory resolver.lookup.
|
/// Library-to-library: Numa forward_query_raw vs Hickory resolver.lookup.
|
||||||
fn run_direct(rt: &tokio::runtime::Runtime) {
|
fn run_direct(rt: &tokio::runtime::Runtime) {
|
||||||
let upstream = numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse");
|
let upstream = numa::forward::parse_upstream(DOH_UPSTREAM, 443, None).expect("failed to parse");
|
||||||
let resolver = rt.block_on(build_hickory_resolver());
|
let resolver = rt.block_on(build_hickory_resolver());
|
||||||
let timeout = Duration::from_secs(10);
|
let timeout = Duration::from_secs(10);
|
||||||
|
|
||||||
@@ -609,9 +609,11 @@ fn run_hedge_multi(rt: &tokio::runtime::Runtime, iterations: usize) {
|
|||||||
DOMAINS.len()
|
DOMAINS.len()
|
||||||
);
|
);
|
||||||
|
|
||||||
let primary = numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse");
|
let primary = numa::forward::parse_upstream(DOH_UPSTREAM, 443, None).expect("failed to parse");
|
||||||
let primary_dual = numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse");
|
let primary_dual =
|
||||||
let secondary_dual = numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse");
|
numa::forward::parse_upstream(DOH_UPSTREAM, 443, None).expect("failed to parse");
|
||||||
|
let secondary_dual =
|
||||||
|
numa::forward::parse_upstream(DOH_UPSTREAM, 443, None).expect("failed to parse");
|
||||||
let resolver = rt.block_on(build_hickory_resolver());
|
let resolver = rt.block_on(build_hickory_resolver());
|
||||||
|
|
||||||
println!("Warming up...");
|
println!("Warming up...");
|
||||||
@@ -810,7 +812,7 @@ fn run_diag(rt: &tokio::runtime::Runtime) {
|
|||||||
fn run_diag_clients(rt: &tokio::runtime::Runtime) {
|
fn run_diag_clients(rt: &tokio::runtime::Runtime) {
|
||||||
println!("Client diagnostic: reqwest vs Hickory (20 queries to {DOH_UPSTREAM})\n");
|
println!("Client diagnostic: reqwest vs Hickory (20 queries to {DOH_UPSTREAM})\n");
|
||||||
|
|
||||||
let upstream = numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse");
|
let upstream = numa::forward::parse_upstream(DOH_UPSTREAM, 443, None).expect("failed to parse");
|
||||||
let resolver = rt.block_on(build_hickory_resolver());
|
let resolver = rt.block_on(build_hickory_resolver());
|
||||||
let timeout = Duration::from_secs(10);
|
let timeout = Duration::from_secs(10);
|
||||||
|
|
||||||
|
|||||||
@@ -357,12 +357,17 @@ mod tests {
|
|||||||
|
|
||||||
const RETRY_DELAYS_SECS: &[u64] = &[2, 10, 30];
|
const RETRY_DELAYS_SECS: &[u64] = &[2, 10, 30];
|
||||||
|
|
||||||
pub async fn download_blocklists(lists: &[String]) -> Vec<(String, String)> {
|
pub async fn download_blocklists(
|
||||||
let client = reqwest::Client::builder()
|
lists: &[String],
|
||||||
|
resolver: Option<std::sync::Arc<crate::bootstrap_resolver::NumaResolver>>,
|
||||||
|
) -> Vec<(String, String)> {
|
||||||
|
let mut builder = reqwest::Client::builder()
|
||||||
.timeout(Duration::from_secs(30))
|
.timeout(Duration::from_secs(30))
|
||||||
.gzip(true)
|
.gzip(true);
|
||||||
.build()
|
if let Some(r) = resolver {
|
||||||
.unwrap_or_default();
|
builder = builder.dns_resolver(r);
|
||||||
|
}
|
||||||
|
let client = builder.build().unwrap_or_default();
|
||||||
|
|
||||||
let fetches = lists.iter().map(|url| {
|
let fetches = lists.iter().map(|url| {
|
||||||
let client = &client;
|
let client = &client;
|
||||||
|
|||||||
235
src/bootstrap_resolver.rs
Normal file
235
src/bootstrap_resolver.rs
Normal file
@@ -0,0 +1,235 @@
|
|||||||
|
//! `reqwest` DNS resolver used by numa-originated HTTPS (DoH upstream, ODoH
|
||||||
|
//! relay/target, blocklist CDN). When numa is its own system resolver
|
||||||
|
//! (`/etc/resolv.conf → 127.0.0.1`, HAOS add-on, Pi-hole-style container),
|
||||||
|
//! the default `getaddrinfo` path loops back through numa before numa can
|
||||||
|
//! answer — a chicken-and-egg that deadlocks cold boot. See issue #122 and
|
||||||
|
//! `docs/implementation/bootstrap-resolver.md`.
|
||||||
|
//!
|
||||||
|
//! Resolution order per hostname:
|
||||||
|
//! 1. Per-hostname overrides (e.g. ODoH `relay_ip` / `target_ip`) → return
|
||||||
|
//! immediately, no DNS query. Preserves ODoH's "zero plain-DNS leak"
|
||||||
|
//! property for configured endpoints.
|
||||||
|
//! 2. Otherwise, query A + AAAA in parallel via UDP to IP-literal bootstrap
|
||||||
|
//! servers, with TCP fallback on UDP timeout (for networks that block
|
||||||
|
//! outbound UDP:53 — see memory: `project_network_udp_hostile.md`).
|
||||||
|
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use log::{debug, info, warn};
|
||||||
|
use reqwest::dns::{Addrs, Name, Resolve, Resolving};
|
||||||
|
|
||||||
|
use crate::forward::{forward_tcp, forward_udp};
|
||||||
|
use crate::packet::DnsPacket;
|
||||||
|
use crate::question::QueryType;
|
||||||
|
use crate::record::DnsRecord;
|
||||||
|
|
||||||
|
const UDP_TIMEOUT: Duration = Duration::from_millis(800);
|
||||||
|
const TCP_TIMEOUT: Duration = Duration::from_millis(1500);
|
||||||
|
const DEFAULT_BOOTSTRAP: &[SocketAddr] = &[
|
||||||
|
SocketAddr::new(IpAddr::V4(Ipv4Addr::new(9, 9, 9, 9)), 53),
|
||||||
|
SocketAddr::new(IpAddr::V4(Ipv4Addr::new(1, 1, 1, 1)), 53),
|
||||||
|
];
|
||||||
|
|
||||||
|
pub struct NumaResolver {
|
||||||
|
bootstrap: Vec<SocketAddr>,
|
||||||
|
overrides: BTreeMap<String, Vec<IpAddr>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NumaResolver {
|
||||||
|
/// Build a resolver from the configured `upstream.fallback` list and any
|
||||||
|
/// per-hostname overrides (e.g. ODoH's `relay_ip`/`target_ip`).
|
||||||
|
///
|
||||||
|
/// `fallback` entries are filtered to IP literals only — hostnames would
|
||||||
|
/// re-introduce the self-loop inside the resolver itself. Empty or
|
||||||
|
/// unusable fallback yields the hardcoded default (Quad9 + Cloudflare).
|
||||||
|
pub fn new(fallback: &[String], overrides: BTreeMap<String, Vec<IpAddr>>) -> Self {
|
||||||
|
let mut bootstrap: Vec<SocketAddr> = Vec::with_capacity(fallback.len());
|
||||||
|
for entry in fallback {
|
||||||
|
match crate::forward::parse_upstream_addr(entry, 53) {
|
||||||
|
Ok(addr) => bootstrap.push(addr),
|
||||||
|
Err(_) => {
|
||||||
|
warn!(
|
||||||
|
"bootstrap_resolver: skipping non-IP fallback '{}' \
|
||||||
|
(hostnames would re-enter the self-loop)",
|
||||||
|
entry
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let source = if bootstrap.is_empty() {
|
||||||
|
bootstrap = DEFAULT_BOOTSTRAP.to_vec();
|
||||||
|
"default (no IP-literal in upstream.fallback)"
|
||||||
|
} else {
|
||||||
|
"upstream.fallback"
|
||||||
|
};
|
||||||
|
let ips: Vec<String> = bootstrap.iter().map(|s| s.ip().to_string()).collect();
|
||||||
|
info!(
|
||||||
|
"bootstrap resolver: {} via {} — used for numa-originated HTTPS hostname resolution",
|
||||||
|
ips.join(", "),
|
||||||
|
source
|
||||||
|
);
|
||||||
|
if !overrides.is_empty() {
|
||||||
|
let pairs: Vec<String> = overrides
|
||||||
|
.iter()
|
||||||
|
.flat_map(|(host, addrs)| addrs.iter().map(move |ip| format!("{}={}", host, ip)))
|
||||||
|
.collect();
|
||||||
|
info!(
|
||||||
|
"bootstrap resolver: host overrides (skip DNS, connect direct): {}",
|
||||||
|
pairs.join(", ")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Self {
|
||||||
|
bootstrap,
|
||||||
|
overrides,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub fn bootstrap(&self) -> &[SocketAddr] {
|
||||||
|
&self.bootstrap
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Resolve for NumaResolver {
|
||||||
|
fn resolve(&self, name: Name) -> Resolving {
|
||||||
|
let hostname = name.as_str().to_string();
|
||||||
|
|
||||||
|
if let Some(ips) = self.overrides.get(&hostname) {
|
||||||
|
let addrs: Vec<SocketAddr> = ips.iter().map(|ip| SocketAddr::new(*ip, 0)).collect();
|
||||||
|
debug!(
|
||||||
|
"bootstrap_resolver: override hit for {} → {:?}",
|
||||||
|
hostname, ips
|
||||||
|
);
|
||||||
|
return Box::pin(async move { Ok(Box::new(addrs.into_iter()) as Addrs) });
|
||||||
|
}
|
||||||
|
|
||||||
|
let bootstrap = self.bootstrap.clone();
|
||||||
|
Box::pin(async move {
|
||||||
|
let addrs = resolve_via_bootstrap(&hostname, &bootstrap).await?;
|
||||||
|
debug!(
|
||||||
|
"bootstrap_resolver: resolved {} → {} addr(s)",
|
||||||
|
hostname,
|
||||||
|
addrs.len()
|
||||||
|
);
|
||||||
|
Ok(Box::new(addrs.into_iter()) as Addrs)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn resolve_via_bootstrap(
|
||||||
|
hostname: &str,
|
||||||
|
bootstrap: &[SocketAddr],
|
||||||
|
) -> Result<Vec<SocketAddr>, Box<dyn std::error::Error + Send + Sync>> {
|
||||||
|
let mut last_err: Option<String> = None;
|
||||||
|
for &server in bootstrap {
|
||||||
|
let q_a = DnsPacket::query(0xBEEF, hostname, QueryType::A);
|
||||||
|
let q_aaaa = DnsPacket::query(0xBEF0, hostname, QueryType::AAAA);
|
||||||
|
let (a_res, aaaa_res) = tokio::join!(
|
||||||
|
query_with_tcp_fallback(&q_a, server),
|
||||||
|
query_with_tcp_fallback(&q_aaaa, server),
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut out = Vec::new();
|
||||||
|
match a_res {
|
||||||
|
Ok(pkt) => extract_addrs(&pkt, &mut out),
|
||||||
|
Err(e) => last_err = Some(format!("{} A failed: {}", server, e)),
|
||||||
|
}
|
||||||
|
match aaaa_res {
|
||||||
|
Ok(pkt) => extract_addrs(&pkt, &mut out),
|
||||||
|
// AAAA is optional — many hosts return NXDOMAIN/empty. Don't
|
||||||
|
// treat as the primary error if A succeeded.
|
||||||
|
Err(e) => debug!("bootstrap {} AAAA for {} failed: {}", server, hostname, e),
|
||||||
|
}
|
||||||
|
if !out.is_empty() {
|
||||||
|
return Ok(out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(last_err
|
||||||
|
.unwrap_or_else(|| "no bootstrap servers reachable".into())
|
||||||
|
.into())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn query_with_tcp_fallback(
|
||||||
|
query: &DnsPacket,
|
||||||
|
server: SocketAddr,
|
||||||
|
) -> crate::Result<DnsPacket> {
|
||||||
|
match forward_udp(query, server, UDP_TIMEOUT).await {
|
||||||
|
Ok(pkt) => Ok(pkt),
|
||||||
|
Err(e) => {
|
||||||
|
debug!(
|
||||||
|
"bootstrap UDP {} failed ({}), falling back to TCP",
|
||||||
|
server, e
|
||||||
|
);
|
||||||
|
forward_tcp(query, server, TCP_TIMEOUT).await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_addrs(pkt: &DnsPacket, out: &mut Vec<SocketAddr>) {
|
||||||
|
for r in &pkt.answers {
|
||||||
|
match r {
|
||||||
|
DnsRecord::A { addr, .. } => out.push(SocketAddr::new(IpAddr::V4(*addr), 0)),
|
||||||
|
DnsRecord::AAAA { addr, .. } => out.push(SocketAddr::new(IpAddr::V6(*addr), 0)),
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use std::net::{Ipv4Addr, Ipv6Addr};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn empty_fallback_uses_defaults() {
|
||||||
|
let r = NumaResolver::new(&[], BTreeMap::new());
|
||||||
|
let got: Vec<String> = r.bootstrap().iter().map(|s| s.to_string()).collect();
|
||||||
|
assert_eq!(got, vec!["9.9.9.9:53", "1.1.1.1:53"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fallback_accepts_ip_literals_only() {
|
||||||
|
let fallback = vec![
|
||||||
|
"9.9.9.9".to_string(),
|
||||||
|
"dns.quad9.net".to_string(),
|
||||||
|
"1.1.1.1:5353".to_string(),
|
||||||
|
];
|
||||||
|
let r = NumaResolver::new(&fallback, BTreeMap::new());
|
||||||
|
let got: Vec<String> = r.bootstrap().iter().map(|s| s.to_string()).collect();
|
||||||
|
assert_eq!(got, vec!["9.9.9.9:53", "1.1.1.1:5353"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn override_returns_configured_ips_without_dns() {
|
||||||
|
let mut overrides = BTreeMap::new();
|
||||||
|
overrides.insert(
|
||||||
|
"odoh-relay.example".to_string(),
|
||||||
|
vec![IpAddr::V4(Ipv4Addr::new(178, 104, 229, 30))],
|
||||||
|
);
|
||||||
|
let r = NumaResolver::new(&[], overrides);
|
||||||
|
let name: Name = "odoh-relay.example".parse().unwrap();
|
||||||
|
let fut = r.resolve(name);
|
||||||
|
let res = futures::executor::block_on(fut).unwrap();
|
||||||
|
let addrs: Vec<_> = res.collect();
|
||||||
|
assert_eq!(addrs.len(), 1);
|
||||||
|
assert_eq!(addrs[0].ip(), IpAddr::V4(Ipv4Addr::new(178, 104, 229, 30)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn override_supports_multiple_ips_including_ipv6() {
|
||||||
|
let mut overrides = BTreeMap::new();
|
||||||
|
overrides.insert(
|
||||||
|
"dual.example".to_string(),
|
||||||
|
vec![
|
||||||
|
IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)),
|
||||||
|
IpAddr::V6(Ipv6Addr::LOCALHOST),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
let r = NumaResolver::new(&[], overrides);
|
||||||
|
let res = futures::executor::block_on(r.resolve("dual.example".parse().unwrap())).unwrap();
|
||||||
|
let addrs: Vec<_> = res.collect();
|
||||||
|
assert_eq!(addrs.len(), 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -56,7 +56,7 @@ impl ForwardingRuleConfig {
|
|||||||
}
|
}
|
||||||
let mut primary = Vec::with_capacity(self.upstream.len());
|
let mut primary = Vec::with_capacity(self.upstream.len());
|
||||||
for s in &self.upstream {
|
for s in &self.upstream {
|
||||||
let u = crate::forward::parse_upstream(s, 53)
|
let u = crate::forward::parse_upstream(s, 53, None)
|
||||||
.map_err(|e| format!("forwarding rule for upstream '{}': {}", s, e))?;
|
.map_err(|e| format!("forwarding rule for upstream '{}': {}", s, e))?;
|
||||||
primary.push(u);
|
primary.push(u);
|
||||||
}
|
}
|
||||||
@@ -241,6 +241,26 @@ pub struct OdohUpstream {
|
|||||||
pub target_bootstrap: Option<SocketAddr>,
|
pub target_bootstrap: Option<SocketAddr>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl OdohUpstream {
|
||||||
|
/// Per-host IP overrides for the bootstrap resolver, lifted from
|
||||||
|
/// `relay_ip`/`target_ip`. Keeps the "zero plain-DNS leak for ODoH
|
||||||
|
/// endpoints" property when numa is its own system resolver.
|
||||||
|
pub fn host_ip_overrides(&self) -> std::collections::BTreeMap<String, Vec<std::net::IpAddr>> {
|
||||||
|
let mut out = std::collections::BTreeMap::new();
|
||||||
|
if let Some(addr) = self.relay_bootstrap {
|
||||||
|
out.entry(self.relay_host.clone())
|
||||||
|
.or_insert_with(Vec::new)
|
||||||
|
.push(addr.ip());
|
||||||
|
}
|
||||||
|
if let Some(addr) = self.target_bootstrap {
|
||||||
|
out.entry(self.target_host.clone())
|
||||||
|
.or_insert_with(Vec::new)
|
||||||
|
.push(addr.ip());
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl UpstreamConfig {
|
impl UpstreamConfig {
|
||||||
/// Validate and extract ODoH-specific fields. Called during `load_config`
|
/// Validate and extract ODoH-specific fields. Called during `load_config`
|
||||||
/// so misconfigured ODoH fails fast at startup, the same care we take
|
/// so misconfigured ODoH fails fast at startup, the same care we take
|
||||||
|
|||||||
177
src/ctx.rs
177
src/ctx.rs
@@ -209,106 +209,68 @@ pub async fn resolve_query(
|
|||||||
{
|
{
|
||||||
// Conditional forwarding takes priority over recursive mode
|
// Conditional forwarding takes priority over recursive mode
|
||||||
// (e.g. Tailscale .ts.net, VPC private zones)
|
// (e.g. Tailscale .ts.net, VPC private zones)
|
||||||
upstream_transport = pool.preferred().map(|u| u.transport());
|
let key = (qname.clone(), qtype);
|
||||||
match forward_with_failover_raw(
|
let (resp, path, err) =
|
||||||
raw_wire,
|
resolve_coalesced(&ctx.inflight, key, &query, QueryPath::Forwarded, || async {
|
||||||
pool,
|
let wire = forward_with_failover_raw(
|
||||||
&ctx.srtt,
|
raw_wire,
|
||||||
ctx.timeout,
|
pool,
|
||||||
ctx.hedge_delay,
|
&ctx.srtt,
|
||||||
)
|
ctx.timeout,
|
||||||
.await
|
ctx.hedge_delay,
|
||||||
{
|
|
||||||
Ok(resp_wire) => match cache_and_parse(ctx, &qname, qtype, &resp_wire) {
|
|
||||||
Ok(resp) => (resp, QueryPath::Forwarded, DnssecStatus::Indeterminate),
|
|
||||||
Err(e) => {
|
|
||||||
error!("{} | {:?} {} | PARSE ERROR | {}", src_addr, qtype, qname, e);
|
|
||||||
(
|
|
||||||
DnsPacket::response_from(&query, ResultCode::SERVFAIL),
|
|
||||||
QueryPath::UpstreamError,
|
|
||||||
DnssecStatus::Indeterminate,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
},
|
|
||||||
Err(e) => {
|
|
||||||
error!(
|
|
||||||
"{} | {:?} {} | FORWARD ERROR | {}",
|
|
||||||
src_addr, qtype, qname, e
|
|
||||||
);
|
|
||||||
(
|
|
||||||
DnsPacket::response_from(&query, ResultCode::SERVFAIL),
|
|
||||||
QueryPath::UpstreamError,
|
|
||||||
DnssecStatus::Indeterminate,
|
|
||||||
)
|
)
|
||||||
}
|
.await?;
|
||||||
|
cache_and_parse(ctx, &qname, qtype, &wire)
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
log_coalesced_outcome(src_addr, qtype, &qname, path, err.as_deref(), "FORWARD");
|
||||||
|
if path == QueryPath::Forwarded {
|
||||||
|
upstream_transport = pool.preferred().map(|u| u.transport());
|
||||||
}
|
}
|
||||||
|
(resp, path, DnssecStatus::Indeterminate)
|
||||||
} else if ctx.upstream_mode == UpstreamMode::Recursive {
|
} else if ctx.upstream_mode == UpstreamMode::Recursive {
|
||||||
// Recursive resolution makes UDP hops to roots/TLDs/auths;
|
// Recursive resolution makes UDP hops to roots/TLDs/auths;
|
||||||
// tag as Udp so the dashboard can aggregate plaintext-wire
|
// tag as Udp so the dashboard can aggregate plaintext-wire
|
||||||
// egress honestly. Only mark on success — errors stay None.
|
// egress honestly. Only mark on success — errors stay None.
|
||||||
let key = (qname.clone(), qtype);
|
let key = (qname.clone(), qtype);
|
||||||
let (resp, path, err) = resolve_coalesced(&ctx.inflight, key, &query, || {
|
let (resp, path, err) =
|
||||||
crate::recursive::resolve_recursive(
|
resolve_coalesced(&ctx.inflight, key, &query, QueryPath::Recursive, || {
|
||||||
&qname,
|
crate::recursive::resolve_recursive(
|
||||||
qtype,
|
&qname,
|
||||||
&ctx.cache,
|
qtype,
|
||||||
&query,
|
&ctx.cache,
|
||||||
&ctx.root_hints,
|
&query,
|
||||||
&ctx.srtt,
|
&ctx.root_hints,
|
||||||
)
|
&ctx.srtt,
|
||||||
})
|
)
|
||||||
.await;
|
})
|
||||||
if path == QueryPath::Coalesced {
|
.await;
|
||||||
debug!("{} | {:?} {} | COALESCED", src_addr, qtype, qname);
|
log_coalesced_outcome(src_addr, qtype, &qname, path, err.as_deref(), "RECURSIVE");
|
||||||
} else if path == QueryPath::UpstreamError {
|
if path == QueryPath::Recursive {
|
||||||
error!(
|
|
||||||
"{} | {:?} {} | RECURSIVE ERROR | {}",
|
|
||||||
src_addr,
|
|
||||||
qtype,
|
|
||||||
qname,
|
|
||||||
err.as_deref().unwrap_or("leader failed")
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
upstream_transport = Some(crate::stats::UpstreamTransport::Udp);
|
upstream_transport = Some(crate::stats::UpstreamTransport::Udp);
|
||||||
}
|
}
|
||||||
(resp, path, DnssecStatus::Indeterminate)
|
(resp, path, DnssecStatus::Indeterminate)
|
||||||
} else {
|
} else {
|
||||||
let pool = ctx.upstream_pool.lock().unwrap().clone();
|
let pool = ctx.upstream_pool.lock().unwrap().clone();
|
||||||
match forward_with_failover_raw(
|
let key = (qname.clone(), qtype);
|
||||||
raw_wire,
|
let (resp, path, err) =
|
||||||
&pool,
|
resolve_coalesced(&ctx.inflight, key, &query, QueryPath::Upstream, || async {
|
||||||
&ctx.srtt,
|
let wire = forward_with_failover_raw(
|
||||||
ctx.timeout,
|
raw_wire,
|
||||||
ctx.hedge_delay,
|
&pool,
|
||||||
)
|
&ctx.srtt,
|
||||||
.await
|
ctx.timeout,
|
||||||
{
|
ctx.hedge_delay,
|
||||||
Ok(resp_wire) => match cache_and_parse(ctx, &qname, qtype, &resp_wire) {
|
|
||||||
Ok(resp) => {
|
|
||||||
upstream_transport = pool.preferred().map(|u| u.transport());
|
|
||||||
(resp, QueryPath::Upstream, DnssecStatus::Indeterminate)
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
error!("{} | {:?} {} | PARSE ERROR | {}", src_addr, qtype, qname, e);
|
|
||||||
(
|
|
||||||
DnsPacket::response_from(&query, ResultCode::SERVFAIL),
|
|
||||||
QueryPath::UpstreamError,
|
|
||||||
DnssecStatus::Indeterminate,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
},
|
|
||||||
Err(e) => {
|
|
||||||
error!(
|
|
||||||
"{} | {:?} {} | UPSTREAM ERROR | {}",
|
|
||||||
src_addr, qtype, qname, e
|
|
||||||
);
|
|
||||||
(
|
|
||||||
DnsPacket::response_from(&query, ResultCode::SERVFAIL),
|
|
||||||
QueryPath::UpstreamError,
|
|
||||||
DnssecStatus::Indeterminate,
|
|
||||||
)
|
)
|
||||||
}
|
.await?;
|
||||||
|
cache_and_parse(ctx, &qname, qtype, &wire)
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
log_coalesced_outcome(src_addr, qtype, &qname, path, err.as_deref(), "UPSTREAM");
|
||||||
|
if path == QueryPath::Upstream {
|
||||||
|
upstream_transport = pool.preferred().map(|u| u.transport());
|
||||||
}
|
}
|
||||||
|
(resp, path, DnssecStatus::Indeterminate)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -611,11 +573,15 @@ fn acquire_inflight(inflight: &Mutex<InflightMap>, key: (String, QueryType)) ->
|
|||||||
|
|
||||||
/// Run a resolve function with in-flight coalescing. Multiple concurrent calls
|
/// Run a resolve function with in-flight coalescing. Multiple concurrent calls
|
||||||
/// for the same key share a single resolution — the first caller (leader)
|
/// for the same key share a single resolution — the first caller (leader)
|
||||||
/// executes `resolve_fn`, and followers wait for the broadcast result.
|
/// executes `resolve_fn`, and followers wait for the broadcast result. The
|
||||||
|
/// leader's successful path is tagged with `leader_path` so callers that
|
||||||
|
/// share this helper (recursive, forwarded-rule, forward-upstream) keep their
|
||||||
|
/// own observability without duplicating the inflight map.
|
||||||
async fn resolve_coalesced<F, Fut>(
|
async fn resolve_coalesced<F, Fut>(
|
||||||
inflight: &Mutex<InflightMap>,
|
inflight: &Mutex<InflightMap>,
|
||||||
key: (String, QueryType),
|
key: (String, QueryType),
|
||||||
query: &DnsPacket,
|
query: &DnsPacket,
|
||||||
|
leader_path: QueryPath,
|
||||||
resolve_fn: F,
|
resolve_fn: F,
|
||||||
) -> (DnsPacket, QueryPath, Option<String>)
|
) -> (DnsPacket, QueryPath, Option<String>)
|
||||||
where
|
where
|
||||||
@@ -644,7 +610,7 @@ where
|
|||||||
match result {
|
match result {
|
||||||
Ok(resp) => {
|
Ok(resp) => {
|
||||||
let _ = tx.send(Some(resp.clone()));
|
let _ = tx.send(Some(resp.clone()));
|
||||||
(resp, QueryPath::Recursive, None)
|
(resp, leader_path, None)
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
let _ = tx.send(None);
|
let _ = tx.send(None);
|
||||||
@@ -671,6 +637,33 @@ impl Drop for InflightGuard<'_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Emit the log lines shared by the three upstream branches (Forwarded,
|
||||||
|
/// Recursive, Upstream) after `resolve_coalesced` returns. Leader-success
|
||||||
|
/// and transport-tagging stay at the call site since they diverge per
|
||||||
|
/// branch, but the Coalesced debug and UpstreamError error are identical
|
||||||
|
/// except for the label.
|
||||||
|
fn log_coalesced_outcome(
|
||||||
|
src_addr: SocketAddr,
|
||||||
|
qtype: QueryType,
|
||||||
|
qname: &str,
|
||||||
|
path: QueryPath,
|
||||||
|
err: Option<&str>,
|
||||||
|
label: &str,
|
||||||
|
) {
|
||||||
|
match path {
|
||||||
|
QueryPath::Coalesced => debug!("{} | {:?} {} | COALESCED", src_addr, qtype, qname),
|
||||||
|
QueryPath::UpstreamError => error!(
|
||||||
|
"{} | {:?} {} | {} ERROR | {}",
|
||||||
|
src_addr,
|
||||||
|
qtype,
|
||||||
|
qname,
|
||||||
|
label,
|
||||||
|
err.unwrap_or("leader failed")
|
||||||
|
),
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn special_use_response(query: &DnsPacket, qname: &str, qtype: QueryType) -> DnsPacket {
|
fn special_use_response(query: &DnsPacket, qname: &str, qtype: QueryType) -> DnsPacket {
|
||||||
use std::net::{Ipv4Addr, Ipv6Addr};
|
use std::net::{Ipv4Addr, Ipv6Addr};
|
||||||
if qname == "ipv4only.arpa" {
|
if qname == "ipv4only.arpa" {
|
||||||
@@ -909,7 +902,7 @@ mod tests {
|
|||||||
let key = ("coalesce.test".to_string(), QueryType::A);
|
let key = ("coalesce.test".to_string(), QueryType::A);
|
||||||
let query = DnsPacket::query(100 + i, "coalesce.test", QueryType::A);
|
let query = DnsPacket::query(100 + i, "coalesce.test", QueryType::A);
|
||||||
handles.push(tokio::spawn(async move {
|
handles.push(tokio::spawn(async move {
|
||||||
resolve_coalesced(&inf, key, &query, || async {
|
resolve_coalesced(&inf, key, &query, QueryPath::Recursive, || async {
|
||||||
count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||||
tokio::time::sleep(Duration::from_millis(200)).await;
|
tokio::time::sleep(Duration::from_millis(200)).await;
|
||||||
Ok(mock_response("coalesce.test"))
|
Ok(mock_response("coalesce.test"))
|
||||||
@@ -953,6 +946,7 @@ mod tests {
|
|||||||
&inf1,
|
&inf1,
|
||||||
("same.domain".to_string(), QueryType::A),
|
("same.domain".to_string(), QueryType::A),
|
||||||
&query_a,
|
&query_a,
|
||||||
|
QueryPath::Recursive,
|
||||||
|| async {
|
|| async {
|
||||||
count1.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
count1.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||||
@@ -966,6 +960,7 @@ mod tests {
|
|||||||
&inf2,
|
&inf2,
|
||||||
("same.domain".to_string(), QueryType::AAAA),
|
("same.domain".to_string(), QueryType::AAAA),
|
||||||
&query_aaaa,
|
&query_aaaa,
|
||||||
|
QueryPath::Recursive,
|
||||||
|| async {
|
|| async {
|
||||||
count2.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
count2.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||||
@@ -995,6 +990,7 @@ mod tests {
|
|||||||
&inflight,
|
&inflight,
|
||||||
("will-fail.test".to_string(), QueryType::A),
|
("will-fail.test".to_string(), QueryType::A),
|
||||||
&query,
|
&query,
|
||||||
|
QueryPath::Recursive,
|
||||||
|| async { Err::<DnsPacket, _>("upstream timeout".into()) },
|
|| async { Err::<DnsPacket, _>("upstream timeout".into()) },
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
@@ -1016,6 +1012,7 @@ mod tests {
|
|||||||
&inf,
|
&inf,
|
||||||
("fail.test".to_string(), QueryType::A),
|
("fail.test".to_string(), QueryType::A),
|
||||||
&query,
|
&query,
|
||||||
|
QueryPath::Recursive,
|
||||||
|| async {
|
|| async {
|
||||||
tokio::time::sleep(Duration::from_millis(200)).await;
|
tokio::time::sleep(Duration::from_millis(200)).await;
|
||||||
Err::<DnsPacket, _>("upstream error".into())
|
Err::<DnsPacket, _>("upstream error".into())
|
||||||
@@ -1056,6 +1053,7 @@ mod tests {
|
|||||||
&inflight,
|
&inflight,
|
||||||
("question.test".to_string(), QueryType::A),
|
("question.test".to_string(), QueryType::A),
|
||||||
&query,
|
&query,
|
||||||
|
QueryPath::Recursive,
|
||||||
|| async { Err::<DnsPacket, _>("fail".into()) },
|
|| async { Err::<DnsPacket, _>("fail".into()) },
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
@@ -1080,6 +1078,7 @@ mod tests {
|
|||||||
&inflight,
|
&inflight,
|
||||||
("err-msg.test".to_string(), QueryType::A),
|
("err-msg.test".to_string(), QueryType::A),
|
||||||
&query,
|
&query,
|
||||||
|
QueryPath::Recursive,
|
||||||
|| async { Err::<DnsPacket, _>("connection refused by upstream".into()) },
|
|| async { Err::<DnsPacket, _>("connection refused by upstream".into()) },
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|||||||
@@ -113,10 +113,7 @@ impl fmt::Display for Upstream {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn parse_upstream_addr(
|
pub fn parse_upstream_addr(s: &str, default_port: u16) -> std::result::Result<SocketAddr, String> {
|
||||||
s: &str,
|
|
||||||
default_port: u16,
|
|
||||||
) -> std::result::Result<SocketAddr, String> {
|
|
||||||
// Try full socket addr first: "1.2.3.4:5353" or "[::1]:5353"
|
// Try full socket addr first: "1.2.3.4:5353" or "[::1]:5353"
|
||||||
if let Ok(addr) = s.parse::<SocketAddr>() {
|
if let Ok(addr) = s.parse::<SocketAddr>() {
|
||||||
return Ok(addr);
|
return Ok(addr);
|
||||||
@@ -129,19 +126,28 @@ pub(crate) fn parse_upstream_addr(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a slice of upstream address strings into `Upstream` values, failing
|
/// Parse a slice of upstream address strings into `Upstream` values, failing
|
||||||
/// on the first invalid entry.
|
/// on the first invalid entry. DoH entries use `resolver` (when provided) as
|
||||||
pub fn parse_upstream_list(addrs: &[String], default_port: u16) -> Result<Vec<Upstream>> {
|
/// their hostname resolver.
|
||||||
|
pub fn parse_upstream_list(
|
||||||
|
addrs: &[String],
|
||||||
|
default_port: u16,
|
||||||
|
resolver: Option<Arc<crate::bootstrap_resolver::NumaResolver>>,
|
||||||
|
) -> Result<Vec<Upstream>> {
|
||||||
addrs
|
addrs
|
||||||
.iter()
|
.iter()
|
||||||
.map(|s| parse_upstream(s, default_port))
|
.map(|s| parse_upstream(s, default_port, resolver.clone()))
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_upstream(s: &str, default_port: u16) -> Result<Upstream> {
|
pub fn parse_upstream(
|
||||||
|
s: &str,
|
||||||
|
default_port: u16,
|
||||||
|
resolver: Option<Arc<crate::bootstrap_resolver::NumaResolver>>,
|
||||||
|
) -> Result<Upstream> {
|
||||||
if s.starts_with("https://") {
|
if s.starts_with("https://") {
|
||||||
return Ok(Upstream::Doh {
|
return Ok(Upstream::Doh {
|
||||||
url: s.to_string(),
|
url: s.to_string(),
|
||||||
client: build_https_client(),
|
client: build_https_client_with_resolver(1, resolver),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
// tls://IP:PORT#hostname or tls://IP#hostname (default port 853)
|
// tls://IP:PORT#hostname or tls://IP#hostname (default port 853)
|
||||||
@@ -163,12 +169,16 @@ pub fn parse_upstream(s: &str, default_port: u16) -> Result<Upstream> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// HTTP/2 client tuned for DoH/ODoH: small windows for low latency, long-lived
|
/// HTTP/2 client tuned for DoH/ODoH: small windows for low latency, long-lived
|
||||||
/// keep-alive. Shared by the DoH upstream and the ODoH config-fetcher +
|
/// keep-alive. Pool defaults to one idle conn per host — good for resolvers
|
||||||
/// seal/open path. Pool defaults to one idle conn per host — good for
|
/// that talk to a single upstream; relays that fan out to many targets
|
||||||
/// resolvers that talk to a single upstream; relays that fan out to many
|
/// should use [`build_https_client_with_pool`].
|
||||||
/// targets should use [`build_https_client_with_pool`].
|
///
|
||||||
|
/// Uses the system resolver. Callers running inside `serve::run` pass the
|
||||||
|
/// shared [`crate::bootstrap_resolver::NumaResolver`] via
|
||||||
|
/// [`build_https_client_with_resolver`] to avoid the self-loop documented
|
||||||
|
/// in `docs/implementation/bootstrap-resolver.md`.
|
||||||
pub fn build_https_client() -> reqwest::Client {
|
pub fn build_https_client() -> reqwest::Client {
|
||||||
build_https_client_with_pool(1)
|
build_https_client_with_resolver(1, None)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Same shape as [`build_https_client`], but caller picks
|
/// Same shape as [`build_https_client`], but caller picks
|
||||||
@@ -176,20 +186,18 @@ pub fn build_https_client() -> reqwest::Client {
|
|||||||
/// and benefit from a larger pool so warm connections survive concurrent
|
/// and benefit from a larger pool so warm connections survive concurrent
|
||||||
/// fan-out.
|
/// fan-out.
|
||||||
pub fn build_https_client_with_pool(pool_max_idle_per_host: usize) -> reqwest::Client {
|
pub fn build_https_client_with_pool(pool_max_idle_per_host: usize) -> reqwest::Client {
|
||||||
https_client_builder(pool_max_idle_per_host)
|
build_https_client_with_resolver(pool_max_idle_per_host, None)
|
||||||
.build()
|
|
||||||
.unwrap_or_default()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// HTTPS client for the ODoH upstream, with bootstrap-IP overrides applied
|
/// [`build_https_client`] with an optional custom DNS resolver. Numa wires
|
||||||
/// so relay/target hostname resolution can bypass system DNS.
|
/// [`crate::bootstrap_resolver::NumaResolver`] here.
|
||||||
pub fn build_odoh_client(odoh: &crate::config::OdohUpstream) -> reqwest::Client {
|
pub fn build_https_client_with_resolver(
|
||||||
let mut builder = https_client_builder(1);
|
pool_max_idle_per_host: usize,
|
||||||
if let Some(addr) = odoh.relay_bootstrap {
|
resolver: Option<Arc<crate::bootstrap_resolver::NumaResolver>>,
|
||||||
builder = builder.resolve(&odoh.relay_host, addr);
|
) -> reqwest::Client {
|
||||||
}
|
let mut builder = https_client_builder(pool_max_idle_per_host);
|
||||||
if let Some(addr) = odoh.target_bootstrap {
|
if let Some(r) = resolver {
|
||||||
builder = builder.resolve(&odoh.target_host, addr);
|
builder = builder.dns_resolver(r);
|
||||||
}
|
}
|
||||||
builder.build().unwrap_or_default()
|
builder.build().unwrap_or_default()
|
||||||
}
|
}
|
||||||
@@ -553,6 +561,9 @@ async fn forward_doh_raw(
|
|||||||
|
|
||||||
/// Send a lightweight keepalive query to a DoH upstream to prevent
|
/// Send a lightweight keepalive query to a DoH upstream to prevent
|
||||||
/// the HTTP/2 + TLS connection from going idle and being torn down.
|
/// the HTTP/2 + TLS connection from going idle and being torn down.
|
||||||
|
/// The first call doubles as a startup warm-up: bootstrap-resolver failures
|
||||||
|
/// (unreachable Quad9/Cloudflare defaults, misconfigured hostname upstream)
|
||||||
|
/// surface here rather than on the first client query.
|
||||||
pub async fn keepalive_doh(upstream: &Upstream) {
|
pub async fn keepalive_doh(upstream: &Upstream) {
|
||||||
if let Upstream::Doh { url, client } = upstream {
|
if let Upstream::Doh { url, client } = upstream {
|
||||||
// Query for . NS — minimal, always succeeds, response is small
|
// Query for . NS — minimal, always succeeds, response is small
|
||||||
@@ -565,7 +576,9 @@ pub async fn keepalive_doh(upstream: &Upstream) {
|
|||||||
0x00, 0x02, // type NS
|
0x00, 0x02, // type NS
|
||||||
0x00, 0x01, // class IN
|
0x00, 0x01, // class IN
|
||||||
];
|
];
|
||||||
let _ = forward_doh_raw(wire, url, client, Duration::from_secs(5)).await;
|
if let Err(e) = forward_doh_raw(wire, url, client, Duration::from_secs(5)).await {
|
||||||
|
log::warn!("DoH keepalive to {} failed: {}", url, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
pub mod api;
|
pub mod api;
|
||||||
pub mod blocklist;
|
pub mod blocklist;
|
||||||
|
pub mod bootstrap_resolver;
|
||||||
pub mod buffer;
|
pub mod buffer;
|
||||||
pub mod cache;
|
pub mod cache;
|
||||||
pub mod config;
|
pub mod config;
|
||||||
|
|||||||
55
src/serve.rs
55
src/serve.rs
@@ -13,12 +13,13 @@ use log::{error, info};
|
|||||||
use tokio::net::UdpSocket;
|
use tokio::net::UdpSocket;
|
||||||
|
|
||||||
use crate::blocklist::{download_blocklists, parse_blocklist, BlocklistStore};
|
use crate::blocklist::{download_blocklists, parse_blocklist, BlocklistStore};
|
||||||
|
use crate::bootstrap_resolver::NumaResolver;
|
||||||
use crate::buffer::BytePacketBuffer;
|
use crate::buffer::BytePacketBuffer;
|
||||||
use crate::cache::DnsCache;
|
use crate::cache::DnsCache;
|
||||||
use crate::config::{build_zone_map, load_config, ConfigLoad};
|
use crate::config::{build_zone_map, load_config, ConfigLoad};
|
||||||
use crate::ctx::{handle_query, ServerCtx};
|
use crate::ctx::{handle_query, ServerCtx};
|
||||||
use crate::forward::{
|
use crate::forward::{
|
||||||
build_https_client, build_odoh_client, parse_upstream_list, Upstream, UpstreamPool,
|
build_https_client_with_resolver, parse_upstream_list, Upstream, UpstreamPool,
|
||||||
};
|
};
|
||||||
use crate::odoh::OdohConfigCache;
|
use crate::odoh::OdohConfigCache;
|
||||||
use crate::override_store::OverrideStore;
|
use crate::override_store::OverrideStore;
|
||||||
@@ -48,6 +49,23 @@ pub async fn run(config_path: String) -> crate::Result<()> {
|
|||||||
(dummy, "recursive (root hints)".to_string())
|
(dummy, "recursive (root hints)".to_string())
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Routes numa-originated HTTPS (DoH upstream, ODoH relay/target, blocklist
|
||||||
|
// CDN) away from the system resolver so lookups don't loop back through
|
||||||
|
// numa when it's its own system DNS.
|
||||||
|
// See `docs/implementation/bootstrap-resolver.md`.
|
||||||
|
let resolver_overrides = match config.upstream.mode {
|
||||||
|
crate::config::UpstreamMode::Odoh => config
|
||||||
|
.upstream
|
||||||
|
.odoh_upstream()
|
||||||
|
.map(|o| o.host_ip_overrides())
|
||||||
|
.unwrap_or_default(),
|
||||||
|
_ => std::collections::BTreeMap::new(),
|
||||||
|
};
|
||||||
|
let bootstrap_resolver: Arc<NumaResolver> = Arc::new(NumaResolver::new(
|
||||||
|
&config.upstream.fallback,
|
||||||
|
resolver_overrides,
|
||||||
|
));
|
||||||
|
|
||||||
let (resolved_mode, upstream_auto, pool, upstream_label) = match config.upstream.mode {
|
let (resolved_mode, upstream_auto, pool, upstream_label) = match config.upstream.mode {
|
||||||
crate::config::UpstreamMode::Auto => {
|
crate::config::UpstreamMode::Auto => {
|
||||||
info!("auto mode: probing recursive resolution...");
|
info!("auto mode: probing recursive resolution...");
|
||||||
@@ -57,7 +75,7 @@ pub async fn run(config_path: String) -> crate::Result<()> {
|
|||||||
(crate::config::UpstreamMode::Recursive, false, pool, label)
|
(crate::config::UpstreamMode::Recursive, false, pool, label)
|
||||||
} else {
|
} else {
|
||||||
log::warn!("recursive probe failed — falling back to Quad9 DoH");
|
log::warn!("recursive probe failed — falling back to Quad9 DoH");
|
||||||
let client = build_https_client();
|
let client = build_https_client_with_resolver(1, Some(bootstrap_resolver.clone()));
|
||||||
let url = DOH_FALLBACK.to_string();
|
let url = DOH_FALLBACK.to_string();
|
||||||
let label = url.clone();
|
let label = url.clone();
|
||||||
let pool = UpstreamPool::new(vec![Upstream::Doh { url, client }], vec![]);
|
let pool = UpstreamPool::new(vec![Upstream::Doh { url, client }], vec![]);
|
||||||
@@ -82,8 +100,16 @@ pub async fn run(config_path: String) -> crate::Result<()> {
|
|||||||
config.upstream.address.clone()
|
config.upstream.address.clone()
|
||||||
};
|
};
|
||||||
|
|
||||||
let primary = parse_upstream_list(&addrs, config.upstream.port)?;
|
let primary = parse_upstream_list(
|
||||||
let fallback = parse_upstream_list(&config.upstream.fallback, config.upstream.port)?;
|
&addrs,
|
||||||
|
config.upstream.port,
|
||||||
|
Some(bootstrap_resolver.clone()),
|
||||||
|
)?;
|
||||||
|
let fallback = parse_upstream_list(
|
||||||
|
&config.upstream.fallback,
|
||||||
|
config.upstream.port,
|
||||||
|
Some(bootstrap_resolver.clone()),
|
||||||
|
)?;
|
||||||
|
|
||||||
let pool = UpstreamPool::new(primary, fallback);
|
let pool = UpstreamPool::new(primary, fallback);
|
||||||
let label = pool.label();
|
let label = pool.label();
|
||||||
@@ -96,7 +122,7 @@ pub async fn run(config_path: String) -> crate::Result<()> {
|
|||||||
}
|
}
|
||||||
crate::config::UpstreamMode::Odoh => {
|
crate::config::UpstreamMode::Odoh => {
|
||||||
let odoh = config.upstream.odoh_upstream()?;
|
let odoh = config.upstream.odoh_upstream()?;
|
||||||
let client = build_odoh_client(&odoh);
|
let client = build_https_client_with_resolver(1, Some(bootstrap_resolver.clone()));
|
||||||
let target_config = Arc::new(OdohConfigCache::new(
|
let target_config = Arc::new(OdohConfigCache::new(
|
||||||
odoh.target_host.clone(),
|
odoh.target_host.clone(),
|
||||||
client.clone(),
|
client.clone(),
|
||||||
@@ -110,7 +136,11 @@ pub async fn run(config_path: String) -> crate::Result<()> {
|
|||||||
let fallback = if odoh.strict {
|
let fallback = if odoh.strict {
|
||||||
Vec::new()
|
Vec::new()
|
||||||
} else {
|
} else {
|
||||||
parse_upstream_list(&config.upstream.fallback, config.upstream.port)?
|
parse_upstream_list(
|
||||||
|
&config.upstream.fallback,
|
||||||
|
config.upstream.port,
|
||||||
|
Some(bootstrap_resolver.clone()),
|
||||||
|
)?
|
||||||
};
|
};
|
||||||
let pool = UpstreamPool::new(primary, fallback);
|
let pool = UpstreamPool::new(primary, fallback);
|
||||||
let label = pool.label();
|
let label = pool.label();
|
||||||
@@ -405,8 +435,9 @@ pub async fn run(config_path: String) -> crate::Result<()> {
|
|||||||
if config.blocking.enabled && !blocklist_lists.is_empty() {
|
if config.blocking.enabled && !blocklist_lists.is_empty() {
|
||||||
let bl_ctx = Arc::clone(&ctx);
|
let bl_ctx = Arc::clone(&ctx);
|
||||||
let bl_lists = blocklist_lists.clone();
|
let bl_lists = blocklist_lists.clone();
|
||||||
|
let bl_resolver = bootstrap_resolver.clone();
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
load_blocklists(&bl_ctx, &bl_lists).await;
|
load_blocklists(&bl_ctx, &bl_lists, Some(bl_resolver.clone())).await;
|
||||||
|
|
||||||
// Periodic refresh
|
// Periodic refresh
|
||||||
let mut interval = tokio::time::interval(Duration::from_secs(refresh_hours * 3600));
|
let mut interval = tokio::time::interval(Duration::from_secs(refresh_hours * 3600));
|
||||||
@@ -414,7 +445,7 @@ pub async fn run(config_path: String) -> crate::Result<()> {
|
|||||||
loop {
|
loop {
|
||||||
interval.tick().await;
|
interval.tick().await;
|
||||||
info!("refreshing blocklists...");
|
info!("refreshing blocklists...");
|
||||||
load_blocklists(&bl_ctx, &bl_lists).await;
|
load_blocklists(&bl_ctx, &bl_lists, Some(bl_resolver.clone())).await;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -596,8 +627,8 @@ async fn network_watch_loop(ctx: Arc<ServerCtx>) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn load_blocklists(ctx: &ServerCtx, lists: &[String]) {
|
async fn load_blocklists(ctx: &ServerCtx, lists: &[String], resolver: Option<Arc<NumaResolver>>) {
|
||||||
let downloaded = download_blocklists(lists).await;
|
let downloaded = download_blocklists(lists, resolver).await;
|
||||||
|
|
||||||
// Parse outside the lock to avoid blocking DNS queries during parse (~100ms)
|
// Parse outside the lock to avoid blocking DNS queries during parse (~100ms)
|
||||||
let mut all_domains = std::collections::HashSet::new();
|
let mut all_domains = std::collections::HashSet::new();
|
||||||
@@ -632,8 +663,10 @@ async fn warm_domain(ctx: &ServerCtx, domain: &str) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn doh_keepalive_loop(ctx: Arc<ServerCtx>) {
|
async fn doh_keepalive_loop(ctx: Arc<ServerCtx>) {
|
||||||
|
// First tick fires immediately so we surface bootstrap-resolver failures
|
||||||
|
// (unreachable Quad9/Cloudflare, blocked :53, bad upstream hostname) in
|
||||||
|
// the startup logs instead of on the first client query.
|
||||||
let mut interval = tokio::time::interval(Duration::from_secs(25));
|
let mut interval = tokio::time::interval(Duration::from_secs(25));
|
||||||
interval.tick().await; // skip first immediate tick
|
|
||||||
loop {
|
loop {
|
||||||
interval.tick().await;
|
interval.tick().await;
|
||||||
let pool = ctx.upstream_pool.lock().unwrap().clone();
|
let pool = ctx.upstream_pool.lock().unwrap().clone();
|
||||||
|
|||||||
155
tests/docker/self-resolver-loop.sh
Executable file
155
tests/docker/self-resolver-loop.sh
Executable file
@@ -0,0 +1,155 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
#
|
||||||
|
# Reproducer for issue #122 — chicken-and-egg when numa is its own system
|
||||||
|
# resolver (HAOS add-on, Pi-hole-style container, laptop with
|
||||||
|
# resolv.conf → 127.0.0.1).
|
||||||
|
#
|
||||||
|
# Topology:
|
||||||
|
# container /etc/resolv.conf → nameserver 127.0.0.1
|
||||||
|
# numa bound on :53 → upstream DoH by hostname (quad9)
|
||||||
|
# numa boots → spawns blocklist download
|
||||||
|
# reqwest::get → getaddrinfo("cdn.jsdelivr.net")
|
||||||
|
# → loopback UDP :53 → numa → cache miss → DoH upstream
|
||||||
|
# → getaddrinfo("dns.quad9.net") → same loop → glibc EAI_AGAIN
|
||||||
|
#
|
||||||
|
# Expected on master: both assertions FAIL (bug reproduced).
|
||||||
|
# Expected after bootstrap-IP fix: both assertions PASS.
|
||||||
|
#
|
||||||
|
# Requirements: docker (with internet access for external lists/DoH)
|
||||||
|
# Usage: ./tests/docker/self-resolver-loop.sh
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
cd "$(dirname "$0")/../.."
|
||||||
|
|
||||||
|
GREEN="\033[32m"; RED="\033[31m"; RESET="\033[0m"
|
||||||
|
|
||||||
|
pass() { printf " ${GREEN}✓${RESET} %s\n" "$1"; }
|
||||||
|
fail() { printf " ${RED}✗${RESET} %s\n" "$1"; printf " %s\n" "$2"; FAILED=$((FAILED+1)); }
|
||||||
|
FAILED=0
|
||||||
|
|
||||||
|
OUT=/tmp/numa-self-resolver.out
|
||||||
|
|
||||||
|
echo "── self-resolver-loop: building + reproducing on debian:bookworm ──"
|
||||||
|
echo " (first run is slow: image pull + cold cargo build, ~5-8 min)"
|
||||||
|
echo
|
||||||
|
|
||||||
|
docker run --rm \
|
||||||
|
-v "$PWD:/src:ro" \
|
||||||
|
-v numa-self-resolver-cargo:/root/.cargo \
|
||||||
|
-v numa-self-resolver-target:/work/target \
|
||||||
|
debian:bookworm bash -c '
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Phase 1: install deps + build with the container DNS as given by Docker
|
||||||
|
# (resolves deb.debian.org, static.rust-lang.org, crates.io).
|
||||||
|
apt-get update -qq && apt-get install -y -qq curl build-essential dnsutils 2>&1 | tail -3
|
||||||
|
|
||||||
|
if ! command -v cargo &>/dev/null; then
|
||||||
|
curl -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --quiet
|
||||||
|
fi
|
||||||
|
. "$HOME/.cargo/env"
|
||||||
|
|
||||||
|
mkdir -p /work
|
||||||
|
tar -C /src --exclude=./target --exclude=./.git -cf - . | tar -C /work -xf -
|
||||||
|
cd /work
|
||||||
|
|
||||||
|
echo "── cargo build --release --locked ──"
|
||||||
|
cargo build --release --locked 2>&1 | tail -5
|
||||||
|
echo
|
||||||
|
|
||||||
|
# Phase 2: flip system DNS to numa itself — this is the pathological
|
||||||
|
# topology from issue #122 (HAOS add-on, resolv.conf → 127.0.0.1).
|
||||||
|
# Everything after this point, any getaddrinfo call inside numa loops
|
||||||
|
# back through :53.
|
||||||
|
echo "nameserver 127.0.0.1" > /etc/resolv.conf
|
||||||
|
echo "── /etc/resolv.conf inside container (post-flip) ──"
|
||||||
|
cat /etc/resolv.conf
|
||||||
|
echo
|
||||||
|
|
||||||
|
cat > /tmp/numa.toml <<CONF
|
||||||
|
[server]
|
||||||
|
bind_addr = "0.0.0.0:53"
|
||||||
|
api_port = 5380
|
||||||
|
api_bind_addr = "127.0.0.1"
|
||||||
|
data_dir = "/tmp/numa-data"
|
||||||
|
|
||||||
|
[upstream]
|
||||||
|
mode = "forward"
|
||||||
|
address = ["https://dns.quad9.net/dns-query"]
|
||||||
|
timeout_ms = 3000
|
||||||
|
|
||||||
|
[blocking]
|
||||||
|
enabled = true
|
||||||
|
lists = ["https://cdn.jsdelivr.net/gh/hagezi/dns-blocklists@latest/hosts/pro.txt"]
|
||||||
|
CONF
|
||||||
|
|
||||||
|
mkdir -p /tmp/numa-data
|
||||||
|
|
||||||
|
echo "── starting numa ──"
|
||||||
|
RUST_LOG=info ./target/release/numa /tmp/numa.toml > /tmp/numa.log 2>&1 &
|
||||||
|
NUMA_PID=$!
|
||||||
|
|
||||||
|
# Wait up to 120s for blocklist to populate.
|
||||||
|
# Retry delays 2+10+30s = 42s, plus ~4 × ~10s getaddrinfo timeouts under
|
||||||
|
# self-loop = ~82s worst case. 120s leaves headroom.
|
||||||
|
LOADED=0
|
||||||
|
for i in $(seq 1 120); do
|
||||||
|
LOADED=$(curl -sf http://127.0.0.1:5380/blocking/stats 2>/dev/null \
|
||||||
|
| grep -o "\"domains_loaded\":[0-9]*" | cut -d: -f2 || echo 0)
|
||||||
|
[ "${LOADED:-0}" -gt 100 ] && break
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
# First cold DoH query — time it.
|
||||||
|
START=$(date +%s%N)
|
||||||
|
dig @127.0.0.1 example.com A +time=15 +tries=1 > /tmp/dig.out 2>&1 || true
|
||||||
|
END=$(date +%s%N)
|
||||||
|
LATENCY_MS=$(( (END - START) / 1000000 ))
|
||||||
|
STATUS=$(grep -oE "status: [A-Z]+" /tmp/dig.out | head -1 || echo "status: TIMEOUT")
|
||||||
|
|
||||||
|
kill $NUMA_PID 2>/dev/null || true
|
||||||
|
wait $NUMA_PID 2>/dev/null || true
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "=== RESULT ==="
|
||||||
|
echo "domains_loaded=$LOADED"
|
||||||
|
echo "first_query_latency_ms=$LATENCY_MS"
|
||||||
|
echo "first_query_${STATUS// /_}"
|
||||||
|
echo
|
||||||
|
echo "=== numa.log (tail 40) ==="
|
||||||
|
tail -40 /tmp/numa.log
|
||||||
|
echo
|
||||||
|
echo "=== dig.out ==="
|
||||||
|
cat /tmp/dig.out
|
||||||
|
' 2>&1 | tee "$OUT"
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "── assertions ──"
|
||||||
|
|
||||||
|
LOADED=$(grep '^domains_loaded=' "$OUT" | tail -1 | cut -d= -f2 || echo 0)
|
||||||
|
LATENCY=$(grep '^first_query_latency_ms=' "$OUT" | tail -1 | cut -d= -f2 || echo 999999)
|
||||||
|
STATUS_LINE=$(grep '^first_query_status_' "$OUT" | tail -1 || echo "first_query_status_TIMEOUT")
|
||||||
|
|
||||||
|
if [ "${LOADED:-0}" -gt 100 ]; then
|
||||||
|
pass "blocklist downloaded (domains_loaded=$LOADED)"
|
||||||
|
else
|
||||||
|
fail "blocklist downloaded (got domains_loaded=${LOADED:-0}, expected >100)" \
|
||||||
|
"chicken-and-egg: blocklist HTTPS client has no DNS bootstrap; getaddrinfo loops through numa"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "${LATENCY:-999999}" -lt 2000 ]; then
|
||||||
|
pass "first DoH query under 2s (latency=${LATENCY}ms, $STATUS_LINE)"
|
||||||
|
else
|
||||||
|
fail "first DoH query under 2s (got ${LATENCY}ms, $STATUS_LINE)" \
|
||||||
|
"self-loop on getaddrinfo(upstream_host); plain DoH needs bootstrap-IP symmetry with ODoH"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo
|
||||||
|
if [ "$FAILED" -eq 0 ]; then
|
||||||
|
printf "${GREEN}── self-resolver-loop passed (fix is in place) ──${RESET}\n"
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
printf "${RED}── self-resolver-loop failed ($FAILED assertion(s)) — bug #122 reproduced ──${RESET}\n"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
@@ -975,6 +975,50 @@ check "Same-host relay+target rejected at startup" \
|
|||||||
"same host" \
|
"same host" \
|
||||||
"$STARTUP_OUT"
|
"$STARTUP_OUT"
|
||||||
|
|
||||||
|
# Guards ODoH's zero-plain-DNS-leak property: relay_ip / target_ip must
|
||||||
|
# land in the bootstrap resolver's override map so reqwest connects direct
|
||||||
|
# to the configured IPs instead of resolving the hostnames via plain DNS.
|
||||||
|
# RFC 5737 TEST-NET-1 IPs (unroutable).
|
||||||
|
cat > "$CONFIG" << 'CONF'
|
||||||
|
[server]
|
||||||
|
bind_addr = "127.0.0.1:5354"
|
||||||
|
api_port = 5381
|
||||||
|
|
||||||
|
[upstream]
|
||||||
|
mode = "odoh"
|
||||||
|
relay = "https://odoh-relay.example.com/proxy"
|
||||||
|
target = "https://odoh-target.example.org/dns-query"
|
||||||
|
relay_ip = "192.0.2.1"
|
||||||
|
target_ip = "192.0.2.2"
|
||||||
|
|
||||||
|
[cache]
|
||||||
|
max_entries = 10000
|
||||||
|
|
||||||
|
[blocking]
|
||||||
|
enabled = false
|
||||||
|
|
||||||
|
[proxy]
|
||||||
|
enabled = false
|
||||||
|
CONF
|
||||||
|
|
||||||
|
RUST_LOG=info "$BINARY" "$CONFIG" > "$LOG" 2>&1 &
|
||||||
|
NUMA_PID=$!
|
||||||
|
for _ in $(seq 1 30); do
|
||||||
|
curl -sf "http://127.0.0.1:$API_PORT/health" >/dev/null 2>&1 && break
|
||||||
|
sleep 0.1
|
||||||
|
done
|
||||||
|
|
||||||
|
OVERRIDE_LOG=$(grep 'bootstrap resolver: host overrides' "$LOG" || true)
|
||||||
|
check "relay_ip wired into bootstrap override map" \
|
||||||
|
"odoh-relay.example.com=192.0.2.1" \
|
||||||
|
"$OVERRIDE_LOG"
|
||||||
|
check "target_ip wired into bootstrap override map" \
|
||||||
|
"odoh-target.example.org=192.0.2.2" \
|
||||||
|
"$OVERRIDE_LOG"
|
||||||
|
|
||||||
|
kill "$NUMA_PID" 2>/dev/null || true
|
||||||
|
wait "$NUMA_PID" 2>/dev/null || true
|
||||||
|
|
||||||
fi # end Suite 8
|
fi # end Suite 8
|
||||||
|
|
||||||
# ---- Suite 9: Numa's own ODoH relay (--relay-mode) ----
|
# ---- Suite 9: Numa's own ODoH relay (--relay-mode) ----
|
||||||
|
|||||||
Reference in New Issue
Block a user