Compare commits
37 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
db6a105f77 | ||
|
|
bf977595b6 | ||
|
|
63a2d26276 | ||
|
|
cfef4f4160 | ||
|
|
38ddb59e00 | ||
|
|
441935af5a | ||
|
|
d090e049ec | ||
|
|
4aa91a5236 | ||
|
|
93f0ea7501 | ||
|
|
f7f35b3424 | ||
|
|
3913d42319 | ||
|
|
e702f5861b | ||
|
|
933643f2c7 | ||
|
|
96cf778bea | ||
|
|
2274151c17 | ||
|
|
c787de1548 | ||
|
|
e6e79273b9 | ||
|
|
3ec3b40830 | ||
|
|
90fa79bc0f | ||
|
|
b8a125b598 | ||
|
|
bc30be94e7 | ||
|
|
26b1cd5917 | ||
|
|
77d6d89f80 | ||
|
|
4fdd05f284 | ||
|
|
2e461ccc0f | ||
|
|
bf84c44346 | ||
|
|
df2062882c | ||
|
|
76dda89078 | ||
|
|
640b64bf7e | ||
|
|
5ba19e04c8 | ||
|
|
c98afafaa1 | ||
|
|
5cba02a6c8 | ||
|
|
46a95d58aa | ||
|
|
51cce0347b | ||
|
|
459395203d | ||
|
|
10469e96bd | ||
|
|
31adc31c9b |
4
.github/workflows/publish-aur.yml
vendored
4
.github/workflows/publish-aur.yml
vendored
@@ -126,6 +126,10 @@ jobs:
|
||||
# ssh://aur@aur.archlinux.org/<package-name>.git
|
||||
git clone ssh://aur@aur.archlinux.org/$AUR_PKGNAME.git aur-repo
|
||||
|
||||
# AUR's git server no longer advertises HEAD's symref, so clone
|
||||
# lands in detached HEAD. Attach to master before committing.
|
||||
git -C aur-repo checkout master
|
||||
|
||||
cp PKGBUILD aur-repo/
|
||||
cd aur-repo
|
||||
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,6 +1,7 @@
|
||||
/target
|
||||
/build-dir
|
||||
CLAUDE.md
|
||||
.claude/
|
||||
docs/
|
||||
site/blog/posts/
|
||||
ios/
|
||||
|
||||
6
Cargo.lock
generated
6
Cargo.lock
generated
@@ -1547,7 +1547,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "numa"
|
||||
version = "0.14.1"
|
||||
version = "0.14.2"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"axum",
|
||||
@@ -2130,9 +2130,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rustls-webpki"
|
||||
version = "0.103.12"
|
||||
version = "0.103.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8279bb85272c9f10811ae6a6c547ff594d6a7f3c6c6b02ee9726d1d0dcfcdd06"
|
||||
checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e"
|
||||
dependencies = [
|
||||
"aws-lc-rs",
|
||||
"ring",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "numa"
|
||||
version = "0.14.1"
|
||||
version = "0.14.2"
|
||||
authors = ["razvandimescu <razvan@dimescu.com>"]
|
||||
edition = "2021"
|
||||
description = "Portable DNS resolver in Rust — .numa local domains, ad blocking, developer overrides, DNS-over-HTTPS"
|
||||
|
||||
@@ -125,6 +125,10 @@ docker run -d --name numa --network host \
|
||||
|
||||
Multi-arch: `linux/amd64` and `linux/arm64`.
|
||||
|
||||
Turnkey compose recipes:
|
||||
- [`packaging/client/`](packaging/client/) — ODoH client mode (anonymous DNS), Numa + starter `numa.toml`.
|
||||
- [`packaging/relay/`](packaging/relay/) — public ODoH relay, Numa + Caddy + ACME.
|
||||
|
||||
## How It Compares
|
||||
|
||||
| | Pi-hole | AdGuard Home | Unbound | Numa |
|
||||
|
||||
@@ -383,7 +383,7 @@ fn run_default(rt: &tokio::runtime::Runtime) {
|
||||
|
||||
/// Library-to-library: Numa forward_query_raw vs Hickory resolver.lookup.
|
||||
fn run_direct(rt: &tokio::runtime::Runtime) {
|
||||
let upstream = numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse");
|
||||
let upstream = numa::forward::parse_upstream(DOH_UPSTREAM, 443, None).expect("failed to parse");
|
||||
let resolver = rt.block_on(build_hickory_resolver());
|
||||
let timeout = Duration::from_secs(10);
|
||||
|
||||
@@ -609,9 +609,11 @@ fn run_hedge_multi(rt: &tokio::runtime::Runtime, iterations: usize) {
|
||||
DOMAINS.len()
|
||||
);
|
||||
|
||||
let primary = numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse");
|
||||
let primary_dual = numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse");
|
||||
let secondary_dual = numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse");
|
||||
let primary = numa::forward::parse_upstream(DOH_UPSTREAM, 443, None).expect("failed to parse");
|
||||
let primary_dual =
|
||||
numa::forward::parse_upstream(DOH_UPSTREAM, 443, None).expect("failed to parse");
|
||||
let secondary_dual =
|
||||
numa::forward::parse_upstream(DOH_UPSTREAM, 443, None).expect("failed to parse");
|
||||
let resolver = rt.block_on(build_hickory_resolver());
|
||||
|
||||
println!("Warming up...");
|
||||
@@ -810,7 +812,7 @@ fn run_diag(rt: &tokio::runtime::Runtime) {
|
||||
fn run_diag_clients(rt: &tokio::runtime::Runtime) {
|
||||
println!("Client diagnostic: reqwest vs Hickory (20 queries to {DOH_UPSTREAM})\n");
|
||||
|
||||
let upstream = numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse");
|
||||
let upstream = numa::forward::parse_upstream(DOH_UPSTREAM, 443, None).expect("failed to parse");
|
||||
let resolver = rt.block_on(build_hickory_resolver());
|
||||
let timeout = Duration::from_secs(10);
|
||||
|
||||
|
||||
29
numa.toml
29
numa.toml
@@ -22,6 +22,7 @@ api_port = 5380
|
||||
# [upstream]
|
||||
# mode = "forward" # "forward" (default) — relay to upstream
|
||||
# # "recursive" — resolve from root hints (no address needed)
|
||||
# # "odoh" — Oblivious DoH (see ODoH block below)
|
||||
# address = "9.9.9.9" # single upstream (plain UDP)
|
||||
# address = ["192.168.1.1", "9.9.9.9:5353"] # multiple upstreams — SRTT picks fastest
|
||||
# address = "https://dns.quad9.net/dns-query" # DNS-over-HTTPS (encrypted)
|
||||
@@ -29,11 +30,29 @@ api_port = 5380
|
||||
# fallback = ["8.8.8.8", "1.1.1.1"] # tried only when all primaries fail
|
||||
# port = 53 # default port for addresses without :port
|
||||
# timeout_ms = 3000
|
||||
# hedge_ms = 10 # request hedging delay (ms). After this delay
|
||||
# # without a response, fires a parallel request
|
||||
# # to the same upstream. Rescues packet loss (UDP),
|
||||
# # dispatch spikes (DoH), TLS stalls (DoT).
|
||||
# # Set to 0 to disable. Default: 10
|
||||
# hedge_ms = 0 # request hedging delay (ms). Default: 0 (off).
|
||||
# # Set to e.g. 10 to fire a parallel upstream
|
||||
# # request after 10ms of silence — rescues packet
|
||||
# # loss (UDP), dispatch spikes (DoH), TLS stalls
|
||||
# # (DoT). Doubles the upstream query count, so
|
||||
# # leave off for quota'd providers (NextDNS,
|
||||
# # Control D).
|
||||
|
||||
# ODoH (Oblivious DNS-over-HTTPS, RFC 9230). The relay sees your IP but
|
||||
# not the question; the target sees the question but not your IP. Numa
|
||||
# refuses same-operator relay+target configs by default (eTLD+1 check).
|
||||
# [upstream]
|
||||
# mode = "odoh"
|
||||
# relay = "https://odoh-relay.numa.rs/relay"
|
||||
# target = "https://odoh.cloudflare-dns.com/dns-query"
|
||||
# strict = true # default: refuse to downgrade to `fallback`
|
||||
# # on relay failure. Set false to allow a
|
||||
# # non-oblivious fallback path.
|
||||
# relay_ip = "178.104.229.30" # optional: pin IPs so numa doesn't leak the
|
||||
# target_ip = "104.16.249.249" # relay/target hostnames via the bootstrap
|
||||
# # resolver on cold boot when numa is its
|
||||
# # own system DNS. See
|
||||
# # recipes/odoh-upstream.md.
|
||||
# root_hints = [ # only used in recursive mode
|
||||
# "198.41.0.4", # a.root-servers.net (Verisign)
|
||||
# "199.9.14.201", # b.root-servers.net (USC-ISI)
|
||||
|
||||
72
packaging/client/README.md
Normal file
72
packaging/client/README.md
Normal file
@@ -0,0 +1,72 @@
|
||||
# Numa ODoH Client — Docker deploy
|
||||
|
||||
Single-container deploy that runs Numa as an ODoH (RFC 9230) client: every
|
||||
DNS query routes through an independent relay + target so neither operator
|
||||
sees both your IP and your question. See the [ODoH upstream recipe][odoh]
|
||||
for the protocol details and the bootstrap-pinning trade-offs.
|
||||
|
||||
[odoh]: ../../recipes/odoh-upstream.md
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Docker + Docker Compose v2.
|
||||
- Port 53 (UDP+TCP) free on the host — Numa listens there for DNS
|
||||
clients on your LAN.
|
||||
|
||||
## Configure
|
||||
|
||||
The shipped `numa.toml` points at Numa's own public relay
|
||||
(`odoh-relay.numa.rs`) paired with Cloudflare's ODoH target
|
||||
(`odoh.cloudflare-dns.com`). That's two independent operators with
|
||||
distinct eTLD+1s — the default configuration passes Numa's same-operator
|
||||
check and works out of the box.
|
||||
|
||||
To use a different relay or target, edit `numa.toml` and adjust the URLs.
|
||||
The `relay` and `target` must resolve to distinct operators or Numa
|
||||
refuses to start.
|
||||
|
||||
## Deploy
|
||||
|
||||
```sh
|
||||
docker compose up -d
|
||||
docker compose logs -f numa # watch startup
|
||||
```
|
||||
|
||||
The first query fires the bootstrap resolver + ODoH config fetch;
|
||||
subsequent queries reuse the warm HTTP/2 connection.
|
||||
|
||||
## Point your devices at it
|
||||
|
||||
Set each device's DNS server to the IP of the Docker host. For a LAN-wide
|
||||
rollout, set the DNS server in your router's DHCP config so every device
|
||||
picks it up automatically.
|
||||
|
||||
Verify a query landed on the ODoH path:
|
||||
|
||||
```sh
|
||||
dig @<host-ip> example.com
|
||||
curl http://<host-ip>:5380/stats | jq '.upstream_transport.odoh'
|
||||
```
|
||||
|
||||
`upstream_transport.odoh` should increment on each query.
|
||||
|
||||
## What this does NOT buy you
|
||||
|
||||
ODoH protects the *path*, not the content:
|
||||
|
||||
- **The target (Cloudflare here) still sees the question.** It just
|
||||
doesn't know it's you asking. If Cloudflare logs every ODoH query, the
|
||||
query is still visible — it's simply unattributed.
|
||||
- **The relay is a trusted party for availability.** A malicious relay
|
||||
can drop or delay queries; it just can't read them.
|
||||
- **Traffic analysis defeats small relays.** If you're the only client
|
||||
talking to a relay, timing alone re-identifies you. Shared, busy relays
|
||||
give better anonymity sets.
|
||||
|
||||
See the [ODoH integration doc][odoh] for more.
|
||||
|
||||
## Relay operator?
|
||||
|
||||
If you'd rather run your own relay (same binary, different mode), see
|
||||
[`../relay/`](../relay/) — that package spins up a public-facing relay
|
||||
with Caddy + ACME in front of it.
|
||||
15
packaging/client/docker-compose.yml
Normal file
15
packaging/client/docker-compose.yml
Normal file
@@ -0,0 +1,15 @@
|
||||
services:
|
||||
numa:
|
||||
image: ghcr.io/razvandimescu/numa:latest
|
||||
command: ["/etc/numa/numa.toml"]
|
||||
ports:
|
||||
- "53:53/udp"
|
||||
- "53:53/tcp"
|
||||
- "5380:5380/tcp" # dashboard + REST API
|
||||
volumes:
|
||||
- ./numa.toml:/etc/numa/numa.toml:ro
|
||||
- numa_data:/var/lib/numa
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
numa_data:
|
||||
23
packaging/client/numa.toml
Normal file
23
packaging/client/numa.toml
Normal file
@@ -0,0 +1,23 @@
|
||||
# Numa — ODoH client mode (docker-compose starter).
|
||||
# Sends every DNS query through an independent relay + target pair so
|
||||
# neither operator sees both your IP and your question. See
|
||||
# recipes/odoh-upstream.md for the protocol details and
|
||||
# packaging/client/README.md for deploy notes.
|
||||
|
||||
[server]
|
||||
bind_addr = "0.0.0.0:53"
|
||||
api_bind_addr = "0.0.0.0"
|
||||
data_dir = "/var/lib/numa"
|
||||
|
||||
[upstream]
|
||||
mode = "odoh"
|
||||
# Numa's own relay (Hetzner, systemd + Caddy). Swap to any other public
|
||||
# ODoH relay if you'd rather not depend on a single operator; the protocol
|
||||
# tolerates it, and Numa refuses same-operator relay+target by default.
|
||||
relay = "https://odoh-relay.numa.rs/relay"
|
||||
target = "https://odoh.cloudflare-dns.com/dns-query"
|
||||
# strict = true (default). Relay failure → SERVFAIL, never silent downgrade.
|
||||
|
||||
[blocking]
|
||||
enabled = true
|
||||
# Default blocklist (Hagezi Pro). Edit the `lists` array to taste.
|
||||
@@ -39,10 +39,3 @@ curl https://<hostname>/health
|
||||
|
||||
Then point any ODoH client at `https://<hostname>/relay` and watch the
|
||||
counters tick.
|
||||
|
||||
## Listing on the public ecosystem
|
||||
|
||||
DNSCrypt's [v3/odoh-relays.md](https://github.com/DNSCrypt/dnscrypt-resolvers/blob/master/v3/odoh-relays.md)
|
||||
is the canonical list. The pruned 2025-09-16 commit shows one public ODoH
|
||||
relay survived the cull — running this compose file doubles global supply.
|
||||
Open a PR there once your relay has been up for ~24 hours.
|
||||
|
||||
11
recipes/README.md
Normal file
11
recipes/README.md
Normal file
@@ -0,0 +1,11 @@
|
||||
# Recipes
|
||||
|
||||
Scenario-driven configs for common Numa deployments. Each recipe is self-contained: copy the snippet, adjust the marked fields, reload.
|
||||
|
||||
## Transport / encryption
|
||||
|
||||
- [DoH on the LAN](doh-on-lan.md) — expose Numa's built-in DNS-over-HTTPS to local clients.
|
||||
- [dnsdist in front of Numa](dnsdist-front.md) — terminate public TLS externally, keep Numa on loopback.
|
||||
- [ODoH upstream with bootstrap pinning](odoh-upstream.md) — oblivious DNS client mode without leaking the relay/target hostnames.
|
||||
|
||||
Missing a scenario? Open an issue or PR — these are plain Markdown with no build step.
|
||||
64
recipes/dnsdist-front.md
Normal file
64
recipes/dnsdist-front.md
Normal file
@@ -0,0 +1,64 @@
|
||||
# dnsdist in front of Numa
|
||||
|
||||
For public DoH with a real (ACME-signed) cert, terminate TLS outside Numa and forward plain DNS (or loopback-only DoH) to the resolver. Cert renewal, rate-limiting, and load-balancing live in the front-end; Numa stays focused on resolution.
|
||||
|
||||
## When to use this
|
||||
|
||||
- Public hostname (`dns.example.com`) with a Let's Encrypt or internal PKI cert.
|
||||
- You want a dedicated front-end for DoH/DoT/DoQ while Numa stays loopback-bound.
|
||||
- You plan to run multiple Numa instances behind one endpoint.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
public 443/DoH ┐
|
||||
public 853/DoT ├─► dnsdist ─► 127.0.0.1:53 (Numa UDP/TCP)
|
||||
public 443/DoQ ┘
|
||||
```
|
||||
|
||||
## dnsdist config
|
||||
|
||||
```lua
|
||||
-- /etc/dnsdist/dnsdist.conf
|
||||
|
||||
newServer({address="127.0.0.1:53", name="numa", checkType="A", checkName="numa.rs."})
|
||||
|
||||
addDOHLocal(
|
||||
"0.0.0.0:443",
|
||||
"/etc/letsencrypt/live/dns.example.com/fullchain.pem",
|
||||
"/etc/letsencrypt/live/dns.example.com/privkey.pem",
|
||||
"/dns-query",
|
||||
{doTCP=true, reusePort=true}
|
||||
)
|
||||
|
||||
addTLSLocal(
|
||||
"0.0.0.0:853",
|
||||
"/etc/letsencrypt/live/dns.example.com/fullchain.pem",
|
||||
"/etc/letsencrypt/live/dns.example.com/privkey.pem"
|
||||
)
|
||||
|
||||
addAction(AllRule(), PoolAction("", false))
|
||||
```
|
||||
|
||||
## Numa config
|
||||
|
||||
```toml
|
||||
[proxy]
|
||||
enabled = true # keep if you still use *.numa service routing
|
||||
bind_addr = "127.0.0.1" # stays default
|
||||
```
|
||||
|
||||
No changes to `[server]` — Numa keeps serving plain DNS on UDP/TCP 53, which dnsdist forwards.
|
||||
|
||||
## Caveat: client IPs
|
||||
|
||||
Without PROXY protocol support in Numa, the query log shows the front-end's IP on every query, not the real client. dnsdist can emit PROXY v2 (`useProxyProtocol=true` on `newServer`), but Numa doesn't yet parse it — tracked in the wish-list under #143. Until then, accept the blind spot or correlate against dnsdist's own logs.
|
||||
|
||||
## Verify
|
||||
|
||||
```bash
|
||||
kdig +https @dns.example.com example.com
|
||||
kdig +tls @dns.example.com example.com
|
||||
```
|
||||
|
||||
Both should return clean answers. Numa's `/queries` API should show the request landing, sourced from the front-end IP.
|
||||
61
recipes/doh-on-lan.md
Normal file
61
recipes/doh-on-lan.md
Normal file
@@ -0,0 +1,61 @@
|
||||
# DoH on the LAN
|
||||
|
||||
Numa ships an RFC 8484 DoH endpoint (`POST /dns-query`) on the `[proxy]` HTTPS listener. By default it binds `127.0.0.1:443` with a self-signed cert — invisible to anything off the box. Three changes make it reachable from the LAN.
|
||||
|
||||
## When to use this
|
||||
|
||||
- Your phone/laptop is on the same network as Numa and you want encrypted DNS without a cloud resolver.
|
||||
- You're OK installing Numa's self-signed CA on every client (one-time, via `/ca.pem` + the mobileconfig flow).
|
||||
|
||||
For a publicly-trusted cert, see [dnsdist in front of Numa](dnsdist-front.md) instead.
|
||||
|
||||
## Minimal config
|
||||
|
||||
```toml
|
||||
[proxy]
|
||||
enabled = true # default
|
||||
bind_addr = "0.0.0.0" # was 127.0.0.1 — expose to LAN
|
||||
tls_port = 443 # default; DoH is served here
|
||||
tld = "numa" # default — self-resolving, see below
|
||||
```
|
||||
|
||||
`tld` is the DoH gate: Numa accepts the DoH request only when the `Host` header is loopback or equals (or is a subdomain of) `tld`. Clients therefore dial `https://numa/dns-query`.
|
||||
|
||||
With the default `tld = "numa"`, there's no DNS bootstrap to configure: Numa already resolves `numa` and `*.numa` to its own LAN IP for remote clients (that's how the `*.numa` service-proxy feature works). Any client that uses Numa as its resolver will resolve `numa` correctly on first try.
|
||||
|
||||
If you'd rather use a hostname that resolves via normal DNS (e.g. you want DoH-only clients that never talk plain DNS to Numa), set `tld = "dns.example.com"` and add a matching A record in whichever DNS your clients consult before reaching Numa.
|
||||
|
||||
## Trust the CA on each client
|
||||
|
||||
Numa generates a self-signed CA at startup. Fetch it once, import it wherever you'll run the DoH client:
|
||||
|
||||
```bash
|
||||
curl -o numa-ca.pem http://<numa-ip>:5380/ca.pem
|
||||
```
|
||||
|
||||
- **macOS** — `sudo security add-trusted-cert -d -r trustRoot -k /Library/Keychains/System.keychain numa-ca.pem`
|
||||
- **iOS** — install the mobileconfig from the API (same CA, signed profile). Flip *Settings → General → About → Certificate Trust Settings* on after install.
|
||||
- **Linux** — drop into `/usr/local/share/ca-certificates/` and run `sudo update-ca-certificates`.
|
||||
- **Android** — requires the user-installed CA path; browsers may still refuse it for DoH. Consider the [dnsdist front](dnsdist-front.md) route instead.
|
||||
|
||||
## Verify
|
||||
|
||||
```bash
|
||||
kdig +https @numa example.com
|
||||
```
|
||||
|
||||
Without `+https` kdig uses plain DNS. With `+https` the same answers should flow over port 443.
|
||||
|
||||
Raw check:
|
||||
|
||||
```bash
|
||||
curl -H 'accept: application/dns-message' \
|
||||
--data-binary @query.bin \
|
||||
https://numa/dns-query
|
||||
```
|
||||
|
||||
## Gotchas
|
||||
|
||||
- Port 443 is privileged on Linux/macOS. Run Numa via the provided service units, or grant `CAP_NET_BIND_SERVICE` (`sudo setcap 'cap_net_bind_service=+ep' /path/to/numa`).
|
||||
- Non-matching `Host` header → HTTP 404 from the proxy's fallback handler. Double-check `tld`.
|
||||
- ChromeOS enrollment rejects user-installed CAs for some flows — known pain point, see issue #136.
|
||||
59
recipes/odoh-upstream.md
Normal file
59
recipes/odoh-upstream.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# ODoH upstream with bootstrap pinning
|
||||
|
||||
Numa can run as an Oblivious DoH (RFC 9230) client: the relay sees your IP but not the question, the target sees the question but not your IP. Neither party alone can re-identify a query. This recipe covers the minimal config and the bootstrap leak that `relay_ip` / `target_ip` close.
|
||||
|
||||
## When to use this
|
||||
|
||||
- You want split-trust encrypted DNS without a single provider seeing both who you are and what you asked.
|
||||
- Numa is your system resolver (so there's no "other" DNS to ask).
|
||||
|
||||
## Minimal config
|
||||
|
||||
```toml
|
||||
[upstream]
|
||||
mode = "odoh"
|
||||
relay = "https://odoh-relay.numa.rs/relay"
|
||||
target = "https://odoh.cloudflare-dns.com/dns-query"
|
||||
strict = true # refuse to fall back to a non-oblivious path on relay failure
|
||||
```
|
||||
|
||||
`strict = true` means a relay-level HTTPS failure returns SERVFAIL instead of silently downgrading. Set it to `false` and configure `[upstream].fallback` if you'd rather keep resolving (at the cost of the oblivious property).
|
||||
|
||||
## The bootstrap leak
|
||||
|
||||
When Numa is the system resolver and needs to reach the relay/target, *something* has to translate `odoh-relay.numa.rs` → IP. If Numa asks itself, you deadlock. If Numa asks a bootstrap resolver (1.1.1.1, 9.9.9.9), that resolver learns which ODoH endpoint you use in cleartext — it can't see your questions, but it sees the destination. That's the leak ODoH was supposed to close.
|
||||
|
||||
`relay_ip` and `target_ip` tell Numa the IPs directly, so it never asks anyone:
|
||||
|
||||
```toml
|
||||
[upstream]
|
||||
mode = "odoh"
|
||||
relay = "https://odoh-relay.numa.rs/relay"
|
||||
target = "https://odoh.cloudflare-dns.com/dns-query"
|
||||
relay_ip = "178.104.229.30" # pin the relay — no hostname lookup
|
||||
target_ip = "104.16.249.249" # pin the target — no hostname lookup
|
||||
```
|
||||
|
||||
Numa still validates TLS against the hostnames in `relay` / `target`, so a hijacked IP can't masquerade — pinning skips only the DNS step.
|
||||
|
||||
## Finding current IPs
|
||||
|
||||
```bash
|
||||
dig +short odoh-relay.numa.rs
|
||||
dig +short odoh.cloudflare-dns.com
|
||||
```
|
||||
|
||||
Re-pin when an operator rotates. The community-maintained list at <https://github.com/DNSCrypt/dnscrypt-resolvers/blob/master/v3/odoh-relays.md> is a useful cross-reference.
|
||||
|
||||
## Verify
|
||||
|
||||
```bash
|
||||
kdig @127.0.0.1 example.com
|
||||
```
|
||||
|
||||
Numa's `/queries` API and startup banner should label the upstream as `odoh://`. Look for `ODoH relay returned ...` errors in the logs if routing fails.
|
||||
|
||||
## Known gotchas
|
||||
|
||||
- **Same-operator refused.** Numa's eTLD+1 check blocks configs where the relay and target belong to the same operator (pointless — same party sees both sides). Override only when testing.
|
||||
- **Single relay.** Current config accepts one relay and one target. Multi-entry rotation/failover is tracked in #140.
|
||||
@@ -1,14 +1,41 @@
|
||||
#!/usr/bin/env bash
|
||||
# Dev server for site/: regenerates drafts on each MD change, reloads the
|
||||
# browser on each rendered HTML/CSS/JS change. Port is the first numeric arg
|
||||
# (default 9000); any other args are ignored for back-compat.
|
||||
#
|
||||
# First run downloads chokidar-cli + browser-sync into the npm cache — slow
|
||||
# once, instant after that.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
PORT="${1:-9000}"
|
||||
PORT=9000
|
||||
for arg in "$@"; do
|
||||
if [[ "$arg" =~ ^[0-9]+$ ]]; then
|
||||
PORT="$arg"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ "${1:-}" == "--drafts" ]] || [[ "${2:-}" == "--drafts" ]]; then
|
||||
PORT="${PORT//--drafts/9000}" # default port if --drafts was first arg
|
||||
make blog-drafts
|
||||
else
|
||||
make blog
|
||||
fi
|
||||
command -v npx >/dev/null || { echo "npx not found. Install Node.js: https://nodejs.org" >&2; exit 1; }
|
||||
command -v pandoc >/dev/null || { echo "pandoc not found (required by 'make blog-drafts')." >&2; exit 1; }
|
||||
|
||||
echo "Serving site at http://localhost:$PORT"
|
||||
cd site && python3 -m http.server "$PORT"
|
||||
# Initial render so the first page load has everything.
|
||||
make blog-drafts
|
||||
|
||||
echo "Serving site at http://localhost:$PORT (drafts included, live reload)"
|
||||
|
||||
# Kill child processes on exit so re-runs don't leave orphaned watchers.
|
||||
trap 'kill $(jobs -p) 2>/dev/null' EXIT INT TERM
|
||||
|
||||
# Regenerate HTML when MD sources or the blog template change.
|
||||
npx --yes chokidar-cli \
|
||||
"drafts/*.md" "blog/*.md" "site/blog-template.html" \
|
||||
-c "make blog-drafts" &
|
||||
|
||||
# Serve + reload on rendered-asset changes.
|
||||
cd site && exec npx --yes browser-sync start \
|
||||
--server . \
|
||||
--port "$PORT" \
|
||||
--files "**/*.html,**/*.css,**/*.js" \
|
||||
--no-open \
|
||||
--no-notify
|
||||
|
||||
14
src/api.rs
14
src/api.rs
@@ -83,8 +83,13 @@ pub fn router(ctx: Arc<ServerCtx>) -> Router {
|
||||
}
|
||||
|
||||
async fn dashboard() -> impl IntoResponse {
|
||||
// Revalidate each load so browsers don't keep serving a stale
|
||||
// dashboard across numa upgrades.
|
||||
(
|
||||
[(header::CONTENT_TYPE, "text/html; charset=utf-8")],
|
||||
[
|
||||
(header::CONTENT_TYPE, "text/html; charset=utf-8"),
|
||||
(header::CACHE_CONTROL, "no-cache"),
|
||||
],
|
||||
DASHBOARD_HTML,
|
||||
)
|
||||
}
|
||||
@@ -1244,6 +1249,13 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), 200);
|
||||
assert_eq!(
|
||||
resp.headers()
|
||||
.get(header::CACHE_CONTROL)
|
||||
.map(|v| v.to_str().unwrap()),
|
||||
Some("no-cache"),
|
||||
"dashboard must revalidate to avoid stale HTML across upgrades"
|
||||
);
|
||||
let body = axum::body::to_bytes(resp.into_body(), 100000)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -357,12 +357,17 @@ mod tests {
|
||||
|
||||
const RETRY_DELAYS_SECS: &[u64] = &[2, 10, 30];
|
||||
|
||||
pub async fn download_blocklists(lists: &[String]) -> Vec<(String, String)> {
|
||||
let client = reqwest::Client::builder()
|
||||
pub async fn download_blocklists(
|
||||
lists: &[String],
|
||||
resolver: Option<std::sync::Arc<crate::bootstrap_resolver::NumaResolver>>,
|
||||
) -> Vec<(String, String)> {
|
||||
let mut builder = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(30))
|
||||
.gzip(true)
|
||||
.build()
|
||||
.unwrap_or_default();
|
||||
.gzip(true);
|
||||
if let Some(r) = resolver {
|
||||
builder = builder.dns_resolver(r);
|
||||
}
|
||||
let client = builder.build().unwrap_or_default();
|
||||
|
||||
let fetches = lists.iter().map(|url| {
|
||||
let client = &client;
|
||||
|
||||
234
src/bootstrap_resolver.rs
Normal file
234
src/bootstrap_resolver.rs
Normal file
@@ -0,0 +1,234 @@
|
||||
//! `reqwest` DNS resolver used by numa-originated HTTPS (DoH upstream, ODoH
|
||||
//! relay/target, blocklist CDN). When numa is its own system resolver
|
||||
//! (`/etc/resolv.conf → 127.0.0.1`, HAOS add-on, Pi-hole-style container),
|
||||
//! the default `getaddrinfo` path loops back through numa before numa can
|
||||
//! answer — a chicken-and-egg that deadlocks cold boot. See issue #122.
|
||||
//!
|
||||
//! Resolution order per hostname:
|
||||
//! 1. Per-hostname overrides (e.g. ODoH `relay_ip` / `target_ip`) → return
|
||||
//! immediately, no DNS query. Preserves ODoH's "zero plain-DNS leak"
|
||||
//! property for configured endpoints.
|
||||
//! 2. Otherwise, query A + AAAA in parallel via UDP to IP-literal bootstrap
|
||||
//! servers, with TCP fallback on UDP timeout (for networks that block
|
||||
//! outbound UDP:53 — see memory: `project_network_udp_hostile.md`).
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
||||
use std::time::Duration;
|
||||
|
||||
use log::{debug, info, warn};
|
||||
use reqwest::dns::{Addrs, Name, Resolve, Resolving};
|
||||
|
||||
use crate::forward::{forward_tcp, forward_udp};
|
||||
use crate::packet::DnsPacket;
|
||||
use crate::question::QueryType;
|
||||
use crate::record::DnsRecord;
|
||||
|
||||
const UDP_TIMEOUT: Duration = Duration::from_millis(800);
|
||||
const TCP_TIMEOUT: Duration = Duration::from_millis(1500);
|
||||
const DEFAULT_BOOTSTRAP: &[SocketAddr] = &[
|
||||
SocketAddr::new(IpAddr::V4(Ipv4Addr::new(9, 9, 9, 9)), 53),
|
||||
SocketAddr::new(IpAddr::V4(Ipv4Addr::new(1, 1, 1, 1)), 53),
|
||||
];
|
||||
|
||||
pub struct NumaResolver {
|
||||
bootstrap: Vec<SocketAddr>,
|
||||
overrides: BTreeMap<String, Vec<IpAddr>>,
|
||||
}
|
||||
|
||||
impl NumaResolver {
|
||||
/// Build a resolver from the configured `upstream.fallback` list and any
|
||||
/// per-hostname overrides (e.g. ODoH's `relay_ip`/`target_ip`).
|
||||
///
|
||||
/// `fallback` entries are filtered to IP literals only — hostnames would
|
||||
/// re-introduce the self-loop inside the resolver itself. Empty or
|
||||
/// unusable fallback yields the hardcoded default (Quad9 + Cloudflare).
|
||||
pub fn new(fallback: &[String], overrides: BTreeMap<String, Vec<IpAddr>>) -> Self {
|
||||
let mut bootstrap: Vec<SocketAddr> = Vec::with_capacity(fallback.len());
|
||||
for entry in fallback {
|
||||
match crate::forward::parse_upstream_addr(entry, 53) {
|
||||
Ok(addr) => bootstrap.push(addr),
|
||||
Err(_) => {
|
||||
warn!(
|
||||
"bootstrap_resolver: skipping non-IP fallback '{}' \
|
||||
(hostnames would re-enter the self-loop)",
|
||||
entry
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
let source = if bootstrap.is_empty() {
|
||||
bootstrap = DEFAULT_BOOTSTRAP.to_vec();
|
||||
"default (no IP-literal in upstream.fallback)"
|
||||
} else {
|
||||
"upstream.fallback"
|
||||
};
|
||||
let ips: Vec<String> = bootstrap.iter().map(|s| s.ip().to_string()).collect();
|
||||
info!(
|
||||
"bootstrap resolver: {} via {} — used for numa-originated HTTPS hostname resolution",
|
||||
ips.join(", "),
|
||||
source
|
||||
);
|
||||
if !overrides.is_empty() {
|
||||
let pairs: Vec<String> = overrides
|
||||
.iter()
|
||||
.flat_map(|(host, addrs)| addrs.iter().map(move |ip| format!("{}={}", host, ip)))
|
||||
.collect();
|
||||
info!(
|
||||
"bootstrap resolver: host overrides (skip DNS, connect direct): {}",
|
||||
pairs.join(", ")
|
||||
);
|
||||
}
|
||||
Self {
|
||||
bootstrap,
|
||||
overrides,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn bootstrap(&self) -> &[SocketAddr] {
|
||||
&self.bootstrap
|
||||
}
|
||||
}
|
||||
|
||||
impl Resolve for NumaResolver {
|
||||
fn resolve(&self, name: Name) -> Resolving {
|
||||
let hostname = name.as_str().to_string();
|
||||
|
||||
if let Some(ips) = self.overrides.get(&hostname) {
|
||||
let addrs: Vec<SocketAddr> = ips.iter().map(|ip| SocketAddr::new(*ip, 0)).collect();
|
||||
debug!(
|
||||
"bootstrap_resolver: override hit for {} → {:?}",
|
||||
hostname, ips
|
||||
);
|
||||
return Box::pin(async move { Ok(Box::new(addrs.into_iter()) as Addrs) });
|
||||
}
|
||||
|
||||
let bootstrap = self.bootstrap.clone();
|
||||
Box::pin(async move {
|
||||
let addrs = resolve_via_bootstrap(&hostname, &bootstrap).await?;
|
||||
debug!(
|
||||
"bootstrap_resolver: resolved {} → {} addr(s)",
|
||||
hostname,
|
||||
addrs.len()
|
||||
);
|
||||
Ok(Box::new(addrs.into_iter()) as Addrs)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
async fn resolve_via_bootstrap(
|
||||
hostname: &str,
|
||||
bootstrap: &[SocketAddr],
|
||||
) -> Result<Vec<SocketAddr>, Box<dyn std::error::Error + Send + Sync>> {
|
||||
let mut last_err: Option<String> = None;
|
||||
for &server in bootstrap {
|
||||
let q_a = DnsPacket::query(0xBEEF, hostname, QueryType::A);
|
||||
let q_aaaa = DnsPacket::query(0xBEF0, hostname, QueryType::AAAA);
|
||||
let (a_res, aaaa_res) = tokio::join!(
|
||||
query_with_tcp_fallback(&q_a, server),
|
||||
query_with_tcp_fallback(&q_aaaa, server),
|
||||
);
|
||||
|
||||
let mut out = Vec::new();
|
||||
match a_res {
|
||||
Ok(pkt) => extract_addrs(&pkt, &mut out),
|
||||
Err(e) => last_err = Some(format!("{} A failed: {}", server, e)),
|
||||
}
|
||||
match aaaa_res {
|
||||
Ok(pkt) => extract_addrs(&pkt, &mut out),
|
||||
// AAAA is optional — many hosts return NXDOMAIN/empty. Don't
|
||||
// treat as the primary error if A succeeded.
|
||||
Err(e) => debug!("bootstrap {} AAAA for {} failed: {}", server, hostname, e),
|
||||
}
|
||||
if !out.is_empty() {
|
||||
return Ok(out);
|
||||
}
|
||||
}
|
||||
Err(last_err
|
||||
.unwrap_or_else(|| "no bootstrap servers reachable".into())
|
||||
.into())
|
||||
}
|
||||
|
||||
async fn query_with_tcp_fallback(
|
||||
query: &DnsPacket,
|
||||
server: SocketAddr,
|
||||
) -> crate::Result<DnsPacket> {
|
||||
match forward_udp(query, server, UDP_TIMEOUT).await {
|
||||
Ok(pkt) => Ok(pkt),
|
||||
Err(e) => {
|
||||
debug!(
|
||||
"bootstrap UDP {} failed ({}), falling back to TCP",
|
||||
server, e
|
||||
);
|
||||
forward_tcp(query, server, TCP_TIMEOUT).await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_addrs(pkt: &DnsPacket, out: &mut Vec<SocketAddr>) {
|
||||
for r in &pkt.answers {
|
||||
match r {
|
||||
DnsRecord::A { addr, .. } => out.push(SocketAddr::new(IpAddr::V4(*addr), 0)),
|
||||
DnsRecord::AAAA { addr, .. } => out.push(SocketAddr::new(IpAddr::V6(*addr), 0)),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::net::{Ipv4Addr, Ipv6Addr};
|
||||
|
||||
#[test]
|
||||
fn empty_fallback_uses_defaults() {
|
||||
let r = NumaResolver::new(&[], BTreeMap::new());
|
||||
let got: Vec<String> = r.bootstrap().iter().map(|s| s.to_string()).collect();
|
||||
assert_eq!(got, vec!["9.9.9.9:53", "1.1.1.1:53"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fallback_accepts_ip_literals_only() {
|
||||
let fallback = vec![
|
||||
"9.9.9.9".to_string(),
|
||||
"dns.quad9.net".to_string(),
|
||||
"1.1.1.1:5353".to_string(),
|
||||
];
|
||||
let r = NumaResolver::new(&fallback, BTreeMap::new());
|
||||
let got: Vec<String> = r.bootstrap().iter().map(|s| s.to_string()).collect();
|
||||
assert_eq!(got, vec!["9.9.9.9:53", "1.1.1.1:5353"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn override_returns_configured_ips_without_dns() {
|
||||
let mut overrides = BTreeMap::new();
|
||||
overrides.insert(
|
||||
"odoh-relay.example".to_string(),
|
||||
vec![IpAddr::V4(Ipv4Addr::new(178, 104, 229, 30))],
|
||||
);
|
||||
let r = NumaResolver::new(&[], overrides);
|
||||
let name: Name = "odoh-relay.example".parse().unwrap();
|
||||
let fut = r.resolve(name);
|
||||
let res = futures::executor::block_on(fut).unwrap();
|
||||
let addrs: Vec<_> = res.collect();
|
||||
assert_eq!(addrs.len(), 1);
|
||||
assert_eq!(addrs[0].ip(), IpAddr::V4(Ipv4Addr::new(178, 104, 229, 30)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn override_supports_multiple_ips_including_ipv6() {
|
||||
let mut overrides = BTreeMap::new();
|
||||
overrides.insert(
|
||||
"dual.example".to_string(),
|
||||
vec![
|
||||
IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)),
|
||||
IpAddr::V6(Ipv6Addr::LOCALHOST),
|
||||
],
|
||||
);
|
||||
let r = NumaResolver::new(&[], overrides);
|
||||
let res = futures::executor::block_on(r.resolve("dual.example".parse().unwrap())).unwrap();
|
||||
let addrs: Vec<_> = res.collect();
|
||||
assert_eq!(addrs.len(), 2);
|
||||
}
|
||||
}
|
||||
@@ -56,7 +56,7 @@ impl ForwardingRuleConfig {
|
||||
}
|
||||
let mut primary = Vec::with_capacity(self.upstream.len());
|
||||
for s in &self.upstream {
|
||||
let u = crate::forward::parse_upstream(s, 53)
|
||||
let u = crate::forward::parse_upstream(s, 53, None)
|
||||
.map_err(|e| format!("forwarding rule for upstream '{}': {}", s, e))?;
|
||||
primary.push(u);
|
||||
}
|
||||
@@ -241,6 +241,26 @@ pub struct OdohUpstream {
|
||||
pub target_bootstrap: Option<SocketAddr>,
|
||||
}
|
||||
|
||||
impl OdohUpstream {
|
||||
/// Per-host IP overrides for the bootstrap resolver, lifted from
|
||||
/// `relay_ip`/`target_ip`. Keeps the "zero plain-DNS leak for ODoH
|
||||
/// endpoints" property when numa is its own system resolver.
|
||||
pub fn host_ip_overrides(&self) -> std::collections::BTreeMap<String, Vec<std::net::IpAddr>> {
|
||||
let mut out = std::collections::BTreeMap::new();
|
||||
if let Some(addr) = self.relay_bootstrap {
|
||||
out.entry(self.relay_host.clone())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(addr.ip());
|
||||
}
|
||||
if let Some(addr) = self.target_bootstrap {
|
||||
out.entry(self.target_host.clone())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(addr.ip());
|
||||
}
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
impl UpstreamConfig {
|
||||
/// Validate and extract ODoH-specific fields. Called during `load_config`
|
||||
/// so misconfigured ODoH fails fast at startup, the same care we take
|
||||
@@ -431,8 +451,12 @@ fn default_upstream_port() -> u16 {
|
||||
fn default_timeout_ms() -> u64 {
|
||||
5000
|
||||
}
|
||||
/// Off by default: hedging fires a second upstream query, which silently
|
||||
/// doubles the count at the provider — hurts quota'd DNS (NextDNS, Control
|
||||
/// D). Opt in with `hedge_ms = 10` for tail-latency rescue on flaky nets
|
||||
/// or handshake-slow DoT.
|
||||
fn default_hedge_ms() -> u64 {
|
||||
10
|
||||
0
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
|
||||
307
src/ctx.rs
307
src/ctx.rs
@@ -209,106 +209,68 @@ pub async fn resolve_query(
|
||||
{
|
||||
// Conditional forwarding takes priority over recursive mode
|
||||
// (e.g. Tailscale .ts.net, VPC private zones)
|
||||
upstream_transport = pool.preferred().map(|u| u.transport());
|
||||
match forward_with_failover_raw(
|
||||
raw_wire,
|
||||
pool,
|
||||
&ctx.srtt,
|
||||
ctx.timeout,
|
||||
ctx.hedge_delay,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(resp_wire) => match cache_and_parse(ctx, &qname, qtype, &resp_wire) {
|
||||
Ok(resp) => (resp, QueryPath::Forwarded, DnssecStatus::Indeterminate),
|
||||
Err(e) => {
|
||||
error!("{} | {:?} {} | PARSE ERROR | {}", src_addr, qtype, qname, e);
|
||||
(
|
||||
DnsPacket::response_from(&query, ResultCode::SERVFAIL),
|
||||
QueryPath::UpstreamError,
|
||||
DnssecStatus::Indeterminate,
|
||||
)
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
error!(
|
||||
"{} | {:?} {} | FORWARD ERROR | {}",
|
||||
src_addr, qtype, qname, e
|
||||
);
|
||||
(
|
||||
DnsPacket::response_from(&query, ResultCode::SERVFAIL),
|
||||
QueryPath::UpstreamError,
|
||||
DnssecStatus::Indeterminate,
|
||||
let key = (qname.clone(), qtype);
|
||||
let (resp, path, err) =
|
||||
resolve_coalesced(&ctx.inflight, key, &query, QueryPath::Forwarded, || async {
|
||||
let wire = forward_with_failover_raw(
|
||||
raw_wire,
|
||||
pool,
|
||||
&ctx.srtt,
|
||||
ctx.timeout,
|
||||
ctx.hedge_delay,
|
||||
)
|
||||
}
|
||||
.await?;
|
||||
cache_and_parse(ctx, &qname, qtype, &wire)
|
||||
})
|
||||
.await;
|
||||
log_coalesced_outcome(src_addr, qtype, &qname, path, err.as_deref(), "FORWARD");
|
||||
if path == QueryPath::Forwarded {
|
||||
upstream_transport = pool.preferred().map(|u| u.transport());
|
||||
}
|
||||
(resp, path, DnssecStatus::Indeterminate)
|
||||
} else if ctx.upstream_mode == UpstreamMode::Recursive {
|
||||
// Recursive resolution makes UDP hops to roots/TLDs/auths;
|
||||
// tag as Udp so the dashboard can aggregate plaintext-wire
|
||||
// egress honestly. Only mark on success — errors stay None.
|
||||
let key = (qname.clone(), qtype);
|
||||
let (resp, path, err) = resolve_coalesced(&ctx.inflight, key, &query, || {
|
||||
crate::recursive::resolve_recursive(
|
||||
&qname,
|
||||
qtype,
|
||||
&ctx.cache,
|
||||
&query,
|
||||
&ctx.root_hints,
|
||||
&ctx.srtt,
|
||||
)
|
||||
})
|
||||
.await;
|
||||
if path == QueryPath::Coalesced {
|
||||
debug!("{} | {:?} {} | COALESCED", src_addr, qtype, qname);
|
||||
} else if path == QueryPath::UpstreamError {
|
||||
error!(
|
||||
"{} | {:?} {} | RECURSIVE ERROR | {}",
|
||||
src_addr,
|
||||
qtype,
|
||||
qname,
|
||||
err.as_deref().unwrap_or("leader failed")
|
||||
);
|
||||
} else {
|
||||
let (resp, path, err) =
|
||||
resolve_coalesced(&ctx.inflight, key, &query, QueryPath::Recursive, || {
|
||||
crate::recursive::resolve_recursive(
|
||||
&qname,
|
||||
qtype,
|
||||
&ctx.cache,
|
||||
&query,
|
||||
&ctx.root_hints,
|
||||
&ctx.srtt,
|
||||
)
|
||||
})
|
||||
.await;
|
||||
log_coalesced_outcome(src_addr, qtype, &qname, path, err.as_deref(), "RECURSIVE");
|
||||
if path == QueryPath::Recursive {
|
||||
upstream_transport = Some(crate::stats::UpstreamTransport::Udp);
|
||||
}
|
||||
(resp, path, DnssecStatus::Indeterminate)
|
||||
} else {
|
||||
let pool = ctx.upstream_pool.lock().unwrap().clone();
|
||||
match forward_with_failover_raw(
|
||||
raw_wire,
|
||||
&pool,
|
||||
&ctx.srtt,
|
||||
ctx.timeout,
|
||||
ctx.hedge_delay,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(resp_wire) => match cache_and_parse(ctx, &qname, qtype, &resp_wire) {
|
||||
Ok(resp) => {
|
||||
upstream_transport = pool.preferred().map(|u| u.transport());
|
||||
(resp, QueryPath::Upstream, DnssecStatus::Indeterminate)
|
||||
}
|
||||
Err(e) => {
|
||||
error!("{} | {:?} {} | PARSE ERROR | {}", src_addr, qtype, qname, e);
|
||||
(
|
||||
DnsPacket::response_from(&query, ResultCode::SERVFAIL),
|
||||
QueryPath::UpstreamError,
|
||||
DnssecStatus::Indeterminate,
|
||||
)
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
error!(
|
||||
"{} | {:?} {} | UPSTREAM ERROR | {}",
|
||||
src_addr, qtype, qname, e
|
||||
);
|
||||
(
|
||||
DnsPacket::response_from(&query, ResultCode::SERVFAIL),
|
||||
QueryPath::UpstreamError,
|
||||
DnssecStatus::Indeterminate,
|
||||
let key = (qname.clone(), qtype);
|
||||
let (resp, path, err) =
|
||||
resolve_coalesced(&ctx.inflight, key, &query, QueryPath::Upstream, || async {
|
||||
let wire = forward_with_failover_raw(
|
||||
raw_wire,
|
||||
&pool,
|
||||
&ctx.srtt,
|
||||
ctx.timeout,
|
||||
ctx.hedge_delay,
|
||||
)
|
||||
}
|
||||
.await?;
|
||||
cache_and_parse(ctx, &qname, qtype, &wire)
|
||||
})
|
||||
.await;
|
||||
log_coalesced_outcome(src_addr, qtype, &qname, path, err.as_deref(), "UPSTREAM");
|
||||
if path == QueryPath::Upstream {
|
||||
upstream_transport = pool.preferred().map(|u| u.transport());
|
||||
}
|
||||
(resp, path, DnssecStatus::Indeterminate)
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -446,6 +408,33 @@ fn cache_and_parse(
|
||||
/// Used for both stale-entry refresh and proactive cache warming.
|
||||
pub async fn refresh_entry(ctx: &ServerCtx, qname: &str, qtype: QueryType) {
|
||||
let query = DnsPacket::query(0, qname, qtype);
|
||||
|
||||
// Forwarding rules must win here, mirroring `resolve_query` — otherwise
|
||||
// refresh re-resolves private zones through the default upstream and
|
||||
// poisons the cache with NXDOMAIN.
|
||||
if let Some(pool) = crate::system_dns::match_forwarding_rule(qname, &ctx.forwarding_rules) {
|
||||
let mut buf = BytePacketBuffer::new();
|
||||
if query.write(&mut buf).is_ok() {
|
||||
if let Ok(wire) = forward_with_failover_raw(
|
||||
buf.filled(),
|
||||
pool,
|
||||
&ctx.srtt,
|
||||
ctx.timeout,
|
||||
ctx.hedge_delay,
|
||||
)
|
||||
.await
|
||||
{
|
||||
ctx.cache.write().unwrap().insert_wire(
|
||||
qname,
|
||||
qtype,
|
||||
&wire,
|
||||
DnssecStatus::Indeterminate,
|
||||
);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if ctx.upstream_mode == UpstreamMode::Recursive {
|
||||
if let Ok(resp) = crate::recursive::resolve_recursive(
|
||||
qname,
|
||||
@@ -611,11 +600,15 @@ fn acquire_inflight(inflight: &Mutex<InflightMap>, key: (String, QueryType)) ->
|
||||
|
||||
/// Run a resolve function with in-flight coalescing. Multiple concurrent calls
|
||||
/// for the same key share a single resolution — the first caller (leader)
|
||||
/// executes `resolve_fn`, and followers wait for the broadcast result.
|
||||
/// executes `resolve_fn`, and followers wait for the broadcast result. The
|
||||
/// leader's successful path is tagged with `leader_path` so callers that
|
||||
/// share this helper (recursive, forwarded-rule, forward-upstream) keep their
|
||||
/// own observability without duplicating the inflight map.
|
||||
async fn resolve_coalesced<F, Fut>(
|
||||
inflight: &Mutex<InflightMap>,
|
||||
key: (String, QueryType),
|
||||
query: &DnsPacket,
|
||||
leader_path: QueryPath,
|
||||
resolve_fn: F,
|
||||
) -> (DnsPacket, QueryPath, Option<String>)
|
||||
where
|
||||
@@ -644,7 +637,7 @@ where
|
||||
match result {
|
||||
Ok(resp) => {
|
||||
let _ = tx.send(Some(resp.clone()));
|
||||
(resp, QueryPath::Recursive, None)
|
||||
(resp, leader_path, None)
|
||||
}
|
||||
Err(e) => {
|
||||
let _ = tx.send(None);
|
||||
@@ -671,6 +664,33 @@ impl Drop for InflightGuard<'_> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit the log lines shared by the three upstream branches (Forwarded,
|
||||
/// Recursive, Upstream) after `resolve_coalesced` returns. Leader-success
|
||||
/// and transport-tagging stay at the call site since they diverge per
|
||||
/// branch, but the Coalesced debug and UpstreamError error are identical
|
||||
/// except for the label.
|
||||
fn log_coalesced_outcome(
|
||||
src_addr: SocketAddr,
|
||||
qtype: QueryType,
|
||||
qname: &str,
|
||||
path: QueryPath,
|
||||
err: Option<&str>,
|
||||
label: &str,
|
||||
) {
|
||||
match path {
|
||||
QueryPath::Coalesced => debug!("{} | {:?} {} | COALESCED", src_addr, qtype, qname),
|
||||
QueryPath::UpstreamError => error!(
|
||||
"{} | {:?} {} | {} ERROR | {}",
|
||||
src_addr,
|
||||
qtype,
|
||||
qname,
|
||||
label,
|
||||
err.unwrap_or("leader failed")
|
||||
),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn special_use_response(query: &DnsPacket, qname: &str, qtype: QueryType) -> DnsPacket {
|
||||
use std::net::{Ipv4Addr, Ipv6Addr};
|
||||
if qname == "ipv4only.arpa" {
|
||||
@@ -909,7 +929,7 @@ mod tests {
|
||||
let key = ("coalesce.test".to_string(), QueryType::A);
|
||||
let query = DnsPacket::query(100 + i, "coalesce.test", QueryType::A);
|
||||
handles.push(tokio::spawn(async move {
|
||||
resolve_coalesced(&inf, key, &query, || async {
|
||||
resolve_coalesced(&inf, key, &query, QueryPath::Recursive, || async {
|
||||
count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
tokio::time::sleep(Duration::from_millis(200)).await;
|
||||
Ok(mock_response("coalesce.test"))
|
||||
@@ -953,6 +973,7 @@ mod tests {
|
||||
&inf1,
|
||||
("same.domain".to_string(), QueryType::A),
|
||||
&query_a,
|
||||
QueryPath::Recursive,
|
||||
|| async {
|
||||
count1.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
@@ -966,6 +987,7 @@ mod tests {
|
||||
&inf2,
|
||||
("same.domain".to_string(), QueryType::AAAA),
|
||||
&query_aaaa,
|
||||
QueryPath::Recursive,
|
||||
|| async {
|
||||
count2.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
@@ -995,6 +1017,7 @@ mod tests {
|
||||
&inflight,
|
||||
("will-fail.test".to_string(), QueryType::A),
|
||||
&query,
|
||||
QueryPath::Recursive,
|
||||
|| async { Err::<DnsPacket, _>("upstream timeout".into()) },
|
||||
)
|
||||
.await;
|
||||
@@ -1016,6 +1039,7 @@ mod tests {
|
||||
&inf,
|
||||
("fail.test".to_string(), QueryType::A),
|
||||
&query,
|
||||
QueryPath::Recursive,
|
||||
|| async {
|
||||
tokio::time::sleep(Duration::from_millis(200)).await;
|
||||
Err::<DnsPacket, _>("upstream error".into())
|
||||
@@ -1056,6 +1080,7 @@ mod tests {
|
||||
&inflight,
|
||||
("question.test".to_string(), QueryType::A),
|
||||
&query,
|
||||
QueryPath::Recursive,
|
||||
|| async { Err::<DnsPacket, _>("fail".into()) },
|
||||
)
|
||||
.await;
|
||||
@@ -1080,6 +1105,7 @@ mod tests {
|
||||
&inflight,
|
||||
("err-msg.test".to_string(), QueryType::A),
|
||||
&query,
|
||||
QueryPath::Recursive,
|
||||
|| async { Err::<DnsPacket, _>("connection refused by upstream".into()) },
|
||||
)
|
||||
.await;
|
||||
@@ -1245,14 +1271,8 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn pipeline_filter_aaaa_leaves_a_queries_alone() {
|
||||
let mut upstream_resp = DnsPacket::new();
|
||||
upstream_resp.header.response = true;
|
||||
upstream_resp.header.rescode = ResultCode::NOERROR;
|
||||
upstream_resp.answers.push(DnsRecord::A {
|
||||
domain: "example.com".to_string(),
|
||||
addr: Ipv4Addr::new(93, 184, 216, 34),
|
||||
ttl: 300,
|
||||
});
|
||||
let upstream_resp =
|
||||
crate::testutil::a_record_response("example.com", Ipv4Addr::new(93, 184, 216, 34), 300);
|
||||
let upstream_addr = crate::testutil::mock_upstream(upstream_resp).await;
|
||||
|
||||
let mut ctx = crate::testutil::test_ctx().await;
|
||||
@@ -1472,14 +1492,8 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn pipeline_forwarding_returns_upstream_answer() {
|
||||
let mut upstream_resp = DnsPacket::new();
|
||||
upstream_resp.header.response = true;
|
||||
upstream_resp.header.rescode = ResultCode::NOERROR;
|
||||
upstream_resp.answers.push(DnsRecord::A {
|
||||
domain: "internal.corp".to_string(),
|
||||
addr: Ipv4Addr::new(10, 1, 2, 3),
|
||||
ttl: 600,
|
||||
});
|
||||
let upstream_resp =
|
||||
crate::testutil::a_record_response("internal.corp", Ipv4Addr::new(10, 1, 2, 3), 600);
|
||||
let upstream_addr = crate::testutil::mock_upstream(upstream_resp).await;
|
||||
|
||||
let mut ctx = crate::testutil::test_ctx().await;
|
||||
@@ -1506,14 +1520,8 @@ mod tests {
|
||||
async fn pipeline_forwarding_fails_over_to_second_upstream() {
|
||||
let dead = crate::testutil::blackhole_upstream();
|
||||
|
||||
let mut live_resp = DnsPacket::new();
|
||||
live_resp.header.response = true;
|
||||
live_resp.header.rescode = ResultCode::NOERROR;
|
||||
live_resp.answers.push(DnsRecord::A {
|
||||
domain: "internal.corp".to_string(),
|
||||
addr: Ipv4Addr::new(10, 9, 9, 9),
|
||||
ttl: 600,
|
||||
});
|
||||
let live_resp =
|
||||
crate::testutil::a_record_response("internal.corp", Ipv4Addr::new(10, 9, 9, 9), 600);
|
||||
let live = crate::testutil::mock_upstream(live_resp).await;
|
||||
|
||||
let mut ctx = crate::testutil::test_ctx().await;
|
||||
@@ -1535,14 +1543,8 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn pipeline_default_pool_reports_upstream_path() {
|
||||
let mut upstream_resp = DnsPacket::new();
|
||||
upstream_resp.header.response = true;
|
||||
upstream_resp.header.rescode = ResultCode::NOERROR;
|
||||
upstream_resp.answers.push(DnsRecord::A {
|
||||
domain: "example.com".to_string(),
|
||||
addr: Ipv4Addr::new(93, 184, 216, 34),
|
||||
ttl: 300,
|
||||
});
|
||||
let upstream_resp =
|
||||
crate::testutil::a_record_response("example.com", Ipv4Addr::new(93, 184, 216, 34), 300);
|
||||
let upstream_addr = crate::testutil::mock_upstream(upstream_resp).await;
|
||||
|
||||
let ctx = crate::testutil::test_ctx().await;
|
||||
@@ -1557,4 +1559,67 @@ mod tests {
|
||||
assert_eq!(resp.header.rescode, ResultCode::NOERROR);
|
||||
assert_eq!(resp.answers.len(), 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn refresh_entry_honors_forwarding_rule() {
|
||||
let rule_resp =
|
||||
crate::testutil::a_record_response("internal.corp", Ipv4Addr::new(10, 0, 0, 42), 300);
|
||||
let rule_upstream = crate::testutil::mock_upstream(rule_resp).await;
|
||||
|
||||
let mut ctx = crate::testutil::test_ctx().await;
|
||||
ctx.forwarding_rules = vec![ForwardingRule::new(
|
||||
"corp".to_string(),
|
||||
UpstreamPool::new(vec![Upstream::Udp(rule_upstream)], vec![]),
|
||||
)];
|
||||
// Default pool points at a blackhole — if the refresh queries it
|
||||
// instead of the rule, the test fails because nothing is cached.
|
||||
ctx.upstream_pool
|
||||
.lock()
|
||||
.unwrap()
|
||||
.set_primary(vec![Upstream::Udp(crate::testutil::blackhole_upstream())]);
|
||||
let ctx = Arc::new(ctx);
|
||||
|
||||
refresh_entry(&ctx, "internal.corp", QueryType::A).await;
|
||||
|
||||
let cached = ctx
|
||||
.cache
|
||||
.read()
|
||||
.unwrap()
|
||||
.lookup("internal.corp", QueryType::A)
|
||||
.expect("refresh must populate cache via forwarding rule");
|
||||
match &cached.answers[0] {
|
||||
DnsRecord::A { addr, .. } => assert_eq!(*addr, Ipv4Addr::new(10, 0, 0, 42)),
|
||||
other => panic!("expected A record, got {:?}", other),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn refresh_entry_prefers_forwarding_rule_over_recursive() {
|
||||
let rule_resp =
|
||||
crate::testutil::a_record_response("db.internal.corp", Ipv4Addr::new(10, 0, 0, 7), 300);
|
||||
let rule_upstream = crate::testutil::mock_upstream(rule_resp).await;
|
||||
|
||||
let mut ctx = crate::testutil::test_ctx().await;
|
||||
ctx.upstream_mode = UpstreamMode::Recursive;
|
||||
ctx.forwarding_rules = vec![ForwardingRule::new(
|
||||
"corp".to_string(),
|
||||
UpstreamPool::new(vec![Upstream::Udp(rule_upstream)], vec![]),
|
||||
)];
|
||||
// No root_hints — recursion would fail immediately, proving that
|
||||
// the rule branch fired instead.
|
||||
let ctx = Arc::new(ctx);
|
||||
|
||||
refresh_entry(&ctx, "db.internal.corp", QueryType::A).await;
|
||||
|
||||
let cached = ctx
|
||||
.cache
|
||||
.read()
|
||||
.unwrap()
|
||||
.lookup("db.internal.corp", QueryType::A)
|
||||
.expect("recursive-mode refresh must still consult forwarding rules");
|
||||
match &cached.answers[0] {
|
||||
DnsRecord::A { addr, .. } => assert_eq!(*addr, Ipv4Addr::new(10, 0, 0, 7)),
|
||||
other => panic!("expected A record, got {:?}", other),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -882,6 +882,28 @@ fn record_rdata_canonical(record: &DnsRecord) -> Vec<u8> {
|
||||
rdata.extend(type_bitmap);
|
||||
rdata
|
||||
}
|
||||
DnsRecord::SOA {
|
||||
mname,
|
||||
rname,
|
||||
serial,
|
||||
refresh,
|
||||
retry,
|
||||
expire,
|
||||
minimum,
|
||||
..
|
||||
} => {
|
||||
let mname_wire = name_to_wire(mname);
|
||||
let rname_wire = name_to_wire(rname);
|
||||
let mut rdata = Vec::with_capacity(mname_wire.len() + rname_wire.len() + 20);
|
||||
rdata.extend(&mname_wire);
|
||||
rdata.extend(&rname_wire);
|
||||
rdata.extend(&serial.to_be_bytes());
|
||||
rdata.extend(&refresh.to_be_bytes());
|
||||
rdata.extend(&retry.to_be_bytes());
|
||||
rdata.extend(&expire.to_be_bytes());
|
||||
rdata.extend(&minimum.to_be_bytes());
|
||||
rdata
|
||||
}
|
||||
DnsRecord::UNKNOWN { data, .. } => data.clone(),
|
||||
DnsRecord::RRSIG { .. } => Vec::new(),
|
||||
}
|
||||
|
||||
@@ -113,10 +113,7 @@ impl fmt::Display for Upstream {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn parse_upstream_addr(
|
||||
s: &str,
|
||||
default_port: u16,
|
||||
) -> std::result::Result<SocketAddr, String> {
|
||||
pub fn parse_upstream_addr(s: &str, default_port: u16) -> std::result::Result<SocketAddr, String> {
|
||||
// Try full socket addr first: "1.2.3.4:5353" or "[::1]:5353"
|
||||
if let Ok(addr) = s.parse::<SocketAddr>() {
|
||||
return Ok(addr);
|
||||
@@ -129,19 +126,28 @@ pub(crate) fn parse_upstream_addr(
|
||||
}
|
||||
|
||||
/// Parse a slice of upstream address strings into `Upstream` values, failing
|
||||
/// on the first invalid entry.
|
||||
pub fn parse_upstream_list(addrs: &[String], default_port: u16) -> Result<Vec<Upstream>> {
|
||||
/// on the first invalid entry. DoH entries use `resolver` (when provided) as
|
||||
/// their hostname resolver.
|
||||
pub fn parse_upstream_list(
|
||||
addrs: &[String],
|
||||
default_port: u16,
|
||||
resolver: Option<Arc<crate::bootstrap_resolver::NumaResolver>>,
|
||||
) -> Result<Vec<Upstream>> {
|
||||
addrs
|
||||
.iter()
|
||||
.map(|s| parse_upstream(s, default_port))
|
||||
.map(|s| parse_upstream(s, default_port, resolver.clone()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn parse_upstream(s: &str, default_port: u16) -> Result<Upstream> {
|
||||
pub fn parse_upstream(
|
||||
s: &str,
|
||||
default_port: u16,
|
||||
resolver: Option<Arc<crate::bootstrap_resolver::NumaResolver>>,
|
||||
) -> Result<Upstream> {
|
||||
if s.starts_with("https://") {
|
||||
return Ok(Upstream::Doh {
|
||||
url: s.to_string(),
|
||||
client: build_https_client(),
|
||||
client: build_https_client_with_resolver(1, resolver),
|
||||
});
|
||||
}
|
||||
// tls://IP:PORT#hostname or tls://IP#hostname (default port 853)
|
||||
@@ -163,12 +169,15 @@ pub fn parse_upstream(s: &str, default_port: u16) -> Result<Upstream> {
|
||||
}
|
||||
|
||||
/// HTTP/2 client tuned for DoH/ODoH: small windows for low latency, long-lived
|
||||
/// keep-alive. Shared by the DoH upstream and the ODoH config-fetcher +
|
||||
/// seal/open path. Pool defaults to one idle conn per host — good for
|
||||
/// resolvers that talk to a single upstream; relays that fan out to many
|
||||
/// targets should use [`build_https_client_with_pool`].
|
||||
/// keep-alive. Pool defaults to one idle conn per host — good for resolvers
|
||||
/// that talk to a single upstream; relays that fan out to many targets
|
||||
/// should use [`build_https_client_with_pool`].
|
||||
///
|
||||
/// Uses the system resolver. Callers running inside `serve::run` pass the
|
||||
/// shared [`crate::bootstrap_resolver::NumaResolver`] via
|
||||
/// [`build_https_client_with_resolver`] to avoid the self-loop (issue #122).
|
||||
pub fn build_https_client() -> reqwest::Client {
|
||||
build_https_client_with_pool(1)
|
||||
build_https_client_with_resolver(1, None)
|
||||
}
|
||||
|
||||
/// Same shape as [`build_https_client`], but caller picks
|
||||
@@ -176,20 +185,18 @@ pub fn build_https_client() -> reqwest::Client {
|
||||
/// and benefit from a larger pool so warm connections survive concurrent
|
||||
/// fan-out.
|
||||
pub fn build_https_client_with_pool(pool_max_idle_per_host: usize) -> reqwest::Client {
|
||||
https_client_builder(pool_max_idle_per_host)
|
||||
.build()
|
||||
.unwrap_or_default()
|
||||
build_https_client_with_resolver(pool_max_idle_per_host, None)
|
||||
}
|
||||
|
||||
/// HTTPS client for the ODoH upstream, with bootstrap-IP overrides applied
|
||||
/// so relay/target hostname resolution can bypass system DNS.
|
||||
pub fn build_odoh_client(odoh: &crate::config::OdohUpstream) -> reqwest::Client {
|
||||
let mut builder = https_client_builder(1);
|
||||
if let Some(addr) = odoh.relay_bootstrap {
|
||||
builder = builder.resolve(&odoh.relay_host, addr);
|
||||
}
|
||||
if let Some(addr) = odoh.target_bootstrap {
|
||||
builder = builder.resolve(&odoh.target_host, addr);
|
||||
/// [`build_https_client`] with an optional custom DNS resolver. Numa wires
|
||||
/// [`crate::bootstrap_resolver::NumaResolver`] here.
|
||||
pub fn build_https_client_with_resolver(
|
||||
pool_max_idle_per_host: usize,
|
||||
resolver: Option<Arc<crate::bootstrap_resolver::NumaResolver>>,
|
||||
) -> reqwest::Client {
|
||||
let mut builder = https_client_builder(pool_max_idle_per_host);
|
||||
if let Some(r) = resolver {
|
||||
builder = builder.dns_resolver(r);
|
||||
}
|
||||
builder.build().unwrap_or_default()
|
||||
}
|
||||
@@ -553,6 +560,9 @@ async fn forward_doh_raw(
|
||||
|
||||
/// Send a lightweight keepalive query to a DoH upstream to prevent
|
||||
/// the HTTP/2 + TLS connection from going idle and being torn down.
|
||||
/// The first call doubles as a startup warm-up: bootstrap-resolver failures
|
||||
/// (unreachable Quad9/Cloudflare defaults, misconfigured hostname upstream)
|
||||
/// surface here rather than on the first client query.
|
||||
pub async fn keepalive_doh(upstream: &Upstream) {
|
||||
if let Upstream::Doh { url, client } = upstream {
|
||||
// Query for . NS — minimal, always succeeds, response is small
|
||||
@@ -565,7 +575,9 @@ pub async fn keepalive_doh(upstream: &Upstream) {
|
||||
0x00, 0x02, // type NS
|
||||
0x00, 0x01, // class IN
|
||||
];
|
||||
let _ = forward_doh_raw(wire, url, client, Duration::from_secs(5)).await;
|
||||
if let Err(e) = forward_doh_raw(wire, url, client, Duration::from_secs(5)).await {
|
||||
log::warn!("DoH keepalive to {} failed: {}", url, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,11 +7,10 @@
|
||||
//! Both handlers call [`HealthResponse::build`] to assemble the JSON
|
||||
//! response from `HealthMeta` + live inputs.
|
||||
//!
|
||||
//! JSON schema is documented in `docs/implementation/ios-companion-app.md`
|
||||
//! §4.2. The iOS companion app's `HealthInfo` struct is the canonical
|
||||
//! consumer; any change to this response must keep that struct decoding
|
||||
//! cleanly (all consumed fields are optional on the Swift side, but
|
||||
//! `lan_ip` is load-bearing for the pipeline).
|
||||
//! The iOS companion app's `HealthInfo` struct is the canonical consumer;
|
||||
//! any change to this response must keep that struct decoding cleanly (all
|
||||
//! consumed fields are optional on the Swift side, but `lan_ip` is
|
||||
//! load-bearing for the pipeline).
|
||||
|
||||
use std::net::Ipv4Addr;
|
||||
use std::path::Path;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
pub mod api;
|
||||
pub mod blocklist;
|
||||
pub mod bootstrap_resolver;
|
||||
pub mod buffer;
|
||||
pub mod cache;
|
||||
pub mod config;
|
||||
|
||||
@@ -24,6 +24,17 @@ pub enum DnsRecord {
|
||||
host: String,
|
||||
ttl: u32,
|
||||
},
|
||||
SOA {
|
||||
domain: String,
|
||||
mname: String,
|
||||
rname: String,
|
||||
serial: u32,
|
||||
refresh: u32,
|
||||
retry: u32,
|
||||
expire: u32,
|
||||
minimum: u32,
|
||||
ttl: u32,
|
||||
},
|
||||
CNAME {
|
||||
domain: String,
|
||||
host: String,
|
||||
@@ -100,6 +111,7 @@ impl DnsRecord {
|
||||
| DnsRecord::RRSIG { domain, .. }
|
||||
| DnsRecord::NSEC { domain, .. }
|
||||
| DnsRecord::NSEC3 { domain, .. }
|
||||
| DnsRecord::SOA { domain, .. }
|
||||
| DnsRecord::UNKNOWN { domain, .. } => domain,
|
||||
}
|
||||
}
|
||||
@@ -111,6 +123,7 @@ impl DnsRecord {
|
||||
DnsRecord::NS { .. } => QueryType::NS,
|
||||
DnsRecord::CNAME { .. } => QueryType::CNAME,
|
||||
DnsRecord::MX { .. } => QueryType::MX,
|
||||
DnsRecord::SOA { .. } => QueryType::SOA,
|
||||
DnsRecord::DNSKEY { .. } => QueryType::DNSKEY,
|
||||
DnsRecord::DS { .. } => QueryType::DS,
|
||||
DnsRecord::RRSIG { .. } => QueryType::RRSIG,
|
||||
@@ -132,6 +145,7 @@ impl DnsRecord {
|
||||
| DnsRecord::RRSIG { ttl, .. }
|
||||
| DnsRecord::NSEC { ttl, .. }
|
||||
| DnsRecord::NSEC3 { ttl, .. }
|
||||
| DnsRecord::SOA { ttl, .. }
|
||||
| DnsRecord::UNKNOWN { ttl, .. } => *ttl,
|
||||
}
|
||||
}
|
||||
@@ -172,6 +186,12 @@ impl DnsRecord {
|
||||
+ next_hashed_owner.capacity()
|
||||
+ type_bitmap.capacity()
|
||||
}
|
||||
DnsRecord::SOA {
|
||||
domain,
|
||||
mname,
|
||||
rname,
|
||||
..
|
||||
} => domain.capacity() + mname.capacity() + rname.capacity(),
|
||||
DnsRecord::UNKNOWN { domain, data, .. } => domain.capacity() + data.capacity(),
|
||||
}
|
||||
}
|
||||
@@ -188,6 +208,7 @@ impl DnsRecord {
|
||||
| DnsRecord::RRSIG { ttl, .. }
|
||||
| DnsRecord::NSEC { ttl, .. }
|
||||
| DnsRecord::NSEC3 { ttl, .. }
|
||||
| DnsRecord::SOA { ttl, .. }
|
||||
| DnsRecord::UNKNOWN { ttl, .. } => *ttl = new_ttl,
|
||||
}
|
||||
}
|
||||
@@ -365,8 +386,31 @@ impl DnsRecord {
|
||||
ttl,
|
||||
})
|
||||
}
|
||||
QueryType::SOA => {
|
||||
// MNAME/RNAME compressible per RFC 1035 §3.3.13 — decompress to avoid stale pointers on re-emit.
|
||||
let mut mname = String::with_capacity(64);
|
||||
buffer.read_qname(&mut mname)?;
|
||||
let mut rname = String::with_capacity(64);
|
||||
buffer.read_qname(&mut rname)?;
|
||||
let serial = buffer.read_u32()?;
|
||||
let refresh = buffer.read_u32()?;
|
||||
let retry = buffer.read_u32()?;
|
||||
let expire = buffer.read_u32()?;
|
||||
let minimum = buffer.read_u32()?;
|
||||
Ok(DnsRecord::SOA {
|
||||
domain,
|
||||
mname,
|
||||
rname,
|
||||
serial,
|
||||
refresh,
|
||||
retry,
|
||||
expire,
|
||||
minimum,
|
||||
ttl,
|
||||
})
|
||||
}
|
||||
_ => {
|
||||
// SOA, TXT, SRV, etc. — stored as opaque bytes until parsed natively
|
||||
// TXT, SRV, HTTPS, SVCB, etc. — stored as opaque bytes until parsed natively
|
||||
let data = buffer.get_range(buffer.pos(), data_len as usize)?.to_vec();
|
||||
buffer.step(data_len as usize)?;
|
||||
Ok(DnsRecord::UNKNOWN {
|
||||
@@ -430,6 +474,30 @@ impl DnsRecord {
|
||||
let size = buffer.pos() - (pos + 2);
|
||||
buffer.set_u16(pos, size as u16)?;
|
||||
}
|
||||
DnsRecord::SOA {
|
||||
ref domain,
|
||||
ref mname,
|
||||
ref rname,
|
||||
serial,
|
||||
refresh,
|
||||
retry,
|
||||
expire,
|
||||
minimum,
|
||||
ttl,
|
||||
} => {
|
||||
write_header(buffer, domain, QueryType::SOA.to_num(), ttl)?;
|
||||
let rdlen_pos = buffer.pos();
|
||||
buffer.write_u16(0)?;
|
||||
buffer.write_qname(mname)?;
|
||||
buffer.write_qname(rname)?;
|
||||
buffer.write_u32(serial)?;
|
||||
buffer.write_u32(refresh)?;
|
||||
buffer.write_u32(retry)?;
|
||||
buffer.write_u32(expire)?;
|
||||
buffer.write_u32(minimum)?;
|
||||
let rdlen = buffer.pos() - (rdlen_pos + 2);
|
||||
buffer.set_u16(rdlen_pos, rdlen as u16)?;
|
||||
}
|
||||
DnsRecord::AAAA {
|
||||
ref domain,
|
||||
ref addr,
|
||||
|
||||
59
src/serve.rs
59
src/serve.rs
@@ -13,12 +13,13 @@ use log::{error, info};
|
||||
use tokio::net::UdpSocket;
|
||||
|
||||
use crate::blocklist::{download_blocklists, parse_blocklist, BlocklistStore};
|
||||
use crate::bootstrap_resolver::NumaResolver;
|
||||
use crate::buffer::BytePacketBuffer;
|
||||
use crate::cache::DnsCache;
|
||||
use crate::config::{build_zone_map, load_config, ConfigLoad};
|
||||
use crate::ctx::{handle_query, ServerCtx};
|
||||
use crate::forward::{
|
||||
build_https_client, build_odoh_client, parse_upstream_list, Upstream, UpstreamPool,
|
||||
build_https_client_with_resolver, parse_upstream_list, Upstream, UpstreamPool,
|
||||
};
|
||||
use crate::odoh::OdohConfigCache;
|
||||
use crate::override_store::OverrideStore;
|
||||
@@ -48,6 +49,22 @@ pub async fn run(config_path: String) -> crate::Result<()> {
|
||||
(dummy, "recursive (root hints)".to_string())
|
||||
};
|
||||
|
||||
// Routes numa-originated HTTPS (DoH upstream, ODoH relay/target, blocklist
|
||||
// CDN) away from the system resolver so lookups don't loop back through
|
||||
// numa when it's its own system DNS.
|
||||
let resolver_overrides = match config.upstream.mode {
|
||||
crate::config::UpstreamMode::Odoh => config
|
||||
.upstream
|
||||
.odoh_upstream()
|
||||
.map(|o| o.host_ip_overrides())
|
||||
.unwrap_or_default(),
|
||||
_ => std::collections::BTreeMap::new(),
|
||||
};
|
||||
let bootstrap_resolver: Arc<NumaResolver> = Arc::new(NumaResolver::new(
|
||||
&config.upstream.fallback,
|
||||
resolver_overrides,
|
||||
));
|
||||
|
||||
let (resolved_mode, upstream_auto, pool, upstream_label) = match config.upstream.mode {
|
||||
crate::config::UpstreamMode::Auto => {
|
||||
info!("auto mode: probing recursive resolution...");
|
||||
@@ -57,7 +74,7 @@ pub async fn run(config_path: String) -> crate::Result<()> {
|
||||
(crate::config::UpstreamMode::Recursive, false, pool, label)
|
||||
} else {
|
||||
log::warn!("recursive probe failed — falling back to Quad9 DoH");
|
||||
let client = build_https_client();
|
||||
let client = build_https_client_with_resolver(1, Some(bootstrap_resolver.clone()));
|
||||
let url = DOH_FALLBACK.to_string();
|
||||
let label = url.clone();
|
||||
let pool = UpstreamPool::new(vec![Upstream::Doh { url, client }], vec![]);
|
||||
@@ -82,8 +99,16 @@ pub async fn run(config_path: String) -> crate::Result<()> {
|
||||
config.upstream.address.clone()
|
||||
};
|
||||
|
||||
let primary = parse_upstream_list(&addrs, config.upstream.port)?;
|
||||
let fallback = parse_upstream_list(&config.upstream.fallback, config.upstream.port)?;
|
||||
let primary = parse_upstream_list(
|
||||
&addrs,
|
||||
config.upstream.port,
|
||||
Some(bootstrap_resolver.clone()),
|
||||
)?;
|
||||
let fallback = parse_upstream_list(
|
||||
&config.upstream.fallback,
|
||||
config.upstream.port,
|
||||
Some(bootstrap_resolver.clone()),
|
||||
)?;
|
||||
|
||||
let pool = UpstreamPool::new(primary, fallback);
|
||||
let label = pool.label();
|
||||
@@ -96,7 +121,7 @@ pub async fn run(config_path: String) -> crate::Result<()> {
|
||||
}
|
||||
crate::config::UpstreamMode::Odoh => {
|
||||
let odoh = config.upstream.odoh_upstream()?;
|
||||
let client = build_odoh_client(&odoh);
|
||||
let client = build_https_client_with_resolver(1, Some(bootstrap_resolver.clone()));
|
||||
let target_config = Arc::new(OdohConfigCache::new(
|
||||
odoh.target_host.clone(),
|
||||
client.clone(),
|
||||
@@ -110,7 +135,11 @@ pub async fn run(config_path: String) -> crate::Result<()> {
|
||||
let fallback = if odoh.strict {
|
||||
Vec::new()
|
||||
} else {
|
||||
parse_upstream_list(&config.upstream.fallback, config.upstream.port)?
|
||||
parse_upstream_list(
|
||||
&config.upstream.fallback,
|
||||
config.upstream.port,
|
||||
Some(bootstrap_resolver.clone()),
|
||||
)?
|
||||
};
|
||||
let pool = UpstreamPool::new(primary, fallback);
|
||||
let label = pool.label();
|
||||
@@ -313,12 +342,13 @@ pub async fn run(config_path: String) -> crate::Result<()> {
|
||||
};
|
||||
|
||||
// Title row: center within the box
|
||||
let tag_line = "DNS that governs itself";
|
||||
let title = format!(
|
||||
"{b}NUMA{r} {it}DNS that governs itself{r} {d}v{}{r}",
|
||||
"{b}NUMA{r} {it}{tag_line}{r} {d}v{}{r}",
|
||||
env!("CARGO_PKG_VERSION")
|
||||
);
|
||||
// The title contains ANSI codes; visible length is ~38 chars. Pad to fill the box.
|
||||
let title_visible_len = 4 + 2 + 24 + 2 + 1 + env!("CARGO_PKG_VERSION").len() + 1;
|
||||
let title_visible_len = 4 + 2 + tag_line.len() + 2 + 1 + env!("CARGO_PKG_VERSION").len() + 1;
|
||||
let title_pad = w.saturating_sub(title_visible_len);
|
||||
eprintln!("\n{o} ╔{bar_top}╗{r}");
|
||||
eprint!("{o} ║{r} {title}");
|
||||
@@ -405,8 +435,9 @@ pub async fn run(config_path: String) -> crate::Result<()> {
|
||||
if config.blocking.enabled && !blocklist_lists.is_empty() {
|
||||
let bl_ctx = Arc::clone(&ctx);
|
||||
let bl_lists = blocklist_lists.clone();
|
||||
let bl_resolver = bootstrap_resolver.clone();
|
||||
tokio::spawn(async move {
|
||||
load_blocklists(&bl_ctx, &bl_lists).await;
|
||||
load_blocklists(&bl_ctx, &bl_lists, Some(bl_resolver.clone())).await;
|
||||
|
||||
// Periodic refresh
|
||||
let mut interval = tokio::time::interval(Duration::from_secs(refresh_hours * 3600));
|
||||
@@ -414,7 +445,7 @@ pub async fn run(config_path: String) -> crate::Result<()> {
|
||||
loop {
|
||||
interval.tick().await;
|
||||
info!("refreshing blocklists...");
|
||||
load_blocklists(&bl_ctx, &bl_lists).await;
|
||||
load_blocklists(&bl_ctx, &bl_lists, Some(bl_resolver.clone())).await;
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -596,8 +627,8 @@ async fn network_watch_loop(ctx: Arc<ServerCtx>) {
|
||||
}
|
||||
}
|
||||
|
||||
async fn load_blocklists(ctx: &ServerCtx, lists: &[String]) {
|
||||
let downloaded = download_blocklists(lists).await;
|
||||
async fn load_blocklists(ctx: &ServerCtx, lists: &[String], resolver: Option<Arc<NumaResolver>>) {
|
||||
let downloaded = download_blocklists(lists, resolver).await;
|
||||
|
||||
// Parse outside the lock to avoid blocking DNS queries during parse (~100ms)
|
||||
let mut all_domains = std::collections::HashSet::new();
|
||||
@@ -632,8 +663,10 @@ async fn warm_domain(ctx: &ServerCtx, domain: &str) {
|
||||
}
|
||||
|
||||
async fn doh_keepalive_loop(ctx: Arc<ServerCtx>) {
|
||||
// First tick fires immediately so we surface bootstrap-resolver failures
|
||||
// (unreachable Quad9/Cloudflare, blocked :53, bad upstream hostname) in
|
||||
// the startup logs instead of on the first client query.
|
||||
let mut interval = tokio::time::interval(Duration::from_secs(25));
|
||||
interval.tick().await; // skip first immediate tick
|
||||
loop {
|
||||
interval.tick().await;
|
||||
let pool = ctx.upstream_pool.lock().unwrap().clone();
|
||||
|
||||
@@ -12,11 +12,13 @@ use crate::cache::DnsCache;
|
||||
use crate::config::UpstreamMode;
|
||||
use crate::ctx::ServerCtx;
|
||||
use crate::forward::{Upstream, UpstreamPool};
|
||||
use crate::header::ResultCode;
|
||||
use crate::health::HealthMeta;
|
||||
use crate::lan::PeerStore;
|
||||
use crate::override_store::OverrideStore;
|
||||
use crate::packet::DnsPacket;
|
||||
use crate::query_log::QueryLog;
|
||||
use crate::record::DnsRecord;
|
||||
use crate::service_store::ServiceStore;
|
||||
use crate::srtt::SrttCache;
|
||||
use crate::stats::ServerStats;
|
||||
@@ -67,6 +69,20 @@ pub async fn test_ctx() -> ServerCtx {
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a NOERROR response containing a single A record — the shape used
|
||||
/// repeatedly by pipeline/forwarding tests to seed `mock_upstream`.
|
||||
pub fn a_record_response(domain: &str, addr: Ipv4Addr, ttl: u32) -> DnsPacket {
|
||||
let mut pkt = DnsPacket::new();
|
||||
pkt.header.response = true;
|
||||
pkt.header.rescode = ResultCode::NOERROR;
|
||||
pkt.answers.push(DnsRecord::A {
|
||||
domain: domain.to_string(),
|
||||
addr,
|
||||
ttl,
|
||||
});
|
||||
pkt
|
||||
}
|
||||
|
||||
/// Spawn a UDP socket that replies to the first DNS query with the given
|
||||
/// response packet (patching the query ID to match). Returns the socket address.
|
||||
pub async fn mock_upstream(response: DnsPacket) -> SocketAddr {
|
||||
|
||||
155
tests/docker/self-resolver-loop.sh
Executable file
155
tests/docker/self-resolver-loop.sh
Executable file
@@ -0,0 +1,155 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Reproducer for issue #122 — chicken-and-egg when numa is its own system
|
||||
# resolver (HAOS add-on, Pi-hole-style container, laptop with
|
||||
# resolv.conf → 127.0.0.1).
|
||||
#
|
||||
# Topology:
|
||||
# container /etc/resolv.conf → nameserver 127.0.0.1
|
||||
# numa bound on :53 → upstream DoH by hostname (quad9)
|
||||
# numa boots → spawns blocklist download
|
||||
# reqwest::get → getaddrinfo("cdn.jsdelivr.net")
|
||||
# → loopback UDP :53 → numa → cache miss → DoH upstream
|
||||
# → getaddrinfo("dns.quad9.net") → same loop → glibc EAI_AGAIN
|
||||
#
|
||||
# Expected on master: both assertions FAIL (bug reproduced).
|
||||
# Expected after bootstrap-IP fix: both assertions PASS.
|
||||
#
|
||||
# Requirements: docker (with internet access for external lists/DoH)
|
||||
# Usage: ./tests/docker/self-resolver-loop.sh
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
cd "$(dirname "$0")/../.."
|
||||
|
||||
GREEN="\033[32m"; RED="\033[31m"; RESET="\033[0m"
|
||||
|
||||
pass() { printf " ${GREEN}✓${RESET} %s\n" "$1"; }
|
||||
fail() { printf " ${RED}✗${RESET} %s\n" "$1"; printf " %s\n" "$2"; FAILED=$((FAILED+1)); }
|
||||
FAILED=0
|
||||
|
||||
OUT=/tmp/numa-self-resolver.out
|
||||
|
||||
echo "── self-resolver-loop: building + reproducing on debian:bookworm ──"
|
||||
echo " (first run is slow: image pull + cold cargo build, ~5-8 min)"
|
||||
echo
|
||||
|
||||
docker run --rm \
|
||||
-v "$PWD:/src:ro" \
|
||||
-v numa-self-resolver-cargo:/root/.cargo \
|
||||
-v numa-self-resolver-target:/work/target \
|
||||
debian:bookworm bash -c '
|
||||
set -e
|
||||
|
||||
# Phase 1: install deps + build with the container DNS as given by Docker
|
||||
# (resolves deb.debian.org, static.rust-lang.org, crates.io).
|
||||
apt-get update -qq && apt-get install -y -qq curl build-essential dnsutils 2>&1 | tail -3
|
||||
|
||||
if ! command -v cargo &>/dev/null; then
|
||||
curl -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --quiet
|
||||
fi
|
||||
. "$HOME/.cargo/env"
|
||||
|
||||
mkdir -p /work
|
||||
tar -C /src --exclude=./target --exclude=./.git -cf - . | tar -C /work -xf -
|
||||
cd /work
|
||||
|
||||
echo "── cargo build --release --locked ──"
|
||||
cargo build --release --locked 2>&1 | tail -5
|
||||
echo
|
||||
|
||||
# Phase 2: flip system DNS to numa itself — this is the pathological
|
||||
# topology from issue #122 (HAOS add-on, resolv.conf → 127.0.0.1).
|
||||
# Everything after this point, any getaddrinfo call inside numa loops
|
||||
# back through :53.
|
||||
echo "nameserver 127.0.0.1" > /etc/resolv.conf
|
||||
echo "── /etc/resolv.conf inside container (post-flip) ──"
|
||||
cat /etc/resolv.conf
|
||||
echo
|
||||
|
||||
cat > /tmp/numa.toml <<CONF
|
||||
[server]
|
||||
bind_addr = "0.0.0.0:53"
|
||||
api_port = 5380
|
||||
api_bind_addr = "127.0.0.1"
|
||||
data_dir = "/tmp/numa-data"
|
||||
|
||||
[upstream]
|
||||
mode = "forward"
|
||||
address = ["https://dns.quad9.net/dns-query"]
|
||||
timeout_ms = 3000
|
||||
|
||||
[blocking]
|
||||
enabled = true
|
||||
lists = ["https://cdn.jsdelivr.net/gh/hagezi/dns-blocklists@latest/hosts/pro.txt"]
|
||||
CONF
|
||||
|
||||
mkdir -p /tmp/numa-data
|
||||
|
||||
echo "── starting numa ──"
|
||||
RUST_LOG=info ./target/release/numa /tmp/numa.toml > /tmp/numa.log 2>&1 &
|
||||
NUMA_PID=$!
|
||||
|
||||
# Wait up to 120s for blocklist to populate.
|
||||
# Retry delays 2+10+30s = 42s, plus ~4 × ~10s getaddrinfo timeouts under
|
||||
# self-loop = ~82s worst case. 120s leaves headroom.
|
||||
LOADED=0
|
||||
for i in $(seq 1 120); do
|
||||
LOADED=$(curl -sf http://127.0.0.1:5380/blocking/stats 2>/dev/null \
|
||||
| grep -o "\"domains_loaded\":[0-9]*" | cut -d: -f2 || echo 0)
|
||||
[ "${LOADED:-0}" -gt 100 ] && break
|
||||
sleep 1
|
||||
done
|
||||
|
||||
# First cold DoH query — time it.
|
||||
START=$(date +%s%N)
|
||||
dig @127.0.0.1 example.com A +time=15 +tries=1 > /tmp/dig.out 2>&1 || true
|
||||
END=$(date +%s%N)
|
||||
LATENCY_MS=$(( (END - START) / 1000000 ))
|
||||
STATUS=$(grep -oE "status: [A-Z]+" /tmp/dig.out | head -1 || echo "status: TIMEOUT")
|
||||
|
||||
kill $NUMA_PID 2>/dev/null || true
|
||||
wait $NUMA_PID 2>/dev/null || true
|
||||
|
||||
echo
|
||||
echo "=== RESULT ==="
|
||||
echo "domains_loaded=$LOADED"
|
||||
echo "first_query_latency_ms=$LATENCY_MS"
|
||||
echo "first_query_${STATUS// /_}"
|
||||
echo
|
||||
echo "=== numa.log (tail 40) ==="
|
||||
tail -40 /tmp/numa.log
|
||||
echo
|
||||
echo "=== dig.out ==="
|
||||
cat /tmp/dig.out
|
||||
' 2>&1 | tee "$OUT"
|
||||
|
||||
echo
|
||||
echo "── assertions ──"
|
||||
|
||||
LOADED=$(grep '^domains_loaded=' "$OUT" | tail -1 | cut -d= -f2 || echo 0)
|
||||
LATENCY=$(grep '^first_query_latency_ms=' "$OUT" | tail -1 | cut -d= -f2 || echo 999999)
|
||||
STATUS_LINE=$(grep '^first_query_status_' "$OUT" | tail -1 || echo "first_query_status_TIMEOUT")
|
||||
|
||||
if [ "${LOADED:-0}" -gt 100 ]; then
|
||||
pass "blocklist downloaded (domains_loaded=$LOADED)"
|
||||
else
|
||||
fail "blocklist downloaded (got domains_loaded=${LOADED:-0}, expected >100)" \
|
||||
"chicken-and-egg: blocklist HTTPS client has no DNS bootstrap; getaddrinfo loops through numa"
|
||||
fi
|
||||
|
||||
if [ "${LATENCY:-999999}" -lt 2000 ]; then
|
||||
pass "first DoH query under 2s (latency=${LATENCY}ms, $STATUS_LINE)"
|
||||
else
|
||||
fail "first DoH query under 2s (got ${LATENCY}ms, $STATUS_LINE)" \
|
||||
"self-loop on getaddrinfo(upstream_host); plain DoH needs bootstrap-IP symmetry with ODoH"
|
||||
fi
|
||||
|
||||
echo
|
||||
if [ "$FAILED" -eq 0 ]; then
|
||||
printf "${GREEN}── self-resolver-loop passed (fix is in place) ──${RESET}\n"
|
||||
exit 0
|
||||
else
|
||||
printf "${RED}── self-resolver-loop failed ($FAILED assertion(s)) — bug #122 reproduced ──${RESET}\n"
|
||||
exit 1
|
||||
fi
|
||||
@@ -975,6 +975,50 @@ check "Same-host relay+target rejected at startup" \
|
||||
"same host" \
|
||||
"$STARTUP_OUT"
|
||||
|
||||
# Guards ODoH's zero-plain-DNS-leak property: relay_ip / target_ip must
|
||||
# land in the bootstrap resolver's override map so reqwest connects direct
|
||||
# to the configured IPs instead of resolving the hostnames via plain DNS.
|
||||
# RFC 5737 TEST-NET-1 IPs (unroutable).
|
||||
cat > "$CONFIG" << 'CONF'
|
||||
[server]
|
||||
bind_addr = "127.0.0.1:5354"
|
||||
api_port = 5381
|
||||
|
||||
[upstream]
|
||||
mode = "odoh"
|
||||
relay = "https://odoh-relay.example.com/proxy"
|
||||
target = "https://odoh-target.example.org/dns-query"
|
||||
relay_ip = "192.0.2.1"
|
||||
target_ip = "192.0.2.2"
|
||||
|
||||
[cache]
|
||||
max_entries = 10000
|
||||
|
||||
[blocking]
|
||||
enabled = false
|
||||
|
||||
[proxy]
|
||||
enabled = false
|
||||
CONF
|
||||
|
||||
RUST_LOG=info "$BINARY" "$CONFIG" > "$LOG" 2>&1 &
|
||||
NUMA_PID=$!
|
||||
for _ in $(seq 1 30); do
|
||||
curl -sf "http://127.0.0.1:$API_PORT/health" >/dev/null 2>&1 && break
|
||||
sleep 0.1
|
||||
done
|
||||
|
||||
OVERRIDE_LOG=$(grep 'bootstrap resolver: host overrides' "$LOG" || true)
|
||||
check "relay_ip wired into bootstrap override map" \
|
||||
"odoh-relay.example.com=192.0.2.1" \
|
||||
"$OVERRIDE_LOG"
|
||||
check "target_ip wired into bootstrap override map" \
|
||||
"odoh-target.example.org=192.0.2.2" \
|
||||
"$OVERRIDE_LOG"
|
||||
|
||||
kill "$NUMA_PID" 2>/dev/null || true
|
||||
wait "$NUMA_PID" 2>/dev/null || true
|
||||
|
||||
fi # end Suite 8
|
||||
|
||||
# ---- Suite 9: Numa's own ODoH relay (--relay-mode) ----
|
||||
|
||||
115
tests/soa_compression_bug.rs
Normal file
115
tests/soa_compression_bug.rs
Normal file
@@ -0,0 +1,115 @@
|
||||
//! Regression test for issue #128: SOA with compressed MNAME/RNAME must
|
||||
//! survive Numa's round-trip — compression pointers reference the upstream
|
||||
//! packet's byte layout, so we have to decompress on read and re-compress
|
||||
//! on write.
|
||||
|
||||
use numa::buffer::BytePacketBuffer;
|
||||
use numa::packet::DnsPacket;
|
||||
|
||||
const COMPRESSION_FLAG: u16 = 0xC000;
|
||||
|
||||
fn upstream_packet() -> Vec<u8> {
|
||||
let mut p = Vec::<u8>::new();
|
||||
|
||||
p.extend_from_slice(&[
|
||||
0x12, 0x34, 0x81, 0x80, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00,
|
||||
]);
|
||||
|
||||
assert_eq!(p.len(), 12);
|
||||
write_name(&mut p, &["odin", "adobe", "com"]);
|
||||
p.extend_from_slice(&[0x00, 0x41, 0x00, 0x01]);
|
||||
|
||||
p.extend_from_slice(&[0xC0, 0x0C]);
|
||||
p.extend_from_slice(&[0x00, 0x05, 0x00, 0x01, 0x00, 0x00, 0x23, 0x7F]);
|
||||
let rdlen_pos_1 = p.len();
|
||||
p.extend_from_slice(&[0x00, 0x00]);
|
||||
let cname1_start = p.len();
|
||||
write_name(&mut p, &["cdn", "adobeaemcloud", "com"]);
|
||||
let rdlen_1 = (p.len() - cname1_start) as u16;
|
||||
p[rdlen_pos_1..rdlen_pos_1 + 2].copy_from_slice(&rdlen_1.to_be_bytes());
|
||||
|
||||
p.extend_from_slice(&(COMPRESSION_FLAG | cname1_start as u16).to_be_bytes());
|
||||
p.extend_from_slice(&[0x00, 0x05, 0x00, 0x01, 0x00, 0x00, 0x23, 0x7F]);
|
||||
let rdlen_pos_2 = p.len();
|
||||
p.extend_from_slice(&[0x00, 0x00]);
|
||||
let cname2_start = p.len();
|
||||
p.push(9);
|
||||
p.extend_from_slice(b"adobe-aem");
|
||||
let map_label_off = p.len();
|
||||
p.push(3);
|
||||
p.extend_from_slice(b"map");
|
||||
let fastly_label_off = p.len();
|
||||
p.push(6);
|
||||
p.extend_from_slice(b"fastly");
|
||||
p.push(3);
|
||||
p.extend_from_slice(b"net");
|
||||
p.push(0);
|
||||
let rdlen_2 = (p.len() - cname2_start) as u16;
|
||||
p[rdlen_pos_2..rdlen_pos_2 + 2].copy_from_slice(&rdlen_2.to_be_bytes());
|
||||
|
||||
p.extend_from_slice(&(COMPRESSION_FLAG | fastly_label_off as u16).to_be_bytes());
|
||||
p.extend_from_slice(&[0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x07, 0x08]);
|
||||
let rdlen_pos_soa = p.len();
|
||||
p.extend_from_slice(&[0x00, 0x00]);
|
||||
let soa_rdata_start = p.len();
|
||||
p.extend_from_slice(&(COMPRESSION_FLAG | map_label_off as u16).to_be_bytes());
|
||||
p.extend_from_slice(&(COMPRESSION_FLAG | fastly_label_off as u16).to_be_bytes());
|
||||
p.extend_from_slice(&1u32.to_be_bytes());
|
||||
p.extend_from_slice(&7200u32.to_be_bytes());
|
||||
p.extend_from_slice(&3600u32.to_be_bytes());
|
||||
p.extend_from_slice(&1209600u32.to_be_bytes());
|
||||
p.extend_from_slice(&1800u32.to_be_bytes());
|
||||
let rdlen_soa = (p.len() - soa_rdata_start) as u16;
|
||||
p[rdlen_pos_soa..rdlen_pos_soa + 2].copy_from_slice(&rdlen_soa.to_be_bytes());
|
||||
|
||||
p
|
||||
}
|
||||
|
||||
fn write_name(p: &mut Vec<u8>, labels: &[&str]) {
|
||||
for l in labels {
|
||||
p.push(l.len() as u8);
|
||||
p.extend_from_slice(l.as_bytes());
|
||||
}
|
||||
p.push(0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compressed_soa_survives_numa_round_trip() {
|
||||
let upstream = upstream_packet();
|
||||
|
||||
let hickory_in = hickory_proto::op::Message::from_vec(&upstream)
|
||||
.expect("hand-crafted upstream must be valid");
|
||||
let soa_in_rd = hickory_in.name_servers()[0]
|
||||
.data()
|
||||
.clone()
|
||||
.into_soa()
|
||||
.expect("SOA rdata");
|
||||
assert_eq!(soa_in_rd.mname().to_string(), "map.fastly.net.");
|
||||
assert_eq!(soa_in_rd.rname().to_string(), "fastly.net.");
|
||||
|
||||
let mut in_buf = BytePacketBuffer::from_bytes(&upstream);
|
||||
let pkt = DnsPacket::from_buffer(&mut in_buf).expect("numa parses upstream");
|
||||
assert_eq!(pkt.answers.len(), 2);
|
||||
assert_eq!(pkt.authorities.len(), 1);
|
||||
|
||||
let mut out_buf = BytePacketBuffer::new();
|
||||
pkt.write(&mut out_buf).expect("numa writes");
|
||||
let out = out_buf.filled().to_vec();
|
||||
|
||||
let hickory_out =
|
||||
hickory_proto::op::Message::from_vec(&out).expect("numa re-emission must parse strictly");
|
||||
|
||||
let soa_out_rd = hickory_out.name_servers()[0]
|
||||
.data()
|
||||
.clone()
|
||||
.into_soa()
|
||||
.expect("SOA rdata on output");
|
||||
|
||||
assert_eq!(soa_out_rd.mname().to_string(), "map.fastly.net.");
|
||||
assert_eq!(soa_out_rd.rname().to_string(), "fastly.net.");
|
||||
assert_eq!(soa_out_rd.serial(), 1);
|
||||
assert_eq!(soa_out_rd.refresh(), 7200);
|
||||
assert_eq!(soa_out_rd.retry(), 3600);
|
||||
assert_eq!(soa_out_rd.expire(), 1209600);
|
||||
assert_eq!(soa_out_rd.minimum(), 1800);
|
||||
}
|
||||
Reference in New Issue
Block a user