From 241c40553b76bd7f5b5a7cdbcfc4005803803797 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 20 Apr 2026 12:34:04 +0300 Subject: [PATCH 1/5] feat(odoh): ship ODoH client + self-hosted relay (RFC 9230) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Client (mode = "odoh"): URL-query target routing per RFC 9230 §5, /.well-known/odohconfigs TTL cache with 60s backoff on failure, HPKE seal/open via odoh-rs, strict-mode default that SERVFAILs on relay failure instead of silently downgrading. Host-equality config validation rejects same-operator relay/target pairs. Relay (`numa relay [PORT]`): axum server with /relay + /health. SSRF-hardened hostname validator (RFC 1035 ASCII + dot + dash), 4 KiB body cap at the axum layer, 5s full-transaction timeout, and static 502 on target failure (reqwest internals logged, not leaked). Aggregate counters only — no per-request logs. Observability: new `UpstreamTransport { Udp, Doh, Dot, Odoh }` orthogonal to `QueryPath`, so /stats can tally wire protocols symmetrically. Recursive mode records `Some(Udp)` for honest "bytes egressing in cleartext" accounting. Tests: Suite 8 exercises the client end-to-end via Frank Denis's public relay + Cloudflare target; Suite 9 exercises `numa relay` forwarding + guards against Cloudflare as the real far end. Full probe script at tests/probe-odoh-ecosystem.sh verifies the entire public ODoH ecosystem (4 targets + 1 relay per DNSCrypt's curated list — confirms deploying Numa's relay doubles global supply). --- Cargo.lock | 374 +++++++++++++++++++++++++- Cargo.toml | 4 + src/api.rs | 15 ++ src/config.rs | 177 +++++++++++- src/ctx.rs | 14 +- src/forward.rs | 119 +++++++-- src/lib.rs | 2 + src/main.rs | 17 ++ src/odoh.rs | 489 ++++++++++++++++++++++++++++++++++ src/relay.rs | 347 ++++++++++++++++++++++++ src/serve.rs | 39 +-- src/stats.rs | 62 ++++- tests/integration.sh | 197 ++++++++++++++ tests/probe-odoh-ecosystem.sh | 101 +++++++ 14 files changed, 1911 insertions(+), 46 deletions(-) create mode 100644 src/odoh.rs create mode 100644 src/relay.rs create mode 100755 tests/probe-odoh-ecosystem.sh diff --git a/Cargo.lock b/Cargo.lock index cf25b3a..2bfeaa6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,41 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "aead" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" +dependencies = [ + "crypto-common", + "generic-array", +] + +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + +[[package]] +name = "aes-gcm" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831010a0f742e1209b3bcea8fab6a8e149051ba6099432c8cb2cc117dec3ead1" +dependencies = [ + "aead", + "aes", + "cipher", + "ctr", + "ghash", + "subtle", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -109,7 +144,7 @@ dependencies = [ "nom", "num-traits", "rusticata-macros", - "thiserror", + "thiserror 2.0.18", "time", ] @@ -257,6 +292,15 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bumpalo" version = "3.20.2" @@ -299,6 +343,30 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chacha20" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3613f74bd2eac03dad61bd53dbe620703d4371614fe0bc3b9f04dd36fe4e818" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + +[[package]] +name = "chacha20poly1305" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10cd79432192d1c0f4e1a0fef9527696cc039165d729fb41b3f4f4f354c2dc35" +dependencies = [ + "aead", + "chacha20", + "cipher", + "poly1305", + "zeroize", +] + [[package]] name = "ciborium" version = "0.2.2" @@ -326,6 +394,17 @@ dependencies = [ "half", ] +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", + "zeroize", +] + [[package]] name = "clap" version = "4.6.0" @@ -383,6 +462,15 @@ version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.5.0" @@ -473,6 +561,51 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "rand_core 0.6.4", + "typenum", +] + +[[package]] +name = "ctr" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0369ee1ad671834580515889b80f2ea915f23b8be8d0daa4bbaf2ac5c7590835" +dependencies = [ + "cipher", +] + +[[package]] +name = "curve25519-dalek" +version = "4.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be" +dependencies = [ + "cfg-if", + "cpufeatures", + "curve25519-dalek-derive", + "fiat-crypto", + "rustc_version", + "subtle", +] + +[[package]] +name = "curve25519-dalek-derive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "data-encoding" version = "2.10.0" @@ -502,6 +635,17 @@ dependencies = [ "powerfmt", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -576,6 +720,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "fiat-crypto" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -707,6 +857,16 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -747,6 +907,16 @@ dependencies = [ "wasip3", ] +[[package]] +name = "ghash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0d8a4362ccb29cb0b265253fb0a2728f592895ee6854fd9bc13f2ffda266ff1" +dependencies = [ + "opaque-debug", + "polyval", +] + [[package]] name = "h2" version = "0.4.13" @@ -820,7 +990,7 @@ dependencies = [ "rand", "ring", "rustls", - "thiserror", + "thiserror 2.0.18", "tinyvec", "tokio", "tokio-rustls", @@ -846,13 +1016,51 @@ dependencies = [ "resolv-conf", "rustls", "smallvec", - "thiserror", + "thiserror 2.0.18", "tokio", "tokio-rustls", "tracing", "webpki-roots 0.26.11", ] +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac", +] + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + +[[package]] +name = "hpke" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f65d16b699dd1a1fa2d851c970b0c971b388eeeb40f744252b8de48860980c8f" +dependencies = [ + "aead", + "aes-gcm", + "chacha20poly1305", + "digest", + "generic-array", + "hkdf", + "hmac", + "rand_core 0.9.5", + "sha2", + "subtle", + "x25519-dalek", + "zeroize", +] + [[package]] name = "http" version = "1.4.0" @@ -1081,6 +1289,15 @@ dependencies = [ "serde_core", ] +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "generic-array", +] + [[package]] name = "ipconfig" version = "0.3.4" @@ -1344,7 +1561,9 @@ dependencies = [ "hyper", "hyper-util", "log", + "odoh-rs", "qrcode", + "rand_core 0.9.5", "rcgen", "reqwest", "ring", @@ -1363,6 +1582,19 @@ dependencies = [ "x509-parser", ] +[[package]] +name = "odoh-rs" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbb89720b7dfdddc89bc7560669d41a0bb68eb64784a4aebd293308a489f3837" +dependencies = [ + "aes-gcm", + "bytes", + "hkdf", + "hpke", + "thiserror 1.0.69", +] + [[package]] name = "oid-registry" version = "0.8.1" @@ -1394,6 +1626,12 @@ version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" +[[package]] +name = "opaque-debug" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" + [[package]] name = "page_size" version = "0.6.0" @@ -1483,6 +1721,29 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "poly1305" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8159bd90725d2df49889a078b54f4f79e87f1f8a8444194cdca81d38f5393abf" +dependencies = [ + "cpufeatures", + "opaque-debug", + "universal-hash", +] + +[[package]] +name = "polyval" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25" +dependencies = [ + "cfg-if", + "cpufeatures", + "opaque-debug", + "universal-hash", +] + [[package]] name = "portable-atomic" version = "1.13.1" @@ -1561,7 +1822,7 @@ dependencies = [ "rustc-hash", "rustls", "socket2", - "thiserror", + "thiserror 2.0.18", "tokio", "tracing", "web-time", @@ -1582,7 +1843,7 @@ dependencies = [ "rustls", "rustls-pki-types", "slab", - "thiserror", + "thiserror 2.0.18", "tinyvec", "tracing", "web-time", @@ -1630,7 +1891,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha", - "rand_core", + "rand_core 0.9.5", ] [[package]] @@ -1640,7 +1901,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", ] [[package]] @@ -1789,6 +2059,15 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + [[package]] name = "rusticata-macros" version = "4.1.0" @@ -1953,6 +2232,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "shlex" version = "1.3.0" @@ -2046,13 +2336,33 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + [[package]] name = "thiserror" version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl", + "thiserror-impl 2.0.18", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -2298,6 +2608,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "typenum" +version = "1.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" + [[package]] name = "unicode-ident" version = "1.0.24" @@ -2310,6 +2626,16 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "universal-hash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" +dependencies = [ + "crypto-common", + "subtle", +] + [[package]] name = "untrusted" version = "0.9.0" @@ -2351,6 +2677,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "walkdir" version = "2.5.0" @@ -2860,6 +3192,16 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "x25519-dalek" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7e468321c81fb07fa7f4c636c3972b9100f0346e5b6a9f2bd0603a52f7ed277" +dependencies = [ + "curve25519-dalek", + "rand_core 0.6.4", +] + [[package]] name = "x509-parser" version = "0.18.1" @@ -2874,7 +3216,7 @@ dependencies = [ "oid-registry", "ring", "rusticata-macros", - "thiserror", + "thiserror 2.0.18", "time", ] @@ -2956,6 +3298,20 @@ name = "zeroize" version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" +dependencies = [ + "zeroize_derive", +] + +[[package]] +name = "zeroize_derive" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "zerotrie" diff --git a/Cargo.toml b/Cargo.toml index 3b3234f..15601c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,10 @@ rustls = "0.23" tokio-rustls = "0.26" arc-swap = "1" ring = "0.17" +odoh-rs = "1" +# rand_core 0.9 matches the version odoh-rs (via hpke 0.13) depends on, so we +# share one RngCore trait and OsRng impl across the dep tree. +rand_core = { version = "0.9", features = ["os_rng"] } rustls-pemfile = "2.2.0" qrcode = { version = "0.14", default-features = false, features = ["svg"] } webpki-roots = "1" diff --git a/src/api.rs b/src/api.rs index dd1fe78..7f02920 100644 --- a/src/api.rs +++ b/src/api.rs @@ -170,6 +170,7 @@ struct StatsResponse { srtt: bool, queries: QueriesStats, transport: TransportStats, + upstream_transport: UpstreamTransportStats, cache: CacheStats, overrides: OverrideStats, blocking: BlockingStatsResponse, @@ -186,6 +187,14 @@ struct TransportStats { doh: u64, } +#[derive(Serialize)] +struct UpstreamTransportStats { + udp: u64, + doh: u64, + dot: u64, + odoh: u64, +} + #[derive(Serialize)] struct MobileStatsResponse { enabled: bool, @@ -566,6 +575,12 @@ async fn stats(State(ctx): State>) -> Json { dot: snap.transport_dot, doh: snap.transport_doh, }, + upstream_transport: UpstreamTransportStats { + udp: snap.upstream_transport_udp, + doh: snap.upstream_transport_doh, + dot: snap.upstream_transport_dot, + odoh: snap.upstream_transport_odoh, + }, cache: CacheStats { entries: cache_len, max_entries: cache_max, diff --git a/src/config.rs b/src/config.rs index 309344b..2d2f1ba 100644 --- a/src/config.rs +++ b/src/config.rs @@ -134,6 +134,7 @@ pub enum UpstreamMode { #[default] Forward, Recursive, + Odoh, } impl UpstreamMode { @@ -142,6 +143,7 @@ impl UpstreamMode { UpstreamMode::Auto => "auto", UpstreamMode::Forward => "forward", UpstreamMode::Recursive => "recursive", + UpstreamMode::Odoh => "odoh", } } } @@ -154,7 +156,7 @@ pub struct UpstreamConfig { pub address: Vec, #[serde(default = "default_upstream_port")] pub port: u16, - #[serde(default)] + #[serde(default, deserialize_with = "string_or_vec")] pub fallback: Vec, #[serde(default = "default_timeout_ms")] pub timeout_ms: u64, @@ -166,6 +168,20 @@ pub struct UpstreamConfig { pub prime_tlds: Vec, #[serde(default = "default_srtt")] pub srtt: bool, + + /// Only used when `mode = "odoh"`. Full https:// URL of the relay + /// endpoint (including path, e.g. `https://odoh-relay.numa.rs/relay`). + #[serde(default)] + pub relay: Option, + /// Only used when `mode = "odoh"`. Full https:// URL of the target + /// resolver (`https://odoh.cloudflare-dns.com/dns-query`). + #[serde(default)] + pub target: Option, + /// Only used when `mode = "odoh"`. When true (the default), relay failure + /// returns SERVFAIL instead of downgrading to the `fallback` upstream — + /// a user who configured ODoH rarely wants a silent non-oblivious path. + #[serde(default)] + pub strict: Option, } impl Default for UpstreamConfig { @@ -180,10 +196,75 @@ impl Default for UpstreamConfig { root_hints: default_root_hints(), prime_tlds: default_prime_tlds(), srtt: default_srtt(), + relay: None, + target: None, + strict: None, } } } +/// Parsed ODoH config fields. `mode = "odoh"` requires both URLs to be +/// present, to parse as `https://`, and to resolve to distinct hosts. +#[derive(Debug)] +pub struct OdohUpstream { + pub relay_url: String, + pub target_host: String, + pub target_path: String, + pub strict: bool, +} + +impl UpstreamConfig { + /// Validate and extract ODoH-specific fields. Called during `load_config` + /// so misconfigured ODoH fails fast at startup, the same care we take + /// with the DNSSEC strict boot check. + pub fn odoh_upstream(&self) -> Result { + let relay = self + .relay + .as_deref() + .ok_or("mode = \"odoh\" requires upstream.relay")?; + let target = self + .target + .as_deref() + .ok_or("mode = \"odoh\" requires upstream.target")?; + + let relay_url = reqwest::Url::parse(relay) + .map_err(|e| format!("upstream.relay invalid URL '{}': {}", relay, e))?; + let target_url = reqwest::Url::parse(target) + .map_err(|e| format!("upstream.target invalid URL '{}': {}", target, e))?; + + if relay_url.scheme() != "https" || target_url.scheme() != "https" { + return Err("upstream.relay and upstream.target must both use https://".into()); + } + if relay_url.host_str().is_none() || target_url.host_str().is_none() { + return Err("upstream.relay and upstream.target must include a host".into()); + } + if relay_url.host_str() == target_url.host_str() { + return Err(format!( + "upstream.relay and upstream.target resolve to the same host ({}); the privacy property requires distinct operators", + relay_url.host_str().unwrap_or("?") + ) + .into()); + } + + let target_host = target_url + .host_str() + .ok_or("upstream.target has no host")? + .to_string(); + let target_path = if target_url.path().is_empty() { + "/".to_string() + } else { + target_url.path().to_string() + }; + + Ok(OdohUpstream { + relay_url: relay.to_string(), + target_host, + target_path, + strict: self.strict.unwrap_or(true), + }) + } +} + fn string_or_vec<'de, D>(deserializer: D) -> std::result::Result, D::Error> where D: serde::Deserializer<'de>, @@ -643,12 +724,22 @@ mod tests { } #[test] - fn fallback_parses() { + fn fallback_array_parses() { let config: Config = toml::from_str("[upstream]\nfallback = [\"8.8.8.8\", \"1.1.1.1\"]").unwrap(); assert_eq!(config.upstream.fallback, vec!["8.8.8.8", "1.1.1.1"]); } + #[test] + fn fallback_string_parses_as_singleton_vec() { + let config: Config = + toml::from_str("[upstream]\nfallback = \"tls://1.1.1.1#cloudflare-dns.com\"").unwrap(); + assert_eq!( + config.upstream.fallback, + vec!["tls://1.1.1.1#cloudflare-dns.com"] + ); + } + #[test] fn empty_address_gives_empty_vec() { let config: Config = toml::from_str("").unwrap(); @@ -656,6 +747,88 @@ mod tests { assert!(config.upstream.fallback.is_empty()); } + // ── [upstream] mode = "odoh" ──────────────────────────────────────── + + #[test] + fn odoh_config_parses_and_validates() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://odoh-relay.numa.rs/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +"#; + let config: Config = toml::from_str(toml).unwrap(); + assert!(matches!(config.upstream.mode, UpstreamMode::Odoh)); + let odoh = config.upstream.odoh_upstream().unwrap(); + assert_eq!(odoh.relay_url, "https://odoh-relay.numa.rs/relay"); + assert_eq!(odoh.target_host, "odoh.cloudflare-dns.com"); + assert_eq!(odoh.target_path, "/dns-query"); + assert!(odoh.strict, "strict defaults to true under mode=odoh"); + } + + #[test] + fn odoh_strict_false_is_honoured() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://odoh-relay.numa.rs/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +strict = false +"#; + let config: Config = toml::from_str(toml).unwrap(); + assert!(!config.upstream.odoh_upstream().unwrap().strict); + } + + #[test] + fn odoh_rejects_same_host_relay_and_target() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://odoh.example.com/relay" +target = "https://odoh.example.com/dns-query" +"#; + let config: Config = toml::from_str(toml).unwrap(); + let err = config.upstream.odoh_upstream().unwrap_err().to_string(); + assert!(err.contains("same host"), "got: {err}"); + } + + #[test] + fn odoh_rejects_non_https() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "http://odoh-relay.numa.rs/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +"#; + let config: Config = toml::from_str(toml).unwrap(); + let err = config.upstream.odoh_upstream().unwrap_err().to_string(); + assert!(err.contains("https"), "got: {err}"); + } + + #[test] + fn odoh_missing_relay_rejected() { + let toml = r#" +[upstream] +mode = "odoh" +target = "https://odoh.cloudflare-dns.com/dns-query" +"#; + let config: Config = toml::from_str(toml).unwrap(); + let err = config.upstream.odoh_upstream().unwrap_err().to_string(); + assert!(err.contains("upstream.relay"), "got: {err}"); + } + + #[test] + fn odoh_missing_target_rejected() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://odoh-relay.numa.rs/relay" +"#; + let config: Config = toml::from_str(toml).unwrap(); + let err = config.upstream.odoh_upstream().unwrap_err().to_string(); + assert!(err.contains("upstream.target"), "got: {err}"); + } + // ── issue #82: [[forwarding]] config section ──────────────────────── #[test] diff --git a/src/ctx.rs b/src/ctx.rs index 71e81c9..511b678 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -105,6 +105,7 @@ pub async fn resolve_query( // Pipeline: overrides -> .localhost -> local zones -> special-use (unless forwarded) // -> .tld proxy -> blocklist -> cache -> forwarding -> recursive/upstream // Each lock is scoped to avoid holding MutexGuard across await points. + let mut upstream_transport: Option = None; let (response, path, dnssec) = { let override_record = ctx.overrides.read().unwrap().lookup(&qname); if let Some(record) = override_record { @@ -208,6 +209,7 @@ pub async fn resolve_query( { // Conditional forwarding takes priority over recursive mode // (e.g. Tailscale .ts.net, VPC private zones) + upstream_transport = pool.preferred().map(|u| u.transport()); match forward_with_failover_raw( raw_wire, pool, @@ -241,6 +243,9 @@ pub async fn resolve_query( } } } else if ctx.upstream_mode == UpstreamMode::Recursive { + // Recursive resolution makes UDP hops to roots/TLDs/auths; + // tag as Udp so the dashboard can aggregate plaintext-wire + // egress honestly. Only mark on success — errors stay None. let key = (qname.clone(), qtype); let (resp, path, err) = resolve_coalesced(&ctx.inflight, key, &query, || { crate::recursive::resolve_recursive( @@ -263,6 +268,8 @@ pub async fn resolve_query( qname, err.as_deref().unwrap_or("leader failed") ); + } else { + upstream_transport = Some(crate::stats::UpstreamTransport::Udp); } (resp, path, DnssecStatus::Indeterminate) } else { @@ -277,7 +284,10 @@ pub async fn resolve_query( .await { Ok(resp_wire) => match cache_and_parse(ctx, &qname, qtype, &resp_wire) { - Ok(resp) => (resp, QueryPath::Upstream, DnssecStatus::Indeterminate), + Ok(resp) => { + upstream_transport = pool.preferred().map(|u| u.transport()); + (resp, QueryPath::Upstream, DnssecStatus::Indeterminate) + } Err(e) => { error!("{} | {:?} {} | PARSE ERROR | {}", src_addr, qtype, qname, e); ( @@ -397,7 +407,7 @@ pub async fn resolve_query( // Record stats and query log { let mut s = ctx.stats.lock().unwrap(); - let total = s.record(path, transport); + let total = s.record(path, transport, upstream_transport); if total.is_multiple_of(1000) { s.log_summary(); } diff --git a/src/forward.rs b/src/forward.rs index 9bfa426..bb91fcf 100644 --- a/src/forward.rs +++ b/src/forward.rs @@ -1,14 +1,16 @@ use std::fmt; use std::net::{IpAddr, SocketAddr}; -use std::sync::RwLock; +use std::sync::{Arc, RwLock}; use std::time::{Duration, Instant}; use tokio::net::UdpSocket; use tokio::time::timeout; use crate::buffer::BytePacketBuffer; +use crate::odoh::{query_through_relay, OdohConfigCache}; use crate::packet::DnsPacket; use crate::srtt::SrttCache; +use crate::stats::UpstreamTransport; use crate::Result; #[derive(Clone)] @@ -23,16 +25,34 @@ pub enum Upstream { tls_name: Option, connector: tokio_rustls::TlsConnector, }, + /// Oblivious DNS-over-HTTPS (RFC 9230). Queries are HPKE-sealed to the + /// target and forwarded through an independent relay. Target host lives + /// on `target_config` (single source of truth — the cache keys on it). + Odoh { + relay_url: String, + target_path: String, + client: reqwest::Client, + target_config: Arc, + }, } impl Upstream { /// IP address to key SRTT tracking on, if the upstream has a stable one. - /// `Doh` routes through a URL + connection pool, so there's no single IP - /// to track; SRTT is skipped for it. + /// `Doh` and `Odoh` route through a URL + connection pool, so there's no + /// single IP to track; SRTT is skipped for them. pub fn tracked_ip(&self) -> Option { match self { Upstream::Udp(addr) | Upstream::Dot { addr, .. } => Some(addr.ip()), - Upstream::Doh { .. } => None, + Upstream::Doh { .. } | Upstream::Odoh { .. } => None, + } + } + + pub fn transport(&self) -> UpstreamTransport { + match self { + Upstream::Udp(_) => UpstreamTransport::Udp, + Upstream::Doh { .. } => UpstreamTransport::Doh, + Upstream::Dot { .. } => UpstreamTransport::Dot, + Upstream::Odoh { .. } => UpstreamTransport::Odoh, } } } @@ -43,6 +63,20 @@ impl PartialEq for Upstream { (Self::Udp(a), Self::Udp(b)) => a == b, (Self::Doh { url: a, .. }, Self::Doh { url: b, .. }) => a == b, (Self::Dot { addr: a, .. }, Self::Dot { addr: b, .. }) => a == b, + ( + Self::Odoh { + relay_url: ra, + target_path: pa, + target_config: ca, + .. + }, + Self::Odoh { + relay_url: rb, + target_path: pb, + target_config: cb, + .. + }, + ) => ra == rb && pa == pb && ca.target_host() == cb.target_host(), _ => false, } } @@ -63,6 +97,18 @@ impl fmt::Display for Upstream { Some(name) => write!(f, "tls://{}#{}", addr, name), None => write!(f, "tls://{}", addr), }, + Upstream::Odoh { + relay_url, + target_path, + target_config, + .. + } => write!( + f, + "odoh://{}{} via {}", + target_config.target_host(), + target_path, + relay_url + ), } } } @@ -82,22 +128,20 @@ pub(crate) fn parse_upstream_addr( Err(format!("invalid upstream address: {}", s)) } +/// Parse a slice of upstream address strings into `Upstream` values, failing +/// on the first invalid entry. +pub fn parse_upstream_list(addrs: &[String], default_port: u16) -> Result> { + addrs + .iter() + .map(|s| parse_upstream(s, default_port)) + .collect() +} + pub fn parse_upstream(s: &str, default_port: u16) -> Result { if s.starts_with("https://") { - let client = reqwest::Client::builder() - .use_rustls_tls() - .http2_initial_stream_window_size(65_535) - .http2_initial_connection_window_size(65_535) - .http2_keep_alive_interval(Duration::from_secs(15)) - .http2_keep_alive_while_idle(true) - .http2_keep_alive_timeout(Duration::from_secs(10)) - .pool_idle_timeout(Duration::from_secs(300)) - .pool_max_idle_per_host(1) - .build() - .unwrap_or_default(); return Ok(Upstream::Doh { url: s.to_string(), - client, + client: build_https_client(), }); } // tls://IP:PORT#hostname or tls://IP#hostname (default port 853) @@ -118,6 +162,33 @@ pub fn parse_upstream(s: &str, default_port: u16) -> Result { Ok(Upstream::Udp(addr)) } +/// HTTP/2 client tuned for DoH/ODoH: small windows for low latency, long-lived +/// keep-alive. Shared by the DoH upstream and the ODoH config-fetcher + +/// seal/open path. Pool defaults to one idle conn per host — good for +/// resolvers that talk to a single upstream; relays that fan out to many +/// targets should use [`build_https_client_with_pool`]. +pub fn build_https_client() -> reqwest::Client { + build_https_client_with_pool(1) +} + +/// Same shape as [`build_https_client`], but caller picks +/// `pool_max_idle_per_host`. Relay workloads hit many distinct target hosts +/// and benefit from a larger pool so warm connections survive concurrent +/// fan-out. +pub fn build_https_client_with_pool(pool_max_idle_per_host: usize) -> reqwest::Client { + reqwest::Client::builder() + .use_rustls_tls() + .http2_initial_stream_window_size(65_535) + .http2_initial_connection_window_size(65_535) + .http2_keep_alive_interval(Duration::from_secs(15)) + .http2_keep_alive_while_idle(true) + .http2_keep_alive_timeout(Duration::from_secs(10)) + .pool_idle_timeout(Duration::from_secs(300)) + .pool_max_idle_per_host(pool_max_idle_per_host) + .build() + .unwrap_or_default() +} + fn build_dot_connector() -> Result { let _ = rustls::crypto::ring::default_provider().install_default(); let mut root_store = rustls::RootCertStore::empty(); @@ -282,6 +353,22 @@ pub async fn forward_query_raw( tls_name, connector, } => forward_dot_raw(wire, *addr, tls_name, connector, timeout_duration).await, + Upstream::Odoh { + relay_url, + target_path, + client, + target_config, + } => { + query_through_relay( + wire, + relay_url, + target_path, + client, + target_config, + timeout_duration, + ) + .await + } } } diff --git a/src/lib.rs b/src/lib.rs index bce8833..aec568d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,7 @@ pub mod health; pub mod lan; pub mod mobile_api; pub mod mobileconfig; +pub mod odoh; pub mod override_store; pub mod packet; pub mod proxy; @@ -20,6 +21,7 @@ pub mod query_log; pub mod question; pub mod record; pub mod recursive; +pub mod relay; pub mod serve; pub mod service_store; pub mod setup_phone; diff --git a/src/main.rs b/src/main.rs index 34bf747..e077a2f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -60,6 +60,22 @@ fn main() -> numa::Result<()> { .block_on(numa::setup_phone::run()) .map_err(|e| e.into()); } + "relay" => { + let port: u16 = std::env::args() + .nth(2) + .as_deref() + .and_then(|s| s.parse().ok()) + .unwrap_or(8443); + let addr: std::net::SocketAddr = ([127, 0, 0, 1], port).into(); + eprintln!( + "\x1b[1;38;2;192;98;58mNuma\x1b[0m — ODoH relay on {}\n", + addr + ); + let runtime = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build()?; + return runtime.block_on(numa::relay::run(addr)); + } "lan" => { let sub = std::env::args().nth(2).unwrap_or_default(); let config_path = std::env::args() @@ -91,6 +107,7 @@ fn main() -> numa::Result<()> { eprintln!(" service status Check if the service is running"); eprintln!(" lan on Enable LAN service discovery (mDNS)"); eprintln!(" lan off Disable LAN service discovery"); + eprintln!(" relay [PORT] Run as an ODoH relay (RFC 9230, default port 8443)"); eprintln!(" setup-phone Generate a QR code to install Numa DoT on a phone"); eprintln!(" help Show this help"); eprintln!(); diff --git a/src/odoh.rs b/src/odoh.rs new file mode 100644 index 0000000..2cfa9c5 --- /dev/null +++ b/src/odoh.rs @@ -0,0 +1,489 @@ +//! ODoH target-config fetcher and TTL cache (RFC 9230 §6). +//! +//! ## Ciphersuite policy +//! `odoh-rs` deserialization rejects any config whose KEM/KDF/AEAD triple is +//! not the mandatory `(X25519, HKDF-SHA256, AES-128-GCM)` (see +//! `ObliviousDoHConfigContents::deserialize`). This is stricter than the +//! plan's "pick the mandatory suite if mixed": a response containing *any* +//! non-mandatory config fails parse entirely. Real-world targets publish a +//! single mandatory config, so this is fine in practice; revisit if a target +//! that matters starts mixing suites. + +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use arc_swap::ArcSwapOption; +use odoh_rs::{ + ObliviousDoHConfigContents, ObliviousDoHConfigs, ObliviousDoHMessage, + ObliviousDoHMessagePlaintext, +}; +use rand_core::{OsRng, TryRngCore}; +use reqwest::header::HeaderMap; +use tokio::sync::Mutex; +use tokio::time::timeout; + +use crate::Result; + +/// MIME type used for both directions of the ODoH exchange (RFC 9230 §4). +const ODOH_CONTENT_TYPE: &str = "application/oblivious-dns-message"; + +/// Cap on the response body we read into memory when the relay returns +/// non-success. Protects against a hostile relay streaming a huge body on +/// the error path; keeps enough room to carry a human-readable reason. +const ERROR_BODY_PREVIEW_BYTES: usize = 1024; + +/// Fallback TTL when the target's response lacks a usable `Cache-Control` +/// directive. RFC 9230 §6.2 places no hard floor; 24 h matches what Cloudflare +/// publishes in practice. +const DEFAULT_CONFIG_TTL: Duration = Duration::from_secs(24 * 60 * 60); + +/// Cap on any TTL we'll honour, regardless of what the target advertises. +/// Keeps a misconfigured server from pinning an old key indefinitely. +const MAX_CONFIG_TTL: Duration = Duration::from_secs(7 * 24 * 60 * 60); + +/// After a failed `/.well-known/odohconfigs` fetch, refuse to refetch again +/// within this window — a target that is genuinely broken would otherwise +/// receive one request per query. Queries that arrive during the backoff +/// return the cached error immediately. +const REFRESH_BACKOFF: Duration = Duration::from_secs(60); + +/// Parsed ODoH target config plus the freshness metadata needed to age it out. +#[derive(Debug)] +pub struct OdohTargetConfig { + pub contents: ObliviousDoHConfigContents, + pub key_id: Vec, + expires_at: Instant, +} + +impl OdohTargetConfig { + pub fn is_expired(&self) -> bool { + Instant::now() >= self.expires_at + } +} + +struct FailedRefresh { + at: Instant, + err: String, +} + +/// TTL-gated cache of a single target's HPKE config. +/// +/// Reads go through `ArcSwapOption` (lock-free hot path). Refreshes serialize +/// on an async mutex so a burst of simultaneous misses produces a single +/// outbound fetch, and a failed refresh blocks subsequent refetches for +/// [`REFRESH_BACKOFF`] to prevent hot-looping against a broken target. +pub struct OdohConfigCache { + target_host: String, + configs_url: String, + client: reqwest::Client, + current: ArcSwapOption, + last_failure: ArcSwapOption, + refresh_lock: Mutex<()>, +} + +impl OdohConfigCache { + pub fn new(target_host: String, client: reqwest::Client) -> Self { + let configs_url = format!("https://{}/.well-known/odohconfigs", target_host); + Self { + target_host, + configs_url, + client, + current: ArcSwapOption::from(None), + last_failure: ArcSwapOption::from(None), + refresh_lock: Mutex::new(()), + } + } + + pub fn target_host(&self) -> &str { + &self.target_host + } + + /// Return a valid config, refetching when the cache is cold or expired. + /// Within [`REFRESH_BACKOFF`] of a failed refresh, returns the cached + /// error without issuing another fetch. + pub async fn get(&self) -> Result> { + if let Some(cfg) = self.current.load_full() { + if !cfg.is_expired() { + return Ok(cfg); + } + } + + if let Some(err) = self.backoff_error() { + return Err(err); + } + + let _guard = self.refresh_lock.lock().await; + + // Another task may have refreshed or failed while we waited. + if let Some(cfg) = self.current.load_full() { + if !cfg.is_expired() { + return Ok(cfg); + } + } + if let Some(err) = self.backoff_error() { + return Err(err); + } + + match fetch_odoh_config(&self.client, &self.configs_url).await { + Ok(fresh) => { + let fresh = Arc::new(fresh); + self.current.store(Some(fresh.clone())); + self.last_failure.store(None); + Ok(fresh) + } + Err(e) => { + let msg = format!("ODoH config fetch failed: {e}"); + self.last_failure.store(Some(Arc::new(FailedRefresh { + at: Instant::now(), + err: msg.clone(), + }))); + Err(msg.into()) + } + } + } + + /// Drop the cached config. Called after the target rejects ciphertext + /// (key rotation race) so the next `get()` refetches. + pub fn invalidate(&self) { + self.current.store(None); + } + + fn backoff_error(&self) -> Option { + let fail = self.last_failure.load_full()?; + if fail.at.elapsed() < REFRESH_BACKOFF { + Some(format!("{} (backoff active)", fail.err).into()) + } else { + None + } + } +} + +/// Fetch `/.well-known/odohconfigs` from `configs_url` and parse it into an +/// [`OdohTargetConfig`]. The TTL is taken from the response's +/// `Cache-Control: max-age=`, clamped to [`DEFAULT_CONFIG_TTL`, +/// [`MAX_CONFIG_TTL`]] when absent or obviously wrong. +pub async fn fetch_odoh_config( + client: &reqwest::Client, + configs_url: &str, +) -> Result { + let resp = client.get(configs_url).send().await?.error_for_status()?; + let ttl = cache_control_ttl(resp.headers()).unwrap_or(DEFAULT_CONFIG_TTL); + let body = resp.bytes().await?; + parse_odoh_config(&body, ttl) +} + +fn parse_odoh_config(body: &[u8], ttl: Duration) -> Result { + let mut buf = body; + let configs: ObliviousDoHConfigs = odoh_rs::parse(&mut buf) + .map_err(|e| format!("failed to parse ObliviousDoHConfigs: {e}"))?; + let first = configs + .into_iter() + .next() + .ok_or("target published no ODoH configs with a supported version + ciphersuite")?; + let contents: ObliviousDoHConfigContents = first.into(); + let key_id = contents + .identifier() + .map_err(|e| format!("failed to derive key_id from ODoH config: {e}"))?; + Ok(OdohTargetConfig { + contents, + key_id, + expires_at: Instant::now() + ttl.min(MAX_CONFIG_TTL), + }) +} + +/// Send a DNS wire query through an ODoH relay to a target and return the +/// plaintext DNS wire response. +/// +/// Flow: fetch the target's HPKE config (cached), seal the query, POST to the +/// relay with `Targethost`/`Targetpath` headers, then unseal the response. +/// On seal/unseal failure we invalidate the cache and retry once — this +/// handles the benign race where the target rotated its key between our +/// cached config and the POST. +pub async fn query_through_relay( + wire: &[u8], + relay_url: &str, + target_path: &str, + client: &reqwest::Client, + cache: &OdohConfigCache, + timeout_duration: Duration, +) -> Result> { + let req = OdohRequest { + wire, + relay_url, + target_path, + client, + cache, + timeout: timeout_duration, + }; + match attempt_query(&req).await { + Ok(v) => Ok(v), + Err(AttemptError::KeyRotation(_)) => { + cache.invalidate(); + attempt_query(&req).await.map_err(AttemptError::into_error) + } + Err(e) => Err(e.into_error()), + } +} + +struct OdohRequest<'a> { + wire: &'a [u8], + relay_url: &'a str, + target_path: &'a str, + client: &'a reqwest::Client, + cache: &'a OdohConfigCache, + timeout: Duration, +} + +/// Classification used only by the retry path in [`query_through_relay`]. +enum AttemptError { + /// Target signalled the config we used is stale (key rotation race). + /// Callers should invalidate the cache and retry exactly once. + KeyRotation(String), + /// Any other failure — transport, timeout, malformed response. + Other(crate::Error), +} + +impl AttemptError { + fn into_error(self) -> crate::Error { + match self { + AttemptError::KeyRotation(m) => format!("ODoH key rotation race: {m}").into(), + AttemptError::Other(e) => e, + } + } +} + +async fn attempt_query(req: &OdohRequest<'_>) -> std::result::Result, AttemptError> { + let cfg = req.cache.get().await.map_err(AttemptError::Other)?; + + let plaintext = ObliviousDoHMessagePlaintext::new(req.wire, 0); + // rand_core 0.9's OsRng is fallible-only; wrap for the infallible bound. + let mut os = OsRng; + let mut rng = os.unwrap_mut(); + let (encrypted_query, client_secret) = + odoh_rs::encrypt_query(&plaintext, &cfg.contents, &mut rng) + .map_err(|e| AttemptError::Other(format!("ODoH encrypt failed: {e}").into()))?; + let body = odoh_rs::compose(&encrypted_query) + .map_err(|e| AttemptError::Other(format!("ODoH compose failed: {e}").into()))? + .freeze(); + + // RFC 9230 §5 and the reference client use URL query parameters, not + // HTTP headers, to carry the target routing. `Targethost`/`Targetpath` + // headers cause relays to treat the request as an unspecified-target and + // reject it. + let (status, resp_body) = timeout(req.timeout, async { + let resp = req + .client + .post(req.relay_url) + .header(reqwest::header::CONTENT_TYPE, ODOH_CONTENT_TYPE) + .header(reqwest::header::ACCEPT, ODOH_CONTENT_TYPE) + .header(reqwest::header::CACHE_CONTROL, "no-cache, no-store") + .query(&[ + ("targethost", req.cache.target_host()), + ("targetpath", req.target_path), + ]) + .body(body) + .send() + .await?; + let status = resp.status(); + let body = resp.bytes().await?; + Ok::<_, reqwest::Error>((status, body)) + }) + .await + .map_err(|_| AttemptError::Other("ODoH relay request timed out".into()))? + .map_err(|e| AttemptError::Other(format!("ODoH relay request failed: {e}").into()))?; + + // RFC 9230 §4.3 expects a target that can't decrypt to reply with a DNS + // error in a sealed 200 response; a 401 from the relay/target is the + // practical signal that our cached HPKE key is stale. Treat 400 as a + // client-side bug (malformed ODoH envelope) — retrying would loop-fail. + if !status.is_success() { + let preview_len = resp_body.len().min(ERROR_BODY_PREVIEW_BYTES); + let body_preview = String::from_utf8_lossy(&resp_body[..preview_len]); + let msg = format!("ODoH relay returned {status}: {}", body_preview.trim()); + return Err(if status.as_u16() == 401 { + AttemptError::KeyRotation(msg) + } else { + AttemptError::Other(msg.into()) + }); + } + + let mut buf = resp_body; + let encrypted_response: ObliviousDoHMessage = odoh_rs::parse(&mut buf) + .map_err(|e| AttemptError::Other(format!("ODoH response parse failed: {e}").into()))?; + let plaintext_response = + odoh_rs::decrypt_response(&plaintext, &encrypted_response, client_secret) + .map_err(|e| AttemptError::KeyRotation(format!("ODoH decrypt failed: {e}")))?; + + Ok(plaintext_response.into_msg().to_vec()) +} + +fn cache_control_ttl(headers: &HeaderMap) -> Option { + let cc = headers.get(reqwest::header::CACHE_CONTROL)?.to_str().ok()?; + for directive in cc.split(',') { + let directive = directive.trim(); + if let Some(rest) = directive.strip_prefix("max-age=") { + if let Ok(secs) = rest.trim().parse::() { + if secs > 0 { + return Some(Duration::from_secs(secs)); + } + } + } + } + None +} + +#[cfg(test)] +mod tests { + use super::*; + use odoh_rs::{ObliviousDoHConfig, ObliviousDoHKeyPair}; + + // RFC 9180 HPKE IDs for the sole ODoH mandatory suite: + // KEM = X25519, KDF = HKDF-SHA256, AEAD = AES-128-GCM. + const KEM_X25519: u16 = 0x0020; + const KDF_SHA256: u16 = 0x0001; + const AEAD_AES128GCM: u16 = 0x0001; + + fn synth_configs_bytes() -> Vec { + let kp = ObliviousDoHKeyPair::from_parameters( + KEM_X25519, + KDF_SHA256, + AEAD_AES128GCM, + &[0u8; 32], + ); + let pk = kp.public().clone(); + let configs: ObliviousDoHConfigs = vec![ObliviousDoHConfig::from(pk)].into(); + odoh_rs::compose(&configs).unwrap().to_vec() + } + + #[test] + fn parse_accepts_well_formed_config() { + let bytes = synth_configs_bytes(); + let cfg = parse_odoh_config(&bytes, Duration::from_secs(3600)).unwrap(); + assert!(!cfg.key_id.is_empty()); + assert!(!cfg.is_expired()); + } + + #[test] + fn parse_rejects_garbage() { + let bytes = [0xffu8; 16]; + assert!(parse_odoh_config(&bytes, Duration::from_secs(3600)).is_err()); + } + + #[test] + fn parse_rejects_empty() { + assert!(parse_odoh_config(&[], Duration::from_secs(3600)).is_err()); + } + + #[test] + fn ttl_capped_at_max() { + let bytes = synth_configs_bytes(); + let cfg = parse_odoh_config(&bytes, Duration::from_secs(100 * 24 * 60 * 60)).unwrap(); + let remaining = cfg.expires_at.saturating_duration_since(Instant::now()); + assert!(remaining <= MAX_CONFIG_TTL); + assert!(remaining >= MAX_CONFIG_TTL - Duration::from_secs(1)); + } + + #[test] + fn cache_control_parses_max_age() { + let mut h = HeaderMap::new(); + h.insert("cache-control", "public, max-age=86400".parse().unwrap()); + assert_eq!(cache_control_ttl(&h), Some(Duration::from_secs(86400))); + } + + #[test] + fn cache_control_ignores_max_age_zero() { + let mut h = HeaderMap::new(); + h.insert("cache-control", "max-age=0, no-store".parse().unwrap()); + assert_eq!(cache_control_ttl(&h), None); + } + + #[test] + fn cache_control_missing_falls_back() { + let h = HeaderMap::new(); + assert_eq!(cache_control_ttl(&h), None); + } + + #[test] + fn is_expired_tracks_ttl() { + let bytes = synth_configs_bytes(); + let mut cfg = parse_odoh_config(&bytes, Duration::from_secs(3600)).unwrap(); + assert!(!cfg.is_expired()); + cfg.expires_at = Instant::now() - Duration::from_secs(1); + assert!(cfg.is_expired()); + } + + #[tokio::test] + async fn cache_backoff_blocks_refetch_after_failure() { + // Point the cache at a host that does not exist so the fetch fails + // deterministically; this exercises the backoff wiring without a + // network round-trip succeeding. + let cache = OdohConfigCache::new( + "odoh-target.invalid".to_string(), + reqwest::Client::builder() + .timeout(Duration::from_millis(200)) + .build() + .unwrap(), + ); + + let first = cache.get().await; + assert!(first.is_err(), "first fetch must fail against invalid host"); + + // Within the backoff window, the cached error is returned immediately. + let second = cache.get().await.unwrap_err().to_string(); + assert!( + second.contains("backoff active"), + "expected backoff hint, got: {second}" + ); + + // Reaching past the backoff window allows a fresh attempt — simulate + // by rewinding the recorded failure timestamp. + cache.last_failure.store(Some(Arc::new(FailedRefresh { + at: Instant::now() - (REFRESH_BACKOFF + Duration::from_secs(1)), + err: "prior".to_string(), + }))); + let third = cache.get().await.unwrap_err().to_string(); + assert!( + !third.contains("backoff active"), + "expected fresh fetch attempt, got: {third}" + ); + } + + /// Round-trip the HPKE seal/unseal path in isolation from HTTP, using the + /// odoh-rs primitives that `query_through_relay` wires together. Guards + /// against silently breaking the crypto glue if we refactor that path. + #[test] + fn seal_unseal_round_trip() { + use odoh_rs::{decrypt_query, encrypt_response, ResponseNonce}; + + let kp = ObliviousDoHKeyPair::from_parameters( + KEM_X25519, + KDF_SHA256, + AEAD_AES128GCM, + &[0u8; 32], + ); + + let query_wire = b"\x12\x34\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x07example\x03com\x00\x00\x01\x00\x01"; + let query_pt = ObliviousDoHMessagePlaintext::new(query_wire, 0); + let mut os = OsRng; + let mut rng = os.unwrap_mut(); + let (query_enc, client_secret) = + odoh_rs::encrypt_query(&query_pt, kp.public(), &mut rng).unwrap(); + + let (query_back, server_secret) = decrypt_query(&query_enc, &kp).unwrap(); + assert_eq!(query_back.into_msg().as_ref(), query_wire); + + let response_wire = b"\x12\x34\x81\x80\x00\x01\x00\x01\x00\x00\x00\x00"; + let response_pt = ObliviousDoHMessagePlaintext::new(response_wire, 0); + let response_enc = encrypt_response( + &query_pt, + &response_pt, + server_secret, + ResponseNonce::default(), + ) + .unwrap(); + + let response_back = + odoh_rs::decrypt_response(&query_pt, &response_enc, client_secret).unwrap(); + assert_eq!(response_back.into_msg().as_ref(), response_wire); + } +} diff --git a/src/relay.rs b/src/relay.rs new file mode 100644 index 0000000..8d6ab40 --- /dev/null +++ b/src/relay.rs @@ -0,0 +1,347 @@ +//! ODoH relay (RFC 9230 §5) — the forward-without-reading half of the +//! protocol. Runs `numa relay`; skips all resolver initialisation (no port +//! 53, no cache, no recursion, no dashboard). The relay never reads the +//! HPKE-sealed payload and keeps no per-request logs — only aggregate +//! counters. + +use std::net::SocketAddr; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::Arc; +use std::time::Duration; + +use axum::body::Bytes; +use axum::extract::{DefaultBodyLimit, Query, State}; +use axum::http::{header, StatusCode}; +use axum::response::{IntoResponse, Response}; +use axum::routing::{get, post}; +use axum::Router; +use log::{error, info}; +use serde::Deserialize; +use tokio::net::TcpListener; + +use crate::forward::build_https_client_with_pool; +use crate::Result; + +const ODOH_CONTENT_TYPE: &str = "application/oblivious-dns-message"; + +/// Cap on the opaque body we accept from a client. ODoH envelopes are +/// ~100–300 bytes in practice; anything larger is malformed or hostile. +const MAX_BODY_BYTES: usize = 4 * 1024; + +/// Cap on the body we read back from the target before streaming to client. +/// Slightly larger: target responses carry DNS answers plus HPKE overhead. +const MAX_TARGET_RESPONSE_BYTES: usize = 8 * 1024; + +/// Covers the whole client-to-target round trip — not just `.send()` — so a +/// slow-drip target can't hang a worker indefinitely after headers arrive. +const TARGET_REQUEST_TIMEOUT: Duration = Duration::from_secs(5); + +/// The relay hits many distinct target hosts on behalf of clients. A +/// per-host idle pool of 4 keeps warm TLS connections available for concurrent +/// fan-out without blowing up memory on a small VPS. +const RELAY_POOL_PER_HOST: usize = 4; + +#[derive(Deserialize)] +struct RelayParams { + targethost: String, + targetpath: String, +} + +struct RelayState { + client: reqwest::Client, + total_requests: AtomicU64, + forwarded_ok: AtomicU64, + forwarded_err: AtomicU64, + rejected_bad_request: AtomicU64, +} + +pub async fn run(addr: SocketAddr) -> Result<()> { + let state = Arc::new(RelayState { + client: build_https_client_with_pool(RELAY_POOL_PER_HOST), + total_requests: AtomicU64::new(0), + forwarded_ok: AtomicU64::new(0), + forwarded_err: AtomicU64::new(0), + rejected_bad_request: AtomicU64::new(0), + }); + + let app = Router::new() + .route("/relay", post(handle_relay)) + // Overrides axum's default (2 MiB) so hostile clients can't force + // the relay to buffer multi-MB bodies before our own cap check. + .layer(DefaultBodyLimit::max(MAX_BODY_BYTES)) + .route("/health", get(handle_health)) + .with_state(state); + + let listener = TcpListener::bind(addr).await?; + info!("ODoH relay listening on {}", addr); + axum::serve(listener, app).await?; + Ok(()) +} + +async fn handle_health(State(state): State>) -> impl IntoResponse { + let body = format!( + "ok\ntotal {}\nforwarded_ok {}\nforwarded_err {}\nrejected_bad_request {}\n", + state.total_requests.load(Ordering::Relaxed), + state.forwarded_ok.load(Ordering::Relaxed), + state.forwarded_err.load(Ordering::Relaxed), + state.rejected_bad_request.load(Ordering::Relaxed), + ); + ( + StatusCode::OK, + [(header::CONTENT_TYPE, "text/plain; charset=utf-8")], + body, + ) +} + +async fn handle_relay( + State(state): State>, + Query(params): Query, + headers: axum::http::HeaderMap, + body: Bytes, +) -> Response { + state.total_requests.fetch_add(1, Ordering::Relaxed); + + if !content_type_matches(&headers, ODOH_CONTENT_TYPE) { + state.rejected_bad_request.fetch_add(1, Ordering::Relaxed); + return ( + StatusCode::UNSUPPORTED_MEDIA_TYPE, + "expected application/oblivious-dns-message", + ) + .into_response(); + } + + if body.len() > MAX_BODY_BYTES { + state.rejected_bad_request.fetch_add(1, Ordering::Relaxed); + return (StatusCode::PAYLOAD_TOO_LARGE, "body exceeds 4 KiB cap").into_response(); + } + + if !is_valid_hostname(¶ms.targethost) || !params.targetpath.starts_with('/') { + state.rejected_bad_request.fetch_add(1, Ordering::Relaxed); + return (StatusCode::BAD_REQUEST, "invalid targethost or targetpath").into_response(); + } + + let target_url = format!("https://{}{}", params.targethost, params.targetpath); + match forward_to_target(&state.client, &target_url, body).await { + Ok((status, resp_body)) => { + state.forwarded_ok.fetch_add(1, Ordering::Relaxed); + ( + status, + [(header::CONTENT_TYPE, ODOH_CONTENT_TYPE)], + resp_body, + ) + .into_response() + } + Err(e) => { + // Log the underlying reason for operators; don't leak reqwest + // internals (which can reveal the target's TLS config, IP, etc.) + // back to arbitrary clients. + error!("relay forward to {} failed: {}", target_url, e); + state.forwarded_err.fetch_add(1, Ordering::Relaxed); + (StatusCode::BAD_GATEWAY, "target unreachable").into_response() + } + } +} + +async fn forward_to_target( + client: &reqwest::Client, + url: &str, + body: Bytes, +) -> Result<(StatusCode, Bytes)> { + let response = tokio::time::timeout(TARGET_REQUEST_TIMEOUT, async { + let resp = client + .post(url) + .header(header::CONTENT_TYPE, ODOH_CONTENT_TYPE) + .header(header::ACCEPT, ODOH_CONTENT_TYPE) + .body(body) + .send() + .await?; + let status = StatusCode::from_u16(resp.status().as_u16())?; + let resp_body = resp.bytes().await?; + Ok::<_, crate::Error>((status, resp_body)) + }) + .await + .map_err(|_| "timed out talking to target")??; + + if response.1.len() > MAX_TARGET_RESPONSE_BYTES { + return Err("target response exceeds cap".into()); + } + Ok(response) +} + +fn content_type_matches(headers: &axum::http::HeaderMap, expected: &str) -> bool { + headers + .get(header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .map(|ct| ct.split(';').next().unwrap_or("").trim() == expected) + .unwrap_or(false) +} + +/// Strict DNS-hostname validator, aimed at closing the SSRF surface a naive +/// `contains('.')` check leaves open (e.g. `example.com@internal.host`, +/// `evil.com/../admin`). Requires ASCII letters/digits/dot/dash, at least +/// one dot, no leading dot or dash, length ≤ 253 per RFC 1035. +fn is_valid_hostname(h: &str) -> bool { + if h.is_empty() || h.len() > 253 || !h.contains('.') { + return false; + } + if h.starts_with('.') || h.starts_with('-') || h.ends_with('.') || h.ends_with('-') { + return false; + } + h.chars() + .all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '-') +} + +#[cfg(test)] +mod tests { + use super::*; + + async fn spawn_relay() -> (SocketAddr, Arc) { + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + + let state = Arc::new(RelayState { + client: build_https_client_with_pool(RELAY_POOL_PER_HOST), + total_requests: AtomicU64::new(0), + forwarded_ok: AtomicU64::new(0), + forwarded_err: AtomicU64::new(0), + rejected_bad_request: AtomicU64::new(0), + }); + + let app = Router::new() + .route("/relay", post(handle_relay)) + .layer(DefaultBodyLimit::max(MAX_BODY_BYTES)) + .route("/health", get(handle_health)) + .with_state(state.clone()); + + tokio::spawn(async move { + let _ = axum::serve(listener, app).await; + }); + (addr, state) + } + + #[tokio::test] + async fn rejects_missing_content_type() { + let (addr, state) = spawn_relay().await; + let client = reqwest::Client::new(); + let resp = client + .post(format!( + "http://{}/relay?targethost=odoh.example.com&targetpath=/dns-query", + addr + )) + .body("body") + .send() + .await + .unwrap(); + assert_eq!(resp.status(), reqwest::StatusCode::UNSUPPORTED_MEDIA_TYPE); + assert_eq!(state.rejected_bad_request.load(Ordering::Relaxed), 1); + } + + #[tokio::test] + async fn rejects_oversized_body() { + let (addr, _state) = spawn_relay().await; + let big = vec![0u8; MAX_BODY_BYTES + 1]; + let client = reqwest::Client::new(); + let resp = client + .post(format!( + "http://{}/relay?targethost=odoh.example.com&targetpath=/dns-query", + addr + )) + .header(header::CONTENT_TYPE, ODOH_CONTENT_TYPE) + .body(big) + .send() + .await + .unwrap(); + // axum's DefaultBodyLimit rejects before our handler runs, so the + // counter doesn't increment — but the status code proves the layer + // enforced the cap. Either status is acceptable evidence. + assert!(matches!( + resp.status(), + reqwest::StatusCode::PAYLOAD_TOO_LARGE | reqwest::StatusCode::BAD_REQUEST + )); + } + + #[tokio::test] + async fn rejects_targethost_without_dot() { + let (addr, state) = spawn_relay().await; + let client = reqwest::Client::new(); + let resp = client + .post(format!( + "http://{}/relay?targethost=localhost&targetpath=/dns-query", + addr + )) + .header(header::CONTENT_TYPE, ODOH_CONTENT_TYPE) + .body("body") + .send() + .await + .unwrap(); + assert_eq!(resp.status(), reqwest::StatusCode::BAD_REQUEST); + assert_eq!(state.rejected_bad_request.load(Ordering::Relaxed), 1); + } + + #[tokio::test] + async fn rejects_userinfo_ssrf_attempt() { + let (addr, state) = spawn_relay().await; + let client = reqwest::Client::new(); + // The naive contains('.') check would let this through and reqwest + // would route to `internal.host` using `evil.com` as userinfo. + let resp = client + .post(format!( + "http://{}/relay?targethost=evil.com@internal.host&targetpath=/dns-query", + addr + )) + .header(header::CONTENT_TYPE, ODOH_CONTENT_TYPE) + .body("body") + .send() + .await + .unwrap(); + assert_eq!(resp.status(), reqwest::StatusCode::BAD_REQUEST); + assert_eq!(state.rejected_bad_request.load(Ordering::Relaxed), 1); + } + + #[tokio::test] + async fn rejects_targetpath_without_leading_slash() { + let (addr, state) = spawn_relay().await; + let client = reqwest::Client::new(); + let resp = client + .post(format!( + "http://{}/relay?targethost=odoh.example.com&targetpath=dns-query", + addr + )) + .header(header::CONTENT_TYPE, ODOH_CONTENT_TYPE) + .body("body") + .send() + .await + .unwrap(); + assert_eq!(resp.status(), reqwest::StatusCode::BAD_REQUEST); + assert_eq!(state.rejected_bad_request.load(Ordering::Relaxed), 1); + } + + #[tokio::test] + async fn health_endpoint_reports_counters() { + let (addr, _state) = spawn_relay().await; + let client = reqwest::Client::new(); + let resp = client + .get(format!("http://{}/health", addr)) + .send() + .await + .unwrap(); + assert_eq!(resp.status(), reqwest::StatusCode::OK); + let body = resp.text().await.unwrap(); + assert!(body.contains("ok\n")); + assert!(body.contains("forwarded_ok 0")); + } + + #[test] + fn hostname_validator_accepts_and_rejects() { + assert!(is_valid_hostname("odoh.cloudflare-dns.com")); + assert!(is_valid_hostname("a.b")); + assert!(!is_valid_hostname("")); + assert!(!is_valid_hostname("localhost")); + assert!(!is_valid_hostname(".leading.dot")); + assert!(!is_valid_hostname("trailing.dot.")); + assert!(!is_valid_hostname("-leading.dash")); + assert!(!is_valid_hostname("evil.com@internal.host")); + assert!(!is_valid_hostname("evil.com/../admin")); + assert!(!is_valid_hostname(&"a".repeat(254))); + } +} diff --git a/src/serve.rs b/src/serve.rs index 8e85b32..2037857 100644 --- a/src/serve.rs +++ b/src/serve.rs @@ -17,7 +17,8 @@ use crate::buffer::BytePacketBuffer; use crate::cache::DnsCache; use crate::config::{build_zone_map, load_config, ConfigLoad}; use crate::ctx::{handle_query, ServerCtx}; -use crate::forward::{parse_upstream, Upstream, UpstreamPool}; +use crate::forward::{build_https_client, parse_upstream_list, Upstream, UpstreamPool}; +use crate::odoh::OdohConfigCache; use crate::override_store::OverrideStore; use crate::query_log::QueryLog; use crate::service_store::ServiceStore; @@ -54,10 +55,7 @@ pub async fn run(config_path: String) -> crate::Result<()> { (crate::config::UpstreamMode::Recursive, false, pool, label) } else { log::warn!("recursive probe failed — falling back to Quad9 DoH"); - let client = reqwest::Client::builder() - .use_rustls_tls() - .build() - .unwrap_or_default(); + let client = build_https_client(); let url = DOH_FALLBACK.to_string(); let label = url.clone(); let pool = UpstreamPool::new(vec![Upstream::Doh { url, client }], vec![]); @@ -82,16 +80,8 @@ pub async fn run(config_path: String) -> crate::Result<()> { config.upstream.address.clone() }; - let primary: Vec = addrs - .iter() - .map(|s| parse_upstream(s, config.upstream.port)) - .collect::>>()?; - let fallback: Vec = config - .upstream - .fallback - .iter() - .map(|s| parse_upstream(s, config.upstream.port)) - .collect::>>()?; + let primary = parse_upstream_list(&addrs, config.upstream.port)?; + let fallback = parse_upstream_list(&config.upstream.fallback, config.upstream.port)?; let pool = UpstreamPool::new(primary, fallback); let label = pool.label(); @@ -102,6 +92,25 @@ pub async fn run(config_path: String) -> crate::Result<()> { label, ) } + crate::config::UpstreamMode::Odoh => { + let odoh = config.upstream.odoh_upstream()?; + let client = build_https_client(); + let target_config = Arc::new(OdohConfigCache::new(odoh.target_host, client.clone())); + let primary = vec![Upstream::Odoh { + relay_url: odoh.relay_url, + target_path: odoh.target_path, + client, + target_config, + }]; + let fallback = if odoh.strict { + Vec::new() + } else { + parse_upstream_list(&config.upstream.fallback, config.upstream.port)? + }; + let pool = UpstreamPool::new(primary, fallback); + let label = pool.label(); + (crate::config::UpstreamMode::Odoh, false, pool, label) + } }; let api_port = config.server.api_port; diff --git a/src/stats.rs b/src/stats.rs index df9127c..acedec1 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -102,6 +102,10 @@ pub struct ServerStats { transport_tcp: u64, transport_dot: u64, transport_doh: u64, + upstream_transport_udp: u64, + upstream_transport_doh: u64, + upstream_transport_dot: u64, + upstream_transport_odoh: u64, started_at: Instant, } @@ -124,6 +128,31 @@ impl Transport { } } +/// Wire protocol used for a forwarded upstream call. Orthogonal to +/// `QueryPath`: the path answers "where the answer came from"; this answers +/// "over what wire we spoke to the forwarder." Callers pass +/// `Option` — `None` for resolutions that never touched +/// a forwarder (cache/local/blocked) or for recursive mode, which has its +/// own counter via `QueryPath::Recursive`. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum UpstreamTransport { + Udp, + Doh, + Dot, + Odoh, +} + +impl UpstreamTransport { + pub fn as_str(&self) -> &'static str { + match self { + UpstreamTransport::Udp => "UDP", + UpstreamTransport::Doh => "DOH", + UpstreamTransport::Dot => "DOT", + UpstreamTransport::Odoh => "ODOH", + } + } +} + #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum QueryPath { Local, @@ -202,11 +231,20 @@ impl ServerStats { transport_tcp: 0, transport_dot: 0, transport_doh: 0, + upstream_transport_udp: 0, + upstream_transport_doh: 0, + upstream_transport_dot: 0, + upstream_transport_odoh: 0, started_at: Instant::now(), } } - pub fn record(&mut self, path: QueryPath, transport: Transport) -> u64 { + pub fn record( + &mut self, + path: QueryPath, + transport: Transport, + upstream_transport: Option, + ) -> u64 { self.queries_total += 1; match path { QueryPath::Local => self.queries_local += 1, @@ -225,6 +263,14 @@ impl ServerStats { Transport::Dot => self.transport_dot += 1, Transport::Doh => self.transport_doh += 1, } + if let Some(ut) = upstream_transport { + match ut { + UpstreamTransport::Udp => self.upstream_transport_udp += 1, + UpstreamTransport::Doh => self.upstream_transport_doh += 1, + UpstreamTransport::Dot => self.upstream_transport_dot += 1, + UpstreamTransport::Odoh => self.upstream_transport_odoh += 1, + } + } self.queries_total } @@ -253,6 +299,10 @@ impl ServerStats { transport_tcp: self.transport_tcp, transport_dot: self.transport_dot, transport_doh: self.transport_doh, + upstream_transport_udp: self.upstream_transport_udp, + upstream_transport_doh: self.upstream_transport_doh, + upstream_transport_dot: self.upstream_transport_dot, + upstream_transport_odoh: self.upstream_transport_odoh, } } @@ -263,7 +313,7 @@ impl ServerStats { let secs = uptime.as_secs() % 60; log::info!( - "STATS | uptime {}h{}m{}s | total {} | fwd {} | upstream {} | recursive {} | coalesced {} | cached {} | local {} | override {} | blocked {} | errors {}", + "STATS | uptime {}h{}m{}s | total {} | fwd {} | upstream {} | recursive {} | coalesced {} | cached {} | local {} | override {} | blocked {} | errors {} | up-udp {} | up-doh {} | up-dot {} | up-odoh {}", hours, mins, secs, self.queries_total, self.queries_forwarded, @@ -275,6 +325,10 @@ impl ServerStats { self.queries_overridden, self.queries_blocked, self.upstream_errors, + self.upstream_transport_udp, + self.upstream_transport_doh, + self.upstream_transport_dot, + self.upstream_transport_odoh, ); } } @@ -295,4 +349,8 @@ pub struct StatsSnapshot { pub transport_tcp: u64, pub transport_dot: u64, pub transport_doh: u64, + pub upstream_transport_udp: u64, + pub upstream_transport_doh: u64, + pub upstream_transport_dot: u64, + pub upstream_transport_odoh: u64, } diff --git a/tests/integration.sh b/tests/integration.sh index 81bd28d..77b874f 100755 --- a/tests/integration.sh +++ b/tests/integration.sh @@ -854,6 +854,203 @@ sleep 1 fi # end Suite 7 +# ---- Suite 8: ODoH (Oblivious DoH via public relay + target) ---- +# Exercises the full client pipeline: /.well-known/odohconfigs fetch, +# HPKE seal/unseal, URL-query target routing (RFC 9230 §5), dashboard +# QueryPath::Odoh counter. Depends on the public ecosystem being up — +# the probe-odoh-ecosystem.sh script guards against flaky runs. +if should_run_suite 8; then +echo "" +echo "╔══════════════════════════════════════════╗" +echo "║ Suite 8: ODoH (Anonymous DNS) ║" +echo "╚══════════════════════════════════════════╝" + +run_test_suite "ODoH via edgecompute.app relay → Cloudflare target" " +[server] +bind_addr = \"127.0.0.1:5354\" +api_port = 5381 + +[upstream] +mode = \"odoh\" +relay = \"https://odoh-relay.edgecompute.app/proxy\" +target = \"https://odoh.cloudflare-dns.com/dns-query\" + +[cache] +max_entries = 10000 +min_ttl = 60 +max_ttl = 86400 + +[blocking] +enabled = false + +[proxy] +enabled = false +" + +# Re-start briefly to assert ODoH-specific observability: the odoh counter +# has to tick above zero after a query, and the stats label has to reflect +# the oblivious path. These guard against silent regressions in the +# QueryPath::Odoh tagging and the /stats serialisation. +RUST_LOG=info "$BINARY" "$CONFIG" > "$LOG" 2>&1 & +NUMA_PID=$! +for _ in $(seq 1 30); do + curl -sf "http://127.0.0.1:$API_PORT/health" >/dev/null 2>&1 && break + sleep 0.1 +done + +$DIG example.com A +short > /dev/null 2>&1 || true +sleep 1 + +STATS=$(curl -sf http://127.0.0.1:$API_PORT/stats 2>/dev/null) +# upstream_transport.odoh lives inside the upstream_transport object. +ODOH_COUNT=$(echo "$STATS" | grep -o '"upstream_transport":{[^}]*}' \ + | grep -o '"odoh":[0-9]*' | cut -d: -f2) +check "upstream_transport.odoh > 0 after a query" "[1-9]" "${ODOH_COUNT:-0}" + +check "Upstream label advertises odoh://" \ + "odoh://" \ + "$(echo "$STATS" | grep -o '"upstream":"[^"]*"')" + +check "Stats mode field is 'odoh'" \ + '"mode":"odoh"' \ + "$(echo "$STATS" | grep -o '"mode":"odoh"')" + +# Strict-mode failure path: a clearly-unreachable relay must produce +# SERVFAIL without silent downgrade. We hijack the config to point at +# an .invalid host so we don't rely on external uptime. +kill "$NUMA_PID" 2>/dev/null || true +wait "$NUMA_PID" 2>/dev/null || true +sleep 1 + +cat > "$CONFIG" << 'CONF' +[server] +bind_addr = "127.0.0.1:5354" +api_port = 5381 + +[upstream] +mode = "odoh" +relay = "https://relay.invalid/proxy" +target = "https://odoh.cloudflare-dns.com/dns-query" +strict = true + +[cache] +max_entries = 10000 + +[blocking] +enabled = false + +[proxy] +enabled = false +CONF + +RUST_LOG=info "$BINARY" "$CONFIG" > "$LOG" 2>&1 & +NUMA_PID=$! +for _ in $(seq 1 30); do + curl -sf "http://127.0.0.1:$API_PORT/health" >/dev/null 2>&1 && break + sleep 0.1 +done + +check "Strict-mode relay outage returns SERVFAIL" \ + "SERVFAIL" \ + "$($DIG example.com A 2>&1 | grep 'status:')" + +kill "$NUMA_PID" 2>/dev/null || true +wait "$NUMA_PID" 2>/dev/null || true +sleep 1 + +# Negative: relay and target on the same host must be rejected at startup. +cat > "$CONFIG" << 'CONF' +[server] +bind_addr = "127.0.0.1:5354" +api_port = 5381 + +[upstream] +mode = "odoh" +relay = "https://odoh.cloudflare-dns.com/proxy" +target = "https://odoh.cloudflare-dns.com/dns-query" +CONF + +STARTUP_OUT=$("$BINARY" "$CONFIG" 2>&1 || true) +check "Same-host relay+target rejected at startup" \ + "same host" \ + "$STARTUP_OUT" + +fi # end Suite 8 + +# ---- Suite 9: Numa's own ODoH relay (--relay-mode) ---- +# Exercises `numa relay PORT` as a forwarding proxy to a real ODoH target. +# Validates the RFC 9230 §5 relay behaviour: URL-query routing, content-type +# gating, body-size cap, and /health observability. +if should_run_suite 9; then +echo "" +echo "╔══════════════════════════════════════════╗" +echo "║ Suite 9: Numa ODoH Relay (own) ║" +echo "╚══════════════════════════════════════════╝" + +RELAY_PORT=18443 +"$BINARY" relay $RELAY_PORT > "$LOG" 2>&1 & +NUMA_PID=$! +for _ in $(seq 1 30); do + curl -sf "http://127.0.0.1:$RELAY_PORT/health" >/dev/null 2>&1 && break + sleep 0.1 +done + +echo "" +echo "=== Relay Endpoints ===" + +check "Health endpoint returns ok" \ + "ok" \ + "$(curl -sf http://127.0.0.1:$RELAY_PORT/health | head -1)" + +# Happy path: forwards arbitrary body to Cloudflare's ODoH target. The +# target will reject the garbage envelope with HTTP 400 — which is exactly +# what proves our relay faithfully forwarded (otherwise we'd see our own +# 4xx from the relay itself). +HAPPY_STATUS=$(curl -sS -o /dev/null -w "%{http_code}" -X POST \ + -H "Content-Type: application/oblivious-dns-message" \ + --data-binary "garbage-forwarded-end-to-end" \ + "http://127.0.0.1:$RELAY_PORT/relay?targethost=odoh.cloudflare-dns.com&targetpath=/dns-query") +check "Relay forwards to target (target rejects garbage → 400)" \ + "400" \ + "$HAPPY_STATUS" + +echo "" +echo "=== Guards ===" + +check "Missing content-type → 415" \ + "415" \ + "$(curl -sS -o /dev/null -w '%{http_code}' -X POST --data-binary 'x' \ + 'http://127.0.0.1:'$RELAY_PORT'/relay?targethost=odoh.cloudflare-dns.com&targetpath=/dns-query')" + +check "Oversized body (>4 KiB) → 413" \ + "413" \ + "$(head -c 5000 /dev/urandom | curl -sS -o /dev/null -w '%{http_code}' -X POST \ + -H 'Content-Type: application/oblivious-dns-message' --data-binary @- \ + 'http://127.0.0.1:'$RELAY_PORT'/relay?targethost=odoh.cloudflare-dns.com&targetpath=/dns-query')" + +check "Invalid targethost (no dot) → 400" \ + "400" \ + "$(curl -sS -o /dev/null -w '%{http_code}' -X POST \ + -H 'Content-Type: application/oblivious-dns-message' --data-binary 'x' \ + 'http://127.0.0.1:'$RELAY_PORT'/relay?targethost=invalid&targetpath=/dns-query')" + +echo "" +echo "=== Counters ===" + +HEALTH=$(curl -sf "http://127.0.0.1:$RELAY_PORT/health") +check "Relay counted at least one forwarded_ok" \ + "[1-9]" \ + "$(echo "$HEALTH" | grep 'forwarded_ok' | awk '{print $2}')" +check "Relay counted at least one rejected_bad_request" \ + "[1-9]" \ + "$(echo "$HEALTH" | grep 'rejected_bad_request' | awk '{print $2}')" + +kill "$NUMA_PID" 2>/dev/null || true +wait "$NUMA_PID" 2>/dev/null || true +sleep 1 + +fi # end Suite 9 + # Summary echo "" TOTAL=$((PASSED + FAILED)) diff --git a/tests/probe-odoh-ecosystem.sh b/tests/probe-odoh-ecosystem.sh new file mode 100755 index 0000000..b2ff311 --- /dev/null +++ b/tests/probe-odoh-ecosystem.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +# Probe the public ODoH ecosystem. +# +# Source of truth: DNSCrypt's curated list at +# https://github.com/DNSCrypt/dnscrypt-resolvers/tree/master/v3 +# - v3/odoh-servers.md (ODoH targets) +# - v3/odoh-relays.md (ODoH relays) +# +# As of commit 2025-09-16 ("odohrelay-crypto-sx seems to be the only ODoH +# relay left"), the full public ecosystem is 4 targets + 1 relay. Re-run this +# script against the upstream list before making any "only N public relays" +# claim publicly. +# +# Usage: ./tests/probe-odoh-ecosystem.sh + +set -uo pipefail + +GREEN="\033[32m" +RED="\033[31m" +YELLOW="\033[33m" +DIM="\033[90m" +RESET="\033[0m" + +UP=0 +DOWN=0 + +probe_target() { + local name="$1" + local host="$2" + local url="https://${host}/.well-known/odohconfigs" + local start=$(date +%s%N) + local headers + headers=$(curl -sS -o /tmp/odoh-probe-body -D - --max-time 5 -A "numa-odoh-probe/0.1" "$url" 2>&1) || { + DOWN=$((DOWN + 1)) + printf " ${RED}✗${RESET} %-25s ${DIM}unreachable${RESET}\n" "$name" + return + } + local elapsed_ms=$((($(date +%s%N) - start) / 1000000)) + local status + status=$(echo "$headers" | head -1 | awk '{print $2}') + local ctype + ctype=$(echo "$headers" | grep -i '^content-type:' | head -1 | tr -d '\r') + local size + size=$(stat -f%z /tmp/odoh-probe-body 2>/dev/null || stat -c%s /tmp/odoh-probe-body 2>/dev/null || echo 0) + + if [[ "$status" == "200" ]] && [[ "$size" -gt 0 ]]; then + UP=$((UP + 1)) + printf " ${GREEN}✓${RESET} %-25s ${DIM}%4dms %s bytes %s${RESET}\n" "$name" "$elapsed_ms" "$size" "$ctype" + else + DOWN=$((DOWN + 1)) + printf " ${RED}✗${RESET} %-25s ${DIM}status=%s size=%s${RESET}\n" "$name" "$status" "$size" + fi + rm -f /tmp/odoh-probe-body +} + +probe_relay() { + # Relays don't expose /.well-known/odohconfigs — we just verify TLS reachability + # and that the endpoint responds to a malformed POST with an HTTP error + # (indicating the relay path exists). A real ODoH validation requires HPKE. + local name="$1" + local url="$2" + local start=$(date +%s%N) + local status + status=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 5 -A "numa-odoh-probe/0.1" \ + -X POST -H "Content-Type: application/oblivious-dns-message" \ + --data-binary "" "$url" 2>&1) || { + DOWN=$((DOWN + 1)) + printf " ${RED}✗${RESET} %-25s ${DIM}unreachable${RESET}\n" "$name" + return + } + local elapsed_ms=$((($(date +%s%N) - start) / 1000000)) + # Any 2xx or 4xx means the endpoint is live (TLS works, HTTP responded). + # 5xx or 000 (curl failure) means broken. + if [[ "$status" =~ ^[24] ]]; then + UP=$((UP + 1)) + printf " ${GREEN}✓${RESET} %-25s ${DIM}%4dms status=%s (endpoint live)${RESET}\n" "$name" "$elapsed_ms" "$status" + else + DOWN=$((DOWN + 1)) + printf " ${RED}✗${RESET} %-25s ${DIM}status=%s${RESET}\n" "$name" "$status" + fi +} + +echo "ODoH targets:" +probe_target "Cloudflare" "odoh.cloudflare-dns.com" +probe_target "crypto.sx" "odoh.crypto.sx" +probe_target "Snowstorm" "dope.snowstorm.love" +probe_target "Tiarap" "doh.tiarap.org" + +echo +echo "ODoH relays:" +probe_relay "Frank Denis (Fastly)" "https://odoh-relay.edgecompute.app/proxy" + +echo +TOTAL=$((UP + DOWN)) +if [[ "$DOWN" -eq 0 ]]; then + printf "${GREEN}All %d endpoints up${RESET}\n" "$TOTAL" + exit 0 +else + printf "${YELLOW}%d/%d up, %d down${RESET}\n" "$UP" "$TOTAL" "$DOWN" + exit 1 +fi -- 2.34.1 From cf128c19af0cc2b747398ae4fd853e7150078edb Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 20 Apr 2026 15:44:09 +0300 Subject: [PATCH 2/5] feat(odoh): bootstrap-IP overrides + zero hedge for ODoH (post-deploy fixes) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues surfaced from running mode = "odoh" against the live Hetzner relay as system DNS: 1. **Bootstrap deadlock.** The reqwest HTTPS client resolves the relay and target hostnames via system DNS. When numa is itself the system resolver, the ODoH client loops trying to resolve through itself. Adds optional `relay_ip` and `target_ip` to `[upstream]`, plumbed into reqwest's `resolve()` so the HTTPS client bypasses system DNS for those two hostnames. TLS still validates against the URL hostname, so a stale IP fails loudly rather than silently MITM'ing. 2. **2x relay load.** Default `hedge_ms = 10` triggers a duplicate in-flight query for every request. Useful for UDP/DoH/DoT (rescues tail latency cheaply); wasteful for ODoH (doubles HPKE seal/unseal, doubles sealed-byte footprint a passive observer can correlate, no latency win — relay hop dominates either way). Force-zero in oblivious mode regardless of configured hedge_ms. Validated end-to-end against odoh-relay.numa.rs → Cloudflare: 3 digs produced 3 forwarded_ok on the relay (was 6 before the hedge fix), upstream_transport.odoh ticks correctly. --- src/config.rs | 123 ++++++++++++++++++++++++++++++++++++++++++++++++- src/forward.rs | 21 ++++++++- src/serve.rs | 13 ++++-- 3 files changed, 149 insertions(+), 8 deletions(-) diff --git a/src/config.rs b/src/config.rs index 2d2f1ba..1205e37 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; -use std::net::Ipv4Addr; -use std::net::Ipv6Addr; +use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}; use std::path::{Path, PathBuf}; +use std::time::Duration; use serde::Deserialize; @@ -146,6 +146,19 @@ impl UpstreamMode { UpstreamMode::Odoh => "odoh", } } + + /// Hedging duplicates the in-flight query against the same upstream to + /// rescue tail latency. Beneficial for UDP/DoH/DoT (cheap retransmit / + /// h2 stream multiplexing). For ODoH it doubles the relay's HPKE + /// seal/unseal load and the sealed-byte footprint a passive observer + /// can correlate, with no latency win — the relay hop dominates either + /// way. Force-zero in oblivious mode regardless of `hedge_ms`. + pub fn hedge_delay(self, hedge_ms: u64) -> Duration { + match self { + UpstreamMode::Odoh => Duration::ZERO, + _ => Duration::from_millis(hedge_ms), + } + } } #[derive(Deserialize)] @@ -182,6 +195,16 @@ pub struct UpstreamConfig { /// a user who configured ODoH rarely wants a silent non-oblivious path. #[serde(default)] pub strict: Option, + + /// Bootstrap IP for the relay host, used when numa is its own system + /// resolver (otherwise the ODoH HTTPS client loops resolving through + /// itself). TLS still validates the cert against `relay`'s hostname. + #[serde(default)] + pub relay_ip: Option, + + /// Same as `relay_ip` but for the target host. + #[serde(default)] + pub target_ip: Option, } impl Default for UpstreamConfig { @@ -199,6 +222,8 @@ impl Default for UpstreamConfig { relay: None, target: None, strict: None, + relay_ip: None, + target_ip: None, } } } @@ -208,9 +233,12 @@ impl Default for UpstreamConfig { #[derive(Debug)] pub struct OdohUpstream { pub relay_url: String, + pub relay_host: String, pub target_host: String, pub target_path: String, pub strict: bool, + pub relay_bootstrap: Option, + pub target_bootstrap: Option, } impl UpstreamConfig { @@ -246,6 +274,10 @@ impl UpstreamConfig { .into()); } + let relay_host = relay_url + .host_str() + .ok_or("upstream.relay has no host")? + .to_string(); let target_host = target_url .host_str() .ok_or("upstream.target has no host")? @@ -256,11 +288,17 @@ impl UpstreamConfig { target_url.path().to_string() }; + let relay_port = relay_url.port_or_known_default().unwrap_or(443); + let target_port = target_url.port_or_known_default().unwrap_or(443); + Ok(OdohUpstream { relay_url: relay.to_string(), + relay_host, target_host, target_path, strict: self.strict.unwrap_or(true), + relay_bootstrap: self.relay_ip.map(|ip| SocketAddr::new(ip, relay_port)), + target_bootstrap: self.target_ip.map(|ip| SocketAddr::new(ip, target_port)), }) } } @@ -817,6 +855,87 @@ target = "https://odoh.cloudflare-dns.com/dns-query" assert!(err.contains("upstream.relay"), "got: {err}"); } + #[test] + fn odoh_bootstrap_ips_parse_into_socket_addrs() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://odoh-relay.numa.rs/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +relay_ip = "178.104.229.30" +target_ip = "104.16.249.249" +"#; + let config: Config = toml::from_str(toml).unwrap(); + let odoh = config.upstream.odoh_upstream().unwrap(); + assert_eq!(odoh.relay_host, "odoh-relay.numa.rs"); + assert_eq!( + odoh.relay_bootstrap.unwrap().to_string(), + "178.104.229.30:443" + ); + assert_eq!( + odoh.target_bootstrap.unwrap().to_string(), + "104.16.249.249:443" + ); + } + + #[test] + fn odoh_bootstrap_ips_optional() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://odoh-relay.numa.rs/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +"#; + let config: Config = toml::from_str(toml).unwrap(); + let odoh = config.upstream.odoh_upstream().unwrap(); + assert!(odoh.relay_bootstrap.is_none()); + assert!(odoh.target_bootstrap.is_none()); + } + + #[test] + fn odoh_bootstrap_ip_rejects_garbage() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://odoh-relay.numa.rs/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +relay_ip = "not-an-ip" +"#; + let err = toml::from_str::(toml).err().unwrap().to_string(); + assert!(err.contains("relay_ip"), "got: {err}"); + } + + #[test] + fn odoh_bootstrap_uses_url_port_when_non_default() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://odoh-relay.numa.rs:8443/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +relay_ip = "178.104.229.30" +"#; + let config: Config = toml::from_str(toml).unwrap(); + let odoh = config.upstream.odoh_upstream().unwrap(); + assert_eq!( + odoh.relay_bootstrap.unwrap().to_string(), + "178.104.229.30:8443" + ); + } + + #[test] + fn hedge_delay_zeroed_for_odoh_mode() { + assert_eq!( + UpstreamMode::Odoh.hedge_delay(50), + Duration::ZERO, + "ODoH mode must zero hedge regardless of configured hedge_ms" + ); + assert_eq!( + UpstreamMode::Forward.hedge_delay(50), + Duration::from_millis(50), + "non-ODoH modes honour configured hedge_ms" + ); + } + #[test] fn odoh_missing_target_rejected() { let toml = r#" diff --git a/src/forward.rs b/src/forward.rs index bb91fcf..530f1ed 100644 --- a/src/forward.rs +++ b/src/forward.rs @@ -176,6 +176,25 @@ pub fn build_https_client() -> reqwest::Client { /// and benefit from a larger pool so warm connections survive concurrent /// fan-out. pub fn build_https_client_with_pool(pool_max_idle_per_host: usize) -> reqwest::Client { + https_client_builder(pool_max_idle_per_host) + .build() + .unwrap_or_default() +} + +/// HTTPS client for the ODoH upstream, with bootstrap-IP overrides applied +/// so relay/target hostname resolution can bypass system DNS. +pub fn build_odoh_client(odoh: &crate::config::OdohUpstream) -> reqwest::Client { + let mut builder = https_client_builder(1); + if let Some(addr) = odoh.relay_bootstrap { + builder = builder.resolve(&odoh.relay_host, addr); + } + if let Some(addr) = odoh.target_bootstrap { + builder = builder.resolve(&odoh.target_host, addr); + } + builder.build().unwrap_or_default() +} + +fn https_client_builder(pool_max_idle_per_host: usize) -> reqwest::ClientBuilder { reqwest::Client::builder() .use_rustls_tls() .http2_initial_stream_window_size(65_535) @@ -185,8 +204,6 @@ pub fn build_https_client_with_pool(pool_max_idle_per_host: usize) -> reqwest::C .http2_keep_alive_timeout(Duration::from_secs(10)) .pool_idle_timeout(Duration::from_secs(300)) .pool_max_idle_per_host(pool_max_idle_per_host) - .build() - .unwrap_or_default() } fn build_dot_connector() -> Result { diff --git a/src/serve.rs b/src/serve.rs index 2037857..9b4b587 100644 --- a/src/serve.rs +++ b/src/serve.rs @@ -17,7 +17,9 @@ use crate::buffer::BytePacketBuffer; use crate::cache::DnsCache; use crate::config::{build_zone_map, load_config, ConfigLoad}; use crate::ctx::{handle_query, ServerCtx}; -use crate::forward::{build_https_client, parse_upstream_list, Upstream, UpstreamPool}; +use crate::forward::{ + build_https_client, build_odoh_client, parse_upstream_list, Upstream, UpstreamPool, +}; use crate::odoh::OdohConfigCache; use crate::override_store::OverrideStore; use crate::query_log::QueryLog; @@ -94,8 +96,11 @@ pub async fn run(config_path: String) -> crate::Result<()> { } crate::config::UpstreamMode::Odoh => { let odoh = config.upstream.odoh_upstream()?; - let client = build_https_client(); - let target_config = Arc::new(OdohConfigCache::new(odoh.target_host, client.clone())); + let client = build_odoh_client(&odoh); + let target_config = Arc::new(OdohConfigCache::new( + odoh.target_host.clone(), + client.clone(), + )); let primary = vec![Upstream::Odoh { relay_url: odoh.relay_url, target_path: odoh.target_path, @@ -222,7 +227,7 @@ pub async fn run(config_path: String) -> crate::Result<()> { upstream_port: config.upstream.port, lan_ip: Mutex::new(crate::lan::detect_lan_ip().unwrap_or(std::net::Ipv4Addr::LOCALHOST)), timeout: Duration::from_millis(config.upstream.timeout_ms), - hedge_delay: Duration::from_millis(config.upstream.hedge_ms), + hedge_delay: resolved_mode.hedge_delay(config.upstream.hedge_ms), proxy_tld_suffix: if config.proxy.tld.is_empty() { String::new() } else { -- 2.34.1 From a3cc64c94f6d7e53705455e4e384ac5b811eaa1e Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 20 Apr 2026 15:44:20 +0300 Subject: [PATCH 3/5] feat(odoh): relay bind-address CLI arg + dashboard Outbound Wire panel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - `numa relay [PORT] [BIND]` accepts an optional bind address (defaults to 127.0.0.1, matching the Caddy reverse-proxy deployment shape). Required for Docker, where the relay needs 0.0.0.0 inside the container so Caddy can reach it across the bridge network. - Dashboard now surfaces the upstream_transport dimension as an "Outbound Wire" panel alongside the existing "Inbound Wire" (renamed from "Transport" for directional clarity). Sub-headers — "apps → numa" / "numa → internet" — make the threat-model split obvious without jargon. Bars: UDP/DoH/DoT/ODoH, headline "X% encrypted outbound". The PR description's promise that "the dashboard answers how much of my DNS traffic left in cleartext honestly" is now true. --- site/dashboard.html | 35 ++++++++++++++++++++++++++++++++--- src/main.rs | 15 +++++++++++++-- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/site/dashboard.html b/site/dashboard.html index fa2d965..710692b 100644 --- a/site/dashboard.html +++ b/site/dashboard.html @@ -228,6 +228,7 @@ body { .path-bar-fill.tcp { background: var(--violet); } .path-bar-fill.dot { background: var(--emerald); } .path-bar-fill.doh { background: var(--teal); } +.path-bar-fill.odoh { background: var(--violet-dim); } .path-pct { font-family: var(--font-mono); font-size: 0.75rem; @@ -637,16 +638,26 @@ body { - +
- Transport + Inbound Wire apps → numa
+ +
+
+ Outbound Wire numa → internet + +
+
+
+
+
@@ -992,7 +1003,24 @@ function renderTransport(transport) { renderBarChart('transportBars', TRANSPORT_DEFS, transport, total); const encPct = encryptionPct(transport); const el = document.getElementById('transportEncrypted'); - el.textContent = `${encPct}% encrypted`; + el.textContent = `${encPct}% encrypted inbound`; + el.style.color = encPct >= 80 ? 'var(--emerald)' : encPct >= 50 ? 'var(--amber)' : 'var(--rose)'; +} + +const UPSTREAM_WIRE_DEFS = [ + { key: 'udp', label: 'UDP', cls: 'udp' }, + { key: 'doh', label: 'DoH', cls: 'doh' }, + { key: 'dot', label: 'DoT', cls: 'dot' }, + { key: 'odoh', label: 'ODoH', cls: 'odoh' }, +]; + +function renderUpstreamWire(ut) { + const total = (ut.udp + ut.doh + ut.dot + ut.odoh) || 0; + renderBarChart('upstreamWireBars', UPSTREAM_WIRE_DEFS, ut, total || 1); + const encrypted = ut.doh + ut.dot + ut.odoh; + const encPct = total > 0 ? Math.round((encrypted / total) * 100) : 0; + const el = document.getElementById('upstreamWireEncrypted'); + el.textContent = total > 0 ? `${encPct}% encrypted outbound` : ''; el.style.color = encPct >= 80 ? 'var(--emerald)' : encPct >= 50 ? 'var(--amber)' : 'var(--rose)'; } @@ -1234,6 +1262,7 @@ async function refresh() { // Panels renderPaths(q); renderTransport(stats.transport); + renderUpstreamWire(stats.upstream_transport || { udp: 0, doh: 0, dot: 0, odoh: 0 }); renderQueryLog(logs); renderOverrides(overrides); renderCache(cache); diff --git a/src/main.rs b/src/main.rs index e077a2f..8f9fecf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -66,7 +66,17 @@ fn main() -> numa::Result<()> { .as_deref() .and_then(|s| s.parse().ok()) .unwrap_or(8443); - let addr: std::net::SocketAddr = ([127, 0, 0, 1], port).into(); + let bind: std::net::IpAddr = std::env::args() + .nth(3) + .as_deref() + .map(|s| { + s.parse().unwrap_or_else(|e| { + eprintln!("invalid bind address '{}': {}", s, e); + std::process::exit(1); + }) + }) + .unwrap_or(std::net::IpAddr::V4(std::net::Ipv4Addr::LOCALHOST)); + let addr = std::net::SocketAddr::new(bind, port); eprintln!( "\x1b[1;38;2;192;98;58mNuma\x1b[0m — ODoH relay on {}\n", addr @@ -107,7 +117,8 @@ fn main() -> numa::Result<()> { eprintln!(" service status Check if the service is running"); eprintln!(" lan on Enable LAN service discovery (mDNS)"); eprintln!(" lan off Disable LAN service discovery"); - eprintln!(" relay [PORT] Run as an ODoH relay (RFC 9230, default port 8443)"); + eprintln!(" relay [PORT] [BIND]"); + eprintln!(" Run as an ODoH relay (RFC 9230, default 127.0.0.1:8443)"); eprintln!(" setup-phone Generate a QR code to install Numa DoT on a phone"); eprintln!(" help Show this help"); eprintln!(); -- 2.34.1 From be60f6ccbc33189d34bd5e02d894aec69ba6fe8c Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 20 Apr 2026 15:44:29 +0300 Subject: [PATCH 4/5] chore(packaging): docker-compose + Caddyfile for ODoH relay deploy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two-container deploy: Caddy terminates TLS (auto-provisions Let's Encrypt via ACME) and reverse-proxies to a Numa relay on an internal Docker network. The relay never reads sealed payloads; Caddy's access log is discarded so per-request observability doesn't defeat the oblivious property. Validated against Hetzner CX22 + DNS at odoh-relay.numa.rs: - TLS-ALPN-01 challenge succeeded on first attempt - /health returned the relay's counter block - End-to-end ODoH client → relay → Cloudflare works Operators only need to: set a DNS A record, edit Caddyfile's hostname, docker compose up -d. README walks through the steps and the DNSCrypt v3/odoh-relays.md submission to claim a public listing. --- packaging/relay/Caddyfile | 15 ++++++++++ packaging/relay/README.md | 48 ++++++++++++++++++++++++++++++ packaging/relay/docker-compose.yml | 26 ++++++++++++++++ 3 files changed, 89 insertions(+) create mode 100644 packaging/relay/Caddyfile create mode 100644 packaging/relay/README.md create mode 100644 packaging/relay/docker-compose.yml diff --git a/packaging/relay/Caddyfile b/packaging/relay/Caddyfile new file mode 100644 index 0000000..ea368c8 --- /dev/null +++ b/packaging/relay/Caddyfile @@ -0,0 +1,15 @@ +odoh-relay.example.com { + handle /relay { + reverse_proxy numa-relay:8443 + } + handle /health { + reverse_proxy numa-relay:8443 + } + respond 404 + + # Per-request access logs defeat the point of an oblivious relay. + # Aggregate counters are exposed at /health on the relay itself. + log { + output discard + } +} diff --git a/packaging/relay/README.md b/packaging/relay/README.md new file mode 100644 index 0000000..373b263 --- /dev/null +++ b/packaging/relay/README.md @@ -0,0 +1,48 @@ +# Numa ODoH Relay — Docker deploy + +Two-container deploy: Caddy terminates TLS (auto-provisioning a Let's Encrypt +cert via ACME) and reverse-proxies to a Numa relay running on an internal +Docker network. The relay never reads sealed payloads; Caddy never logs them. + +## Prerequisites + +- A host with public 80/443 reachable from the internet. +- A DNS record (`A` or `AAAA`) pointing your chosen hostname at the host. +- Docker + Docker Compose v2. + +## Configure + +Edit `Caddyfile` and replace `odoh-relay.example.com` with your hostname. +That hostname is what ACME validates against and what ODoH clients will +configure as their relay URL: `https:///relay`. + +## Deploy + +```sh +docker compose up -d +docker compose logs -f caddy # watch ACME provisioning +``` + +First boot takes a few seconds while Caddy obtains the cert. Subsequent +restarts reuse the cached cert from the `caddy_data` volume. + +## Verify + +```sh +curl https:///health +# ok +# total 0 +# forwarded_ok 0 +# forwarded_err 0 +# rejected_bad_request 0 +``` + +Then point any ODoH client at `https:///relay` and watch the +counters tick. + +## Listing on the public ecosystem + +DNSCrypt's [v3/odoh-relays.md](https://github.com/DNSCrypt/dnscrypt-resolvers/blob/master/v3/odoh-relays.md) +is the canonical list. The pruned 2025-09-16 commit shows one public ODoH +relay survived the cull — running this compose file doubles global supply. +Open a PR there once your relay has been up for ~24 hours. diff --git a/packaging/relay/docker-compose.yml b/packaging/relay/docker-compose.yml new file mode 100644 index 0000000..9561535 --- /dev/null +++ b/packaging/relay/docker-compose.yml @@ -0,0 +1,26 @@ +services: + numa-relay: + image: ghcr.io/razvandimescu/numa:latest + command: ["relay", "8443", "0.0.0.0"] + restart: unless-stopped + networks: [internal] + + caddy: + image: caddy:2 + ports: + - "80:80" + - "443:443" + volumes: + - ./Caddyfile:/etc/caddy/Caddyfile:ro + - caddy_data:/data + - caddy_config:/config + restart: unless-stopped + depends_on: [numa-relay] + networks: [internal] + +networks: + internal: + +volumes: + caddy_data: + caddy_config: -- 2.34.1 From eb5ea3b645f0e64806fc5975fba3ea2c76e651a8 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 20 Apr 2026 16:03:34 +0300 Subject: [PATCH 5/5] refactor(odoh): deduplicate post-audit findings - Hoist ODOH_CONTENT_TYPE to a single pub(crate) constant in odoh.rs; relay.rs imports it instead of declaring its own. - Generalize dashboard encryptionPct(data, encryptedKeys, allKeys) so both Inbound Wire and Outbound Wire panels share the same math instead of drifting independently. - Extract RelayState::new() and build_app() helpers in relay.rs so the test spawn_relay() and production run() wire the same router + body-limit layer. Prevents future middleware from landing in one path but not the other. All 344 lib tests pass; no behavior change. --- site/dashboard.html | 13 ++++++------ src/odoh.rs | 2 +- src/relay.rs | 49 ++++++++++++++++++++------------------------- 3 files changed, 30 insertions(+), 34 deletions(-) diff --git a/site/dashboard.html b/site/dashboard.html index 710692b..7b20e17 100644 --- a/site/dashboard.html +++ b/site/dashboard.html @@ -971,9 +971,11 @@ function renderBarChart(containerId, defs, data, total) { }).join(''); } -function encryptionPct(transport) { - const total = (transport.udp + transport.tcp + transport.dot + transport.doh) || 1; - return (((transport.dot + transport.doh) / total) * 100).toFixed(0); +function encryptionPct(data, encryptedKeys, allKeys) { + const total = allKeys.reduce((s, k) => s + (data[k] || 0), 0); + if (total === 0) return 0; + const encrypted = encryptedKeys.reduce((s, k) => s + (data[k] || 0), 0); + return Math.round((encrypted / total) * 100); } const PATH_DEFS = [ @@ -1001,7 +1003,7 @@ const TRANSPORT_DEFS = [ function renderTransport(transport) { const total = (transport.udp + transport.tcp + transport.dot + transport.doh) || 1; renderBarChart('transportBars', TRANSPORT_DEFS, transport, total); - const encPct = encryptionPct(transport); + const encPct = encryptionPct(transport, ['dot', 'doh'], ['udp', 'tcp', 'dot', 'doh']); const el = document.getElementById('transportEncrypted'); el.textContent = `${encPct}% encrypted inbound`; el.style.color = encPct >= 80 ? 'var(--emerald)' : encPct >= 50 ? 'var(--amber)' : 'var(--rose)'; @@ -1017,8 +1019,7 @@ const UPSTREAM_WIRE_DEFS = [ function renderUpstreamWire(ut) { const total = (ut.udp + ut.doh + ut.dot + ut.odoh) || 0; renderBarChart('upstreamWireBars', UPSTREAM_WIRE_DEFS, ut, total || 1); - const encrypted = ut.doh + ut.dot + ut.odoh; - const encPct = total > 0 ? Math.round((encrypted / total) * 100) : 0; + const encPct = encryptionPct(ut, ['doh', 'dot', 'odoh'], ['udp', 'doh', 'dot', 'odoh']); const el = document.getElementById('upstreamWireEncrypted'); el.textContent = total > 0 ? `${encPct}% encrypted outbound` : ''; el.style.color = encPct >= 80 ? 'var(--emerald)' : encPct >= 50 ? 'var(--amber)' : 'var(--rose)'; diff --git a/src/odoh.rs b/src/odoh.rs index 2cfa9c5..0901c94 100644 --- a/src/odoh.rs +++ b/src/odoh.rs @@ -25,7 +25,7 @@ use tokio::time::timeout; use crate::Result; /// MIME type used for both directions of the ODoH exchange (RFC 9230 §4). -const ODOH_CONTENT_TYPE: &str = "application/oblivious-dns-message"; +pub(crate) const ODOH_CONTENT_TYPE: &str = "application/oblivious-dns-message"; /// Cap on the response body we read into memory when the relay returns /// non-success. Protects against a hostile relay streaming a huge body on diff --git a/src/relay.rs b/src/relay.rs index 8d6ab40..122796e 100644 --- a/src/relay.rs +++ b/src/relay.rs @@ -20,10 +20,9 @@ use serde::Deserialize; use tokio::net::TcpListener; use crate::forward::build_https_client_with_pool; +use crate::odoh::ODOH_CONTENT_TYPE; use crate::Result; -const ODOH_CONTENT_TYPE: &str = "application/oblivious-dns-message"; - /// Cap on the opaque body we accept from a client. ODoH envelopes are /// ~100–300 bytes in practice; anything larger is malformed or hostile. const MAX_BODY_BYTES: usize = 4 * 1024; @@ -55,23 +54,30 @@ struct RelayState { rejected_bad_request: AtomicU64, } -pub async fn run(addr: SocketAddr) -> Result<()> { - let state = Arc::new(RelayState { - client: build_https_client_with_pool(RELAY_POOL_PER_HOST), - total_requests: AtomicU64::new(0), - forwarded_ok: AtomicU64::new(0), - forwarded_err: AtomicU64::new(0), - rejected_bad_request: AtomicU64::new(0), - }); +impl RelayState { + fn new() -> Arc { + Arc::new(RelayState { + client: build_https_client_with_pool(RELAY_POOL_PER_HOST), + total_requests: AtomicU64::new(0), + forwarded_ok: AtomicU64::new(0), + forwarded_err: AtomicU64::new(0), + rejected_bad_request: AtomicU64::new(0), + }) + } +} - let app = Router::new() +/// `DefaultBodyLimit` overrides axum's 2 MiB default so hostile clients +/// can't force the relay to buffer multi-MB bodies before our own cap. +fn build_app(state: Arc) -> Router { + Router::new() .route("/relay", post(handle_relay)) - // Overrides axum's default (2 MiB) so hostile clients can't force - // the relay to buffer multi-MB bodies before our own cap check. .layer(DefaultBodyLimit::max(MAX_BODY_BYTES)) .route("/health", get(handle_health)) - .with_state(state); + .with_state(state) +} +pub async fn run(addr: SocketAddr) -> Result<()> { + let app = build_app(RelayState::new()); let listener = TcpListener::bind(addr).await?; info!("ODoH relay listening on {}", addr); axum::serve(listener, app).await?; @@ -199,19 +205,8 @@ mod tests { let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); let addr = listener.local_addr().unwrap(); - let state = Arc::new(RelayState { - client: build_https_client_with_pool(RELAY_POOL_PER_HOST), - total_requests: AtomicU64::new(0), - forwarded_ok: AtomicU64::new(0), - forwarded_err: AtomicU64::new(0), - rejected_bad_request: AtomicU64::new(0), - }); - - let app = Router::new() - .route("/relay", post(handle_relay)) - .layer(DefaultBodyLimit::max(MAX_BODY_BYTES)) - .route("/health", get(handle_health)) - .with_state(state.clone()); + let state = RelayState::new(); + let app = build_app(state.clone()); tokio::spawn(async move { let _ = axum::serve(listener, app).await; -- 2.34.1