From 80fcfd10ae73b7778245693204ea78714bd2a834 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Fri, 3 Apr 2026 10:27:47 +0200 Subject: [PATCH 01/30] flexible installation path --- com.numa.dns.plist | 2 +- numa.service | 2 +- src/system_dns.rs | 34 ++++++++++++++++------------------ 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/com.numa.dns.plist b/com.numa.dns.plist index 67c90fa..ad59f43 100644 --- a/com.numa.dns.plist +++ b/com.numa.dns.plist @@ -6,7 +6,7 @@ com.numa.dns ProgramArguments - /usr/local/bin/numa + {{exe_path}} RunAtLoad diff --git a/numa.service b/numa.service index 50b0909..7e67296 100644 --- a/numa.service +++ b/numa.service @@ -5,7 +5,7 @@ Wants=network-online.target [Service] Type=simple -ExecStart=/usr/local/bin/numa +ExecStart={{exe_path}} Restart=always RestartSec=2 StandardOutput=journal diff --git a/src/system_dns.rs b/src/system_dns.rs index a172608..89ef441 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -903,9 +903,12 @@ pub fn uninstall_service() -> Result<(), String> { /// Restart the service (kill process, launchd/systemd auto-restarts with new binary). pub fn restart_service() -> Result<(), String> { + let exe_path = + std::env::current_exe().map_err(|e| format!("failed to get current exe: {}", e))?; + #[cfg(any(target_os = "macos", target_os = "linux"))] let version = { - match std::process::Command::new("/usr/local/bin/numa") + match std::process::Command::new(&exe_path) .arg("--version") .output() { @@ -916,6 +919,7 @@ pub fn restart_service() -> Result<(), String> { #[cfg(target_os = "macos")] { + let exe_path = exe_path.to_string_lossy(); let output = std::process::Command::new("launchctl") .args(["list", PLIST_LABEL]) .output(); @@ -926,11 +930,11 @@ pub fn restart_service() -> Result<(), String> { // This will kill us too (we ARE /usr/local/bin/numa), so // codesign and print output first. let _ = std::process::Command::new("codesign") - .args(["-f", "-s", "-", "/usr/local/bin/numa"]) + .args(["-f", "-s", "-", &exe_path]) .output(); // use output() to suppress codesign stderr eprintln!(" Service restarting → {}\n", version); let _ = std::process::Command::new("pkill") - .args(["-f", "/usr/local/bin/numa"]) + .args(["-f", &exe_path]) .status(); Ok(()) } @@ -965,19 +969,22 @@ pub fn service_status() -> Result<(), String> { } } +fn replace_exe_path(service: &str) -> Result { + let exe_path = + std::env::current_exe().map_err(|e| format!("failed to get current exe: {}", e))?; + Ok(service.replace("{{exe_path}}", &exe_path.to_string_lossy())) +} + #[cfg(target_os = "macos")] fn install_service_macos() -> Result<(), String> { - // Check binary exists - if !std::path::Path::new("/usr/local/bin/numa").exists() { - return Err("numa binary not found at /usr/local/bin/numa. Run: sudo cp target/release/numa /usr/local/bin/numa".to_string()); - } - // Create log directory std::fs::create_dir_all("/usr/local/var/log") .map_err(|e| format!("failed to create log dir: {}", e))?; // Write plist let plist = include_str!("../com.numa.dns.plist"); + let plist = replace_exe_path(plist)?; + std::fs::write(PLIST_DEST, plist) .map_err(|e| format!("failed to write {}: {}", PLIST_DEST, e))?; @@ -1179,19 +1186,10 @@ fn uninstall_linux() -> Result<(), String> { Ok(()) } -#[cfg(target_os = "linux")] -fn ensure_binary_installed() -> Result<(), String> { - if !std::path::Path::new("/usr/local/bin/numa").exists() { - return Err("numa binary not found at /usr/local/bin/numa. Run: sudo cp target/release/numa /usr/local/bin/numa".to_string()); - } - Ok(()) -} - #[cfg(target_os = "linux")] fn install_service_linux() -> Result<(), String> { - ensure_binary_installed()?; - let unit = include_str!("../numa.service"); + let unit = replace_exe_path(plist)?; std::fs::write(SYSTEMD_UNIT, unit) .map_err(|e| format!("failed to write {}: {}", SYSTEMD_UNIT, e))?; From ad34fe2d9eb1a3f6a7ca88bbfb2a73f8d4b10720 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Sat, 4 Apr 2026 11:25:29 +0200 Subject: [PATCH 02/30] Fix unit replacement for linux --- src/system_dns.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/system_dns.rs b/src/system_dns.rs index 89ef441..25ab11e 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -1189,7 +1189,7 @@ fn uninstall_linux() -> Result<(), String> { #[cfg(target_os = "linux")] fn install_service_linux() -> Result<(), String> { let unit = include_str!("../numa.service"); - let unit = replace_exe_path(plist)?; + let unit = replace_exe_path(unit)?; std::fs::write(SYSTEMD_UNIT, unit) .map_err(|e| format!("failed to write {}: {}", SYSTEMD_UNIT, e))?; From efe36695407d8b01add433a24ce549c886600359 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 6 Apr 2026 22:38:22 +0300 Subject: [PATCH 03/30] fix: gate exe_path and replace_exe_path for Windows clippy, add macOS CI - Gate exe_path in restart_service() and replace_exe_path() behind #[cfg(any(target_os = "macos", target_os = "linux"))] to fix unused variable and dead code warnings on Windows - Add macOS CI job (clippy + tests) - Add test for template substitution in plist and systemd unit files Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/ci.yml | 11 +++++++++++ src/system_dns.rs | 18 ++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f59e274..7884549 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,6 +27,17 @@ jobs: - name: audit run: cargo install cargo-audit && cargo audit + check-macos: + runs-on: macos-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - uses: Swatinem/rust-cache@v2 + - name: clippy + run: cargo clippy -- -D warnings + - name: test + run: cargo test + check-windows: runs-on: windows-latest steps: diff --git a/src/system_dns.rs b/src/system_dns.rs index 25ab11e..ea5d05f 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -903,6 +903,7 @@ pub fn uninstall_service() -> Result<(), String> { /// Restart the service (kill process, launchd/systemd auto-restarts with new binary). pub fn restart_service() -> Result<(), String> { + #[cfg(any(target_os = "macos", target_os = "linux"))] let exe_path = std::env::current_exe().map_err(|e| format!("failed to get current exe: {}", e))?; @@ -969,6 +970,7 @@ pub fn service_status() -> Result<(), String> { } } +#[cfg(any(target_os = "macos", target_os = "linux"))] fn replace_exe_path(service: &str) -> Result { let exe_path = std::env::current_exe().map_err(|e| format!("failed to get current exe: {}", e))?; @@ -1411,6 +1413,22 @@ Wireless LAN adapter Wi-Fi: ); } + #[test] + #[cfg(any(target_os = "macos", target_os = "linux"))] + fn replace_exe_path_substitutes_template() { + let plist = include_str!("../com.numa.dns.plist"); + let unit = include_str!("../numa.service"); + + assert!(plist.contains("{{exe_path}}"), "plist missing placeholder"); + assert!(unit.contains("{{exe_path}}"), "unit file missing placeholder"); + + let result = replace_exe_path(plist).expect("replace_exe_path failed for plist"); + assert!(!result.contains("{{exe_path}}")); + + let result = replace_exe_path(unit).expect("replace_exe_path failed for unit"); + assert!(!result.contains("{{exe_path}}")); + } + #[test] fn parse_ipconfig_skips_disconnected() { let sample = "\ From 766935ec97b589e956f90804e7c9d43cc1ec42bc Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 6 Apr 2026 22:42:43 +0300 Subject: [PATCH 04/30] style: fix rustfmt formatting Co-Authored-By: Claude Opus 4.6 (1M context) --- src/system_dns.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/system_dns.rs b/src/system_dns.rs index ea5d05f..8709e0d 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -1420,7 +1420,10 @@ Wireless LAN adapter Wi-Fi: let unit = include_str!("../numa.service"); assert!(plist.contains("{{exe_path}}"), "plist missing placeholder"); - assert!(unit.contains("{{exe_path}}"), "unit file missing placeholder"); + assert!( + unit.contains("{{exe_path}}"), + "unit file missing placeholder" + ); let result = replace_exe_path(plist).expect("replace_exe_path failed for plist"); assert!(!result.contains("{{exe_path}}")); From e4350ae81cd90f367d1d3197b0b07c89547700e5 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 30 Mar 2026 00:36:26 +0300 Subject: [PATCH 05/30] feat: add DNS-over-TLS (DoT) listener (RFC 7858) Refactor handle_query into transport-agnostic resolve_query that returns a BytePacketBuffer, keeping the UDP path zero-alloc. Add a TLS listener on port 853 with persistent connections, idle timeout, connection limits, and coalesced writes. Supports user-provided certs or self-signed CA fallback. Includes 5 integration tests. Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.lock | 10 ++ Cargo.toml | 1 + src/config.rs | 39 ++++- src/ctx.rs | 38 +++-- src/dot.rs | 444 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + src/main.rs | 12 ++ 7 files changed, 534 insertions(+), 11 deletions(-) create mode 100644 src/dot.rs diff --git a/Cargo.lock b/Cargo.lock index ea8eb0a..722c413 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1159,6 +1159,7 @@ dependencies = [ "reqwest", "ring", "rustls", + "rustls-pemfile", "serde", "serde_json", "socket2 0.5.10", @@ -1546,6 +1547,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-pemfile" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "rustls-pki-types" version = "1.14.0" diff --git a/Cargo.toml b/Cargo.toml index 79ccf9d..c6e9a2a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ rustls = "0.23" tokio-rustls = "0.26" arc-swap = "1" ring = "0.17" +rustls-pemfile = "2.2.0" [dev-dependencies] criterion = { version = "0.5", features = ["html_reports"] } diff --git a/src/config.rs b/src/config.rs index 0cf5cb0..acf4d37 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use std::net::Ipv4Addr; use std::net::Ipv6Addr; -use std::path::Path; +use std::path::{Path, PathBuf}; use serde::Deserialize; @@ -29,6 +29,8 @@ pub struct Config { pub lan: LanConfig, #[serde(default)] pub dnssec: DnssecConfig, + #[serde(default)] + pub dot: DotConfig, } #[derive(Deserialize)] @@ -370,6 +372,41 @@ pub struct DnssecConfig { pub strict: bool, } +#[derive(Deserialize, Clone)] +pub struct DotConfig { + #[serde(default)] + pub enabled: bool, + #[serde(default = "default_dot_port")] + pub port: u16, + #[serde(default = "default_dot_bind_addr")] + pub bind_addr: String, + /// Path to TLS certificate (PEM). If None, uses self-signed CA. + #[serde(default)] + pub cert_path: Option, + /// Path to TLS private key (PEM). If None, uses self-signed CA. + #[serde(default)] + pub key_path: Option, +} + +impl Default for DotConfig { + fn default() -> Self { + DotConfig { + enabled: false, + port: default_dot_port(), + bind_addr: default_dot_bind_addr(), + cert_path: None, + key_path: None, + } + } +} + +fn default_dot_port() -> u16 { + 853 +} +fn default_dot_bind_addr() -> String { + "0.0.0.0".to_string() +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/ctx.rs b/src/ctx.rs index 7529bc1..5ad1bbc 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -62,24 +62,27 @@ pub struct ServerCtx { pub dnssec_strict: bool, } -pub async fn handle_query( +/// Transport-agnostic DNS resolution. Runs the full pipeline (overrides, blocklist, +/// cache, upstream, DNSSEC) and returns the serialized response in a buffer. +/// Callers use `.filled()` to get the response bytes without heap allocation. +pub async fn resolve_query( mut buffer: BytePacketBuffer, src_addr: SocketAddr, ctx: &ServerCtx, -) -> crate::Result<()> { +) -> crate::Result { let start = Instant::now(); let query = match DnsPacket::from_buffer(&mut buffer) { Ok(packet) => packet, Err(e) => { warn!("{} | PARSE ERROR | {}", src_addr, e); - return Ok(()); + return Err(e); } }; let (qname, qtype) = match query.questions.first() { Some(q) => (q.name.clone(), q.qtype), - None => return Ok(()), + None => return Err("empty question section".into()), }; // Pipeline: overrides -> .tld interception -> blocklist -> local zones -> cache -> upstream @@ -306,17 +309,15 @@ pub async fn handle_query( response.resources.len(), ); + // Serialize response let mut resp_buffer = BytePacketBuffer::new(); if response.write(&mut resp_buffer).is_err() { - // Response too large for UDP — set TC bit and send header + question only + // Response too large — set TC bit and send header + question only debug!("response too large, setting TC bit for {}", qname); let mut tc_response = DnsPacket::response_from(&query, response.header.rescode); tc_response.header.truncated_message = true; - let mut tc_buffer = BytePacketBuffer::new(); - tc_response.write(&mut tc_buffer)?; - ctx.socket.send_to(tc_buffer.filled(), src_addr).await?; - } else { - ctx.socket.send_to(resp_buffer.filled(), src_addr).await?; + resp_buffer = BytePacketBuffer::new(); + tc_response.write(&mut resp_buffer)?; } // Record stats and query log @@ -339,6 +340,23 @@ pub async fn handle_query( dnssec, }); + Ok(resp_buffer) +} + +/// Handle a DNS query received over UDP. Thin wrapper around resolve_query. +pub async fn handle_query( + buffer: BytePacketBuffer, + src_addr: SocketAddr, + ctx: &ServerCtx, +) -> crate::Result<()> { + match resolve_query(buffer, src_addr, ctx).await { + Ok(resp_buffer) => { + ctx.socket.send_to(resp_buffer.filled(), src_addr).await?; + } + Err(e) => { + warn!("{} | RESOLVE ERROR | {}", src_addr, e); + } + } Ok(()) } diff --git a/src/dot.rs b/src/dot.rs new file mode 100644 index 0000000..4d86176 --- /dev/null +++ b/src/dot.rs @@ -0,0 +1,444 @@ +use std::net::SocketAddr; +use std::path::Path; +use std::sync::Arc; +use std::time::Duration; + +use log::{debug, error, info, warn}; +use rustls::ServerConfig; +use tokio::io::{AsyncReadExt, AsyncWriteExt}; +use tokio::net::TcpListener; +use tokio::sync::Semaphore; +use tokio_rustls::TlsAcceptor; + +use crate::buffer::BytePacketBuffer; +use crate::config::DotConfig; +use crate::ctx::{resolve_query, ServerCtx}; + +const MAX_CONNECTIONS: usize = 512; +const IDLE_TIMEOUT: Duration = Duration::from_secs(30); + +/// Build a TLS ServerConfig for DoT from user-provided cert/key PEM files. +fn load_tls_config(cert_path: &Path, key_path: &Path) -> crate::Result> { + let cert_pem = std::fs::read(cert_path)?; + let key_pem = std::fs::read(key_path)?; + + let certs: Vec<_> = rustls_pemfile::certs(&mut &cert_pem[..]).collect::>()?; + let key = rustls_pemfile::private_key(&mut &key_pem[..])? + .ok_or("no private key found in key file")?; + + let _ = rustls::crypto::ring::default_provider().install_default(); + + let config = ServerConfig::builder() + .with_no_client_auth() + .with_single_cert(certs, key)?; + + Ok(Arc::new(config)) +} + +/// Start the DNS-over-TLS listener (RFC 7858). +pub async fn start_dot(ctx: Arc, config: &DotConfig) { + let tls_config = match (&config.cert_path, &config.key_path) { + (Some(cert), Some(key)) => match load_tls_config(cert, key) { + Ok(cfg) => cfg, + Err(e) => { + warn!("DoT: failed to load TLS cert/key: {} — DoT disabled", e); + return; + } + }, + _ => match ctx.tls_config.as_ref() { + Some(arc_swap) => Arc::clone(&*arc_swap.load()), + None => match crate::tls::build_tls_config(&ctx.proxy_tld, &[]) { + Ok(cfg) => cfg, + Err(e) => { + warn!( + "DoT: failed to generate self-signed TLS: {} — DoT disabled", + e + ); + return; + } + }, + }, + }; + + let bind_addr: std::net::Ipv4Addr = config + .bind_addr + .parse() + .unwrap_or(std::net::Ipv4Addr::UNSPECIFIED); + let addr: SocketAddr = (bind_addr, config.port).into(); + let listener = match TcpListener::bind(addr).await { + Ok(l) => l, + Err(e) => { + warn!("DoT: could not bind {} ({}) — DoT disabled", addr, e); + return; + } + }; + info!("DoT listening on {}", addr); + + let acceptor = TlsAcceptor::from(tls_config); + let semaphore = Arc::new(Semaphore::new(MAX_CONNECTIONS)); + + loop { + let (tcp_stream, remote_addr) = match listener.accept().await { + Ok(conn) => conn, + Err(e) => { + error!("DoT: TCP accept error: {}", e); + continue; + } + }; + + let permit = match semaphore.clone().try_acquire_owned() { + Ok(p) => p, + Err(_) => { + debug!("DoT: connection limit reached, rejecting {}", remote_addr); + continue; + } + }; + let acceptor = acceptor.clone(); + let ctx = Arc::clone(&ctx); + + tokio::spawn(async move { + let _permit = permit; // held until task exits + + let mut tls_stream = match acceptor.accept(tcp_stream).await { + Ok(s) => s, + Err(e) => { + debug!("DoT: TLS handshake failed from {}: {}", remote_addr, e); + return; + } + }; + + // RFC 7858: connection is persistent — read queries until EOF or idle timeout + loop { + // Read 2-byte length prefix (RFC 1035 §4.2.2) with idle timeout + let mut len_buf = [0u8; 2]; + match tokio::time::timeout(IDLE_TIMEOUT, tls_stream.read_exact(&mut len_buf)).await + { + Ok(Ok(_)) => {} + Ok(Err(_)) => break, // read error or EOF + Err(_) => break, // idle timeout + } + let msg_len = u16::from_be_bytes(len_buf) as usize; + if msg_len == 0 || msg_len > 4096 { + debug!( + "DoT: invalid message length {} from {}", + msg_len, remote_addr + ); + break; + } + + let mut data = vec![0u8; msg_len]; + if tls_stream.read_exact(&mut data).await.is_err() { + break; + } + + let buffer = BytePacketBuffer::from_bytes(&data); + match resolve_query(buffer, remote_addr, &ctx).await { + Ok(resp_buffer) => { + let resp = resp_buffer.filled(); + // Coalesce length prefix + response into a single TLS write + let mut out = Vec::with_capacity(2 + resp.len()); + out.extend_from_slice(&(resp.len() as u16).to_be_bytes()); + out.extend_from_slice(resp); + if tls_stream.write_all(&out).await.is_err() { + break; + } + } + Err(e) => { + debug!("DoT: resolve error from {}: {}", remote_addr, e); + } + } + } + }); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + use std::sync::{Mutex, RwLock}; + + use rcgen::{CertificateParams, DnType, KeyPair}; + use rustls::pki_types::{CertificateDer, PrivateKeyDer, PrivatePkcs8KeyDer, ServerName}; + use tokio::io::{AsyncReadExt, AsyncWriteExt}; + + use crate::buffer::BytePacketBuffer; + use crate::header::ResultCode; + use crate::packet::DnsPacket; + use crate::question::QueryType; + use crate::record::DnsRecord; + + /// Generate a self-signed cert + key in memory, return (ServerConfig, ClientConfig). + fn test_tls_configs() -> (Arc, Arc) { + let _ = rustls::crypto::ring::default_provider().install_default(); + + let key_pair = KeyPair::generate().unwrap(); + let mut params = CertificateParams::default(); + params + .distinguished_name + .push(DnType::CommonName, "localhost"); + params.subject_alt_names = vec![rcgen::SanType::DnsName("localhost".try_into().unwrap())]; + let cert = params.self_signed(&key_pair).unwrap(); + + let cert_der = CertificateDer::from(cert.der().to_vec()); + let key_der = PrivateKeyDer::Pkcs8(PrivatePkcs8KeyDer::from(key_pair.serialize_der())); + + let server_config = ServerConfig::builder() + .with_no_client_auth() + .with_single_cert(vec![cert_der.clone()], key_der) + .unwrap(); + + let mut root_store = rustls::RootCertStore::empty(); + root_store.add(cert_der).unwrap(); + let client_config = rustls::ClientConfig::builder() + .with_root_certificates(root_store) + .with_no_client_auth(); + + (Arc::new(server_config), Arc::new(client_config)) + } + + /// Spin up a DoT listener with a test TLS config. Returns (addr, client_config). + async fn spawn_dot_server() -> (SocketAddr, Arc) { + let (server_tls, client_tls) = test_tls_configs(); + + let socket = tokio::net::UdpSocket::bind("127.0.0.1:0").await.unwrap(); + let ctx = Arc::new(ServerCtx { + socket, + zone_map: { + let mut m = HashMap::new(); + let mut inner = HashMap::new(); + inner.insert( + QueryType::A, + vec![DnsRecord::A { + domain: "dot-test.example".to_string(), + addr: std::net::Ipv4Addr::new(10, 0, 0, 1), + ttl: 300, + }], + ); + m.insert("dot-test.example".to_string(), inner); + m + }, + cache: RwLock::new(crate::cache::DnsCache::new(100, 60, 86400)), + stats: Mutex::new(crate::stats::ServerStats::new()), + overrides: RwLock::new(crate::override_store::OverrideStore::new()), + blocklist: RwLock::new(crate::blocklist::BlocklistStore::new()), + query_log: Mutex::new(crate::query_log::QueryLog::new(100)), + services: Mutex::new(crate::service_store::ServiceStore::new()), + lan_peers: Mutex::new(crate::lan::PeerStore::new(90)), + forwarding_rules: Vec::new(), + upstream: Mutex::new(crate::forward::Upstream::Udp( + "127.0.0.1:53".parse().unwrap(), + )), + upstream_auto: false, + upstream_port: 53, + lan_ip: Mutex::new(std::net::Ipv4Addr::LOCALHOST), + timeout: Duration::from_secs(3), + proxy_tld: "numa".to_string(), + proxy_tld_suffix: ".numa".to_string(), + lan_enabled: false, + config_path: String::new(), + config_found: false, + config_dir: std::path::PathBuf::from("/tmp"), + data_dir: std::path::PathBuf::from("/tmp"), + tls_config: Some(arc_swap::ArcSwap::from(server_tls)), + upstream_mode: crate::config::UpstreamMode::Forward, + root_hints: Vec::new(), + srtt: RwLock::new(crate::srtt::SrttCache::new(true)), + inflight: Mutex::new(HashMap::new()), + dnssec_enabled: false, + dnssec_strict: false, + }); + + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + + let tls_config = Arc::clone(&*ctx.tls_config.as_ref().unwrap().load()); + let acceptor = TlsAcceptor::from(tls_config); + let semaphore = Arc::new(Semaphore::new(MAX_CONNECTIONS)); + + tokio::spawn(async move { + loop { + let (tcp_stream, remote_addr) = match listener.accept().await { + Ok(conn) => conn, + Err(_) => return, + }; + let permit = match semaphore.clone().try_acquire_owned() { + Ok(p) => p, + Err(_) => continue, + }; + let acceptor = acceptor.clone(); + let ctx = Arc::clone(&ctx); + tokio::spawn(async move { + let _permit = permit; + let mut tls_stream = match acceptor.accept(tcp_stream).await { + Ok(s) => s, + Err(_) => return, + }; + loop { + let mut len_buf = [0u8; 2]; + match tokio::time::timeout( + IDLE_TIMEOUT, + tls_stream.read_exact(&mut len_buf), + ) + .await + { + Ok(Ok(_)) => {} + _ => break, + } + let msg_len = u16::from_be_bytes(len_buf) as usize; + if msg_len == 0 || msg_len > 4096 { + break; + } + let mut data = vec![0u8; msg_len]; + if tls_stream.read_exact(&mut data).await.is_err() { + break; + } + let buffer = BytePacketBuffer::from_bytes(&data); + match resolve_query(buffer, remote_addr, &ctx).await { + Ok(resp_buffer) => { + let resp = resp_buffer.filled(); + let mut out = Vec::with_capacity(2 + resp.len()); + out.extend_from_slice(&(resp.len() as u16).to_be_bytes()); + out.extend_from_slice(resp); + if tls_stream.write_all(&out).await.is_err() { + break; + } + } + Err(_) => {} + } + } + }); + } + }); + + (addr, client_tls) + } + + /// Open a TLS connection to the DoT server and return the stream. + async fn dot_connect( + addr: SocketAddr, + client_config: &Arc, + ) -> tokio_rustls::client::TlsStream { + let connector = tokio_rustls::TlsConnector::from(Arc::clone(client_config)); + let tcp = tokio::net::TcpStream::connect(addr).await.unwrap(); + connector + .connect(ServerName::try_from("localhost").unwrap(), tcp) + .await + .unwrap() + } + + /// Send a DNS query over a DoT stream and read the response. + async fn dot_exchange( + stream: &mut tokio_rustls::client::TlsStream, + query: &DnsPacket, + ) -> DnsPacket { + let mut buf = BytePacketBuffer::new(); + query.write(&mut buf).unwrap(); + let msg = buf.filled(); + + let mut out = Vec::with_capacity(2 + msg.len()); + out.extend_from_slice(&(msg.len() as u16).to_be_bytes()); + out.extend_from_slice(msg); + stream.write_all(&out).await.unwrap(); + + let mut len_buf = [0u8; 2]; + stream.read_exact(&mut len_buf).await.unwrap(); + let resp_len = u16::from_be_bytes(len_buf) as usize; + + let mut data = vec![0u8; resp_len]; + stream.read_exact(&mut data).await.unwrap(); + + let mut resp_buf = BytePacketBuffer::from_bytes(&data); + DnsPacket::from_buffer(&mut resp_buf).unwrap() + } + + #[tokio::test] + async fn dot_resolves_local_zone() { + let (addr, client_config) = spawn_dot_server().await; + let mut stream = dot_connect(addr, &client_config).await; + + let query = DnsPacket::query(0x1234, "dot-test.example", QueryType::A); + let resp = dot_exchange(&mut stream, &query).await; + + assert_eq!(resp.header.id, 0x1234); + assert!(resp.header.response); + assert_eq!(resp.header.rescode, ResultCode::NOERROR); + assert_eq!(resp.answers.len(), 1); + match &resp.answers[0] { + DnsRecord::A { domain, addr, ttl } => { + assert_eq!(domain, "dot-test.example"); + assert_eq!(*addr, std::net::Ipv4Addr::new(10, 0, 0, 1)); + assert_eq!(*ttl, 300); + } + other => panic!("expected A record, got {:?}", other), + } + } + + #[tokio::test] + async fn dot_multiple_queries_on_persistent_connection() { + let (addr, client_config) = spawn_dot_server().await; + let mut stream = dot_connect(addr, &client_config).await; + + // Send 3 queries on the same TLS connection + for i in 0..3u16 { + let query = DnsPacket::query(0xA000 + i, "dot-test.example", QueryType::A); + let resp = dot_exchange(&mut stream, &query).await; + assert_eq!(resp.header.id, 0xA000 + i); + assert_eq!(resp.header.rescode, ResultCode::NOERROR); + assert_eq!(resp.answers.len(), 1); + } + } + + #[tokio::test] + async fn dot_nxdomain_for_unknown() { + let (addr, client_config) = spawn_dot_server().await; + let mut stream = dot_connect(addr, &client_config).await; + + let query = DnsPacket::query(0xBEEF, "nonexistent.test", QueryType::A); + let resp = dot_exchange(&mut stream, &query).await; + + assert_eq!(resp.header.id, 0xBEEF); + assert!(resp.header.response); + // Query goes to upstream (127.0.0.1:53), which will fail — expect SERVFAIL + assert_eq!(resp.header.rescode, ResultCode::SERVFAIL); + } + + #[tokio::test] + async fn dot_concurrent_connections() { + let (addr, client_config) = spawn_dot_server().await; + + let mut handles = Vec::new(); + for i in 0..5u16 { + let cfg = Arc::clone(&client_config); + handles.push(tokio::spawn(async move { + let mut stream = dot_connect(addr, &cfg).await; + let query = DnsPacket::query(0xC000 + i, "dot-test.example", QueryType::A); + let resp = dot_exchange(&mut stream, &query).await; + assert_eq!(resp.header.id, 0xC000 + i); + assert_eq!(resp.header.rescode, ResultCode::NOERROR); + assert_eq!(resp.answers.len(), 1); + })); + } + + for h in handles { + h.await.unwrap(); + } + } + + #[tokio::test] + async fn dot_localhost_resolution() { + let (addr, client_config) = spawn_dot_server().await; + let mut stream = dot_connect(addr, &client_config).await; + + let query = DnsPacket::query(0xD000, "localhost", QueryType::A); + let resp = dot_exchange(&mut stream, &query).await; + + assert_eq!(resp.header.id, 0xD000); + assert_eq!(resp.header.rescode, ResultCode::NOERROR); + assert_eq!(resp.answers.len(), 1); + match &resp.answers[0] { + DnsRecord::A { addr, .. } => assert_eq!(*addr, std::net::Ipv4Addr::LOCALHOST), + other => panic!("expected A record, got {:?}", other), + } + } +} diff --git a/src/lib.rs b/src/lib.rs index cff1a48..36017fe 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,7 @@ pub mod cache; pub mod config; pub mod ctx; pub mod dnssec; +pub mod dot; pub mod forward; pub mod header; pub mod lan; diff --git a/src/main.rs b/src/main.rs index 68022fc..b9316b8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -370,6 +370,9 @@ async fn main() -> numa::Result<()> { ); } } + if config.dot.enabled { + row("DoT", g, &format!("tls://:{}", config.dot.port)); + } if config.lan.enabled { row("LAN", g, "mDNS (_numa._tcp.local)"); } @@ -477,6 +480,15 @@ async fn main() -> numa::Result<()> { }); } + // Spawn DNS-over-TLS listener (RFC 7858) + if config.dot.enabled { + let dot_ctx = Arc::clone(&ctx); + let dot_config = config.dot.clone(); + tokio::spawn(async move { + numa::dot::start_dot(dot_ctx, &dot_config).await; + }); + } + // UDP DNS listener #[allow(clippy::infinite_loop)] loop { From 14efc51340fe419eb9d66166544b0478ff491bee Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 30 Mar 2026 00:50:04 +0300 Subject: [PATCH 06/30] fix: send SERVFAIL on DoT resolve errors, extract shared connection handler - Send SERVFAIL response (with correct query ID) when resolve_query fails, preventing DoT clients from hanging until idle timeout - Extract handle_dot_connection() so tests use the same logic as production, eliminating duplicated accept/read/resolve loop - Replace magic 4096 with named MAX_MSG_LEN constant tied to BUF_SIZE - Add flush() after each TLS write to prevent buffered responses - Extract fallback_tls() helper, handle partial cert/key config, support IPv6 bind address, remove redundant crypto provider init Co-Authored-By: Claude Opus 4.6 (1M context) --- src/dot.rs | 197 ++++++++++++++++++++++++++++------------------------- 1 file changed, 103 insertions(+), 94 deletions(-) diff --git a/src/dot.rs b/src/dot.rs index 4d86176..e10e7b7 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -1,4 +1,4 @@ -use std::net::SocketAddr; +use std::net::{IpAddr, SocketAddr}; use std::path::Path; use std::sync::Arc; use std::time::Duration; @@ -13,9 +13,14 @@ use tokio_rustls::TlsAcceptor; use crate::buffer::BytePacketBuffer; use crate::config::DotConfig; use crate::ctx::{resolve_query, ServerCtx}; +use crate::header::ResultCode; +use crate::packet::DnsPacket; const MAX_CONNECTIONS: usize = 512; const IDLE_TIMEOUT: Duration = Duration::from_secs(30); +// Matches BytePacketBuffer::BUF_SIZE — RFC 7858 allows up to 65535 but our +// buffer would silently truncate anything larger. +const MAX_MSG_LEN: usize = 4096; /// Build a TLS ServerConfig for DoT from user-provided cert/key PEM files. fn load_tls_config(cert_path: &Path, key_path: &Path) -> crate::Result> { @@ -26,8 +31,6 @@ fn load_tls_config(cert_path: &Path, key_path: &Path) -> crate::Result crate::Result Option> { + if let Some(arc_swap) = ctx.tls_config.as_ref() { + return Some(Arc::clone(&*arc_swap.load())); + } + match crate::tls::build_tls_config(&ctx.proxy_tld, &[]) { + Ok(cfg) => Some(cfg), + Err(e) => { + warn!( + "DoT: failed to generate self-signed TLS: {} — DoT disabled", + e + ); + None + } + } +} + /// Start the DNS-over-TLS listener (RFC 7858). pub async fn start_dot(ctx: Arc, config: &DotConfig) { let tls_config = match (&config.cert_path, &config.key_path) { @@ -45,26 +64,24 @@ pub async fn start_dot(ctx: Arc, config: &DotConfig) { return; } }, - _ => match ctx.tls_config.as_ref() { - Some(arc_swap) => Arc::clone(&*arc_swap.load()), - None => match crate::tls::build_tls_config(&ctx.proxy_tld, &[]) { - Ok(cfg) => cfg, - Err(e) => { - warn!( - "DoT: failed to generate self-signed TLS: {} — DoT disabled", - e - ); - return; - } - }, + (Some(_), None) | (None, Some(_)) => { + warn!("DoT: both cert_path and key_path must be set — ignoring partial config, using self-signed"); + match fallback_tls(&ctx) { + Some(cfg) => cfg, + None => return, + } + } + (None, None) => match fallback_tls(&ctx) { + Some(cfg) => cfg, + None => return, }, }; - let bind_addr: std::net::Ipv4Addr = config + let bind_addr: IpAddr = config .bind_addr .parse() - .unwrap_or(std::net::Ipv4Addr::UNSPECIFIED); - let addr: SocketAddr = (bind_addr, config.port).into(); + .unwrap_or(IpAddr::V4(std::net::Ipv4Addr::UNSPECIFIED)); + let addr = SocketAddr::new(bind_addr, config.port); let listener = match TcpListener::bind(addr).await { Ok(l) => l, Err(e) => { @@ -99,7 +116,7 @@ pub async fn start_dot(ctx: Arc, config: &DotConfig) { tokio::spawn(async move { let _permit = permit; // held until task exits - let mut tls_stream = match acceptor.accept(tcp_stream).await { + let tls_stream = match acceptor.accept(tcp_stream).await { Ok(s) => s, Err(e) => { debug!("DoT: TLS handshake failed from {}: {}", remote_addr, e); @@ -107,51 +124,75 @@ pub async fn start_dot(ctx: Arc, config: &DotConfig) { } }; - // RFC 7858: connection is persistent — read queries until EOF or idle timeout - loop { - // Read 2-byte length prefix (RFC 1035 §4.2.2) with idle timeout - let mut len_buf = [0u8; 2]; - match tokio::time::timeout(IDLE_TIMEOUT, tls_stream.read_exact(&mut len_buf)).await - { - Ok(Ok(_)) => {} - Ok(Err(_)) => break, // read error or EOF - Err(_) => break, // idle timeout - } - let msg_len = u16::from_be_bytes(len_buf) as usize; - if msg_len == 0 || msg_len > 4096 { - debug!( - "DoT: invalid message length {} from {}", - msg_len, remote_addr - ); - break; - } - - let mut data = vec![0u8; msg_len]; - if tls_stream.read_exact(&mut data).await.is_err() { - break; - } - - let buffer = BytePacketBuffer::from_bytes(&data); - match resolve_query(buffer, remote_addr, &ctx).await { - Ok(resp_buffer) => { - let resp = resp_buffer.filled(); - // Coalesce length prefix + response into a single TLS write - let mut out = Vec::with_capacity(2 + resp.len()); - out.extend_from_slice(&(resp.len() as u16).to_be_bytes()); - out.extend_from_slice(resp); - if tls_stream.write_all(&out).await.is_err() { - break; - } - } - Err(e) => { - debug!("DoT: resolve error from {}: {}", remote_addr, e); - } - } - } + handle_dot_connection(tls_stream, remote_addr, &ctx).await; }); } } +/// Handle a single persistent DoT connection (RFC 7858). +/// Reads length-prefixed DNS queries until EOF, idle timeout, or error. +async fn handle_dot_connection(mut stream: S, remote_addr: SocketAddr, ctx: &ServerCtx) +where + S: AsyncReadExt + AsyncWriteExt + Unpin, +{ + loop { + // Read 2-byte length prefix (RFC 1035 §4.2.2) with idle timeout + let mut len_buf = [0u8; 2]; + match tokio::time::timeout(IDLE_TIMEOUT, stream.read_exact(&mut len_buf)).await { + Ok(Ok(_)) => {} + Ok(Err(_)) => break, // read error or EOF + Err(_) => break, // idle timeout + } + let msg_len = u16::from_be_bytes(len_buf) as usize; + if msg_len == 0 || msg_len > MAX_MSG_LEN { + debug!( + "DoT: invalid message length {} from {}", + msg_len, remote_addr + ); + break; + } + + let mut data = vec![0u8; msg_len]; + if stream.read_exact(&mut data).await.is_err() { + break; + } + + // Extract query ID before resolve_query consumes the buffer + let query_id = data + .get(..2) + .map(|b| u16::from_be_bytes([b[0], b[1]])) + .unwrap_or(0); + + let buffer = BytePacketBuffer::from_bytes(&data); + let resp_buffer = match resolve_query(buffer, remote_addr, ctx).await { + Ok(buf) => buf, + Err(e) => { + debug!("DoT: resolve error from {}: {}", remote_addr, e); + // Send SERVFAIL so the client doesn't hang + let mut resp = DnsPacket::new(); + resp.header.id = query_id; + resp.header.response = true; + resp.header.rescode = ResultCode::SERVFAIL; + let mut buf = BytePacketBuffer::new(); + if resp.write(&mut buf).is_err() { + break; + } + buf + } + }; + let resp = resp_buffer.filled(); + let mut out = Vec::with_capacity(2 + resp.len()); + out.extend_from_slice(&(resp.len() as u16).to_be_bytes()); + out.extend_from_slice(resp); + if stream.write_all(&out).await.is_err() { + break; + } + if stream.flush().await.is_err() { + break; + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -270,43 +311,11 @@ mod tests { let ctx = Arc::clone(&ctx); tokio::spawn(async move { let _permit = permit; - let mut tls_stream = match acceptor.accept(tcp_stream).await { + let tls_stream = match acceptor.accept(tcp_stream).await { Ok(s) => s, Err(_) => return, }; - loop { - let mut len_buf = [0u8; 2]; - match tokio::time::timeout( - IDLE_TIMEOUT, - tls_stream.read_exact(&mut len_buf), - ) - .await - { - Ok(Ok(_)) => {} - _ => break, - } - let msg_len = u16::from_be_bytes(len_buf) as usize; - if msg_len == 0 || msg_len > 4096 { - break; - } - let mut data = vec![0u8; msg_len]; - if tls_stream.read_exact(&mut data).await.is_err() { - break; - } - let buffer = BytePacketBuffer::from_bytes(&data); - match resolve_query(buffer, remote_addr, &ctx).await { - Ok(resp_buffer) => { - let resp = resp_buffer.filled(); - let mut out = Vec::with_capacity(2 + resp.len()); - out.extend_from_slice(&(resp.len() as u16).to_be_bytes()); - out.extend_from_slice(resp); - if tls_stream.write_all(&out).await.is_err() { - break; - } - } - Err(_) => {} - } - } + handle_dot_connection(tls_stream, remote_addr, &ctx).await; }); } }); From aa8923b2c63b8b915762adb60566affb4c77a548 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 30 Mar 2026 01:31:51 +0300 Subject: [PATCH 07/30] fix: add debug logging for DoT SERVFAIL serialization failure, TC-bit TODO Co-Authored-By: Claude Opus 4.6 --- src/ctx.rs | 2 ++ src/dot.rs | 1 + 2 files changed, 3 insertions(+) diff --git a/src/ctx.rs b/src/ctx.rs index 5ad1bbc..d3d4eb0 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -310,6 +310,8 @@ pub async fn resolve_query( ); // Serialize response + // TODO: TC bit is UDP-specific; DoT connections could carry up to 65535 bytes. + // Once BytePacketBuffer supports larger buffers, skip truncation for TCP/TLS. let mut resp_buffer = BytePacketBuffer::new(); if response.write(&mut resp_buffer).is_err() { // Response too large — set TC bit and send header + question only diff --git a/src/dot.rs b/src/dot.rs index e10e7b7..d780727 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -175,6 +175,7 @@ where resp.header.rescode = ResultCode::SERVFAIL; let mut buf = BytePacketBuffer::new(); if resp.write(&mut buf).is_err() { + debug!("DoT: failed to serialize SERVFAIL for {}", remote_addr); break; } buf From cb54ab3dfce794269aab155f55e8f843722ab9a4 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 6 Apr 2026 23:10:45 +0300 Subject: [PATCH 08/30] fix: harden DoT listener against slowloris and stale handshakes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add 10s timeout on TLS handshake — prevents clients from holding a semaphore permit without completing the handshake - Add IDLE_TIMEOUT on payload read_exact — prevents slowloris after sending a valid length prefix then trickling bytes - Extract accept_loop() shared between start_dot and tests — eliminates duplicated accept logic that could drift - Add 5s timeout on TCP reads in recursive test mock server Co-Authored-By: Claude Opus 4.6 (1M context) --- src/dot.rs | 70 ++++++++++++++++++++---------------------------- src/recursive.rs | 15 +++++++++-- 2 files changed, 42 insertions(+), 43 deletions(-) diff --git a/src/dot.rs b/src/dot.rs index d780727..d9c1180 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -18,6 +18,7 @@ use crate::packet::DnsPacket; const MAX_CONNECTIONS: usize = 512; const IDLE_TIMEOUT: Duration = Duration::from_secs(30); +const HANDSHAKE_TIMEOUT: Duration = Duration::from_secs(10); // Matches BytePacketBuffer::BUF_SIZE — RFC 7858 allows up to 65535 but our // buffer would silently truncate anything larger. const MAX_MSG_LEN: usize = 4096; @@ -91,7 +92,10 @@ pub async fn start_dot(ctx: Arc, config: &DotConfig) { }; info!("DoT listening on {}", addr); - let acceptor = TlsAcceptor::from(tls_config); + accept_loop(listener, TlsAcceptor::from(tls_config), ctx).await; +} + +async fn accept_loop(listener: TcpListener, acceptor: TlsAcceptor, ctx: Arc) { let semaphore = Arc::new(Semaphore::new(MAX_CONNECTIONS)); loop { @@ -116,13 +120,18 @@ pub async fn start_dot(ctx: Arc, config: &DotConfig) { tokio::spawn(async move { let _permit = permit; // held until task exits - let tls_stream = match acceptor.accept(tcp_stream).await { - Ok(s) => s, - Err(e) => { - debug!("DoT: TLS handshake failed from {}: {}", remote_addr, e); - return; - } - }; + let tls_stream = + match tokio::time::timeout(HANDSHAKE_TIMEOUT, acceptor.accept(tcp_stream)).await { + Ok(Ok(s)) => s, + Ok(Err(e)) => { + debug!("DoT: TLS handshake failed from {}: {}", remote_addr, e); + return; + } + Err(_) => { + debug!("DoT: TLS handshake timeout from {}", remote_addr); + return; + } + }; handle_dot_connection(tls_stream, remote_addr, &ctx).await; }); @@ -152,18 +161,19 @@ where break; } - let mut data = vec![0u8; msg_len]; - if stream.read_exact(&mut data).await.is_err() { - break; + let mut buffer = BytePacketBuffer::new(); + match tokio::time::timeout(IDLE_TIMEOUT, stream.read_exact(&mut buffer.buf[..msg_len])) + .await + { + Ok(Ok(_)) => {} + Ok(Err(_)) => break, + Err(_) => { + debug!("DoT: payload read timeout from {}", remote_addr); + break; + } } - // Extract query ID before resolve_query consumes the buffer - let query_id = data - .get(..2) - .map(|b| u16::from_be_bytes([b[0], b[1]])) - .unwrap_or(0); - - let buffer = BytePacketBuffer::from_bytes(&data); + let query_id = u16::from_be_bytes([buffer.buf[0], buffer.buf[1]]); let resp_buffer = match resolve_query(buffer, remote_addr, ctx).await { Ok(buf) => buf, Err(e) => { @@ -296,30 +306,8 @@ mod tests { let tls_config = Arc::clone(&*ctx.tls_config.as_ref().unwrap().load()); let acceptor = TlsAcceptor::from(tls_config); - let semaphore = Arc::new(Semaphore::new(MAX_CONNECTIONS)); - tokio::spawn(async move { - loop { - let (tcp_stream, remote_addr) = match listener.accept().await { - Ok(conn) => conn, - Err(_) => return, - }; - let permit = match semaphore.clone().try_acquire_owned() { - Ok(p) => p, - Err(_) => continue, - }; - let acceptor = acceptor.clone(); - let ctx = Arc::clone(&ctx); - tokio::spawn(async move { - let _permit = permit; - let tls_stream = match acceptor.accept(tcp_stream).await { - Ok(s) => s, - Err(_) => return, - }; - handle_dot_connection(tls_stream, remote_addr, &ctx).await; - }); - } - }); + tokio::spawn(accept_loop(listener, acceptor, ctx)); (addr, client_tls) } diff --git a/src/recursive.rs b/src/recursive.rs index 7801bec..24d0367 100644 --- a/src/recursive.rs +++ b/src/recursive.rs @@ -870,14 +870,25 @@ mod tests { }; let handler = handler.clone(); tokio::spawn(async move { + let timeout = std::time::Duration::from_secs(5); // Read length-prefixed DNS query let mut len_buf = [0u8; 2]; - if stream.read_exact(&mut len_buf).await.is_err() { + if tokio::time::timeout(timeout, stream.read_exact(&mut len_buf)) + .await + .ok() + .and_then(|r| r.ok()) + .is_none() + { return; } let len = u16::from_be_bytes(len_buf) as usize; let mut data = vec![0u8; len]; - if stream.read_exact(&mut data).await.is_err() { + if tokio::time::timeout(timeout, stream.read_exact(&mut data)) + .await + .ok() + .and_then(|r| r.ok()) + .is_none() + { return; } From 1239ed0e729882383d050fbc2c62adf13f287c58 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Tue, 7 Apr 2026 16:47:54 +0300 Subject: [PATCH 09/30] fix: parse DoT queries up-front and echo question in SERVFAIL Address review findings on PR #25: - Refactor resolve_query to take a pre-parsed DnsPacket. Parse-error handling moves to the UDP caller, eliminating the double warn! line on malformed UDP queries. - Enforce MIN_MSG_LEN=12 (DNS header) in handle_dot_connection so query_id extraction is always reading client-sent bytes, not the zeroed buffer tail. - Parse the DoT query before calling resolve_query and retain it, so SERVFAIL responses can echo the original question section via response_from(). Parse failures send FORMERR with the client id. - Extract write_framed() helper for length-prefix + flush, reused by success, SERVFAIL, and FORMERR paths. - Back off 100ms on listener.accept() errors to avoid tight-looping on fd exhaustion. - Replace the hardcoded 127.0.0.1:53 upstream in dot_nxdomain_for_unknown with a bound-but-unresponsive UDP socket owned by the test, making it independent of the host's local resolver. Test now runs in ~220ms (timeout lowered to 200ms) instead of 3s and asserts the question is echoed in the SERVFAIL response. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/ctx.rs | 23 ++++++++------- src/dot.rs | 87 +++++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 76 insertions(+), 34 deletions(-) diff --git a/src/ctx.rs b/src/ctx.rs index d3d4eb0..17a4979 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -65,21 +65,15 @@ pub struct ServerCtx { /// Transport-agnostic DNS resolution. Runs the full pipeline (overrides, blocklist, /// cache, upstream, DNSSEC) and returns the serialized response in a buffer. /// Callers use `.filled()` to get the response bytes without heap allocation. +/// Callers are responsible for parsing the incoming buffer into a `DnsPacket` +/// (and logging parse errors) before calling this function. pub async fn resolve_query( - mut buffer: BytePacketBuffer, + query: DnsPacket, src_addr: SocketAddr, ctx: &ServerCtx, ) -> crate::Result { let start = Instant::now(); - let query = match DnsPacket::from_buffer(&mut buffer) { - Ok(packet) => packet, - Err(e) => { - warn!("{} | PARSE ERROR | {}", src_addr, e); - return Err(e); - } - }; - let (qname, qtype) = match query.questions.first() { Some(q) => (q.name.clone(), q.qtype), None => return Err("empty question section".into()), @@ -347,11 +341,18 @@ pub async fn resolve_query( /// Handle a DNS query received over UDP. Thin wrapper around resolve_query. pub async fn handle_query( - buffer: BytePacketBuffer, + mut buffer: BytePacketBuffer, src_addr: SocketAddr, ctx: &ServerCtx, ) -> crate::Result<()> { - match resolve_query(buffer, src_addr, ctx).await { + let query = match DnsPacket::from_buffer(&mut buffer) { + Ok(packet) => packet, + Err(e) => { + warn!("{} | PARSE ERROR | {}", src_addr, e); + return Ok(()); + } + }; + match resolve_query(query, src_addr, ctx).await { Ok(resp_buffer) => { ctx.socket.send_to(resp_buffer.filled(), src_addr).await?; } diff --git a/src/dot.rs b/src/dot.rs index d9c1180..2178c26 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -19,9 +19,12 @@ use crate::packet::DnsPacket; const MAX_CONNECTIONS: usize = 512; const IDLE_TIMEOUT: Duration = Duration::from_secs(30); const HANDSHAKE_TIMEOUT: Duration = Duration::from_secs(10); +const ACCEPT_ERROR_BACKOFF: Duration = Duration::from_millis(100); // Matches BytePacketBuffer::BUF_SIZE — RFC 7858 allows up to 65535 but our // buffer would silently truncate anything larger. const MAX_MSG_LEN: usize = 4096; +// DNS header is 12 bytes; anything shorter cannot be a valid query. +const MIN_MSG_LEN: usize = 12; /// Build a TLS ServerConfig for DoT from user-provided cert/key PEM files. fn load_tls_config(cert_path: &Path, key_path: &Path) -> crate::Result> { @@ -103,6 +106,8 @@ async fn accept_loop(listener: TcpListener, acceptor: TlsAcceptor, ctx: Arc conn, Err(e) => { error!("DoT: TCP accept error: {}", e); + // Back off to avoid tight-looping on persistent failures (e.g. fd exhaustion). + tokio::time::sleep(ACCEPT_ERROR_BACKOFF).await; continue; } }; @@ -153,7 +158,7 @@ where Err(_) => break, // idle timeout } let msg_len = u16::from_be_bytes(len_buf) as usize; - if msg_len == 0 || msg_len > MAX_MSG_LEN { + if !(MIN_MSG_LEN..=MAX_MSG_LEN).contains(&msg_len) { debug!( "DoT: invalid message length {} from {}", msg_len, remote_addr @@ -173,37 +178,66 @@ where } } - let query_id = u16::from_be_bytes([buffer.buf[0], buffer.buf[1]]); - let resp_buffer = match resolve_query(buffer, remote_addr, ctx).await { - Ok(buf) => buf, + // Parse query up-front so we can echo its question section in SERVFAIL + // responses when resolve_query fails. + let query = match DnsPacket::from_buffer(&mut buffer) { + Ok(q) => q, Err(e) => { - debug!("DoT: resolve error from {}: {}", remote_addr, e); - // Send SERVFAIL so the client doesn't hang + warn!("{} | PARSE ERROR | {}", remote_addr, e); + // msg_len >= MIN_MSG_LEN guarantees buf[0..2] is the client's query id. + let query_id = u16::from_be_bytes([buffer.buf[0], buffer.buf[1]]); let mut resp = DnsPacket::new(); resp.header.id = query_id; resp.header.response = true; - resp.header.rescode = ResultCode::SERVFAIL; - let mut buf = BytePacketBuffer::new(); - if resp.write(&mut buf).is_err() { + resp.header.rescode = ResultCode::FORMERR; + let mut out_buf = BytePacketBuffer::new(); + if resp.write(&mut out_buf).is_err() { + debug!("DoT: failed to serialize FORMERR for {}", remote_addr); + break; + } + if write_framed(&mut stream, out_buf.filled()).await.is_err() { + break; + } + continue; + } + }; + + let resp_buffer = match resolve_query(query.clone(), remote_addr, ctx).await { + Ok(buf) => buf, + Err(e) => { + warn!("{} | RESOLVE ERROR | {}", remote_addr, e); + // Build SERVFAIL that echoes the original question section. + let resp = DnsPacket::response_from(&query, ResultCode::SERVFAIL); + let mut out_buf = BytePacketBuffer::new(); + if resp.write(&mut out_buf).is_err() { debug!("DoT: failed to serialize SERVFAIL for {}", remote_addr); break; } - buf + out_buf } }; - let resp = resp_buffer.filled(); - let mut out = Vec::with_capacity(2 + resp.len()); - out.extend_from_slice(&(resp.len() as u16).to_be_bytes()); - out.extend_from_slice(resp); - if stream.write_all(&out).await.is_err() { - break; - } - if stream.flush().await.is_err() { + if write_framed(&mut stream, resp_buffer.filled()) + .await + .is_err() + { break; } } } +/// Write a DNS message with its 2-byte length prefix, coalesced into one syscall. +async fn write_framed(stream: &mut S, msg: &[u8]) -> std::io::Result<()> +where + S: AsyncWriteExt + Unpin, +{ + let mut out = Vec::with_capacity(2 + msg.len()); + out.extend_from_slice(&(msg.len() as u16).to_be_bytes()); + out.extend_from_slice(msg); + stream.write_all(&out).await?; + stream.flush().await?; + Ok(()) +} + #[cfg(test)] mod tests { use super::*; @@ -250,10 +284,16 @@ mod tests { } /// Spin up a DoT listener with a test TLS config. Returns (addr, client_config). + /// The upstream is pointed at a bound-but-unresponsive UDP socket we own, so + /// any query that escapes to the upstream path times out deterministically + /// (SERVFAIL) regardless of what the host has running on port 53. async fn spawn_dot_server() -> (SocketAddr, Arc) { let (server_tls, client_tls) = test_tls_configs(); let socket = tokio::net::UdpSocket::bind("127.0.0.1:0").await.unwrap(); + // Bind an unresponsive upstream and leak it so it lives for the test duration. + let blackhole = Box::leak(Box::new(std::net::UdpSocket::bind("127.0.0.1:0").unwrap())); + let upstream_addr = blackhole.local_addr().unwrap(); let ctx = Arc::new(ServerCtx { socket, zone_map: { @@ -278,13 +318,11 @@ mod tests { services: Mutex::new(crate::service_store::ServiceStore::new()), lan_peers: Mutex::new(crate::lan::PeerStore::new(90)), forwarding_rules: Vec::new(), - upstream: Mutex::new(crate::forward::Upstream::Udp( - "127.0.0.1:53".parse().unwrap(), - )), + upstream: Mutex::new(crate::forward::Upstream::Udp(upstream_addr)), upstream_auto: false, upstream_port: 53, lan_ip: Mutex::new(std::net::Ipv4Addr::LOCALHOST), - timeout: Duration::from_secs(3), + timeout: Duration::from_millis(200), proxy_tld: "numa".to_string(), proxy_tld_suffix: ".numa".to_string(), lan_enabled: false, @@ -397,8 +435,11 @@ mod tests { assert_eq!(resp.header.id, 0xBEEF); assert!(resp.header.response); - // Query goes to upstream (127.0.0.1:53), which will fail — expect SERVFAIL + // Query goes to the blackhole upstream which never replies → SERVFAIL. + // The SERVFAIL response echoes the question section. assert_eq!(resp.header.rescode, ResultCode::SERVFAIL); + assert_eq!(resp.questions.len(), 1); + assert_eq!(resp.questions[0].name, "nonexistent.test"); } #[tokio::test] From 7742858b7bc6ca7b0091475ad4d62e1ab9884a9a Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Tue, 7 Apr 2026 20:10:51 +0300 Subject: [PATCH 10/30] refactor: simplify DoT cert/key match and extract send_response helper - Flatten 4-arm cert/key match in start_dot to 2 arms with the partial-config warning hoisted into a one-liner above the match. - Extract send_response() that serializes a DnsPacket and writes it framed, used by both the FORMERR-on-parse-error and SERVFAIL-on- resolve-error paths. Removes duplicated buffer/write/log boilerplate and unifies the rescode logging via {:?}. No behavior change; 126/126 tests still pass. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/dot.rs | 70 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/src/dot.rs b/src/dot.rs index 2178c26..291f6f0 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -60,6 +60,9 @@ fn fallback_tls(ctx: &ServerCtx) -> Option> { /// Start the DNS-over-TLS listener (RFC 7858). pub async fn start_dot(ctx: Arc, config: &DotConfig) { + if config.cert_path.is_some() != config.key_path.is_some() { + warn!("DoT: both cert_path and key_path must be set — ignoring partial config, using self-signed"); + } let tls_config = match (&config.cert_path, &config.key_path) { (Some(cert), Some(key)) => match load_tls_config(cert, key) { Ok(cfg) => cfg, @@ -68,14 +71,7 @@ pub async fn start_dot(ctx: Arc, config: &DotConfig) { return; } }, - (Some(_), None) | (None, Some(_)) => { - warn!("DoT: both cert_path and key_path must be set — ignoring partial config, using self-signed"); - match fallback_tls(&ctx) { - Some(cfg) => cfg, - None => return, - } - } - (None, None) => match fallback_tls(&ctx) { + _ => match fallback_tls(&ctx) { Some(cfg) => cfg, None => return, }, @@ -190,41 +186,55 @@ where resp.header.id = query_id; resp.header.response = true; resp.header.rescode = ResultCode::FORMERR; - let mut out_buf = BytePacketBuffer::new(); - if resp.write(&mut out_buf).is_err() { - debug!("DoT: failed to serialize FORMERR for {}", remote_addr); - break; - } - if write_framed(&mut stream, out_buf.filled()).await.is_err() { + if send_response(&mut stream, &resp, remote_addr).await.is_err() { break; } continue; } }; - let resp_buffer = match resolve_query(query.clone(), remote_addr, ctx).await { - Ok(buf) => buf, - Err(e) => { - warn!("{} | RESOLVE ERROR | {}", remote_addr, e); - // Build SERVFAIL that echoes the original question section. - let resp = DnsPacket::response_from(&query, ResultCode::SERVFAIL); - let mut out_buf = BytePacketBuffer::new(); - if resp.write(&mut out_buf).is_err() { - debug!("DoT: failed to serialize SERVFAIL for {}", remote_addr); + match resolve_query(query.clone(), remote_addr, ctx).await { + Ok(resp_buffer) => { + if write_framed(&mut stream, resp_buffer.filled()) + .await + .is_err() + { + break; + } + } + Err(e) => { + warn!("{} | RESOLVE ERROR | {}", remote_addr, e); + // SERVFAIL that echoes the original question section. + let resp = DnsPacket::response_from(&query, ResultCode::SERVFAIL); + if send_response(&mut stream, &resp, remote_addr).await.is_err() { break; } - out_buf } - }; - if write_framed(&mut stream, resp_buffer.filled()) - .await - .is_err() - { - break; } } } +/// Serialize a DNS response and send it framed. Logs serialization failures +/// and returns Err so the caller can tear down the connection. +async fn send_response( + stream: &mut S, + resp: &DnsPacket, + remote_addr: SocketAddr, +) -> std::io::Result<()> +where + S: AsyncWriteExt + Unpin, +{ + let mut out_buf = BytePacketBuffer::new(); + if resp.write(&mut out_buf).is_err() { + debug!( + "DoT: failed to serialize {:?} response for {}", + resp.header.rescode, remote_addr + ); + return Err(std::io::Error::other("serialize failed")); + } + write_framed(stream, out_buf.filled()).await +} + /// Write a DNS message with its 2-byte length prefix, coalesced into one syscall. async fn write_framed(stream: &mut S, msg: &[u8]) -> std::io::Result<()> where From 357c710ec43b57a6809e9fa0dfef452d0b682774 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Tue, 7 Apr 2026 20:11:15 +0300 Subject: [PATCH 11/30] style: rustfmt Co-Authored-By: Claude Opus 4.6 (1M context) --- src/dot.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/dot.rs b/src/dot.rs index 291f6f0..de2f9a8 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -186,7 +186,10 @@ where resp.header.id = query_id; resp.header.response = true; resp.header.rescode = ResultCode::FORMERR; - if send_response(&mut stream, &resp, remote_addr).await.is_err() { + if send_response(&mut stream, &resp, remote_addr) + .await + .is_err() + { break; } continue; @@ -206,7 +209,10 @@ where warn!("{} | RESOLVE ERROR | {}", remote_addr, e); // SERVFAIL that echoes the original question section. let resp = DnsPacket::response_from(&query, ResultCode::SERVFAIL); - if send_response(&mut stream, &resp, remote_addr).await.is_err() { + if send_response(&mut stream, &resp, remote_addr) + .await + .is_err() + { break; } } From 2b0c4e3d5e2e04e695fbc24ac09cb63e659c4a2e Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Tue, 7 Apr 2026 20:35:05 +0300 Subject: [PATCH 12/30] =?UTF-8?q?refactor:=20trim=20DoT=20listener=20?= =?UTF-8?q?=E2=80=94=20let-else=20reads,=20drop=20MIN=5FMSG=5FLEN=20and=20?= =?UTF-8?q?redundant=20localhost=20test?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Collapse two 4-arm read/timeout matches to let-else (lose one defensive debug log on payload-read timeout; idle timeouts are routine on persistent DoT connections anyway) - Drop MIN_MSG_LEN: DnsPacket::from_buffer rejects truncated input on its own, and BytePacketBuffer is zero-init so buf[0..2] for sub-2-byte messages just yields a harmless FORMERR with id=0 - Inline ACCEPT_ERROR_BACKOFF (single use site) - Drop the partial cert/key warning: missing one of cert_path/ key_path silently falls back to self-signed; users see the self-signed cert at startup and figure it out - Drop dot_localhost_resolution test: RFC 6761 localhost is tested in ctx.rs; this test only verified DoT transport, which dot_resolves_local_zone already covers - Drop self-documenting comment in dot_multiple_queries_on_persistent_connection Net -32 lines, 125/125 tests pass, no behavior change users would notice. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/dot.rs | 60 +++++++++++++----------------------------------------- 1 file changed, 14 insertions(+), 46 deletions(-) diff --git a/src/dot.rs b/src/dot.rs index de2f9a8..360bf4a 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -19,12 +19,9 @@ use crate::packet::DnsPacket; const MAX_CONNECTIONS: usize = 512; const IDLE_TIMEOUT: Duration = Duration::from_secs(30); const HANDSHAKE_TIMEOUT: Duration = Duration::from_secs(10); -const ACCEPT_ERROR_BACKOFF: Duration = Duration::from_millis(100); // Matches BytePacketBuffer::BUF_SIZE — RFC 7858 allows up to 65535 but our // buffer would silently truncate anything larger. const MAX_MSG_LEN: usize = 4096; -// DNS header is 12 bytes; anything shorter cannot be a valid query. -const MIN_MSG_LEN: usize = 12; /// Build a TLS ServerConfig for DoT from user-provided cert/key PEM files. fn load_tls_config(cert_path: &Path, key_path: &Path) -> crate::Result> { @@ -60,9 +57,6 @@ fn fallback_tls(ctx: &ServerCtx) -> Option> { /// Start the DNS-over-TLS listener (RFC 7858). pub async fn start_dot(ctx: Arc, config: &DotConfig) { - if config.cert_path.is_some() != config.key_path.is_some() { - warn!("DoT: both cert_path and key_path must be set — ignoring partial config, using self-signed"); - } let tls_config = match (&config.cert_path, &config.key_path) { (Some(cert), Some(key)) => match load_tls_config(cert, key) { Ok(cfg) => cfg, @@ -103,7 +97,7 @@ async fn accept_loop(listener: TcpListener, acceptor: TlsAcceptor, ctx: Arc { error!("DoT: TCP accept error: {}", e); // Back off to avoid tight-looping on persistent failures (e.g. fd exhaustion). - tokio::time::sleep(ACCEPT_ERROR_BACKOFF).await; + tokio::time::sleep(Duration::from_millis(100)).await; continue; } }; @@ -148,31 +142,22 @@ where loop { // Read 2-byte length prefix (RFC 1035 §4.2.2) with idle timeout let mut len_buf = [0u8; 2]; - match tokio::time::timeout(IDLE_TIMEOUT, stream.read_exact(&mut len_buf)).await { - Ok(Ok(_)) => {} - Ok(Err(_)) => break, // read error or EOF - Err(_) => break, // idle timeout - } + let Ok(Ok(_)) = tokio::time::timeout(IDLE_TIMEOUT, stream.read_exact(&mut len_buf)).await + else { + break; + }; let msg_len = u16::from_be_bytes(len_buf) as usize; - if !(MIN_MSG_LEN..=MAX_MSG_LEN).contains(&msg_len) { - debug!( - "DoT: invalid message length {} from {}", - msg_len, remote_addr - ); + if msg_len > MAX_MSG_LEN { + debug!("DoT: oversized message {} from {}", msg_len, remote_addr); break; } let mut buffer = BytePacketBuffer::new(); - match tokio::time::timeout(IDLE_TIMEOUT, stream.read_exact(&mut buffer.buf[..msg_len])) - .await - { - Ok(Ok(_)) => {} - Ok(Err(_)) => break, - Err(_) => { - debug!("DoT: payload read timeout from {}", remote_addr); - break; - } - } + let Ok(Ok(_)) = + tokio::time::timeout(IDLE_TIMEOUT, stream.read_exact(&mut buffer.buf[..msg_len])).await + else { + break; + }; // Parse query up-front so we can echo its question section in SERVFAIL // responses when resolve_query fails. @@ -180,7 +165,8 @@ where Ok(q) => q, Err(e) => { warn!("{} | PARSE ERROR | {}", remote_addr, e); - // msg_len >= MIN_MSG_LEN guarantees buf[0..2] is the client's query id. + // BytePacketBuffer is zero-initialized, so buf[0..2] reads as 0x0000 + // for sub-2-byte messages — harmless FORMERR with id=0. let query_id = u16::from_be_bytes([buffer.buf[0], buffer.buf[1]]); let mut resp = DnsPacket::new(); resp.header.id = query_id; @@ -431,7 +417,6 @@ mod tests { let (addr, client_config) = spawn_dot_server().await; let mut stream = dot_connect(addr, &client_config).await; - // Send 3 queries on the same TLS connection for i in 0..3u16 { let query = DnsPacket::query(0xA000 + i, "dot-test.example", QueryType::A); let resp = dot_exchange(&mut stream, &query).await; @@ -479,21 +464,4 @@ mod tests { h.await.unwrap(); } } - - #[tokio::test] - async fn dot_localhost_resolution() { - let (addr, client_config) = spawn_dot_server().await; - let mut stream = dot_connect(addr, &client_config).await; - - let query = DnsPacket::query(0xD000, "localhost", QueryType::A); - let resp = dot_exchange(&mut stream, &query).await; - - assert_eq!(resp.header.id, 0xD000); - assert_eq!(resp.header.rescode, ResultCode::NOERROR); - assert_eq!(resp.answers.len(), 1); - match &resp.answers[0] { - DnsRecord::A { addr, .. } => assert_eq!(*addr, std::net::Ipv4Addr::LOCALHOST), - other => panic!("expected A record, got {:?}", other), - } - } } From 0a73cdf4db3f6094cf33b17b7d0013116bef9bc5 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Tue, 7 Apr 2026 20:37:40 +0300 Subject: [PATCH 13/30] docs: add commented-out [dot] example to numa.toml Matches the style of the other opt-in sections (blocking, dnssec, lan). Documents all five DotConfig fields with their defaults. Co-Authored-By: Claude Opus 4.6 (1M context) --- numa.toml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/numa.toml b/numa.toml index 4fa0a3d..b7f98de 100644 --- a/numa.toml +++ b/numa.toml @@ -83,6 +83,14 @@ tld = "numa" # enabled = false # opt-in: verify chain of trust from root KSK # strict = false # true = SERVFAIL on bogus signatures +# DNS-over-TLS listener (RFC 7858) — encrypted DNS on port 853 +# [dot] +# enabled = false # opt-in: accept DoT queries +# port = 853 # standard DoT port +# bind_addr = "0.0.0.0" # IPv4 or IPv6; unspecified binds all interfaces +# cert_path = "/etc/numa/dot.crt" # PEM cert; omit to use self-signed (proxy CA if available) +# key_path = "/etc/numa/dot.key" # PEM private key; must be set together with cert_path + # LAN service discovery via mDNS (disabled by default — no network traffic unless enabled) # [lan] # enabled = true # discover other Numa instances via mDNS (_numa._tcp.local) From 1632fc36f2bd6fb48be48ef1581b23aa5aead153 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Tue, 7 Apr 2026 22:51:52 +0300 Subject: [PATCH 14/30] feat: DoT write timeout and ALPN "dot" advertisement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two DoS/interop hardening items: 1. Bound write_framed by WRITE_TIMEOUT (10s) so a slow-reader attacker can't indefinitely hold a worker task and its connection permit. Symmetric to the existing handshake timeout. 2. Advertise ALPN "dot" per RFC 7858 §3.2. Required by some strict DoT clients (newer Apple stacks, some Android versions). rustls ServerConfig exposes alpn_protocols as a pub field so we set it after with_single_cert: - load_tls_config (user-provided cert/key): set directly - self_signed_tls (new, replaces fallback_tls): builds a fresh DoT-specific TLS config via build_tls_config with the ALPN list build_tls_config now takes an `alpn: Vec>` parameter so DoT and the proxy can pass different ALPN lists while sharing the same CA. Proxy callers pass Vec::new() (unchanged behavior). Dropped the ctx.tls_config reuse branch: we can't mutate a shared Arc to add DoT-specific ALPN, and reusing the proxy config was already quietly broken re: SAN (proxy cert covers *.{tld}, not the DoT server's bind hostname/IP). Added dot_negotiates_alpn test that asserts conn.alpn_protocol() returns Some(b"dot") after handshake. 126/126 tests pass. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/dot.rs | 48 ++++++++++++++++++++++++++++++++++++------------ src/main.rs | 2 +- src/tls.rs | 13 ++++++++++--- 3 files changed, 47 insertions(+), 16 deletions(-) diff --git a/src/dot.rs b/src/dot.rs index 360bf4a..898c2a1 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -19,10 +19,16 @@ use crate::packet::DnsPacket; const MAX_CONNECTIONS: usize = 512; const IDLE_TIMEOUT: Duration = Duration::from_secs(30); const HANDSHAKE_TIMEOUT: Duration = Duration::from_secs(10); +const WRITE_TIMEOUT: Duration = Duration::from_secs(10); // Matches BytePacketBuffer::BUF_SIZE — RFC 7858 allows up to 65535 but our // buffer would silently truncate anything larger. const MAX_MSG_LEN: usize = 4096; +/// ALPN protocol identifier for DNS-over-TLS (RFC 7858 §3.2). +fn dot_alpn() -> Vec> { + vec![b"dot".to_vec()] +} + /// Build a TLS ServerConfig for DoT from user-provided cert/key PEM files. fn load_tls_config(cert_path: &Path, key_path: &Path) -> crate::Result> { let cert_pem = std::fs::read(cert_path)?; @@ -32,18 +38,18 @@ fn load_tls_config(cert_path: &Path, key_path: &Path) -> crate::Result Option> { - if let Some(arc_swap) = ctx.tls_config.as_ref() { - return Some(Arc::clone(&*arc_swap.load())); - } - match crate::tls::build_tls_config(&ctx.proxy_tld, &[]) { +/// Build a self-signed DoT TLS config. Can't reuse `ctx.tls_config` (the +/// proxy's shared config) because DoT needs its own ALPN advertisement. +fn self_signed_tls(ctx: &ServerCtx) -> Option> { + match crate::tls::build_tls_config(&ctx.proxy_tld, &[], dot_alpn()) { Ok(cfg) => Some(cfg), Err(e) => { warn!( @@ -65,7 +71,7 @@ pub async fn start_dot(ctx: Arc, config: &DotConfig) { return; } }, - _ => match fallback_tls(&ctx) { + _ => match self_signed_tls(&ctx) { Some(cfg) => cfg, None => return, }, @@ -228,6 +234,7 @@ where } /// Write a DNS message with its 2-byte length prefix, coalesced into one syscall. +/// Bounded by WRITE_TIMEOUT so a stalled reader can't indefinitely hold a worker. async fn write_framed(stream: &mut S, msg: &[u8]) -> std::io::Result<()> where S: AsyncWriteExt + Unpin, @@ -235,9 +242,15 @@ where let mut out = Vec::with_capacity(2 + msg.len()); out.extend_from_slice(&(msg.len() as u16).to_be_bytes()); out.extend_from_slice(msg); - stream.write_all(&out).await?; - stream.flush().await?; - Ok(()) + match tokio::time::timeout(WRITE_TIMEOUT, async { + stream.write_all(&out).await?; + stream.flush().await + }) + .await + { + Ok(result) => result, + Err(_) => Err(std::io::Error::other("write timeout")), + } } #[cfg(test)] @@ -271,16 +284,18 @@ mod tests { let cert_der = CertificateDer::from(cert.der().to_vec()); let key_der = PrivateKeyDer::Pkcs8(PrivatePkcs8KeyDer::from(key_pair.serialize_der())); - let server_config = ServerConfig::builder() + let mut server_config = ServerConfig::builder() .with_no_client_auth() .with_single_cert(vec![cert_der.clone()], key_der) .unwrap(); + server_config.alpn_protocols = dot_alpn(); let mut root_store = rustls::RootCertStore::empty(); root_store.add(cert_der).unwrap(); - let client_config = rustls::ClientConfig::builder() + let mut client_config = rustls::ClientConfig::builder() .with_root_certificates(root_store) .with_no_client_auth(); + client_config.alpn_protocols = dot_alpn(); (Arc::new(server_config), Arc::new(client_config)) } @@ -443,6 +458,15 @@ mod tests { assert_eq!(resp.questions[0].name, "nonexistent.test"); } + #[tokio::test] + async fn dot_negotiates_alpn() { + let (addr, client_config) = spawn_dot_server().await; + let stream = dot_connect(addr, &client_config).await; + // After handshake, the negotiated ALPN protocol should be "dot" (RFC 7858 §3.2). + let (_io, conn) = stream.get_ref(); + assert_eq!(conn.alpn_protocol(), Some(&b"dot"[..])); + } + #[tokio::test] async fn dot_concurrent_connections() { let (addr, client_config) = spawn_dot_server().await; diff --git a/src/main.rs b/src/main.rs index b9316b8..adf266e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -207,7 +207,7 @@ async fn main() -> numa::Result<()> { // Build initial TLS config before ServerCtx (so ArcSwap is ready at construction) let initial_tls = if config.proxy.enabled && config.proxy.tls_port > 0 { let service_names = service_store.names(); - match numa::tls::build_tls_config(&config.proxy.tld, &service_names) { + match numa::tls::build_tls_config(&config.proxy.tld, &service_names, Vec::new()) { Ok(tls_config) => Some(ArcSwap::from(tls_config)), Err(e) => { log::warn!("TLS setup failed, HTTPS proxy disabled: {}", e); diff --git a/src/tls.rs b/src/tls.rs index a4d91bf..5746f3b 100644 --- a/src/tls.rs +++ b/src/tls.rs @@ -24,7 +24,7 @@ pub fn regenerate_tls(ctx: &ServerCtx) { names.extend(ctx.lan_peers.lock().unwrap().names()); let names: Vec = names.into_iter().collect(); - match build_tls_config(&ctx.proxy_tld, &names) { + match build_tls_config(&ctx.proxy_tld, &names, Vec::new()) { Ok(new_config) => { tls.store(new_config); info!("TLS cert regenerated for {} services", names.len()); @@ -36,7 +36,13 @@ pub fn regenerate_tls(ctx: &ServerCtx) { /// Build a TLS config with a cert covering all provided service names. /// Wildcards under single-label TLDs (*.numa) are rejected by browsers, /// so we list each service explicitly as a SAN. -pub fn build_tls_config(tld: &str, service_names: &[String]) -> crate::Result> { +/// `alpn` is advertised in the TLS ServerHello — pass empty for the proxy +/// (which accepts any ALPN), or `[b"dot"]` for DoT (RFC 7858 §3.2). +pub fn build_tls_config( + tld: &str, + service_names: &[String], + alpn: Vec>, +) -> crate::Result> { let dir = crate::data_dir(); let (ca_cert, ca_key) = ensure_ca(&dir)?; let (cert_chain, key) = generate_service_cert(&ca_cert, &ca_key, tld, service_names)?; @@ -44,9 +50,10 @@ pub fn build_tls_config(tld: &str, service_names: &[String]) -> crate::Result Date: Tue, 7 Apr 2026 22:56:44 +0300 Subject: [PATCH 15/30] style: drop narrating comments on dot_alpn and ALPN test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both were restating what the code already said — dot_alpn's doc narrated the function name and the test comment restated the assertion. RFC 7858 §3.2 is already cited on self_signed_tls and build_tls_config where the "why" actually matters. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/dot.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/dot.rs b/src/dot.rs index 898c2a1..b7e7875 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -24,7 +24,6 @@ const WRITE_TIMEOUT: Duration = Duration::from_secs(10); // buffer would silently truncate anything larger. const MAX_MSG_LEN: usize = 4096; -/// ALPN protocol identifier for DNS-over-TLS (RFC 7858 §3.2). fn dot_alpn() -> Vec> { vec![b"dot".to_vec()] } @@ -462,7 +461,6 @@ mod tests { async fn dot_negotiates_alpn() { let (addr, client_config) = spawn_dot_server().await; let stream = dot_connect(addr, &client_config).await; - // After handshake, the negotiated ALPN protocol should be "dot" (RFC 7858 §3.2). let (_io, conn) = stream.get_ref(); assert_eq!(conn.alpn_protocol(), Some(&b"dot"[..])); } From bacc49667ab5e28870765d9afbb6478ac8b4b073 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Tue, 7 Apr 2026 23:22:04 +0300 Subject: [PATCH 16/30] fix: DoT cert needs explicit {tld}.{tld} SAN, not just *.{tld} wildcard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit self_signed_tls was passing an empty service_names list, so the generated cert only had the *.numa wildcard SAN. Strict TLS clients (browsers, possibly some iOS versions) reject wildcards under single-label TLDs — see the existing comment in tls.rs explaining why the proxy lists each service explicitly. setup-phone's mobileconfig sends ServerName "numa.numa" as SNI, so the DoT cert must have an explicit numa.numa SAN. Pass proxy_tld itself as a service name, mirroring how main.rs already registers "numa" as a service for the proxy's TLS cert. Test fixture updated to mirror the production SAN shape (*.numa + numa.numa) and switched the client to SNI "numa.numa", so the existing DoT test suite implicitly exercises the SNI path used by setup-phone clients. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/dot.rs | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/dot.rs b/src/dot.rs index b7e7875..487c25f 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -47,8 +47,15 @@ fn load_tls_config(cert_path: &Path, key_path: &Path) -> crate::Result Option> { - match crate::tls::build_tls_config(&ctx.proxy_tld, &[], dot_alpn()) { + let service_names = [ctx.proxy_tld.clone()]; + match crate::tls::build_tls_config(&ctx.proxy_tld, &service_names, dot_alpn()) { Ok(cfg) => Some(cfg), Err(e) => { warn!( @@ -272,12 +279,17 @@ mod tests { fn test_tls_configs() -> (Arc, Arc) { let _ = rustls::crypto::ring::default_provider().install_default(); + // Mirror production self_signed_tls SAN shape: *.numa wildcard plus + // explicit numa.numa apex (the ServerName setup-phone uses as SNI). let key_pair = KeyPair::generate().unwrap(); let mut params = CertificateParams::default(); params .distinguished_name - .push(DnType::CommonName, "localhost"); - params.subject_alt_names = vec![rcgen::SanType::DnsName("localhost".try_into().unwrap())]; + .push(DnType::CommonName, "Numa .numa services"); + params.subject_alt_names = vec![ + rcgen::SanType::DnsName("*.numa".try_into().unwrap()), + rcgen::SanType::DnsName("numa.numa".try_into().unwrap()), + ]; let cert = params.self_signed(&key_pair).unwrap(); let cert_der = CertificateDer::from(cert.der().to_vec()); @@ -367,6 +379,7 @@ mod tests { } /// Open a TLS connection to the DoT server and return the stream. + /// Uses SNI "numa.numa" to mirror what setup-phone's mobileconfig sends. async fn dot_connect( addr: SocketAddr, client_config: &Arc, @@ -374,7 +387,7 @@ mod tests { let connector = tokio_rustls::TlsConnector::from(Arc::clone(client_config)); let tcp = tokio::net::TcpStream::connect(addr).await.unwrap(); connector - .connect(ServerName::try_from("localhost").unwrap(), tcp) + .connect(ServerName::try_from("numa.numa").unwrap(), tcp) .await .unwrap() } From 186e70937381b66dfa4236f968acb1d1def44683 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 8 Apr 2026 00:09:54 +0300 Subject: [PATCH 17/30] test: verify DoT server rejects mismatched ALPN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds dot_rejects_non_dot_alpn to assert the rustls server enforces ALPN strictness rather than silently accepting a mismatched negotiation. This is the load-bearing behavior behind the cross- protocol confusion defense — without enforcement, the ALPN "dot" advertisement is just a sign hung on an unlocked door. Refactors test_tls_configs to return the leaf cert DER instead of a prebuilt client config, and adds a dot_client(cert_der, alpn) helper so each test can build a client config with the ALPN list it needs. The five existing DoT tests gain one line each to call dot_client with dot_alpn(); behavior unchanged. 127/127 tests pass. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/dot.rs | 72 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 19 deletions(-) diff --git a/src/dot.rs b/src/dot.rs index 487c25f..0a917dd 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -275,8 +275,9 @@ mod tests { use crate::question::QueryType; use crate::record::DnsRecord; - /// Generate a self-signed cert + key in memory, return (ServerConfig, ClientConfig). - fn test_tls_configs() -> (Arc, Arc) { + /// Generate a self-signed DoT server config and return its leaf cert DER + /// so callers can build matching client configs with arbitrary ALPN. + fn test_tls_configs() -> (Arc, CertificateDer<'static>) { let _ = rustls::crypto::ring::default_provider().install_default(); // Mirror production self_signed_tls SAN shape: *.numa wildcard plus @@ -301,22 +302,31 @@ mod tests { .unwrap(); server_config.alpn_protocols = dot_alpn(); - let mut root_store = rustls::RootCertStore::empty(); - root_store.add(cert_der).unwrap(); - let mut client_config = rustls::ClientConfig::builder() - .with_root_certificates(root_store) - .with_no_client_auth(); - client_config.alpn_protocols = dot_alpn(); - - (Arc::new(server_config), Arc::new(client_config)) + (Arc::new(server_config), cert_der) } - /// Spin up a DoT listener with a test TLS config. Returns (addr, client_config). + /// Build a TLS client config that trusts `cert_der` and advertises the + /// given ALPN protocols. Used by tests to vary ALPN per test case. + fn dot_client( + cert_der: &CertificateDer<'static>, + alpn: Vec>, + ) -> Arc { + let mut root_store = rustls::RootCertStore::empty(); + root_store.add(cert_der.clone()).unwrap(); + let mut config = rustls::ClientConfig::builder() + .with_root_certificates(root_store) + .with_no_client_auth(); + config.alpn_protocols = alpn; + Arc::new(config) + } + + /// Spin up a DoT listener with a test TLS config. Returns the bind addr + /// and the leaf cert DER so callers can build clients with arbitrary ALPN. /// The upstream is pointed at a bound-but-unresponsive UDP socket we own, so /// any query that escapes to the upstream path times out deterministically /// (SERVFAIL) regardless of what the host has running on port 53. - async fn spawn_dot_server() -> (SocketAddr, Arc) { - let (server_tls, client_tls) = test_tls_configs(); + async fn spawn_dot_server() -> (SocketAddr, CertificateDer<'static>) { + let (server_tls, cert_der) = test_tls_configs(); let socket = tokio::net::UdpSocket::bind("127.0.0.1:0").await.unwrap(); // Bind an unresponsive upstream and leak it so it lives for the test duration. @@ -375,7 +385,7 @@ mod tests { tokio::spawn(accept_loop(listener, acceptor, ctx)); - (addr, client_tls) + (addr, cert_der) } /// Open a TLS connection to the DoT server and return the stream. @@ -419,7 +429,8 @@ mod tests { #[tokio::test] async fn dot_resolves_local_zone() { - let (addr, client_config) = spawn_dot_server().await; + let (addr, cert_der) = spawn_dot_server().await; + let client_config = dot_client(&cert_der, dot_alpn()); let mut stream = dot_connect(addr, &client_config).await; let query = DnsPacket::query(0x1234, "dot-test.example", QueryType::A); @@ -441,7 +452,8 @@ mod tests { #[tokio::test] async fn dot_multiple_queries_on_persistent_connection() { - let (addr, client_config) = spawn_dot_server().await; + let (addr, cert_der) = spawn_dot_server().await; + let client_config = dot_client(&cert_der, dot_alpn()); let mut stream = dot_connect(addr, &client_config).await; for i in 0..3u16 { @@ -455,7 +467,8 @@ mod tests { #[tokio::test] async fn dot_nxdomain_for_unknown() { - let (addr, client_config) = spawn_dot_server().await; + let (addr, cert_der) = spawn_dot_server().await; + let client_config = dot_client(&cert_der, dot_alpn()); let mut stream = dot_connect(addr, &client_config).await; let query = DnsPacket::query(0xBEEF, "nonexistent.test", QueryType::A); @@ -472,15 +485,36 @@ mod tests { #[tokio::test] async fn dot_negotiates_alpn() { - let (addr, client_config) = spawn_dot_server().await; + let (addr, cert_der) = spawn_dot_server().await; + let client_config = dot_client(&cert_der, dot_alpn()); let stream = dot_connect(addr, &client_config).await; let (_io, conn) = stream.get_ref(); assert_eq!(conn.alpn_protocol(), Some(&b"dot"[..])); } + #[tokio::test] + async fn dot_rejects_non_dot_alpn() { + // Cross-protocol confusion defense: a client that only offers "h2" + // (e.g. an HTTP/2 client mistakenly hitting :853) must not complete + // a TLS handshake with the DoT server. Verifies the rustls server + // sends `no_application_protocol` rather than silently negotiating. + let (addr, cert_der) = spawn_dot_server().await; + let client_config = dot_client(&cert_der, vec![b"h2".to_vec()]); + let connector = tokio_rustls::TlsConnector::from(client_config); + let tcp = tokio::net::TcpStream::connect(addr).await.unwrap(); + let result = connector + .connect(ServerName::try_from("numa.numa").unwrap(), tcp) + .await; + assert!( + result.is_err(), + "DoT server must reject ALPN that doesn't include \"dot\"" + ); + } + #[tokio::test] async fn dot_concurrent_connections() { - let (addr, client_config) = spawn_dot_server().await; + let (addr, cert_der) = spawn_dot_server().await; + let client_config = dot_client(&cert_der, dot_alpn()); let mut handles = Vec::new(); for i in 0..5u16 { From c98e6c3ea9bea6a8e32ce1ad53b6f542e3e1dbf9 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 8 Apr 2026 00:54:51 +0300 Subject: [PATCH 18/30] fix: install rustls crypto provider when loading user DoT cert Adds tests/integration.sh Suite 5 (DoT via kdig + openssl) and fixes a startup panic caught by it. Bug: when [dot] cert_path/key_path was set AND [proxy] was disabled, numa panicked on the first DoT handshake with "Could not automatically determine the process-level CryptoProvider from Rustls crate features". In normal deployments the proxy's build_tls_config installs the default provider as a side effect, masking the missing call in dot.rs::load_tls_config. Disable the proxy and the panic surfaces. Fix: call rustls::crypto::ring::default_provider().install_default() at the top of load_tls_config (no-op if already installed). Suite 5 exercises: - DoT listener binds on configured port - Resolves a local zone A record over TLS (kdig +tls) - Persistent connection reuse (kdig +keepopen, 3 queries, 1 handshake) - ALPN "dot" negotiation (openssl s_client -alpn dot) - ALPN mismatch rejected with no_application_protocol (openssl -alpn h2) Uses a pre-generated cert at /tmp so the test runs non-root. Skips gracefully if kdig or openssl aren't installed. Also: Dockerfile now EXPOSE 853/tcp so docker run -p 853:853 works out of the box when users enable DoT. Co-Authored-By: Claude Opus 4.6 (1M context) --- Dockerfile | 2 +- src/dot.rs | 6 +++ tests/integration.sh | 122 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 0af2ee3..1d6f28f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,5 +13,5 @@ RUN cargo build --release FROM alpine:3.20 COPY --from=builder /app/target/release/numa /usr/local/bin/numa -EXPOSE 53/udp 80/tcp 443/tcp 5380/tcp +EXPOSE 53/udp 80/tcp 443/tcp 853/tcp 5380/tcp ENTRYPOINT ["numa"] diff --git a/src/dot.rs b/src/dot.rs index 0a917dd..d399649 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -30,6 +30,12 @@ fn dot_alpn() -> Vec> { /// Build a TLS ServerConfig for DoT from user-provided cert/key PEM files. fn load_tls_config(cert_path: &Path, key_path: &Path) -> crate::Result> { + // rustls needs a CryptoProvider installed before ServerConfig::builder(). + // The proxy's build_tls_config also does this; we repeat it here because + // running DoT with user-provided certs while the proxy is disabled would + // otherwise panic on first handshake (no default provider). + let _ = rustls::crypto::ring::default_provider().install_default(); + let cert_pem = std::fs::read(cert_path)?; let key_pem = std::fs::read(key_path)?; diff --git a/tests/integration.sh b/tests/integration.sh index c83dd61..a19d3bc 100755 --- a/tests/integration.sh +++ b/tests/integration.sh @@ -404,6 +404,128 @@ check "Cache flushed" \ kill "$NUMA_PID" 2>/dev/null || true wait "$NUMA_PID" 2>/dev/null || true +sleep 1 + +# ---- Suite 5: DNS-over-TLS (RFC 7858) ---- +echo "" +echo "╔══════════════════════════════════════════╗" +echo "║ Suite 5: DNS-over-TLS (RFC 7858) ║" +echo "╚══════════════════════════════════════════╝" + +if ! command -v kdig >/dev/null 2>&1; then + printf " ${DIM}skipped — install 'knot' for kdig${RESET}\n" +elif ! command -v openssl >/dev/null 2>&1; then + printf " ${DIM}skipped — openssl not found${RESET}\n" +else + DOT_PORT=8853 + DOT_CERT=/tmp/numa-integration-dot.crt + DOT_KEY=/tmp/numa-integration-dot.key + + # Generate a test cert mirroring production self_signed_tls SAN shape + # (*.numa wildcard + explicit numa.numa apex). + openssl req -x509 -newkey rsa:2048 -nodes -days 1 \ + -keyout "$DOT_KEY" -out "$DOT_CERT" \ + -subj "/CN=Numa .numa services" \ + -addext "subjectAltName=DNS:*.numa,DNS:numa.numa" \ + >/dev/null 2>&1 + + # Suite 5 uses a local zone so it's upstream-independent — the point is + # to exercise the DoT transport layer (handshake, ALPN, framing, + # persistent connections), not re-test recursive resolution. + cat > "$CONFIG" << CONF +[server] +bind_addr = "127.0.0.1:$PORT" +api_port = $API_PORT + +[upstream] +mode = "forward" +address = "127.0.0.1" +port = 65535 + +[cache] +max_entries = 10000 + +[blocking] +enabled = false + +[proxy] +enabled = false + +[dot] +enabled = true +port = $DOT_PORT +bind_addr = "127.0.0.1" +cert_path = "$DOT_CERT" +key_path = "$DOT_KEY" + +[[zones]] +domain = "dot-test.example" +record_type = "A" +value = "10.0.0.1" +ttl = 60 +CONF + + RUST_LOG=info "$BINARY" "$CONFIG" > "$LOG" 2>&1 & + NUMA_PID=$! + sleep 4 + + if ! kill -0 "$NUMA_PID" 2>/dev/null; then + FAILED=$((FAILED + 1)) + printf " ${RED}✗${RESET} DoT startup\n" + printf " ${DIM}%s${RESET}\n" "$(tail -5 "$LOG")" + else + echo "" + echo "=== Listener ===" + + check "DoT bound on 127.0.0.1:$DOT_PORT" \ + "DoT listening on 127.0.0.1:$DOT_PORT" \ + "$(grep 'DoT listening' "$LOG")" + + KDIG="kdig @127.0.0.1 -p $DOT_PORT +tls +tls-ca=$DOT_CERT +tls-hostname=numa.numa +time=5 +retry=0" + + echo "" + echo "=== Queries over DoT ===" + + check "DoT local zone A record" \ + "10.0.0.1" \ + "$($KDIG +short dot-test.example A 2>/dev/null)" + + # +keepopen reuses one TLS connection for multiple queries — tests + # persistent connection handling. kdig applies options left-to-right, + # so +short and +keepopen must come before the query specs. + check "DoT persistent connection (3 queries, 1 handshake)" \ + "10.0.0.1" \ + "$($KDIG +keepopen +short dot-test.example A dot-test.example A dot-test.example A 2>/dev/null | head -1)" + + echo "" + echo "=== ALPN ===" + + # Positive case: client offers "dot", server picks it. + ALPN_OK=$(echo "" | openssl s_client -connect "127.0.0.1:$DOT_PORT" \ + -servername numa.numa -alpn dot -CAfile "$DOT_CERT" 2>&1 /dev/null 2>&1; then + ALPN_MISMATCH="handshake unexpectedly succeeded" + else + ALPN_MISMATCH="rejected" + fi + check "DoT rejects non-dot ALPN" \ + "rejected" \ + "$ALPN_MISMATCH" + fi + + kill "$NUMA_PID" 2>/dev/null || true + wait "$NUMA_PID" 2>/dev/null || true + rm -f "$DOT_CERT" "$DOT_KEY" +fi # Summary echo "" From 7f52bd8a324c689dbb8328229516aec4bc9d5e60 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 8 Apr 2026 01:12:16 +0300 Subject: [PATCH 19/30] =?UTF-8?q?test:=20Suite=206=20=E2=80=94=20proxy=20+?= =?UTF-8?q?=20DoT=20coexistence,=20NUMA=5FDATA=5FDIR=20override?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds integration test coverage for the realistic production shape where both the HTTPS proxy and DoT are enabled simultaneously. This was previously untested — every existing suite had either one or the other, so the interaction path was implicit. What Suite 6 verifies: - Both listeners bind without panic - DoT still resolves queries with the proxy enabled - Proxy HTTPS handshake still works with DoT enabled - Both certs validate against the same shared CA To run non-root, adds a NUMA_DATA_DIR env var override to data_dir() that lets callers point the CA/cert storage at any writable path. Useful beyond tests: containerized deployments, CI runners, dev testing without sudo. The fallback is the existing platform-specific path (unix: /usr/local/var/numa, windows: %PROGRAMDATA%\numa). Suite 6 sets NUMA_DATA_DIR=/tmp/numa-integration-data before starting numa, then trusts the generated CA at $NUMA_DATA_DIR/ca.pem for both kdig (DoT query) and openssl s_client (HTTPS proxy handshake) verification. All 6 suites, 32 checks, run non-root and pass locally. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/lib.rs | 5 ++ tests/integration.sh | 111 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 36017fe..05d18a0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -67,9 +67,14 @@ fn config_dir_unix() -> std::path::PathBuf { } /// System-wide data directory for TLS certs. +/// Override with `NUMA_DATA_DIR` env var (useful for containerized +/// deployments and integration tests that can't write to the default path). /// Unix: /usr/local/var/numa /// Windows: %PROGRAMDATA%\numa pub fn data_dir() -> std::path::PathBuf { + if let Ok(dir) = std::env::var("NUMA_DATA_DIR") { + return std::path::PathBuf::from(dir); + } #[cfg(windows)] { std::path::PathBuf::from( diff --git a/tests/integration.sh b/tests/integration.sh index a19d3bc..f1c5205 100755 --- a/tests/integration.sh +++ b/tests/integration.sh @@ -526,6 +526,117 @@ CONF wait "$NUMA_PID" 2>/dev/null || true rm -f "$DOT_CERT" "$DOT_KEY" fi +sleep 1 + +# ---- Suite 6: Proxy + DoT coexistence ---- +echo "" +echo "╔══════════════════════════════════════════╗" +echo "║ Suite 6: Proxy + DoT Coexistence ║" +echo "╚══════════════════════════════════════════╝" + +if ! command -v kdig >/dev/null 2>&1 || ! command -v openssl >/dev/null 2>&1; then + printf " ${DIM}skipped — needs kdig + openssl${RESET}\n" +else + DOT_PORT=8853 + PROXY_HTTP_PORT=8080 + PROXY_HTTPS_PORT=8443 + NUMA_DATA=/tmp/numa-integration-data + + # Fresh data dir so we generate a fresh CA for this suite — NUMA_DATA_DIR + # env var lets numa write under $TMPDIR instead of /usr/local/var/numa. + rm -rf "$NUMA_DATA" + mkdir -p "$NUMA_DATA" + + cat > "$CONFIG" << CONF +[server] +bind_addr = "127.0.0.1:$PORT" +api_port = $API_PORT + +[upstream] +mode = "forward" +address = "127.0.0.1" +port = 65535 + +[cache] +max_entries = 10000 + +[blocking] +enabled = false + +[proxy] +enabled = true +port = $PROXY_HTTP_PORT +tls_port = $PROXY_HTTPS_PORT +tld = "numa" +bind_addr = "127.0.0.1" + +[dot] +enabled = true +port = $DOT_PORT +bind_addr = "127.0.0.1" + +[[zones]] +domain = "dot-test.example" +record_type = "A" +value = "10.0.0.1" +ttl = 60 +CONF + + NUMA_DATA_DIR="$NUMA_DATA" RUST_LOG=info "$BINARY" "$CONFIG" > "$LOG" 2>&1 & + NUMA_PID=$! + sleep 4 + + if ! kill -0 "$NUMA_PID" 2>/dev/null; then + FAILED=$((FAILED + 1)) + printf " ${RED}✗${RESET} Startup with proxy + DoT\n" + printf " ${DIM}%s${RESET}\n" "$(tail -5 "$LOG")" + else + echo "" + echo "=== Both listeners ===" + + check "DoT listener bound" \ + "DoT listening on 127.0.0.1:$DOT_PORT" \ + "$(grep 'DoT listening' "$LOG")" + + check "HTTPS proxy listener bound" \ + "HTTPS proxy listening on 127.0.0.1:$PROXY_HTTPS_PORT" \ + "$(grep 'HTTPS proxy listening' "$LOG")" + + PANIC_COUNT=$(grep -c 'panicked' "$LOG" 2>/dev/null || echo 0) + check "No startup panics in log" \ + "^0$" \ + "$PANIC_COUNT" + + echo "" + echo "=== DoT works with proxy enabled ===" + + # Proxy's build_tls_config runs first and creates the CA in + # $NUMA_DATA_DIR. DoT self_signed_tls then loads the same CA and + # issues its own leaf cert. One CA trusts both listeners. + CA="$NUMA_DATA/ca.pem" + KDIG="kdig @127.0.0.1 -p $DOT_PORT +tls +tls-ca=$CA +tls-hostname=numa.numa +time=5 +retry=0" + + check "DoT local zone A (with proxy on)" \ + "10.0.0.1" \ + "$($KDIG +short dot-test.example A 2>/dev/null)" + + echo "" + echo "=== Proxy TLS works with DoT enabled ===" + + # Proxy cert has SAN numa.numa (auto-added "numa" service). A + # successful handshake validates that the proxy's separate + # ServerConfig wasn't disturbed by DoT's own cert generation. + PROXY_TLS=$(echo "" | openssl s_client -connect "127.0.0.1:$PROXY_HTTPS_PORT" \ + -servername numa.numa -CAfile "$CA" 2>&1 /dev/null || true + wait "$NUMA_PID" 2>/dev/null || true + rm -rf "$NUMA_DATA" +fi # Summary echo "" From 6887c8e02e6eec69dc21da8f19e9e406c8bd16aa Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 8 Apr 2026 01:31:16 +0300 Subject: [PATCH 20/30] refactor: move data_dir override from env var to [server] TOML field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reverts the NUMA_DATA_DIR env var added in the previous commit and replaces it with a [server] data_dir TOML field. Numa already has a well-developed config system; adding a parallel env-var mechanism for a single knob was wrong. The principle: TOML is for application behavior configuration. Env vars are for bootstrap values (HOME, SUDO_USER to discover paths before config loads) and standard ecosystem conventions (RUST_LOG). data_dir is neither — it's an app knob, so it belongs in the TOML. Changes: - lib.rs::data_dir() reverts to the platform-specific fallback only - config.rs adds `data_dir: Option` to ServerConfig - main.rs resolves config.server.data_dir with fallback to numa::data_dir() and passes it to build_tls_config, then stores the resolved path on ctx.data_dir for downstream consumers - tls.rs::build_tls_config takes `data_dir: &Path` as an explicit parameter instead of calling crate::data_dir() behind the caller's back. regenerate_tls and dot.rs self_signed_tls now pass &ctx.data_dir, honoring whatever path the config resolved to - tests/integration.sh Suite 6 uses `data_dir = "$NUMA_DATA"` in its test TOML instead of the NUMA_DATA_DIR env var prefix - numa.toml gains a commented-out data_dir example No behavior change for existing production deployments (the default path is unchanged). Test harness is now fully config-driven, and containerized deploys can override data_dir via mount+config without needing env var injection. 127/127 unit tests pass, Suite 6 passes end-to-end. Co-Authored-By: Claude Opus 4.6 (1M context) --- numa.toml | 5 +++++ src/config.rs | 5 +++++ src/dot.rs | 2 +- src/lib.rs | 9 +++------ src/main.rs | 17 +++++++++++++++-- src/tls.rs | 8 +++++--- tests/integration.sh | 8 +++++--- 7 files changed, 39 insertions(+), 15 deletions(-) diff --git a/numa.toml b/numa.toml index b7f98de..35d92de 100644 --- a/numa.toml +++ b/numa.toml @@ -2,6 +2,11 @@ bind_addr = "0.0.0.0:53" api_port = 5380 # api_bind_addr = "127.0.0.1" # default; set to "0.0.0.0" for LAN dashboard access +# data_dir = "/usr/local/var/numa" # where numa stores TLS CA and cert material + # (default: /usr/local/var/numa on unix, + # %PROGRAMDATA%\numa on windows). Override for + # containerized deploys or tests that can't + # write to the system path. # [upstream] # mode = "forward" # "forward" (default) — relay to upstream diff --git a/src/config.rs b/src/config.rs index acf4d37..45dc896 100644 --- a/src/config.rs +++ b/src/config.rs @@ -41,6 +41,10 @@ pub struct ServerConfig { pub api_port: u16, #[serde(default = "default_api_bind_addr")] pub api_bind_addr: String, + /// Where numa writes TLS material (CA, leaf certs, regenerated state). + /// Defaults to `crate::data_dir()` (platform-specific system path) if unset. + #[serde(default)] + pub data_dir: Option, } impl Default for ServerConfig { @@ -49,6 +53,7 @@ impl Default for ServerConfig { bind_addr: default_bind_addr(), api_port: default_api_port(), api_bind_addr: default_api_bind_addr(), + data_dir: None, } } } diff --git a/src/dot.rs b/src/dot.rs index d399649..a09b160 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -61,7 +61,7 @@ fn load_tls_config(cert_path: &Path, key_path: &Path) -> crate::Result Option> { let service_names = [ctx.proxy_tld.clone()]; - match crate::tls::build_tls_config(&ctx.proxy_tld, &service_names, dot_alpn()) { + match crate::tls::build_tls_config(&ctx.proxy_tld, &service_names, dot_alpn(), &ctx.data_dir) { Ok(cfg) => Some(cfg), Err(e) => { warn!( diff --git a/src/lib.rs b/src/lib.rs index 05d18a0..347e72f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -66,15 +66,12 @@ fn config_dir_unix() -> std::path::PathBuf { std::path::PathBuf::from("/usr/local/var/numa") } -/// System-wide data directory for TLS certs. -/// Override with `NUMA_DATA_DIR` env var (useful for containerized -/// deployments and integration tests that can't write to the default path). +/// Default system-wide data directory for TLS certs. Overridable via +/// `[server] data_dir = "..."` in numa.toml — this function only provides +/// the fallback when the config doesn't set it. /// Unix: /usr/local/var/numa /// Windows: %PROGRAMDATA%\numa pub fn data_dir() -> std::path::PathBuf { - if let Ok(dir) = std::env::var("NUMA_DATA_DIR") { - return std::path::PathBuf::from(dir); - } #[cfg(windows)] { std::path::PathBuf::from( diff --git a/src/main.rs b/src/main.rs index adf266e..af0fb3a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -204,10 +204,23 @@ async fn main() -> numa::Result<()> { let forwarding_rules = system_dns.forwarding_rules; + // Resolve data_dir from config, falling back to the platform default. + // Used for TLS CA storage below and stored on ServerCtx for runtime use. + let resolved_data_dir = config + .server + .data_dir + .clone() + .unwrap_or_else(numa::data_dir); + // Build initial TLS config before ServerCtx (so ArcSwap is ready at construction) let initial_tls = if config.proxy.enabled && config.proxy.tls_port > 0 { let service_names = service_store.names(); - match numa::tls::build_tls_config(&config.proxy.tld, &service_names, Vec::new()) { + match numa::tls::build_tls_config( + &config.proxy.tld, + &service_names, + Vec::new(), + &resolved_data_dir, + ) { Ok(tls_config) => Some(ArcSwap::from(tls_config)), Err(e) => { log::warn!("TLS setup failed, HTTPS proxy disabled: {}", e); @@ -248,7 +261,7 @@ async fn main() -> numa::Result<()> { config_path: resolved_config_path, config_found, config_dir: numa::config_dir(), - data_dir: numa::data_dir(), + data_dir: resolved_data_dir, tls_config: initial_tls, upstream_mode: resolved_mode, root_hints, diff --git a/src/tls.rs b/src/tls.rs index 5746f3b..c60714e 100644 --- a/src/tls.rs +++ b/src/tls.rs @@ -24,7 +24,7 @@ pub fn regenerate_tls(ctx: &ServerCtx) { names.extend(ctx.lan_peers.lock().unwrap().names()); let names: Vec = names.into_iter().collect(); - match build_tls_config(&ctx.proxy_tld, &names, Vec::new()) { + match build_tls_config(&ctx.proxy_tld, &names, Vec::new(), &ctx.data_dir) { Ok(new_config) => { tls.store(new_config); info!("TLS cert regenerated for {} services", names.len()); @@ -38,13 +38,15 @@ pub fn regenerate_tls(ctx: &ServerCtx) { /// so we list each service explicitly as a SAN. /// `alpn` is advertised in the TLS ServerHello — pass empty for the proxy /// (which accepts any ALPN), or `[b"dot"]` for DoT (RFC 7858 §3.2). +/// `data_dir` is where the CA material is stored — taken from +/// `[server] data_dir` in numa.toml (defaults to `crate::data_dir()`). pub fn build_tls_config( tld: &str, service_names: &[String], alpn: Vec>, + data_dir: &Path, ) -> crate::Result> { - let dir = crate::data_dir(); - let (ca_cert, ca_key) = ensure_ca(&dir)?; + let (ca_cert, ca_key) = ensure_ca(data_dir)?; let (cert_chain, key) = generate_service_cert(&ca_cert, &ca_key, tld, service_names)?; // Ensure a crypto provider is installed (rustls needs one) diff --git a/tests/integration.sh b/tests/integration.sh index f1c5205..473356e 100755 --- a/tests/integration.sh +++ b/tests/integration.sh @@ -542,8 +542,9 @@ else PROXY_HTTPS_PORT=8443 NUMA_DATA=/tmp/numa-integration-data - # Fresh data dir so we generate a fresh CA for this suite — NUMA_DATA_DIR - # env var lets numa write under $TMPDIR instead of /usr/local/var/numa. + # Fresh data dir so we generate a fresh CA for this suite. Path is set + # via [server] data_dir in the TOML below, not an env var — numa treats + # its config file as the single source of truth for all knobs. rm -rf "$NUMA_DATA" mkdir -p "$NUMA_DATA" @@ -551,6 +552,7 @@ else [server] bind_addr = "127.0.0.1:$PORT" api_port = $API_PORT +data_dir = "$NUMA_DATA" [upstream] mode = "forward" @@ -582,7 +584,7 @@ value = "10.0.0.1" ttl = 60 CONF - NUMA_DATA_DIR="$NUMA_DATA" RUST_LOG=info "$BINARY" "$CONFIG" > "$LOG" 2>&1 & + RUST_LOG=info "$BINARY" "$CONFIG" > "$LOG" 2>&1 & NUMA_PID=$! sleep 4 From 7001ba2e517a69909d83f9f2fdbf6f45bad8c9d2 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 8 Apr 2026 01:37:07 +0300 Subject: [PATCH 21/30] chore: bump version to 0.10.0 v0.10.0 ships DNS-over-TLS. Tagged release v0.10.0 on main after merge will pick up this Cargo.toml version, keeping tag and manifest aligned for release.yml. Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 722c413..8750934 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1143,7 +1143,7 @@ dependencies = [ [[package]] name = "numa" -version = "0.9.1" +version = "0.10.0" dependencies = [ "arc-swap", "axum", diff --git a/Cargo.toml b/Cargo.toml index c6e9a2a..f0278b9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "numa" -version = "0.9.1" +version = "0.10.0" authors = ["razvandimescu "] edition = "2021" description = "Portable DNS resolver in Rust — .numa local domains, ad blocking, developer overrides, DNS-over-HTTPS" From bc54ea930f3928f5436fa6ddb6ea59c2fc695798 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 8 Apr 2026 01:49:44 +0300 Subject: [PATCH 22/30] docs: document DNS-over-TLS listener in README MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds DoT to the four existing touchpoints in the README where the feature naturally belongs: - Hero paragraph: mentions DoT alongside DNSSEC as a headline feature - Ad Blocking & Privacy section: dedicated paragraph with RFC 7858 reference, config hint, and the ALPN strictness guarantee - Comparison table: new "Encrypted clients (DoT listener)" row. Pi-hole "Needs stunnel sidecar" (verified — Pi-hole explicitly closed the native-DoT feature request as out of scope; community uses stunnel or AdGuard DNS Proxy as a TLS terminator) - Roadmap: checks off "DNS-over-TLS listener" alongside the existing DoH entry Co-Authored-By: Claude Opus 4.6 (1M context) --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e96ecda..373c239 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ A portable DNS resolver in a single binary. Block ads on any network, name your local services (`frontend.numa`), and override any hostname with auto-revert — all from your laptop, no cloud account or Raspberry Pi required. -Built from scratch in Rust. Zero DNS libraries. RFC 1035 wire protocol parsed by hand. Caching, ad blocking, and local service domains out of the box. Optional recursive resolution from root nameservers with full DNSSEC chain-of-trust validation. One ~8MB binary, everything embedded. +Built from scratch in Rust. Zero DNS libraries. RFC 1035 wire protocol parsed by hand. Caching, ad blocking, and local service domains out of the box. Optional recursive resolution from root nameservers with full DNSSEC chain-of-trust validation, plus a DNS-over-TLS listener for encrypted client connections (iOS Private DNS, systemd-resolved, etc.). One ~8MB binary, everything embedded. ![Numa dashboard](assets/hero-demo.gif) @@ -67,6 +67,8 @@ Three resolution modes: DNSSEC validates the full chain of trust: RRSIG signatures, DNSKEY verification, DS delegation, NSEC/NSEC3 denial proofs. [Read how it works →](https://numa.rs/blog/posts/dnssec-from-scratch.html) +**DNS-over-TLS listener** (RFC 7858) — accept encrypted queries on port 853 from strict clients like iOS Private DNS, systemd-resolved, or stubby. Self-signed CA generated automatically, or bring your own cert via `[dot] cert_path` / `key_path` in `numa.toml`. ALPN `"dot"` is advertised and enforced; a handshake with mismatched ALPN is rejected as a cross-protocol confusion defense. + ## LAN Discovery Run Numa on multiple machines. They find each other automatically via mDNS: @@ -96,6 +98,7 @@ From Machine B: `curl http://api.numa` → proxied to Machine A's port 8000. Ena | Ad blocking | Yes | Yes | — | 385K+ domains | | Web admin UI | Full | Full | — | Dashboard | | Encrypted upstream (DoH) | Needs cloudflared | Yes | — | Native | +| Encrypted clients (DoT listener) | Needs stunnel sidecar | Yes | Yes | Native (RFC 7858) | | Portable (laptop) | No (appliance) | No (appliance) | Server | Single binary, macOS/Linux/Windows | | Community maturity | 56K stars, 10 years | 33K stars | 20 years | New | @@ -116,6 +119,7 @@ From Machine B: `curl http://api.numa` → proxied to Machine A's port 8000. Ena - [x] `.numa` local domains — auto TLS, path routing, WebSocket proxy - [x] LAN service discovery — mDNS, cross-machine DNS + proxy - [x] DNS-over-HTTPS — encrypted upstream +- [x] DNS-over-TLS listener — encrypted client connections (RFC 7858, ALPN strict) - [x] Recursive resolution + DNSSEC — chain-of-trust, NSEC/NSEC3 - [x] SRTT-based nameserver selection - [ ] pkarr integration — self-sovereign DNS via Mainline DHT From 82cc588c67548103df8fdd6483f2fe31a4da55a4 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 8 Apr 2026 01:59:41 +0300 Subject: [PATCH 23/30] docs: explain the two DoT cert modes in README Expands the DoT paragraph to make the trust model explicit. The previous version said "self-signed or bring your own cert" without explaining when to pick which or what the user experience looks like. The two modes close numa's gap vs AdGuard Home: BYO cert mode is functionally identical (Let's Encrypt via DNS-01 + cert_path/key_path), and the self-signed mode is numa's advantage on LAN-only deploys. Co-Authored-By: Claude Opus 4.6 (1M context) --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 373c239..4c32370 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,12 @@ Three resolution modes: DNSSEC validates the full chain of trust: RRSIG signatures, DNSKEY verification, DS delegation, NSEC/NSEC3 denial proofs. [Read how it works →](https://numa.rs/blog/posts/dnssec-from-scratch.html) -**DNS-over-TLS listener** (RFC 7858) — accept encrypted queries on port 853 from strict clients like iOS Private DNS, systemd-resolved, or stubby. Self-signed CA generated automatically, or bring your own cert via `[dot] cert_path` / `key_path` in `numa.toml`. ALPN `"dot"` is advertised and enforced; a handshake with mismatched ALPN is rejected as a cross-protocol confusion defense. +**DNS-over-TLS listener** (RFC 7858) — accept encrypted queries on port 853 from strict clients like iOS Private DNS, systemd-resolved, or stubby. Two modes: + +- **Self-signed** (default) — numa generates a local CA automatically. Works on any network with zero DNS setup, but clients must manually trust the CA (on macOS/Linux add to the system trust store; on iOS install a `.mobileconfig`). +- **Bring-your-own cert** — point `[dot] cert_path` / `key_path` at a publicly-trusted cert (e.g., Let's Encrypt via DNS-01 challenge on a domain pointing at your numa instance). Clients connect without any trust-store setup — same UX as AdGuard Home or Cloudflare `1.1.1.1`. + +ALPN `"dot"` is advertised and enforced in both modes; a handshake with mismatched ALPN is rejected as a cross-protocol confusion defense. ## LAN Discovery From 1b2f68202696e73fa94c5e4434a3dbc534018f5d Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 8 Apr 2026 03:47:43 +0300 Subject: [PATCH 24/30] ci: auto-bump homebrew formula on release (#39) Add a workflow that runs on release:published (and via manual workflow_dispatch), fetches sha256 checksums from the published release assets, and rewrites razvandimescu/homebrew-tap/numa.rb in place: version, URL paths, and sha256 lines after each url. The formula's existing on_macos/on_linux structure is preserved. Uses HOMEBREW_TAP_GITHUB_TOKEN (already set as a repo secret) to push directly to the tap's main branch. Co-authored-by: Claude Opus 4.6 (1M context) --- .github/workflows/homebrew-bump.yml | 76 +++++++++++++++++++++++++++++ scripts/update-homebrew-formula.py | 57 ++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 .github/workflows/homebrew-bump.yml create mode 100755 scripts/update-homebrew-formula.py diff --git a/.github/workflows/homebrew-bump.yml b/.github/workflows/homebrew-bump.yml new file mode 100644 index 0000000..5bcac57 --- /dev/null +++ b/.github/workflows/homebrew-bump.yml @@ -0,0 +1,76 @@ +name: Bump Homebrew Tap + +on: + release: + types: [published] + workflow_dispatch: + inputs: + version: + description: 'Version to bump (e.g. 0.10.0 or v0.10.0)' + required: true + +permissions: + contents: read + +jobs: + bump: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Determine version + id: ver + run: | + if [ "${{ github.event_name }}" = "release" ]; then + V="${{ github.event.release.tag_name }}" + else + V="${{ github.event.inputs.version }}" + fi + V="${V#v}" + echo "version=$V" >> "$GITHUB_OUTPUT" + + - name: Fetch sha256 checksums from release assets + id: shas + env: + V: ${{ steps.ver.outputs.version }} + run: | + set -euo pipefail + base="https://github.com/razvandimescu/numa/releases/download/v${V}" + for t in macos-aarch64 macos-x86_64 linux-aarch64 linux-x86_64; do + sha=$(curl -fsSL "${base}/numa-${t}.tar.gz.sha256" | awk '{print $1}') + if [ -z "$sha" ]; then + echo "ERROR: failed to fetch sha256 for $t" >&2 + exit 1 + fi + key=$(echo "$t" | tr '[:lower:]-' '[:upper:]_') + echo "SHA_${key}=${sha}" >> "$GITHUB_ENV" + done + + - name: Clone homebrew-tap + env: + HOMEBREW_TAP_GITHUB_TOKEN: ${{ secrets.HOMEBREW_TAP_GITHUB_TOKEN }} + run: | + git clone "https://x-access-token:${HOMEBREW_TAP_GITHUB_TOKEN}@github.com/razvandimescu/homebrew-tap.git" tap + + - name: Update formula + env: + VERSION: ${{ steps.ver.outputs.version }} + run: | + python3 scripts/update-homebrew-formula.py tap/numa.rb + echo "--- updated numa.rb ---" + cat tap/numa.rb + + - name: Commit and push + working-directory: tap + env: + V: ${{ steps.ver.outputs.version }} + run: | + if git diff --quiet; then + echo "numa.rb already at v${V}, nothing to commit" + exit 0 + fi + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git add numa.rb + git commit -m "chore: bump numa to v${V}" + git push origin main diff --git a/scripts/update-homebrew-formula.py b/scripts/update-homebrew-formula.py new file mode 100755 index 0000000..c114784 --- /dev/null +++ b/scripts/update-homebrew-formula.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +"""Rewrite a Homebrew formula in place: bump version, URL paths, and sha256 lines. + +Reads the formula path from argv[1], and the following env vars: + VERSION e.g. "0.10.0" (no leading v) + SHA_MACOS_AARCH64 + SHA_MACOS_X86_64 + SHA_LINUX_AARCH64 + SHA_LINUX_X86_64 + +Assumptions about the formula: + - Has `version "X.Y.Z"` somewhere + - Has `url "...releases/download/vX.Y.Z/numa-.tar.gz"` lines + - May or may not already have `sha256 "..."` lines immediately after each url +""" +import os +import re +import sys + +formula_path = sys.argv[1] +version = os.environ["VERSION"].lstrip("v") +shas = { + "macos-aarch64": os.environ["SHA_MACOS_AARCH64"], + "macos-x86_64": os.environ["SHA_MACOS_X86_64"], + "linux-aarch64": os.environ["SHA_LINUX_AARCH64"], + "linux-x86_64": os.environ["SHA_LINUX_X86_64"], +} + +with open(formula_path) as f: + content = f.read() + +content = re.sub(r'version "[^"]*"', f'version "{version}"', content) +content = re.sub( + r"releases/download/v[\d.]+/numa-", + f"releases/download/v{version}/numa-", + content, +) +content = re.sub(r'\n[ \t]*sha256 "[^"]*"', "", content) + + +def add_sha(match: re.Match) -> str: + indent = match.group(1) + target = match.group(2) + if target not in shas: + return match.group(0) + return f'{match.group(0)}\n{indent}sha256 "{shas[target]}"' + + +content = re.sub( + r'^([ \t]+)url "[^"]*numa-([\w-]+)\.tar\.gz"', + add_sha, + content, + flags=re.MULTILINE, +) + +with open(formula_path, "w") as f: + f.write(content) From 039254280b49655e561a92c54ade70d94d30a1e0 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 8 Apr 2026 15:18:01 +0300 Subject: [PATCH 25/30] fix: cross-platform CA trust (Arch/Fedora + Windows) (#41) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: cross-platform CA trust (Arch/Fedora + Windows) Closes #35. trust_ca_linux now detects which trust store the distro ships and runs the matching refresh command, instead of hardcoding Debian's update-ca-certificates. Detection walks a const table in priority order, picking the first whose anchor dir exists: - debian: /usr/local/share/ca-certificates (update-ca-certificates) - pki: /etc/pki/ca-trust/source/anchors (update-ca-trust extract) - p11kit: /etc/ca-certificates/trust-source/anchors (trust extract-compat) Falls back with a clear error listing every backend tried. Adds Windows support via certutil -addstore Root / -delstore Root, removing the silent CA-trust gap on numa install (previously the service installed but the trust step quietly errored, leaving every HTTPS .numa request throwing browser warnings). Refactor: trust_ca and untrust_ca are now thin dispatchers calling per-platform helpers. CA_COMMON_NAME and CA_FILE_NAME are centralized in tls.rs and reused from system_dns.rs and api.rs. untrust_ca_linux no longer pre-checks file existence (TOCTOU) and skips the refresh when no file was actually removed. Test: tests/docker/install-trust.sh runs the install/uninstall contract against debian:stable, fedora:latest, and archlinux:latest in containers, asserting the cert lands in (and is removed from) the system bundle. All three pass locally. README notes the Firefox/NSS limitation (separate trust store). Co-Authored-By: Claude Opus 4.6 (1M context) * style: rustfmt fixes for trust_ca_linux helpers Co-Authored-By: Claude Opus 4.6 (1M context) * test: macOS CA trust contract test (manual) Adds tests/manual/install-trust-macos.sh — a sudo bash script that mirrors trust_ca_macos / untrust_ca_macos against a fixture cert with a unique CN. Designed to coexist with a running production numa: - Refuses to run if a real "Numa Local CA" is already in System.keychain (fail-closed protection for dogfood installs) - Uses a unique CN ("Numa Local CA Test ") so the test cert can never collide with production - Mirrors the by-hash deletion loop from untrust_ca_macos - Trap-cleanup on success or interrupt Lives under tests/manual/ to signal "host-mutating, dev-only" — distinct from tests/docker/install-trust.sh which is hermetic. Co-Authored-By: Claude Opus 4.6 (1M context) * test: relax bail-out in macOS trust test (safe alongside production) The bail-out was overly defensive. The test cert uses a unique CN ("Numa Local CA Test ") that is strictly longer than the production CN, so `security find-certificate -c $TEST_CN` cannot substring-match the production cert. All deletes are by-hash, which can only target the test cert's specific hash. Coexistence is provably safe; document the reasoning in the header comment block and replace the refusal with an informational notice. Co-Authored-By: Claude Opus 4.6 (1M context) --------- Co-authored-by: Claude Opus 4.6 (1M context) --- README.md | 2 +- src/api.rs | 2 +- src/system_dns.rs | 259 ++++++++++++++++++++-------- src/tls.rs | 11 +- tests/docker/install-trust.sh | 123 +++++++++++++ tests/manual/install-trust-macos.sh | 94 ++++++++++ 6 files changed, 411 insertions(+), 80 deletions(-) create mode 100755 tests/docker/install-trust.sh create mode 100755 tests/manual/install-trust-macos.sh diff --git a/README.md b/README.md index 4c32370..5794268 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ DNSSEC validates the full chain of trust: RRSIG signatures, DNSKEY verification, **DNS-over-TLS listener** (RFC 7858) — accept encrypted queries on port 853 from strict clients like iOS Private DNS, systemd-resolved, or stubby. Two modes: -- **Self-signed** (default) — numa generates a local CA automatically. Works on any network with zero DNS setup, but clients must manually trust the CA (on macOS/Linux add to the system trust store; on iOS install a `.mobileconfig`). +- **Self-signed** (default) — numa generates a local CA automatically. `numa install` adds it to the system trust store on macOS, Linux (Debian/Ubuntu, Fedora/RHEL/SUSE, Arch), and Windows. On iOS, install the `.mobileconfig` from `numa setup-phone`. Firefox keeps its own NSS store and ignores the system one — trust the CA there manually if you need HTTPS for `.numa` services in Firefox. - **Bring-your-own cert** — point `[dot] cert_path` / `key_path` at a publicly-trusted cert (e.g., Let's Encrypt via DNS-01 challenge on a domain pointing at your numa instance). Clients connect without any trust-store setup — same UX as AdGuard Home or Cloudflare `1.1.1.1`. ALPN `"dot"` is advertised and enforced in both modes; a handshake with mismatched ALPN is rejected as a cross-protocol confusion defense. diff --git a/src/api.rs b/src/api.rs index 1a6b7ef..59938b4 100644 --- a/src/api.rs +++ b/src/api.rs @@ -906,7 +906,7 @@ async fn remove_route( } async fn serve_ca(State(ctx): State>) -> Result { - let ca_path = ctx.data_dir.join("ca.pem"); + let ca_path = ctx.data_dir.join(crate::tls::CA_FILE_NAME); let bytes = tokio::task::spawn_blocking(move || std::fs::read(ca_path)) .await .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)? diff --git a/src/system_dns.rs b/src/system_dns.rs index 8709e0d..fc02393 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -1278,102 +1278,209 @@ fn run_systemctl(args: &[&str]) -> Result<(), String> { // --- CA trust management --- +/// One Linux trust-store backend (Debian, Fedora pki, Arch p11-kit). +#[cfg(target_os = "linux")] +struct LinuxTrustStore { + name: &'static str, + anchor_dir: &'static str, + anchor_file: &'static str, + refresh_install: &'static [&'static str], + refresh_uninstall: &'static [&'static str], +} + +// If you change this table, update tests/docker/install-trust.sh to match — +// it asserts the same paths/commands against real distro images. +#[cfg(target_os = "linux")] +const LINUX_TRUST_STORES: &[LinuxTrustStore] = &[ + // Debian / Ubuntu / Mint + LinuxTrustStore { + name: "debian", + anchor_dir: "/usr/local/share/ca-certificates", + anchor_file: "numa-local-ca.crt", + refresh_install: &["update-ca-certificates"], + refresh_uninstall: &["update-ca-certificates", "--fresh"], + }, + // Fedora / RHEL / CentOS / SUSE (p11-kit via update-ca-trust wrapper) + LinuxTrustStore { + name: "pki", + anchor_dir: "/etc/pki/ca-trust/source/anchors", + anchor_file: "numa-local-ca.pem", + refresh_install: &["update-ca-trust", "extract"], + refresh_uninstall: &["update-ca-trust", "extract"], + }, + // Arch / Manjaro (raw p11-kit) + LinuxTrustStore { + name: "p11kit", + anchor_dir: "/etc/ca-certificates/trust-source/anchors", + anchor_file: "numa-local-ca.pem", + refresh_install: &["trust", "extract-compat"], + refresh_uninstall: &["trust", "extract-compat"], + }, +]; + +#[cfg(target_os = "linux")] +fn detect_linux_trust_store() -> Option<&'static LinuxTrustStore> { + LINUX_TRUST_STORES + .iter() + .find(|s| std::path::Path::new(s.anchor_dir).is_dir()) +} + fn trust_ca() -> Result<(), String> { - let ca_path = crate::data_dir().join("ca.pem"); + let ca_path = crate::data_dir().join(crate::tls::CA_FILE_NAME); if !ca_path.exists() { return Err("CA not generated yet — start numa first to create certificates".into()); } #[cfg(target_os = "macos")] - { - let status = std::process::Command::new("security") - .args([ - "add-trusted-cert", - "-d", - "-r", - "trustRoot", - "-k", - "/Library/Keychains/System.keychain", - ]) - .arg(&ca_path) - .status() - .map_err(|e| format!("security: {}", e))?; - if !status.success() { - return Err("security add-trusted-cert failed".into()); - } - eprintln!(" Trusted Numa CA in system keychain"); - } - + let result = trust_ca_macos(&ca_path); #[cfg(target_os = "linux")] - { - let dest = std::path::Path::new("/usr/local/share/ca-certificates/numa-local-ca.crt"); - std::fs::copy(&ca_path, dest).map_err(|e| format!("copy CA: {}", e))?; - let status = std::process::Command::new("update-ca-certificates") - .status() - .map_err(|e| format!("update-ca-certificates: {}", e))?; - if !status.success() { - return Err("update-ca-certificates failed".into()); - } - eprintln!(" Trusted Numa CA system-wide"); - } + let result = trust_ca_linux(&ca_path); + #[cfg(windows)] + let result = trust_ca_windows(&ca_path); + #[cfg(not(any(target_os = "macos", target_os = "linux", windows)))] + let result = Err::<(), String>("CA trust not supported on this OS".to_string()); - #[cfg(not(any(target_os = "macos", target_os = "linux")))] - { - Err("CA trust not supported on this OS".into()) - } - - #[cfg(any(target_os = "macos", target_os = "linux"))] - Ok(()) + result } fn untrust_ca() -> Result<(), String> { - let ca_path = crate::data_dir().join("ca.pem"); - #[cfg(target_os = "macos")] + let result = untrust_ca_macos(); + #[cfg(target_os = "linux")] + let result = untrust_ca_linux(); + #[cfg(windows)] + let result = untrust_ca_windows(); + #[cfg(not(any(target_os = "macos", target_os = "linux", windows)))] + let result = Ok::<(), String>(()); + + result +} + +#[cfg(target_os = "macos")] +fn trust_ca_macos(ca_path: &std::path::Path) -> Result<(), String> { + let status = std::process::Command::new("security") + .args([ + "add-trusted-cert", + "-d", + "-r", + "trustRoot", + "-k", + "/Library/Keychains/System.keychain", + ]) + .arg(ca_path) + .status() + .map_err(|e| format!("security: {}", e))?; + if !status.success() { + return Err("security add-trusted-cert failed".into()); + } + eprintln!(" Trusted Numa CA in system keychain"); + Ok(()) +} + +#[cfg(target_os = "macos")] +fn untrust_ca_macos() -> Result<(), String> { + if let Ok(out) = std::process::Command::new("security") + .args([ + "find-certificate", + "-c", + crate::tls::CA_COMMON_NAME, + "-a", + "-Z", + "/Library/Keychains/System.keychain", + ]) + .output() { - // Find all Numa CA certs by hash and delete each one - if let Ok(out) = std::process::Command::new("security") - .args([ - "find-certificate", - "-c", - "Numa Local CA", - "-a", - "-Z", - "/Library/Keychains/System.keychain", - ]) - .output() - { - let stdout = String::from_utf8_lossy(&out.stdout); - for line in stdout.lines() { - if let Some(hash) = line.strip_prefix("SHA-1 hash: ") { - let hash = hash.trim(); - let _ = std::process::Command::new("security") - .args([ - "delete-certificate", - "-Z", - hash, - "/Library/Keychains/System.keychain", - ]) - .output(); - } + let stdout = String::from_utf8_lossy(&out.stdout); + for line in stdout.lines() { + if let Some(hash) = line.strip_prefix("SHA-1 hash: ") { + let hash = hash.trim(); + let _ = std::process::Command::new("security") + .args([ + "delete-certificate", + "-Z", + hash, + "/Library/Keychains/System.keychain", + ]) + .output(); } } - eprintln!(" Removed Numa CA from system keychain"); } + eprintln!(" Removed Numa CA from system keychain"); + Ok(()) +} - #[cfg(target_os = "linux")] - { - let dest = std::path::Path::new("/usr/local/share/ca-certificates/numa-local-ca.crt"); - if dest.exists() { - let _ = std::fs::remove_file(dest); - let _ = std::process::Command::new("update-ca-certificates") - .arg("--fresh") - .status(); - eprintln!(" Removed Numa CA from system trust store"); +#[cfg(target_os = "linux")] +fn trust_ca_linux(ca_path: &std::path::Path) -> Result<(), String> { + let store = detect_linux_trust_store().ok_or_else(|| { + let names: Vec<&str> = LINUX_TRUST_STORES.iter().map(|s| s.name).collect(); + format!( + "no supported CA trust store found (tried: {}). \ + Please report at https://github.com/razvandimescu/numa/issues", + names.join(", ") + ) + })?; + + let dest = std::path::Path::new(store.anchor_dir).join(store.anchor_file); + std::fs::copy(ca_path, &dest).map_err(|e| format!("copy CA to {}: {}", dest.display(), e))?; + + run_refresh(store.name, store.refresh_install)?; + eprintln!(" Trusted Numa CA system-wide ({})", store.name); + Ok(()) +} + +#[cfg(target_os = "linux")] +fn untrust_ca_linux() -> Result<(), String> { + let Some(store) = detect_linux_trust_store() else { + return Ok(()); + }; + + let dest = std::path::Path::new(store.anchor_dir).join(store.anchor_file); + match std::fs::remove_file(&dest) { + Ok(()) => { + let _ = run_refresh(store.name, store.refresh_uninstall); + eprintln!(" Removed Numa CA from system trust store ({})", store.name); } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => {} + Err(_) => {} // best-effort uninstall } + Ok(()) +} - let _ = ca_path; // suppress unused warning on other platforms +#[cfg(target_os = "linux")] +fn run_refresh(store_name: &str, argv: &[&str]) -> Result<(), String> { + let (cmd, args) = argv + .split_first() + .expect("refresh command must be non-empty"); + let status = std::process::Command::new(cmd) + .args(args) + .status() + .map_err(|e| format!("{} ({}): {}", cmd, store_name, e))?; + if !status.success() { + return Err(format!("{} ({}) failed", cmd, store_name)); + } + Ok(()) +} + +#[cfg(windows)] +fn trust_ca_windows(ca_path: &std::path::Path) -> Result<(), String> { + let status = std::process::Command::new("certutil") + .args(["-addstore", "-f", "Root"]) + .arg(ca_path) + .status() + .map_err(|e| format!("certutil: {}", e))?; + if !status.success() { + return Err("certutil -addstore Root failed (run as Administrator?)".into()); + } + eprintln!(" Trusted Numa CA in Windows Root store"); + Ok(()) +} + +#[cfg(windows)] +fn untrust_ca_windows() -> Result<(), String> { + let _ = std::process::Command::new("certutil") + .args(["-delstore", "Root", crate::tls::CA_COMMON_NAME]) + .status(); + eprintln!(" Removed Numa CA from Windows Root store"); Ok(()) } diff --git a/src/tls.rs b/src/tls.rs index c60714e..7c7620a 100644 --- a/src/tls.rs +++ b/src/tls.rs @@ -13,6 +13,13 @@ use time::{Duration, OffsetDateTime}; const CA_VALIDITY_DAYS: i64 = 3650; // 10 years const CERT_VALIDITY_DAYS: i64 = 365; // 1 year +/// Common Name on Numa's local CA. Referenced by trust-store helpers +/// (`security`, `certutil`) when locating the cert for removal. +pub const CA_COMMON_NAME: &str = "Numa Local CA"; + +/// Filename of the CA certificate inside the data dir. +pub const CA_FILE_NAME: &str = "ca.pem"; + /// Collect all service + LAN peer names and regenerate the TLS cert. pub fn regenerate_tls(ctx: &ServerCtx) { let tls = match &ctx.tls_config { @@ -67,7 +74,7 @@ pub fn build_tls_config( fn ensure_ca(dir: &Path) -> crate::Result<(rcgen::Certificate, KeyPair)> { let ca_key_path = dir.join("ca.key"); - let ca_cert_path = dir.join("ca.pem"); + let ca_cert_path = dir.join(CA_FILE_NAME); if ca_key_path.exists() && ca_cert_path.exists() { let key_pem = std::fs::read_to_string(&ca_key_path)?; @@ -86,7 +93,7 @@ fn ensure_ca(dir: &Path) -> crate::Result<(rcgen::Certificate, KeyPair)> { let mut params = CertificateParams::default(); params .distinguished_name - .push(DnType::CommonName, "Numa Local CA"); + .push(DnType::CommonName, CA_COMMON_NAME); params.is_ca = IsCa::Ca(BasicConstraints::Unconstrained); params.key_usages = vec![KeyUsagePurpose::KeyCertSign, KeyUsagePurpose::CrlSign]; params.not_before = OffsetDateTime::now_utc(); diff --git a/tests/docker/install-trust.sh b/tests/docker/install-trust.sh new file mode 100755 index 0000000..ec6d55c --- /dev/null +++ b/tests/docker/install-trust.sh @@ -0,0 +1,123 @@ +#!/usr/bin/env bash +# +# Cross-distro CA trust contract test for issue #35. +# +# Runs the exact shell commands `src/system_dns.rs::trust_ca_linux` would run +# on each Linux trust-store family (Debian, Fedora pki, Arch p11-kit), and +# asserts the certificate ends up in (and is removed from) the system bundle. +# +# This is a contract test, not an integration test: it doesn't drive the Rust +# code (that would need systemd-in-container). It verifies the assumptions in +# `LINUX_TRUST_STORES` against the real distro behavior. If you change that +# table in src/system_dns.rs, update the per-distro cases below to match. +# +# Requirements: docker, openssl (host). +# Usage: ./tests/docker/install-trust.sh + +set -euo pipefail + +cd "$(dirname "$0")/../.." + +GREEN="\033[32m"; RED="\033[31m"; RESET="\033[0m" + +# Self-signed CA fixture, mounted into each container as ca.pem. +# basicConstraints=CA:TRUE is required — without it, Debian's +# update-ca-certificates silently skips the cert during bundle build. +FIXTURE_DIR=$(mktemp -d) +trap 'rm -rf "$FIXTURE_DIR"' EXIT +openssl req -x509 -newkey rsa:2048 -nodes -days 1 \ + -keyout "$FIXTURE_DIR/ca.key" \ + -out "$FIXTURE_DIR/ca.pem" \ + -subj "/CN=Numa Local CA Test $(date +%s)" \ + -addext "basicConstraints=critical,CA:TRUE" \ + -addext "keyUsage=critical,keyCertSign,cRLSign" >/dev/null 2>&1 + +# Distro bundles store certs differently — Debian writes raw PEM only, +# Fedora prepends "# CN" comment headers, Arch via extract-compat is +# raw PEM. To detect cert presence uniformly we grep for a deterministic +# substring of the base64 body (first base64 line is unique per cert). +CERT_TAG=$(sed -n '2p' "$FIXTURE_DIR/ca.pem") + +PASSED=0; FAILED=0 + +run_case() { + local distro="$1"; shift + local image="$1"; shift + local platform="$1"; shift + local script="$1" + + printf "── %s (%s) ──\n" "$distro" "$image" + if docker run --rm \ + --platform "$platform" \ + --security-opt seccomp=unconfined \ + -e CERT_TAG="$CERT_TAG" \ + -e DEBIAN_FRONTEND=noninteractive \ + -v "$FIXTURE_DIR/ca.pem:/fixture/ca.pem:ro" \ + "$image" bash -c "$script"; then + printf "${GREEN}✓${RESET} %s\n\n" "$distro" + PASSED=$((PASSED + 1)) + else + printf "${RED}✗${RESET} %s\n\n" "$distro" + FAILED=$((FAILED + 1)) + fi +} + +# Debian / Ubuntu / Mint — anchor: /usr/local/share/ca-certificates/*.crt +run_case "debian" "debian:stable" "linux/amd64" ' + set -e + apt-get update -qq + apt-get install -qq -y ca-certificates >/dev/null + install -m 0644 /fixture/ca.pem /usr/local/share/ca-certificates/numa-local-ca.crt + update-ca-certificates >/dev/null 2>&1 + grep -q "$CERT_TAG" /etc/ssl/certs/ca-certificates.crt + echo " install: cert present in bundle" + rm /usr/local/share/ca-certificates/numa-local-ca.crt + update-ca-certificates --fresh >/dev/null 2>&1 + if grep -q "$CERT_TAG" /etc/ssl/certs/ca-certificates.crt; then + echo " uninstall: cert STILL present (regression)" >&2 + exit 1 + fi + echo " uninstall: cert removed from bundle" +' + +# Fedora / RHEL / CentOS / SUSE — anchor: /etc/pki/ca-trust/source/anchors/*.pem +run_case "fedora" "fedora:latest" "linux/amd64" ' + set -e + dnf install -q -y ca-certificates >/dev/null + install -m 0644 /fixture/ca.pem /etc/pki/ca-trust/source/anchors/numa-local-ca.pem + update-ca-trust extract + grep -q "$CERT_TAG" /etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem + echo " install: cert present in bundle" + rm /etc/pki/ca-trust/source/anchors/numa-local-ca.pem + update-ca-trust extract + if grep -q "$CERT_TAG" /etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem; then + echo " uninstall: cert STILL present (regression)" >&2 + exit 1 + fi + echo " uninstall: cert removed from bundle" +' + +# Arch / Manjaro — anchor: /etc/ca-certificates/trust-source/anchors/*.pem +# archlinux:latest is x86_64-only; --platform forces emulation on Apple Silicon. +run_case "arch" "archlinux:latest" "linux/amd64" ' + set -e + # pacman 7+ filters syscalls in its own sandbox; disable for Rosetta/qemu emulation. + sed -i "s/^#DisableSandboxSyscalls/DisableSandboxSyscalls/" /etc/pacman.conf + pacman -Sy --noconfirm --needed ca-certificates p11-kit >/dev/null 2>&1 + install -m 0644 /fixture/ca.pem /etc/ca-certificates/trust-source/anchors/numa-local-ca.pem + trust extract-compat + grep -q "$CERT_TAG" /etc/ssl/certs/ca-certificates.crt + echo " install: cert present in bundle" + rm /etc/ca-certificates/trust-source/anchors/numa-local-ca.pem + trust extract-compat + if grep -q "$CERT_TAG" /etc/ssl/certs/ca-certificates.crt; then + echo " uninstall: cert STILL present (regression)" >&2 + exit 1 + fi + echo " uninstall: cert removed from bundle" +' + +printf "── summary ──\n" +printf " ${GREEN}passed${RESET}: %d\n" "$PASSED" +printf " ${RED}failed${RESET}: %d\n" "$FAILED" +[ "$FAILED" -eq 0 ] diff --git a/tests/manual/install-trust-macos.sh b/tests/manual/install-trust-macos.sh new file mode 100755 index 0000000..5ad29d0 --- /dev/null +++ b/tests/manual/install-trust-macos.sh @@ -0,0 +1,94 @@ +#!/usr/bin/env bash +# +# Manual macOS CA trust contract test. +# +# Mirrors src/system_dns.rs::trust_ca_macos / untrust_ca_macos by running +# the same `security` shell commands against a fixture cert with a unique +# CN. Safe to run alongside a production numa install: +# +# - Test cert CN = "Numa Local CA Test ", always strictly longer +# than the production CN "Numa Local CA". `security find-certificate -c` +# does substring matching, so the test's search for $TEST_CN can never +# match the production cert (the search term is longer than the prod CN). +# - All deletes use `delete-certificate -Z `, which only touches the +# cert with that exact hash. Production and test certs have different +# hashes by construction (different key material), so the delete cannot +# reach the production cert even if a CN search somehow returned both. +# +# Mutates the System keychain (briefly). Cleans up on success or interrupt. +# Requires sudo for `security add-trusted-cert` and `delete-certificate`. +# +# Usage: ./tests/manual/install-trust-macos.sh + +set -euo pipefail + +if [[ "$OSTYPE" != darwin* ]]; then + echo "This test is macOS-only." >&2 + exit 1 +fi + +GREEN="\033[32m"; RED="\033[31m"; RESET="\033[0m" + +# Production constant from src/tls.rs::CA_COMMON_NAME — keep in sync. +PROD_CN="Numa Local CA" +KEYCHAIN="/Library/Keychains/System.keychain" + +# Notice if production numa is already installed. We proceed regardless — +# see header for why coexistence is safe (unique CN + by-hash deletion). +if security find-certificate -c "$PROD_CN" "$KEYCHAIN" >/dev/null 2>&1; then + echo " note: production '$PROD_CN' detected — proceeding alongside (test cert can't touch it)" + echo +fi + +# Unique CN ensures the test cert can never collide with production. +TEST_CN="Numa Local CA Test $$-$(date +%s)" +FIXTURE_DIR=$(mktemp -d) + +cleanup() { + # Best-effort: remove any test certs by hash if still present. + if security find-certificate -c "$TEST_CN" "$KEYCHAIN" >/dev/null 2>&1; then + echo " cleanup: removing leftover test cert" + security find-certificate -c "$TEST_CN" -a -Z "$KEYCHAIN" 2>/dev/null \ + | awk '/^SHA-1 hash:/ {print $NF}' \ + | while read -r hash; do + sudo security delete-certificate -Z "$hash" "$KEYCHAIN" >/dev/null 2>&1 || true + done + fi + rm -rf "$FIXTURE_DIR" +} +trap cleanup EXIT + +echo "── generating fixture CA ──" +openssl req -x509 -newkey rsa:2048 -nodes -days 1 \ + -keyout "$FIXTURE_DIR/ca.key" \ + -out "$FIXTURE_DIR/ca.pem" \ + -subj "/CN=$TEST_CN" \ + -addext "basicConstraints=critical,CA:TRUE" \ + -addext "keyUsage=critical,keyCertSign,cRLSign" >/dev/null 2>&1 +echo " CN: $TEST_CN" +echo + +echo "── trust step (mirrors trust_ca_macos) ──" +sudo security add-trusted-cert -d -r trustRoot -k "$KEYCHAIN" "$FIXTURE_DIR/ca.pem" +if security find-certificate -c "$TEST_CN" "$KEYCHAIN" >/dev/null 2>&1; then + printf " ${GREEN}✓${RESET} test cert found in keychain\n" +else + printf " ${RED}✗${RESET} test cert NOT found after add-trusted-cert\n" + exit 1 +fi +echo + +echo "── untrust step (mirrors untrust_ca_macos) ──" +security find-certificate -c "$TEST_CN" -a -Z "$KEYCHAIN" 2>/dev/null \ + | awk '/^SHA-1 hash:/ {print $NF}' \ + | while read -r hash; do + sudo security delete-certificate -Z "$hash" "$KEYCHAIN" >/dev/null + done +if security find-certificate -c "$TEST_CN" "$KEYCHAIN" >/dev/null 2>&1; then + printf " ${RED}✗${RESET} test cert STILL present after delete (regression)\n" + exit 1 +fi +printf " ${GREEN}✓${RESET} test cert removed from keychain\n" +echo + +printf "${GREEN}all checks passed${RESET}\n" From 679b346246c38bbc51ddd132a99b43efe82e3f81 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 8 Apr 2026 16:38:37 +0300 Subject: [PATCH 26/30] fix: prevent self-referential DNS backup on re-install (#40) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: prevent self-referential DNS backup on re-install The install flow previously captured current system DNS servers verbatim into the backup file. If numa was already installed, current DNS was 127.0.0.1, so the "backup" recorded 127.0.0.1 as the "original" — making a subsequent uninstall a no-op self-reference. Reproduced 2026-04-08 during v0.10.0 brew dogfood: after `sudo numa uninstall; sudo /opt/homebrew/bin/numa install`, `sudo numa uninstall` printed `restored DNS for "Wi-Fi" -> 127.0.0.1` because the brew binary's install step had overwritten the backup with the already-stub state. Fix (all three platforms): - macOS/Windows: if the existing backup already contains at least one non-loopback/non-stub upstream, preserve it as-is. If writing a fresh backup, filter loopback/stub addresses first so a capture from already-numa-managed state isn't self-referential. - Linux (resolv.conf fallback path): detect numa-managed or all-loopback resolv.conf content and skip the file copy in that case; preserve an existing useful backup rather than overwriting it. systemd-resolved path is unaffected (uses a drop-in, no backup file). Adds three unit tests for the predicates: macOS HashMap detection, Windows interface filter, and resolv.conf parsing (real upstream, self-referential, numa-marker, systemd stub, mixed). Co-Authored-By: Claude Opus 4.6 (1M context) * refactor: share iter_nameservers helper and reuse resolv.conf content Post-review simplifications on the stale-backup fix: - Extract iter_nameservers(&str) helper used by both parse_resolv_conf and resolv_conf_has_real_upstream. Eliminates the duplicated line-by-line nameserver parsing (findings from reuse review). - install_linux: reuse the already-read resolv.conf content via std::fs::write instead of a second read via std::fs::copy. - install_macos / install_windows: flatten the conditional eprintln pattern — always print a blank line, conditionally print the save message. Equivalent output, less branching. Net −12 lines. All 130 tests still pass, clippy clean. Co-Authored-By: Claude Opus 4.6 (1M context) * fix: drop redundant trim before split_whitespace CI caught `clippy::trim_split_whitespace` on Rust 1.94: `split_whitespace()` already skips leading/trailing whitespace, so `.trim()` first is redundant. Co-Authored-By: Claude Opus 4.6 (1M context) * refactor: extract load_backup helper Remove duplicated read+deserialize boilerplate shared by install_macos and install_windows. The two call sites each had an identical 4-line chain of read_to_string().ok().and_then(serde_json::from_str).ok() — collapse into a single generic helper load_backup(). Co-Authored-By: Claude Opus 4.6 (1M context) * Revert "refactor: extract load_backup helper" This reverts commit a54fb99428fb29da6f6ee2cc365bbb97e31cfbb1. * test: drop windows_backup_filters_loopback The test inlined the 3-line filter block from install_windows rather than calling a production helper, so it was testing stdlib Vec::retain + is_loopback_or_stub — both already covered elsewhere. Deleting it removes a test that would silently pass even if install_windows stopped filtering altogether. The predicate logic for macOS-shaped backups stays covered by macos_backup_real_upstream_detection (same inner Vec type). Co-Authored-By: Claude Opus 4.6 (1M context) * test: add windows_backup_filters_loopback unit test The PR description mentioned this test but it was missing from the diff, leaving backup_has_real_upstream_windows untested. Mirrors the shape of macos_backup_real_upstream_detection: empty map → false, all-loopback (127.0.0.1, ::1, 0.0.0.0) → false, one real entry alongside loopback → true. Also relax the cfg gate on backup_has_real_upstream_windows from cfg(windows) to cfg(any(windows, test)) so the test compiles cross-platform, matching how backup_has_real_upstream_macos and the resolv_conf helpers are gated. Co-Authored-By: Claude Opus 4.6 (1M context) --------- Co-authored-by: Claude Opus 4.6 (1M context) --- src/system_dns.rs | 254 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 220 insertions(+), 34 deletions(-) diff --git a/src/system_dns.rs b/src/system_dns.rs index fc02393..643b9d0 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -214,7 +214,18 @@ fn discover_linux() -> SystemDnsInfo { } } -/// Parse resolv.conf in a single pass, extracting both the first non-loopback +/// Yield each `nameserver` address from resolv.conf content. No filtering — +/// callers decide what counts as a real upstream. +#[cfg(any(target_os = "linux", test))] +fn iter_nameservers(content: &str) -> impl Iterator { + content.lines().filter_map(|line| { + let mut parts = line.split_whitespace(); + (parts.next() == Some("nameserver")).then_some(())?; + parts.next() + }) +} + +/// Parse resolv.conf in a single pass, extracting the first non-loopback /// nameserver and all search domains. #[cfg(target_os = "linux")] fn parse_resolv_conf(path: &str) -> (Option, Vec) { @@ -222,19 +233,13 @@ fn parse_resolv_conf(path: &str) -> (Option, Vec) { Ok(t) => t, Err(_) => return (None, Vec::new()), }; - let mut upstream = None; + let upstream = iter_nameservers(&text) + .find(|ns| !is_loopback_or_stub(ns)) + .map(str::to_string); let mut search_domains = Vec::new(); for line in text.lines() { let line = line.trim(); - if line.starts_with("nameserver") { - if upstream.is_none() { - if let Some(ns) = line.split_whitespace().nth(1) { - if !is_loopback_or_stub(ns) { - upstream = Some(ns.to_string()); - } - } - } - } else if line.starts_with("search") || line.starts_with("domain") { + if line.starts_with("search") || line.starts_with("domain") { for domain in line.split_whitespace().skip(1) { search_domains.push(domain.to_string()); } @@ -243,6 +248,21 @@ fn parse_resolv_conf(path: &str) -> (Option, Vec) { (upstream, search_domains) } +/// True if the resolv.conf *content* appears to be written by numa itself, +/// or has no real upstream — either way, it's not a safe source of truth +/// for a backup. +#[cfg(any(target_os = "linux", test))] +fn resolv_conf_is_numa_managed(content: &str) -> bool { + content.contains("Generated by Numa") || !resolv_conf_has_real_upstream(content) +} + +/// True if the resolv.conf content has at least one non-loopback, non-stub +/// nameserver. An all-loopback resolv.conf is self-referential. +#[cfg(any(target_os = "linux", test))] +fn resolv_conf_has_real_upstream(content: &str) -> bool { + iter_nameservers(content).any(|ns| !is_loopback_or_stub(ns)) +} + /// Query resolvectl for the real upstream DNS server (e.g. VPC resolver on AWS). #[cfg(target_os = "linux")] fn resolvectl_dns_server() -> Option { @@ -526,9 +546,19 @@ fn enable_dnscache() { .status(); } +/// True if the backup map has at least one real upstream (non-loopback, non-stub). +#[cfg(any(windows, test))] +fn backup_has_real_upstream_windows( + interfaces: &std::collections::HashMap, +) -> bool { + interfaces + .values() + .any(|iface| iface.servers.iter().any(|s| !is_loopback_or_stub(s))) +} + #[cfg(windows)] fn install_windows() -> Result<(), String> { - let interfaces = get_windows_interfaces()?; + let mut interfaces = get_windows_interfaces()?; if interfaces.is_empty() { return Err("no active network interfaces found".to_string()); } @@ -538,9 +568,30 @@ fn install_windows() -> Result<(), String> { std::fs::create_dir_all(parent) .map_err(|e| format!("failed to create {}: {}", parent.display(), e))?; } - let json = serde_json::to_string_pretty(&interfaces) - .map_err(|e| format!("failed to serialize backup: {}", e))?; - std::fs::write(&path, json).map_err(|e| format!("failed to write backup: {}", e))?; + + // Preserve an existing useful backup rather than overwriting it with + // numa-managed state (which would be self-referential after uninstall). + let existing: Option> = + std::fs::read_to_string(&path) + .ok() + .and_then(|json| serde_json::from_str(&json).ok()); + let has_useful_existing = existing + .as_ref() + .map(backup_has_real_upstream_windows) + .unwrap_or(false); + + if has_useful_existing { + eprintln!(" Existing DNS backup preserved at {}", path.display()); + } else { + // Filter loopback/stub addresses before saving so a fresh backup + // captured from already-numa-managed state isn't self-referential. + for iface in interfaces.values_mut() { + iface.servers.retain(|s| !is_loopback_or_stub(s)); + } + let json = serde_json::to_string_pretty(&interfaces) + .map_err(|e| format!("failed to serialize backup: {}", e))?; + std::fs::write(&path, json).map_err(|e| format!("failed to write backup: {}", e))?; + } for name in interfaces.keys() { let status = std::process::Command::new("netsh") @@ -570,7 +621,10 @@ fn install_windows() -> Result<(), String> { let needs_reboot = disable_dnscache()?; register_autostart(); - eprintln!("\n Original DNS saved to {}", path.display()); + eprintln!(); + if !has_useful_existing { + eprintln!(" Original DNS saved to {}", path.display()); + } eprintln!(" Run 'numa uninstall' to restore.\n"); if needs_reboot { eprintln!(" *** Reboot required. Numa will start automatically. ***\n"); @@ -754,27 +808,60 @@ fn get_dns_servers(service: &str) -> Result, String> { } } +/// True if the backup map has at least one real upstream (non-loopback, non-stub). +/// An all-loopback backup is self-referential — restoring it is a no-op. +#[cfg(any(target_os = "macos", test))] +fn backup_has_real_upstream_macos( + servers: &std::collections::HashMap>, +) -> bool { + servers + .values() + .any(|list| list.iter().any(|s| !is_loopback_or_stub(s))) +} + #[cfg(target_os = "macos")] fn install_macos() -> Result<(), String> { use std::collections::HashMap; let services = get_network_services()?; - let mut original: HashMap> = HashMap::new(); - - // Save current DNS for each service - for service in &services { - let servers = get_dns_servers(service)?; - original.insert(service.clone(), servers); - } - - // Save backup let dir = numa_data_dir(); std::fs::create_dir_all(&dir) .map_err(|e| format!("failed to create {}: {}", dir.display(), e))?; - let json = serde_json::to_string_pretty(&original) - .map_err(|e| format!("failed to serialize backup: {}", e))?; - std::fs::write(backup_path(), json).map_err(|e| format!("failed to write backup: {}", e))?; + // If a useful backup already exists (at least one non-loopback upstream), + // preserve it — overwriting would destroy the original DNS state when + // re-installing on top of a numa-managed configuration. + let existing_backup: Option>> = + std::fs::read_to_string(backup_path()) + .ok() + .and_then(|json| serde_json::from_str(&json).ok()); + let has_useful_existing = existing_backup + .as_ref() + .map(backup_has_real_upstream_macos) + .unwrap_or(false); + + if has_useful_existing { + eprintln!( + " Existing DNS backup preserved at {}", + backup_path().display() + ); + } else { + // Capture fresh, filtering out loopback and stub addresses so we + // never record a self-referential backup. + let mut original: HashMap> = HashMap::new(); + for service in &services { + let servers: Vec = get_dns_servers(service)? + .into_iter() + .filter(|s| !is_loopback_or_stub(s)) + .collect(); + original.insert(service.clone(), servers); + } + + let json = serde_json::to_string_pretty(&original) + .map_err(|e| format!("failed to serialize backup: {}", e))?; + std::fs::write(backup_path(), json) + .map_err(|e| format!("failed to write backup: {}", e))?; + } // Set DNS to 127.0.0.1 and add "numa" search domain for each service for service in &services { @@ -795,7 +882,10 @@ fn install_macos() -> Result<(), String> { .status(); } - eprintln!("\n Original DNS saved to {}", backup_path().display()); + eprintln!(); + if !has_useful_existing { + eprintln!(" Original DNS saved to {}", backup_path().display()); + } eprintln!(" Run 'sudo numa uninstall' to restore.\n"); Ok(()) @@ -1132,11 +1222,31 @@ fn install_linux() -> Result<(), String> { .map_err(|e| format!("failed to create {}: {}", parent.display(), e))?; } - // Back up current resolv.conf (ignore NotFound) - match std::fs::copy(resolv, &backup) { - Ok(_) => eprintln!(" Saved /etc/resolv.conf to {}", backup.display()), - Err(e) if e.kind() == std::io::ErrorKind::NotFound => {} - Err(e) => return Err(format!("failed to backup /etc/resolv.conf: {}", e)), + // Back up current resolv.conf, but never overwrite a useful existing + // backup with a numa-managed file — that would leave uninstall with + // nothing to restore to. + let current = std::fs::read_to_string(resolv).ok(); + let current_is_numa_managed = current + .as_deref() + .map(resolv_conf_is_numa_managed) + .unwrap_or(false); + let existing_backup_is_useful = std::fs::read_to_string(&backup) + .ok() + .as_deref() + .map(resolv_conf_has_real_upstream) + .unwrap_or(false); + + if existing_backup_is_useful { + eprintln!( + " Existing resolv.conf backup preserved at {}", + backup.display() + ); + } else if current_is_numa_managed { + eprintln!(" warning: /etc/resolv.conf is already numa-managed; no fresh backup written"); + } else if let Some(content) = current.as_deref() { + std::fs::write(&backup, content) + .map_err(|e| format!("failed to backup /etc/resolv.conf: {}", e))?; + eprintln!(" Saved /etc/resolv.conf to {}", backup.display()); } if resolv @@ -1539,6 +1649,82 @@ Wireless LAN adapter Wi-Fi: assert!(!result.contains("{{exe_path}}")); } + #[test] + fn macos_backup_real_upstream_detection() { + use std::collections::HashMap; + let mut map: HashMap> = HashMap::new(); + + // Empty backup → no real upstream + assert!(!backup_has_real_upstream_macos(&map)); + + // All-loopback backup → still no real upstream (the bug case) + map.insert("Wi-Fi".into(), vec!["127.0.0.1".into()]); + map.insert("Ethernet".into(), vec!["::1".into()]); + assert!(!backup_has_real_upstream_macos(&map)); + + // One real entry → useful + map.insert("Tailscale".into(), vec!["192.168.1.1".into()]); + assert!(backup_has_real_upstream_macos(&map)); + } + + #[test] + fn windows_backup_filters_loopback() { + use std::collections::HashMap; + let mut map: HashMap = HashMap::new(); + + // Empty backup → no real upstream + assert!(!backup_has_real_upstream_windows(&map)); + + // All-loopback backup → still no real upstream (the bug case) + map.insert( + "Wi-Fi".into(), + WindowsInterfaceDns { + dhcp: false, + servers: vec!["127.0.0.1".into()], + }, + ); + map.insert( + "Ethernet".into(), + WindowsInterfaceDns { + dhcp: false, + servers: vec!["::1".into(), "0.0.0.0".into()], + }, + ); + assert!(!backup_has_real_upstream_windows(&map)); + + // One real entry alongside loopback → useful + map.insert( + "Ethernet 2".into(), + WindowsInterfaceDns { + dhcp: false, + servers: vec!["192.168.1.1".into()], + }, + ); + assert!(backup_has_real_upstream_windows(&map)); + } + + #[test] + fn resolv_conf_real_upstream_detection() { + let real = "nameserver 192.168.1.1\nsearch lan\n"; + assert!(resolv_conf_has_real_upstream(real)); + assert!(!resolv_conf_is_numa_managed(real)); + + let self_ref = "nameserver 127.0.0.1\nsearch numa\n"; + assert!(!resolv_conf_has_real_upstream(self_ref)); + assert!(resolv_conf_is_numa_managed(self_ref)); + + let numa_marker = + "# Generated by Numa — run 'sudo numa uninstall' to restore\nnameserver 127.0.0.1\nsearch numa\n"; + assert!(resolv_conf_is_numa_managed(numa_marker)); + + let systemd_stub = "nameserver 127.0.0.53\noptions edns0\n"; + assert!(!resolv_conf_has_real_upstream(systemd_stub)); + + let mixed = "nameserver 127.0.0.1\nnameserver 1.1.1.1\n"; + assert!(resolv_conf_has_real_upstream(mixed)); + assert!(!resolv_conf_is_numa_managed(mixed)); + } + #[test] fn parse_ipconfig_skips_disconnected() { let sample = "\ From bf5565ac262e65322fc0eafa66b510cf7d8110ad Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 8 Apr 2026 16:54:21 +0300 Subject: [PATCH 27/30] fix: macOS use launchctl bootout/bootstrap instead of deprecated load (#42) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The deprecated `launchctl load -w` returns exit code 0 even when it cannot actually reload a service whose label is already in launchd's in-memory state. It prints `Load failed: 5: Input/output error` to stderr but exits 0, so the install path interprets it as success and continues — silently leaving the running daemon on whatever binary was first loaded, even though the on-disk plist now points elsewhere. The consequence: every macOS user running `brew upgrade numa` rewrites the plist to point at the new binary, but launchctl never actually loads it. They think they upgraded; they're still running the old version. Neither #41 (cross-platform CA trust) nor #40 (self-referential backup) would actually take effect for them until they manually run: sudo launchctl bootout system /Library/LaunchDaemons/com.numa.dns.plist sudo launchctl bootstrap system /Library/LaunchDaemons/com.numa.dns.plist The fix uses the modern API symmetrically across all three call sites: - install_service_macos: bootout (best-effort cleanup, no-op on first install) → bootstrap → wait for readiness → configure DNS - install_service_macos rollback path: bootout instead of `unload` - uninstall_service_macos: bootout BEFORE remove_file (the modern API needs the plist file path as the specifier; doing it after remove would leave the service in memory until reboot) No new tests — this is a shell-call substitution with no logic to unit-test. Verified manually on macOS: `sudo numa install` no longer prints `Load failed`, and the daemon is correctly running the binary the plist points at. Co-authored-by: Claude Opus 4.6 (1M context) --- src/system_dns.rs | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/src/system_dns.rs b/src/system_dns.rs index 643b9d0..b24b3ad 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -1080,14 +1080,23 @@ fn install_service_macos() -> Result<(), String> { std::fs::write(PLIST_DEST, plist) .map_err(|e| format!("failed to write {}: {}", PLIST_DEST, e))?; - // Load the service first so numa is listening before DNS redirect + // Modern launchctl API: explicitly tear down any existing in-memory + // state, then bootstrap fresh from the on-disk plist. The deprecated + // `load -w` returns exit 0 even when it cannot actually reload (label + // already in launchd state), silently leaving the daemon running a + // stale binary path after `numa install` rewrites the plist on disk — + // which is exactly what `brew upgrade numa` does. + let _ = std::process::Command::new("launchctl") + .args(["bootout", "system", PLIST_DEST]) + .status(); + let status = std::process::Command::new("launchctl") - .args(["load", "-w", PLIST_DEST]) + .args(["bootstrap", "system", PLIST_DEST]) .status() .map_err(|e| format!("failed to run launchctl: {}", e))?; if !status.success() { - return Err("launchctl load failed".to_string()); + return Err("launchctl bootstrap failed".to_string()); } // Wait for numa to be ready before redirecting DNS @@ -1100,7 +1109,7 @@ fn install_service_macos() -> Result<(), String> { if !api_up { // Service failed to start — don't redirect DNS to a dead endpoint let _ = std::process::Command::new("launchctl") - .args(["unload", PLIST_DEST]) + .args(["bootout", "system", PLIST_DEST]) .status(); return Err( "numa service did not start (port 53 may be in use). Service unloaded.".to_string(), @@ -1128,22 +1137,25 @@ fn uninstall_service_macos() -> Result<(), String> { eprintln!(" warning: failed to restore system DNS: {}", e); } - // Remove plist first so service won't restart on boot even if unload fails - if let Err(e) = std::fs::remove_file(PLIST_DEST) { - if e.kind() != std::io::ErrorKind::NotFound { - return Err(format!("failed to remove {}: {}", PLIST_DEST, e)); + // Bootout the service from launchd's in-memory state BEFORE removing + // the plist. The modern API needs the file path as the specifier; + // doing this in the wrong order would leave the service loaded in + // memory until reboot. (Deprecated `unload -w` had the same issue.) + let bootout_status = std::process::Command::new("launchctl") + .args(["bootout", "system", PLIST_DEST]) + .status(); + if let Ok(s) = bootout_status { + if !s.success() { + eprintln!( + " warning: launchctl bootout returned non-zero (service may not have been loaded)" + ); } } - // Unload the service - let status = std::process::Command::new("launchctl") - .args(["unload", "-w", PLIST_DEST]) - .status(); - if let Ok(s) = status { - if !s.success() { - eprintln!( - " warning: launchctl unload returned non-zero (service may still be running)" - ); + // Remove plist so the service won't restart on boot + if let Err(e) = std::fs::remove_file(PLIST_DEST) { + if e.kind() != std::io::ErrorKind::NotFound { + return Err(format!("failed to remove {}: {}", PLIST_DEST, e)); } } From 79ecb73d8793a23a5fd15425d5769eaebc1fc159 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 8 Apr 2026 18:00:27 +0300 Subject: [PATCH 28/30] fix: use FHS-compliant /var/lib/numa as Linux data dir default (#43) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: use FHS-compliant /var/lib/numa as Linux data dir default numa's default system-wide data directory was hardcoded to /usr/local/var/numa for all Unix platforms. This is the right path on macOS (Homebrew prefix convention) but non-FHS on Linux, where Arch / Fedora / Debian / etc. expect persistent state under /var/lib/. The mismatch was invisible to existing users (numa creates the dir silently on first run) but immediately surfaces when packaging for a distro — see PR #33 (community contribution to add an Arch AUR package) which had to add fragile sed-based path patching at PKGBUILD build time. The fix moves the path decision into a small helper: - daemon_data_dir() — cfg-gated platform dispatch (linux/macos) - resolve_linux_data_dir() — pure function, takes "does X exist?" as parameters, returns the right path Linux behavior: - Fresh install → /var/lib/numa (FHS) - Upgrading from pre-v0.10.1 install → /usr/local/var/numa (legacy) - Both paths exist → /var/lib/numa (FHS wins) The legacy fallback is critical: existing v0.10.0 Linux users have their CA cert + services.json under /usr/local/var/numa. Returning the new path unconditionally would cause CA regeneration on upgrade, breaking every browser that had trusted the previous CA. The fallback is checked at startup via std::path::Path::exists, so the upgrade is seamless and zero-config. macOS behavior is unchanged — /usr/local/var/numa is still correct because Homebrew's prefix is /usr/local. Test coverage: - resolve_linux_data_dir is a pure function gated cfg(any(linux,test)) so the same code path is unit-tested on every platform's CI run. - Four tests cover all combinations of (legacy_exists, fhs_exists), asserting the migration logic stays correct under future edits. The default config in numa.toml is also updated to document the new per-platform default paths. Co-Authored-By: Claude Opus 4.6 (1M context) * test: end-to-end FHS path verification + simplify cleanup Two related changes from a /simplify pass and a follow-up testing finalization: 1. lib.rs cleanup (no behavior change): - Drop FHS_LINUX_DATA_DIR and LEGACY_LINUX_DATA_DIR consts. Both were used in only 4 places total and the unit tests already bypassed them with string literals, so they were over-engineering. Inline the strings in daemon_data_dir() and resolve_linux_data_dir(). - Trim narrating doc/comments on the helper and the test bodies. Keep only the non-obvious WHY (the macOS Homebrew note and the migration-keeps-legacy rationale). 2. tests/docker/smoke-arch.sh: - Cherry-picked the previously-uncommitted Arch compatibility smoke test from feat/smoke-arch. - Removed the [server] data_dir = "/tmp/numa-smoke" override from the test config so the script now exercises the DEFAULT data dir code path — which is exactly what the FHS fix touches. - Added a path assertion after the dig succeeds: verify that /var/lib/numa/ca.pem exists (FHS) and /usr/local/var/numa is absent (no accidental dual-creation on a fresh install). Verified end-to-end on archlinux:latest (Apple Silicon, Rosetta): ── building + running numa on archlinux:latest ── ── cargo build --release --locked ── Finished `release` profile [optimized] target(s) in 24.02s ── dig @127.0.0.1 -p 5354 google.com A ── 142.251.38.206 ── FHS path check ── ✓ CA cert at /var/lib/numa/ca.pem (FHS path) ✓ legacy path /usr/local/var/numa absent (fresh install used FHS) ── smoke-arch passed ── This closes the testing gap where the unit tests covered the path-decision LOGIC in isolation but nothing exercised the live wiring on a real Linux filesystem. Co-Authored-By: Claude Opus 4.6 (1M context) --------- Co-authored-by: Claude Opus 4.6 (1M context) --- numa.toml | 11 +-- src/lib.rs | 67 ++++++++++++++++- tests/docker/smoke-arch.sh | 147 +++++++++++++++++++++++++++++++++++++ 3 files changed, 217 insertions(+), 8 deletions(-) create mode 100755 tests/docker/smoke-arch.sh diff --git a/numa.toml b/numa.toml index 35d92de..77ba231 100644 --- a/numa.toml +++ b/numa.toml @@ -2,11 +2,12 @@ bind_addr = "0.0.0.0:53" api_port = 5380 # api_bind_addr = "127.0.0.1" # default; set to "0.0.0.0" for LAN dashboard access -# data_dir = "/usr/local/var/numa" # where numa stores TLS CA and cert material - # (default: /usr/local/var/numa on unix, - # %PROGRAMDATA%\numa on windows). Override for - # containerized deploys or tests that can't - # write to the system path. +# data_dir = "/var/lib/numa" # where numa stores TLS CA and cert material + # Defaults: /var/lib/numa on linux (FHS), + # /usr/local/var/numa on macos (homebrew prefix), + # %PROGRAMDATA%\numa on windows. Override for + # containerized deploys or tests that can't + # write to the system path. # [upstream] # mode = "forward" # "forward" (default) — relay to upstream diff --git a/src/lib.rs b/src/lib.rs index 347e72f..6455506 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,7 +26,10 @@ pub type Error = Box; pub type Result = std::result::Result; /// Shared config directory for persistent data (services.json, etc). -/// Unix: ~/.config/numa/ (or /usr/local/var/numa/ when running as root daemon) +/// Unix users: ~/.config/numa/ +/// Linux root daemon: /var/lib/numa (FHS) — falls back to /usr/local/var/numa +/// if a pre-v0.10.1 install already lives there. +/// macOS root daemon: /usr/local/var/numa (Homebrew prefix) /// Windows: %APPDATA%\numa pub fn config_dir() -> std::path::PathBuf { #[cfg(windows)] @@ -63,13 +66,15 @@ fn config_dir_unix() -> std::path::PathBuf { } // Running as root daemon (launchd/systemd) — use system-wide path - std::path::PathBuf::from("/usr/local/var/numa") + daemon_data_dir() } /// Default system-wide data directory for TLS certs. Overridable via /// `[server] data_dir = "..."` in numa.toml — this function only provides /// the fallback when the config doesn't set it. -/// Unix: /usr/local/var/numa +/// Linux: /var/lib/numa (FHS) — falls back to /usr/local/var/numa if a +/// pre-v0.10.1 install already has data there. +/// macOS: /usr/local/var/numa (Homebrew prefix) /// Windows: %PROGRAMDATA%\numa pub fn data_dir() -> std::path::PathBuf { #[cfg(windows)] @@ -81,6 +86,62 @@ pub fn data_dir() -> std::path::PathBuf { } #[cfg(not(windows))] { + daemon_data_dir() + } +} + +/// Resolve the system-wide data directory for the running platform. +/// Honors backwards compatibility with pre-v0.10.1 installs that still +/// have their CA cert + services.json under `/usr/local/var/numa`. +#[cfg(not(windows))] +fn daemon_data_dir() -> std::path::PathBuf { + #[cfg(target_os = "linux")] + { + std::path::PathBuf::from(resolve_linux_data_dir( + std::path::Path::new("/usr/local/var/numa").exists(), + std::path::Path::new("/var/lib/numa").exists(), + )) + } + #[cfg(target_os = "macos")] + { + // macOS uses the Homebrew prefix convention; no FHS migration needed. std::path::PathBuf::from("/usr/local/var/numa") } } + +/// Extracted as a pure function so the migration logic is unit-testable +/// without touching the real filesystem. +#[cfg(any(target_os = "linux", test))] +fn resolve_linux_data_dir(legacy_exists: bool, fhs_exists: bool) -> &'static str { + if legacy_exists && !fhs_exists { + "/usr/local/var/numa" + } else { + "/var/lib/numa" + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn linux_data_dir_fresh_install_uses_fhs() { + assert_eq!(resolve_linux_data_dir(false, false), "/var/lib/numa"); + } + + #[test] + fn linux_data_dir_upgrading_install_keeps_legacy() { + // Migration must keep legacy so the user doesn't lose their CA on upgrade. + assert_eq!(resolve_linux_data_dir(true, false), "/usr/local/var/numa"); + } + + #[test] + fn linux_data_dir_after_migration_uses_fhs() { + assert_eq!(resolve_linux_data_dir(true, true), "/var/lib/numa"); + } + + #[test] + fn linux_data_dir_only_fhs_uses_fhs() { + assert_eq!(resolve_linux_data_dir(false, true), "/var/lib/numa"); + } +} diff --git a/tests/docker/smoke-arch.sh b/tests/docker/smoke-arch.sh new file mode 100755 index 0000000..12e779e --- /dev/null +++ b/tests/docker/smoke-arch.sh @@ -0,0 +1,147 @@ +#!/usr/bin/env bash +# +# Arch Linux compatibility smoke test. +# +# Builds numa from source inside an archlinux:latest container, runs it +# in forward mode on port 5354, and verifies a single DNS query returns +# an A record. Validates the "Arch compatible" claim end-to-end before +# release announcements. +# +# Dogfooding: the test numa forwards to the host's running numa via +# host.docker.internal (Docker Desktop's host gateway). This avoids the +# Docker NAT/UDP issues with public resolvers and exercises the realistic +# numa-on-numa shape. Requires the host to be running numa on port 53. +# +# First run is slow (~8-12 min): image pull + pacman + cold cargo build. +# No caching across runs. +# +# Requirements: docker, host running numa on 0.0.0.0:53 +# Usage: ./tests/docker/smoke-arch.sh + +set -euo pipefail + +cd "$(dirname "$0")/../.." + +GREEN="\033[32m"; RED="\033[31m"; RESET="\033[0m" + +# Precondition: the test numa-on-arch forwards to the host numa as its +# upstream (dogfood pattern). Fail fast with a clear error if there is +# no working DNS on the host, rather than letting the dig inside the +# container time out with "deadline has elapsed". +if ! dig @127.0.0.1 google.com A +short +time=1 +tries=1 >/dev/null 2>&1; then + printf "${RED}error:${RESET} host numa is not answering on 127.0.0.1:53\n" >&2 + echo " This test forwards to the host numa via host.docker.internal." >&2 + echo " Start numa on the host first (sudo numa install), then rerun." >&2 + exit 1 +fi + +echo "── building + running numa on archlinux:latest ──" +echo " (first run is slow: image pull + pacman + cold cargo build, ~8-12 min)" +echo + +docker run --rm \ + --platform linux/amd64 \ + --security-opt seccomp=unconfined \ + -v "$PWD:/src:ro" \ + -v numa-arch-cargo:/root/.cargo \ + -v numa-arch-target:/work/target \ + archlinux:latest bash -c ' + set -e + + # pacman 7+ filters syscalls in its own sandbox; disable for Rosetta/qemu + sed -i "s/^#DisableSandboxSyscalls/DisableSandboxSyscalls/" /etc/pacman.conf + + echo "── pacman: installing build + runtime deps ──" + pacman -Sy --noconfirm --needed rust gcc pkgconf cmake make perl bind 2>&1 | tail -3 + echo + + # Copy source to a writable workdir, skipping target/ + .git so we + # do not pull in the host (macOS) build artifacts. + mkdir -p /work + tar -C /src --exclude=./target --exclude=./.git -cf - . | tar -C /work -xf - + cd /work + + echo "── cargo build --release --locked ──" + cargo build --release --locked 2>&1 | tail -5 + echo + + # Dogfood: forward to the host numa via host.docker.internal. + # numa parses upstream.address as a literal SocketAddr, so we resolve + # the hostname to an IPv4 address first (force v4 — getent hosts may + # return IPv6 first, and IPv6 addresses need bracketed addr:port form). + HOST_IP=$(getent ahostsv4 host.docker.internal | awk "/STREAM/ {print \$1; exit}") + if [ -z "$HOST_IP" ]; then + echo " ✗ could not resolve host.docker.internal to IPv4 (not on Docker Desktop?)" + exit 1 + fi + echo "── starting numa on :5354 (forward to host numa at $HOST_IP:53) ──" + # Intentionally NOT setting [server] data_dir — we want to exercise the + # default code path (data_dir() → daemon_data_dir() → /var/lib/numa) so + # the FHS-path assertion below verifies the live wiring, not just the + # unit-tested helper. + cat > /tmp/numa.toml < /tmp/numa.log 2>&1 & + NUMA_PID=$! + + # Poll for readiness — numa is ready when it answers a query + READY=0 + for i in 1 2 3 4 5 6 7 8; do + sleep 1 + if dig @127.0.0.1 -p 5354 google.com A +short +time=1 +tries=1 2>/dev/null \ + | grep -qE "^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$"; then + READY=1 + break + fi + done + + if [ "$READY" -ne 1 ]; then + echo " ✗ numa did not return an A record after 8s" + echo " numa log:" + cat /tmp/numa.log + kill $NUMA_PID 2>/dev/null || true + exit 1 + fi + + echo "── dig @127.0.0.1 -p 5354 google.com A ──" + ANSWER=$(dig @127.0.0.1 -p 5354 google.com A +short +time=2 +tries=1) + echo "$ANSWER" | sed "s/^/ /" + + kill $NUMA_PID 2>/dev/null || true + + # FHS path assertion: the default data dir on Linux must be /var/lib/numa + # (not the legacy /usr/local/var/numa). The CA cert generated at startup + # is the canonical proof that numa wrote to the right place. + echo + echo "── FHS path check ──" + if [ -f /var/lib/numa/ca.pem ]; then + echo " ✓ CA cert at /var/lib/numa/ca.pem (FHS path)" + else + echo " ✗ CA cert NOT at /var/lib/numa/ca.pem" + echo " ls /var/lib/numa/:" + ls -la /var/lib/numa/ 2>&1 | sed "s/^/ /" + echo " ls /usr/local/var/numa/:" + ls -la /usr/local/var/numa/ 2>&1 | sed "s/^/ /" + exit 1 + fi + if [ -e /usr/local/var/numa ]; then + echo " ✗ legacy path /usr/local/var/numa unexpectedly exists on a fresh container" + exit 1 + fi + echo " ✓ legacy path /usr/local/var/numa absent (fresh install used FHS)" + + echo + echo " ✓ numa built, ran, answered a forward query, and used the FHS data dir on Arch" +' + +echo +printf "${GREEN}── smoke-arch passed ──${RESET}\n" From b2ed2e6aec43b30ec26c477d63d32c0170e58fb3 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 8 Apr 2026 18:05:00 +0300 Subject: [PATCH 29/30] chore: bump version to 0.10.1 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8750934..c36808c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1143,7 +1143,7 @@ dependencies = [ [[package]] name = "numa" -version = "0.10.0" +version = "0.10.1" dependencies = [ "arc-swap", "axum", diff --git a/Cargo.toml b/Cargo.toml index f0278b9..8ca29d0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "numa" -version = "0.10.0" +version = "0.10.1" authors = ["razvandimescu "] edition = "2021" description = "Portable DNS resolver in Rust — .numa local domains, ad blocking, developer overrides, DNS-over-HTTPS" From 27dfaab36019d30f452332bbc56494cfb7a40c4b Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 8 Apr 2026 18:26:21 +0300 Subject: [PATCH 30/30] ci: pass PAT to action-gh-release so release events propagate (#44) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GitHub Actions deliberately does not propagate workflow events triggered by the default GITHUB_TOKEN — a safety feature against infinite loops. softprops/action-gh-release falls back to GITHUB_TOKEN when no `token` is supplied, so the resulting `release: published` event was silently swallowed and never reached homebrew-bump.yml. Discovered shipping v0.10.1: tag pushed cleanly, crates.io published cleanly, GitHub release page created cleanly, but the brew tap never auto-bumped. Had to trigger homebrew-bump.yml manually via workflow_dispatch. Fix: pass HOMEBREW_TAP_GITHUB_TOKEN explicitly. This is already a PAT (used by homebrew-bump.yml to push cross-repo to razvandimescu/ homebrew-tap), so reusing it keeps the secret surface flat. PAT-authored release events are the documented escape hatch from the GITHUB_TOKEN no-propagation rule. Applies to v0.10.2+. v0.10.1 was bumped manually. Co-authored-by: Claude Opus 4.6 (1M context) --- .github/workflows/release.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 057a8d0..3396667 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -103,6 +103,14 @@ jobs: - name: Create Release uses: softprops/action-gh-release@v2 with: + # Use a PAT (not the default GITHUB_TOKEN) so the resulting + # `release: published` event propagates to downstream workflows + # like homebrew-bump.yml. Events triggered by GITHUB_TOKEN are + # deliberately not propagated by GitHub Actions to prevent + # infinite loops; PAT-authored events are the documented escape + # hatch. Reusing HOMEBREW_TAP_GITHUB_TOKEN (already a PAT used + # by homebrew-bump.yml itself) keeps the secret surface flat. + token: ${{ secrets.HOMEBREW_TAP_GITHUB_TOKEN }} generate_release_notes: true files: | *.tar.gz