feat(odoh): ship ODoH client + self-hosted relay (RFC 9230)

Client (mode = "odoh"): URL-query target routing per RFC 9230 §5,
/.well-known/odohconfigs TTL cache with 60s backoff on failure, HPKE
seal/open via odoh-rs, strict-mode default that SERVFAILs on relay
failure instead of silently downgrading. Host-equality config
validation rejects same-operator relay/target pairs.

Relay (`numa relay [PORT]`): axum server with /relay + /health.
SSRF-hardened hostname validator (RFC 1035 ASCII + dot + dash),
4 KiB body cap at the axum layer, 5s full-transaction timeout, and
static 502 on target failure (reqwest internals logged, not leaked).
Aggregate counters only — no per-request logs.

Observability: new `UpstreamTransport { Udp, Doh, Dot, Odoh }`
orthogonal to `QueryPath`, so /stats can tally wire protocols
symmetrically. Recursive mode records `Some(Udp)` for honest
"bytes egressing in cleartext" accounting.

Tests: Suite 8 exercises the client end-to-end via Frank Denis's
public relay + Cloudflare target; Suite 9 exercises `numa relay`
forwarding + guards against Cloudflare as the real far end. Full
probe script at tests/probe-odoh-ecosystem.sh verifies the entire
public ODoH ecosystem (4 targets + 1 relay per DNSCrypt's curated
list — confirms deploying Numa's relay doubles global supply).
This commit is contained in:
Razvan Dimescu
2026-04-20 12:34:04 +03:00
parent f6cfb3ce1b
commit 241c40553b
14 changed files with 1911 additions and 46 deletions

View File

@@ -854,6 +854,203 @@ sleep 1
fi # end Suite 7
# ---- Suite 8: ODoH (Oblivious DoH via public relay + target) ----
# Exercises the full client pipeline: /.well-known/odohconfigs fetch,
# HPKE seal/unseal, URL-query target routing (RFC 9230 §5), dashboard
# QueryPath::Odoh counter. Depends on the public ecosystem being up —
# the probe-odoh-ecosystem.sh script guards against flaky runs.
if should_run_suite 8; then
echo ""
echo "╔══════════════════════════════════════════╗"
echo "║ Suite 8: ODoH (Anonymous DNS) ║"
echo "╚══════════════════════════════════════════╝"
run_test_suite "ODoH via edgecompute.app relay → Cloudflare target" "
[server]
bind_addr = \"127.0.0.1:5354\"
api_port = 5381
[upstream]
mode = \"odoh\"
relay = \"https://odoh-relay.edgecompute.app/proxy\"
target = \"https://odoh.cloudflare-dns.com/dns-query\"
[cache]
max_entries = 10000
min_ttl = 60
max_ttl = 86400
[blocking]
enabled = false
[proxy]
enabled = false
"
# Re-start briefly to assert ODoH-specific observability: the odoh counter
# has to tick above zero after a query, and the stats label has to reflect
# the oblivious path. These guard against silent regressions in the
# QueryPath::Odoh tagging and the /stats serialisation.
RUST_LOG=info "$BINARY" "$CONFIG" > "$LOG" 2>&1 &
NUMA_PID=$!
for _ in $(seq 1 30); do
curl -sf "http://127.0.0.1:$API_PORT/health" >/dev/null 2>&1 && break
sleep 0.1
done
$DIG example.com A +short > /dev/null 2>&1 || true
sleep 1
STATS=$(curl -sf http://127.0.0.1:$API_PORT/stats 2>/dev/null)
# upstream_transport.odoh lives inside the upstream_transport object.
ODOH_COUNT=$(echo "$STATS" | grep -o '"upstream_transport":{[^}]*}' \
| grep -o '"odoh":[0-9]*' | cut -d: -f2)
check "upstream_transport.odoh > 0 after a query" "[1-9]" "${ODOH_COUNT:-0}"
check "Upstream label advertises odoh://" \
"odoh://" \
"$(echo "$STATS" | grep -o '"upstream":"[^"]*"')"
check "Stats mode field is 'odoh'" \
'"mode":"odoh"' \
"$(echo "$STATS" | grep -o '"mode":"odoh"')"
# Strict-mode failure path: a clearly-unreachable relay must produce
# SERVFAIL without silent downgrade. We hijack the config to point at
# an .invalid host so we don't rely on external uptime.
kill "$NUMA_PID" 2>/dev/null || true
wait "$NUMA_PID" 2>/dev/null || true
sleep 1
cat > "$CONFIG" << 'CONF'
[server]
bind_addr = "127.0.0.1:5354"
api_port = 5381
[upstream]
mode = "odoh"
relay = "https://relay.invalid/proxy"
target = "https://odoh.cloudflare-dns.com/dns-query"
strict = true
[cache]
max_entries = 10000
[blocking]
enabled = false
[proxy]
enabled = false
CONF
RUST_LOG=info "$BINARY" "$CONFIG" > "$LOG" 2>&1 &
NUMA_PID=$!
for _ in $(seq 1 30); do
curl -sf "http://127.0.0.1:$API_PORT/health" >/dev/null 2>&1 && break
sleep 0.1
done
check "Strict-mode relay outage returns SERVFAIL" \
"SERVFAIL" \
"$($DIG example.com A 2>&1 | grep 'status:')"
kill "$NUMA_PID" 2>/dev/null || true
wait "$NUMA_PID" 2>/dev/null || true
sleep 1
# Negative: relay and target on the same host must be rejected at startup.
cat > "$CONFIG" << 'CONF'
[server]
bind_addr = "127.0.0.1:5354"
api_port = 5381
[upstream]
mode = "odoh"
relay = "https://odoh.cloudflare-dns.com/proxy"
target = "https://odoh.cloudflare-dns.com/dns-query"
CONF
STARTUP_OUT=$("$BINARY" "$CONFIG" 2>&1 || true)
check "Same-host relay+target rejected at startup" \
"same host" \
"$STARTUP_OUT"
fi # end Suite 8
# ---- Suite 9: Numa's own ODoH relay (--relay-mode) ----
# Exercises `numa relay PORT` as a forwarding proxy to a real ODoH target.
# Validates the RFC 9230 §5 relay behaviour: URL-query routing, content-type
# gating, body-size cap, and /health observability.
if should_run_suite 9; then
echo ""
echo "╔══════════════════════════════════════════╗"
echo "║ Suite 9: Numa ODoH Relay (own) ║"
echo "╚══════════════════════════════════════════╝"
RELAY_PORT=18443
"$BINARY" relay $RELAY_PORT > "$LOG" 2>&1 &
NUMA_PID=$!
for _ in $(seq 1 30); do
curl -sf "http://127.0.0.1:$RELAY_PORT/health" >/dev/null 2>&1 && break
sleep 0.1
done
echo ""
echo "=== Relay Endpoints ==="
check "Health endpoint returns ok" \
"ok" \
"$(curl -sf http://127.0.0.1:$RELAY_PORT/health | head -1)"
# Happy path: forwards arbitrary body to Cloudflare's ODoH target. The
# target will reject the garbage envelope with HTTP 400 — which is exactly
# what proves our relay faithfully forwarded (otherwise we'd see our own
# 4xx from the relay itself).
HAPPY_STATUS=$(curl -sS -o /dev/null -w "%{http_code}" -X POST \
-H "Content-Type: application/oblivious-dns-message" \
--data-binary "garbage-forwarded-end-to-end" \
"http://127.0.0.1:$RELAY_PORT/relay?targethost=odoh.cloudflare-dns.com&targetpath=/dns-query")
check "Relay forwards to target (target rejects garbage → 400)" \
"400" \
"$HAPPY_STATUS"
echo ""
echo "=== Guards ==="
check "Missing content-type → 415" \
"415" \
"$(curl -sS -o /dev/null -w '%{http_code}' -X POST --data-binary 'x' \
'http://127.0.0.1:'$RELAY_PORT'/relay?targethost=odoh.cloudflare-dns.com&targetpath=/dns-query')"
check "Oversized body (>4 KiB) → 413" \
"413" \
"$(head -c 5000 /dev/urandom | curl -sS -o /dev/null -w '%{http_code}' -X POST \
-H 'Content-Type: application/oblivious-dns-message' --data-binary @- \
'http://127.0.0.1:'$RELAY_PORT'/relay?targethost=odoh.cloudflare-dns.com&targetpath=/dns-query')"
check "Invalid targethost (no dot) → 400" \
"400" \
"$(curl -sS -o /dev/null -w '%{http_code}' -X POST \
-H 'Content-Type: application/oblivious-dns-message' --data-binary 'x' \
'http://127.0.0.1:'$RELAY_PORT'/relay?targethost=invalid&targetpath=/dns-query')"
echo ""
echo "=== Counters ==="
HEALTH=$(curl -sf "http://127.0.0.1:$RELAY_PORT/health")
check "Relay counted at least one forwarded_ok" \
"[1-9]" \
"$(echo "$HEALTH" | grep 'forwarded_ok' | awk '{print $2}')"
check "Relay counted at least one rejected_bad_request" \
"[1-9]" \
"$(echo "$HEALTH" | grep 'rejected_bad_request' | awk '{print $2}')"
kill "$NUMA_PID" 2>/dev/null || true
wait "$NUMA_PID" 2>/dev/null || true
sleep 1
fi # end Suite 9
# Summary
echo ""
TOTAL=$((PASSED + FAILED))

101
tests/probe-odoh-ecosystem.sh Executable file
View File

@@ -0,0 +1,101 @@
#!/usr/bin/env bash
# Probe the public ODoH ecosystem.
#
# Source of truth: DNSCrypt's curated list at
# https://github.com/DNSCrypt/dnscrypt-resolvers/tree/master/v3
# - v3/odoh-servers.md (ODoH targets)
# - v3/odoh-relays.md (ODoH relays)
#
# As of commit 2025-09-16 ("odohrelay-crypto-sx seems to be the only ODoH
# relay left"), the full public ecosystem is 4 targets + 1 relay. Re-run this
# script against the upstream list before making any "only N public relays"
# claim publicly.
#
# Usage: ./tests/probe-odoh-ecosystem.sh
set -uo pipefail
GREEN="\033[32m"
RED="\033[31m"
YELLOW="\033[33m"
DIM="\033[90m"
RESET="\033[0m"
UP=0
DOWN=0
probe_target() {
local name="$1"
local host="$2"
local url="https://${host}/.well-known/odohconfigs"
local start=$(date +%s%N)
local headers
headers=$(curl -sS -o /tmp/odoh-probe-body -D - --max-time 5 -A "numa-odoh-probe/0.1" "$url" 2>&1) || {
DOWN=$((DOWN + 1))
printf " ${RED}${RESET} %-25s ${DIM}unreachable${RESET}\n" "$name"
return
}
local elapsed_ms=$((($(date +%s%N) - start) / 1000000))
local status
status=$(echo "$headers" | head -1 | awk '{print $2}')
local ctype
ctype=$(echo "$headers" | grep -i '^content-type:' | head -1 | tr -d '\r')
local size
size=$(stat -f%z /tmp/odoh-probe-body 2>/dev/null || stat -c%s /tmp/odoh-probe-body 2>/dev/null || echo 0)
if [[ "$status" == "200" ]] && [[ "$size" -gt 0 ]]; then
UP=$((UP + 1))
printf " ${GREEN}${RESET} %-25s ${DIM}%4dms %s bytes %s${RESET}\n" "$name" "$elapsed_ms" "$size" "$ctype"
else
DOWN=$((DOWN + 1))
printf " ${RED}${RESET} %-25s ${DIM}status=%s size=%s${RESET}\n" "$name" "$status" "$size"
fi
rm -f /tmp/odoh-probe-body
}
probe_relay() {
# Relays don't expose /.well-known/odohconfigs — we just verify TLS reachability
# and that the endpoint responds to a malformed POST with an HTTP error
# (indicating the relay path exists). A real ODoH validation requires HPKE.
local name="$1"
local url="$2"
local start=$(date +%s%N)
local status
status=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 5 -A "numa-odoh-probe/0.1" \
-X POST -H "Content-Type: application/oblivious-dns-message" \
--data-binary "" "$url" 2>&1) || {
DOWN=$((DOWN + 1))
printf " ${RED}${RESET} %-25s ${DIM}unreachable${RESET}\n" "$name"
return
}
local elapsed_ms=$((($(date +%s%N) - start) / 1000000))
# Any 2xx or 4xx means the endpoint is live (TLS works, HTTP responded).
# 5xx or 000 (curl failure) means broken.
if [[ "$status" =~ ^[24] ]]; then
UP=$((UP + 1))
printf " ${GREEN}${RESET} %-25s ${DIM}%4dms status=%s (endpoint live)${RESET}\n" "$name" "$elapsed_ms" "$status"
else
DOWN=$((DOWN + 1))
printf " ${RED}${RESET} %-25s ${DIM}status=%s${RESET}\n" "$name" "$status"
fi
}
echo "ODoH targets:"
probe_target "Cloudflare" "odoh.cloudflare-dns.com"
probe_target "crypto.sx" "odoh.crypto.sx"
probe_target "Snowstorm" "dope.snowstorm.love"
probe_target "Tiarap" "doh.tiarap.org"
echo
echo "ODoH relays:"
probe_relay "Frank Denis (Fastly)" "https://odoh-relay.edgecompute.app/proxy"
echo
TOTAL=$((UP + DOWN))
if [[ "$DOWN" -eq 0 ]]; then
printf "${GREEN}All %d endpoints up${RESET}\n" "$TOTAL"
exit 0
else
printf "${YELLOW}%d/%d up, %d down${RESET}\n" "$UP" "$TOTAL" "$DOWN"
exit 1
fi