net/dns/resolver: treat DNS REFUSED responses as soft errors in forwarder race (#19053)
When racing multiple upstream DNS resolvers, a REFUSED (RCode 5) response from a broken or misconfigured resolver could win the race and be returned to the client before healthier resolvers had a chance to respond with a valid answer. This caused complete DNS failure in cases where, e.g., a broken upstream resolver returned REFUSED quickly while a working resolver (such as 1.1.1.1) was still responding. Previously, only SERVFAIL (RCode 2) was treated as a soft error. REFUSED responses were returned as successful bytes and could win the race immediately. This change also treats REFUSED as a soft error in the UDP and TCP forwarding paths, so the race continues until a better answer arrives. If all resolvers refuse, the first REFUSED response is returned to the client. Additionally, SERVFAIL responses from upstream resolvers are now returned verbatim to the client rather than replaced with a locally synthesized packet. Synthesized SERVFAIL responses were authoritative and guaranteed to include a question section echoing the original query; upstream responses carry no such guarantees but may include extended error information (e.g. RFC 8914 extended DNS errors) that would otherwise be lost. Fixes #19024 Signed-off-by: Brendan Creane <bcreane@gmail.com>
This commit is contained in:
@@ -1162,8 +1162,19 @@ func TestForwarderWithManyResolvers(t *testing.T) {
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Refused",
|
||||
responses: [][]byte{ // All upstream servers return different failures.
|
||||
name: "AllRefused",
|
||||
responses: [][]byte{ // All upstream servers return REFUSED.
|
||||
makeTestResponse(t, domain, dns.RCodeRefused),
|
||||
makeTestResponse(t, domain, dns.RCodeRefused),
|
||||
makeTestResponse(t, domain, dns.RCodeRefused),
|
||||
},
|
||||
wantResponses: [][]byte{ // When all refuse, return REFUSED to the client.
|
||||
makeTestResponse(t, domain, dns.RCodeRefused),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Refused+Success",
|
||||
responses: [][]byte{ // Some upstream servers refuse, but one succeeds.
|
||||
makeTestResponse(t, domain, dns.RCodeRefused),
|
||||
makeTestResponse(t, domain, dns.RCodeRefused),
|
||||
makeTestResponse(t, domain, dns.RCodeRefused),
|
||||
@@ -1171,21 +1182,30 @@ func TestForwarderWithManyResolvers(t *testing.T) {
|
||||
makeTestResponse(t, domain, dns.RCodeRefused),
|
||||
makeTestResponse(t, domain, dns.RCodeSuccess, netip.MustParseAddr("127.0.0.1")),
|
||||
},
|
||||
wantResponses: [][]byte{ // Refused is not considered to be an error and can be forwarded.
|
||||
makeTestResponse(t, domain, dns.RCodeRefused),
|
||||
wantResponses: [][]byte{ // Refused is treated as a soft error; the Success response should win.
|
||||
makeTestResponse(t, domain, dns.RCodeSuccess, netip.MustParseAddr("127.0.0.1")),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Refused+ServFail",
|
||||
responses: [][]byte{ // Some servers refuse, at least one fails.
|
||||
makeTestResponse(t, domain, dns.RCodeRefused),
|
||||
makeTestResponse(t, domain, dns.RCodeServerFailure),
|
||||
makeTestResponse(t, domain, dns.RCodeRefused),
|
||||
},
|
||||
wantResponses: [][]byte{ // Any non-REFUSED failure triggers SERVFAIL regardless of arrival order.
|
||||
makeTestResponse(t, domain, dns.RCodeServerFailure),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "MixFail",
|
||||
responses: [][]byte{ // All upstream servers return different failures.
|
||||
responses: [][]byte{ // Upstream servers return different failures.
|
||||
makeTestResponse(t, domain, dns.RCodeServerFailure),
|
||||
makeTestResponse(t, domain, dns.RCodeNameError),
|
||||
makeTestResponse(t, domain, dns.RCodeRefused),
|
||||
},
|
||||
wantResponses: [][]byte{ // Both NXDomain and Refused can be forwarded.
|
||||
wantResponses: [][]byte{ // SERVFAIL and REFUSED are soft errors; NXDOMAIN wins.
|
||||
makeTestResponse(t, domain, dns.RCodeNameError),
|
||||
makeTestResponse(t, domain, dns.RCodeRefused),
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -1297,3 +1317,71 @@ func TestForwarderVerboseLogs(t *testing.T) {
|
||||
t.Errorf("expected forwarding log, got:\n%s", logStr)
|
||||
}
|
||||
}
|
||||
|
||||
// TestForwarderHealthOnContextExpiry verifies that when all resolvers fail and
|
||||
// the context expires before the response can be sent, the health tracker is
|
||||
// set unhealthy if and only if acceptDNS is true.
|
||||
func TestForwarderHealthOnContextExpiry(t *testing.T) {
|
||||
const domain = "health-test.example.com."
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
acceptDNS bool
|
||||
wantUnhealthy bool
|
||||
}{
|
||||
{"acceptDNS=true", true, true},
|
||||
{"acceptDNS=false", false, false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
request := makeTestRequest(t, domain, dns.TypeA, 0)
|
||||
logf := tstest.WhileTestRunningLogger(t)
|
||||
bus := eventbustest.NewBus(t)
|
||||
netMon, err := netmon.New(bus, logf)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var dialer tsdial.Dialer
|
||||
dialer.SetNetMon(netMon)
|
||||
dialer.SetBus(bus)
|
||||
|
||||
ht := health.NewTracker(bus)
|
||||
fwd := newForwarder(logf, netMon, nil, &dialer, ht, nil)
|
||||
fwd.acceptDNS = tt.acceptDNS
|
||||
|
||||
port1 := runDNSServer(t, nil, makeTestResponse(t, domain, dns.RCodeServerFailure), func(bool, []byte) {})
|
||||
port2 := runDNSServer(t, nil, makeTestResponse(t, domain, dns.RCodeServerFailure), func(bool, []byte) {})
|
||||
|
||||
resolvers := []resolverAndDelay{
|
||||
{name: &dnstype.Resolver{Addr: fmt.Sprintf("127.0.0.1:%d", port1)}},
|
||||
{name: &dnstype.Resolver{Addr: fmt.Sprintf("127.0.0.1:%d", port2)}},
|
||||
}
|
||||
|
||||
rpkt := packet{
|
||||
bs: request,
|
||||
family: "udp",
|
||||
addr: netip.MustParseAddrPort("127.0.0.1:12345"),
|
||||
}
|
||||
|
||||
// Use an unbuffered responseChan so the send blocks, forcing the
|
||||
// ctx.Done path and the SetUnhealthy call.
|
||||
responseChan := make(chan packet)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
// Cancel after DNS servers have had time to respond and their errors
|
||||
// collected, leaving forwardWithDestChan blocked on responseChan.
|
||||
go func() {
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
cancel()
|
||||
}()
|
||||
|
||||
fwd.forwardWithDestChan(ctx, rpkt, responseChan, resolvers...)
|
||||
|
||||
if got := ht.IsUnhealthy(dnsForwarderFailing); got != tt.wantUnhealthy {
|
||||
t.Errorf("IsUnhealthy = %v, want %v", got, tt.wantUnhealthy)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user