net/dns: retrample resolve.conf when another process has trampled it (#18069)
When using the resolve.conf file for setting DNS, it is possible that some other services will trample the file and overwrite our set DNS server. Experiments has shown this to be a racy error depending on how quickly processes start. Make an attempt to trample back the file a limited number of times if the file is changed. Updates #16635 Signed-off-by: Claus Lensbøl <claus@tailscale.com>
This commit is contained in:
+35
-3
@@ -21,6 +21,7 @@ import (
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"tailscale.com/feature"
|
||||
@@ -29,6 +30,7 @@ import (
|
||||
"tailscale.com/net/tsaddr"
|
||||
"tailscale.com/types/logger"
|
||||
"tailscale.com/util/dnsname"
|
||||
"tailscale.com/util/eventbus"
|
||||
"tailscale.com/version/distro"
|
||||
)
|
||||
|
||||
@@ -135,6 +137,11 @@ type directManager struct {
|
||||
// but is better than having non-functioning DNS.
|
||||
renameBroken bool
|
||||
|
||||
trampleCount atomic.Int64
|
||||
trampleTimer *time.Timer
|
||||
eventClient *eventbus.Client
|
||||
trampleDNSPub *eventbus.Publisher[TrampleDNS]
|
||||
|
||||
ctx context.Context // valid until Close
|
||||
ctxClose context.CancelFunc // closes ctx
|
||||
|
||||
@@ -145,11 +152,13 @@ type directManager struct {
|
||||
}
|
||||
|
||||
//lint:ignore U1000 used in manager_{freebsd,openbsd}.go
|
||||
func newDirectManager(logf logger.Logf, health *health.Tracker) *directManager {
|
||||
return newDirectManagerOnFS(logf, health, directFS{})
|
||||
func newDirectManager(logf logger.Logf, health *health.Tracker, bus *eventbus.Bus) *directManager {
|
||||
return newDirectManagerOnFS(logf, health, bus, directFS{})
|
||||
}
|
||||
|
||||
func newDirectManagerOnFS(logf logger.Logf, health *health.Tracker, fs wholeFileFS) *directManager {
|
||||
var trampleWatchDuration = 5 * time.Second
|
||||
|
||||
func newDirectManagerOnFS(logf logger.Logf, health *health.Tracker, bus *eventbus.Bus, fs wholeFileFS) *directManager {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
m := &directManager{
|
||||
logf: logf,
|
||||
@@ -158,6 +167,13 @@ func newDirectManagerOnFS(logf logger.Logf, health *health.Tracker, fs wholeFile
|
||||
ctx: ctx,
|
||||
ctxClose: cancel,
|
||||
}
|
||||
if bus != nil {
|
||||
m.eventClient = bus.Client("dns.directManager")
|
||||
m.trampleDNSPub = eventbus.Publish[TrampleDNS](m.eventClient)
|
||||
}
|
||||
m.trampleTimer = time.AfterFunc(trampleWatchDuration, func() {
|
||||
m.trampleCount.Store(0)
|
||||
})
|
||||
go m.runFileWatcher()
|
||||
return m
|
||||
}
|
||||
@@ -481,10 +497,26 @@ func (m *directManager) checkForFileTrample() {
|
||||
}
|
||||
m.logf("trample: resolv.conf changed from what we expected. did some other program interfere? current contents: %q", show)
|
||||
m.health.SetUnhealthy(resolvTrampleWarnable, nil)
|
||||
if m.trampleDNSPub != nil {
|
||||
n := m.trampleCount.Add(1)
|
||||
|
||||
if n < 10 {
|
||||
m.trampleDNSPub.Publish(TrampleDNS{
|
||||
LastTrample: time.Now(),
|
||||
TramplesInTimeout: n,
|
||||
})
|
||||
m.trampleTimer.Reset(trampleWatchDuration)
|
||||
} else {
|
||||
m.logf("trample: resolv.conf overwritten %d times, no longer attempting to replace it.", n)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *directManager) Close() error {
|
||||
m.ctxClose()
|
||||
if m.eventClient != nil {
|
||||
m.eventClient.Close()
|
||||
}
|
||||
|
||||
// We used to keep a file for the tailscale config and symlinked
|
||||
// to it, but then we stopped because /etc/resolv.conf being a
|
||||
|
||||
Reference in New Issue
Block a user