control,health,ipn: move IP forwarding check to health tracker (#19007)

Currently IP forwarding health check is done on sending MapRequests.

Move ip forwarding to the health service to gain the benefits
of the health tracker and perodic monitoring out of band from
the MapRequest path. ipnlocal now provides a closure to
the health service to provide the check if forwarding is broken.

Removed `skipIPForwardingCheck` from controlclient/direct.go,
it wasn't being used as the comments describe it, that check
has moved to ipnlocal for the closure to the health tracker.

Updates #18976

Signed-off-by: Mike O'Driscoll <mikeo@tailscale.com>
main
Mike O'Driscoll 1 month ago committed by GitHub
parent 156d97c549
commit 4e88d231d5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 2
      cmd/tailscaled/depaware-min.txt
  2. 2
      cmd/tailscaled/depaware-minbox.txt
  3. 9
      control/controlclient/auto.go
  4. 3
      control/controlclient/client.go
  5. 127
      control/controlclient/direct.go
  6. 33
      health/health.go
  7. 83
      health/health_test.go
  8. 13
      health/warnings.go
  9. 31
      ipn/ipnlocal/local.go
  10. 2
      ipn/ipnlocal/state_test.go

@ -101,7 +101,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/net/netknob from tailscale.com/logpolicy+ tailscale.com/net/netknob from tailscale.com/logpolicy+
tailscale.com/net/netmon from tailscale.com/cmd/tailscaled+ tailscale.com/net/netmon from tailscale.com/cmd/tailscaled+
tailscale.com/net/netns from tailscale.com/cmd/tailscaled+ tailscale.com/net/netns from tailscale.com/cmd/tailscaled+
tailscale.com/net/netutil from tailscale.com/control/controlclient+ tailscale.com/net/netutil from tailscale.com/control/controlhttp+
tailscale.com/net/netx from tailscale.com/control/controlclient+ tailscale.com/net/netx from tailscale.com/control/controlclient+
tailscale.com/net/packet from tailscale.com/ipn/ipnlocal+ tailscale.com/net/packet from tailscale.com/ipn/ipnlocal+
tailscale.com/net/packet/checksum from tailscale.com/net/tstun tailscale.com/net/packet/checksum from tailscale.com/net/tstun

@ -118,7 +118,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/net/netknob from tailscale.com/logpolicy+ tailscale.com/net/netknob from tailscale.com/logpolicy+
tailscale.com/net/netmon from tailscale.com/cmd/tailscaled+ tailscale.com/net/netmon from tailscale.com/cmd/tailscaled+
tailscale.com/net/netns from tailscale.com/cmd/tailscaled+ tailscale.com/net/netns from tailscale.com/cmd/tailscaled+
tailscale.com/net/netutil from tailscale.com/control/controlclient+ tailscale.com/net/netutil from tailscale.com/client/local+
tailscale.com/net/netx from tailscale.com/control/controlclient+ tailscale.com/net/netx from tailscale.com/control/controlclient+
tailscale.com/net/packet from tailscale.com/ipn/ipnlocal+ tailscale.com/net/packet from tailscale.com/ipn/ipnlocal+
tailscale.com/net/packet/checksum from tailscale.com/net/tstun tailscale.com/net/packet/checksum from tailscale.com/net/tstun

@ -773,6 +773,15 @@ func (c *Auto) SetDiscoPublicKey(key key.DiscoPublic) {
c.updateControl() c.updateControl()
} }
// SetIPForwardingBroken updates the IP forwarding broken state and sends
// a control update if the value changed.
func (c *Auto) SetIPForwardingBroken(v bool) {
if !c.direct.SetIPForwardingBroken(v) {
return
}
c.updateControl()
}
func (c *Auto) Shutdown() { func (c *Auto) Shutdown() {
c.mu.Lock() c.mu.Lock()
if c.closed { if c.closed {

@ -87,6 +87,9 @@ type Client interface {
// future map requests. This should be called after rotating the discovery key. // future map requests. This should be called after rotating the discovery key.
// Note: the auto client uploads the new key to control immediately. // Note: the auto client uploads the new key to control immediately.
SetDiscoPublicKey(key.DiscoPublic) SetDiscoPublicKey(key.DiscoPublic)
// SetIPForwardingBroken updates the IP forwarding broken state
// and sends a control update if the value changed.
SetIPForwardingBroken(bool)
// ClientID returns the ClientID of a client. This ID is meant to // ClientID returns the ClientID of a client. This ID is meant to
// distinguish one client from another. // distinguish one client from another.
ClientID() int64 ClientID() int64

@ -39,7 +39,6 @@ import (
"tailscale.com/net/dnscache" "tailscale.com/net/dnscache"
"tailscale.com/net/dnsfallback" "tailscale.com/net/dnsfallback"
"tailscale.com/net/netmon" "tailscale.com/net/netmon"
"tailscale.com/net/netutil"
"tailscale.com/net/netx" "tailscale.com/net/netx"
"tailscale.com/net/tlsdial" "tailscale.com/net/tlsdial"
"tailscale.com/net/tsdial" "tailscale.com/net/tsdial"
@ -64,30 +63,29 @@ import (
// Direct is the client that connects to a tailcontrol server for a node. // Direct is the client that connects to a tailcontrol server for a node.
type Direct struct { type Direct struct {
httpc *http.Client // HTTP client used to do TLS requests to control (just https://controlplane.tailscale.com/key?v=123) httpc *http.Client // HTTP client used to do TLS requests to control (just https://controlplane.tailscale.com/key?v=123)
interceptedDial *atomic.Bool // if non-nil, pointer to bool whether ScreenTime intercepted our dial interceptedDial *atomic.Bool // if non-nil, pointer to bool whether ScreenTime intercepted our dial
dialer *tsdial.Dialer dialer *tsdial.Dialer
dnsCache *dnscache.Resolver dnsCache *dnscache.Resolver
controlKnobs *controlknobs.Knobs // always non-nil controlKnobs *controlknobs.Knobs // always non-nil
serverURL string // URL of the tailcontrol server serverURL string // URL of the tailcontrol server
clock tstime.Clock clock tstime.Clock
logf logger.Logf logf logger.Logf
netMon *netmon.Monitor // non-nil netMon *netmon.Monitor // non-nil
health *health.Tracker health *health.Tracker
busClient *eventbus.Client busClient *eventbus.Client
clientVersionPub *eventbus.Publisher[tailcfg.ClientVersion] clientVersionPub *eventbus.Publisher[tailcfg.ClientVersion]
autoUpdatePub *eventbus.Publisher[AutoUpdate] autoUpdatePub *eventbus.Publisher[AutoUpdate]
controlTimePub *eventbus.Publisher[ControlTime] controlTimePub *eventbus.Publisher[ControlTime]
getMachinePrivKey func() (key.MachinePrivate, error) getMachinePrivKey func() (key.MachinePrivate, error)
debugFlags []string debugFlags []string
skipIPForwardingCheck bool pinger Pinger
pinger Pinger popBrowser func(url string) // or nil
popBrowser func(url string) // or nil polc policyclient.Client // always non-nil
polc policyclient.Client // always non-nil c2nHandler http.Handler // or nil
c2nHandler http.Handler // or nil panicOnUse bool // if true, panic if client is used (for testing)
panicOnUse bool // if true, panic if client is used (for testing) closedCtx context.Context // alive until Direct.Close is called
closedCtx context.Context // alive until Direct.Close is called closeCtx context.CancelFunc // cancels closedCtx
closeCtx context.CancelFunc // cancels closedCtx
dialPlan ControlDialPlanner // can be nil dialPlan ControlDialPlanner // can be nil
@ -95,6 +93,7 @@ type Direct struct {
serverLegacyKey key.MachinePublic // original ("legacy") nacl crypto_box-based public key; only used for signRegisterRequest on Windows now serverLegacyKey key.MachinePublic // original ("legacy") nacl crypto_box-based public key; only used for signRegisterRequest on Windows now
serverNoiseKey key.MachinePublic serverNoiseKey key.MachinePublic
discoPubKey key.DiscoPublic // protected by mu; can be updated via [SetDiscoPublicKey] discoPubKey key.DiscoPublic // protected by mu; can be updated via [SetDiscoPublicKey]
ipForwardBroken bool // protected by mu; can be updated via [SetIPForwardingBroken]
sfGroup singleflight.Group[struct{}, *ts2021.Client] // protects noiseClient creation. sfGroup singleflight.Group[struct{}, *ts2021.Client] // protects noiseClient creation.
noiseClient *ts2021.Client // also protected by mu noiseClient *ts2021.Client // also protected by mu
@ -159,11 +158,6 @@ type Options struct {
// If nil, no status updates are reported. // If nil, no status updates are reported.
Observer Observer Observer Observer
// SkipIPForwardingCheck declares that the host's IP
// forwarding works and should not be double-checked by the
// controlclient package.
SkipIPForwardingCheck bool
// Pinger optionally specifies the Pinger to use to satisfy // Pinger optionally specifies the Pinger to use to satisfy
// MapResponse.PingRequest queries from the control plane. // MapResponse.PingRequest queries from the control plane.
// If nil, PingRequest queries are not answered. // If nil, PingRequest queries are not answered.
@ -307,26 +301,25 @@ func NewDirect(opts Options) (*Direct, error) {
} }
c := &Direct{ c := &Direct{
httpc: httpc, httpc: httpc,
interceptedDial: interceptedDial, interceptedDial: interceptedDial,
controlKnobs: opts.ControlKnobs, controlKnobs: opts.ControlKnobs,
getMachinePrivKey: opts.GetMachinePrivateKey, getMachinePrivKey: opts.GetMachinePrivateKey,
serverURL: opts.ServerURL, serverURL: opts.ServerURL,
clock: opts.Clock, clock: opts.Clock,
logf: opts.Logf, logf: opts.Logf,
persist: opts.Persist.View(), persist: opts.Persist.View(),
authKey: opts.AuthKey, authKey: opts.AuthKey,
debugFlags: opts.DebugFlags, debugFlags: opts.DebugFlags,
netMon: netMon, netMon: netMon,
health: opts.HealthTracker, health: opts.HealthTracker,
skipIPForwardingCheck: opts.SkipIPForwardingCheck, pinger: opts.Pinger,
pinger: opts.Pinger, polc: cmp.Or(opts.PolicyClient, policyclient.Client(policyclient.NoPolicyClient{})),
polc: cmp.Or(opts.PolicyClient, policyclient.Client(policyclient.NoPolicyClient{})), popBrowser: opts.PopBrowserURL,
popBrowser: opts.PopBrowserURL, c2nHandler: opts.C2NHandler,
c2nHandler: opts.C2NHandler, dialer: opts.Dialer,
dialer: opts.Dialer, dnsCache: dnsCache,
dnsCache: dnsCache, dialPlan: opts.DialPlan,
dialPlan: opts.DialPlan,
} }
c.discoPubKey = opts.DiscoPublicKey c.discoPubKey = opts.DiscoPublicKey
c.closedCtx, c.closeCtx = context.WithCancel(context.Background()) c.closedCtx, c.closeCtx = context.WithCancel(context.Background())
@ -861,6 +854,18 @@ func (c *Direct) SetDiscoPublicKey(key key.DiscoPublic) {
c.discoPubKey = key c.discoPubKey = key
} }
// SetIPForwardingBroken updates the IP forwarding broken state.
// It reports whether the value changed.
func (c *Direct) SetIPForwardingBroken(v bool) bool {
c.mu.Lock()
defer c.mu.Unlock()
if c.ipForwardBroken == v {
return false
}
c.ipForwardBroken = v
return true
}
// ClientID returns the controlClientID of the controlClient. // ClientID returns the controlClientID of the controlClient.
func (c *Direct) ClientID() int64 { func (c *Direct) ClientID() int64 {
return c.controlClientID return c.controlClientID
@ -991,10 +996,6 @@ func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu Netmap
} }
var extraDebugFlags []string var extraDebugFlags []string
if buildfeatures.HasAdvertiseRoutes && hi != nil && c.netMon != nil && !c.skipIPForwardingCheck &&
ipForwardingBroken(hi.RoutableIPs, c.netMon.InterfaceState()) {
extraDebugFlags = append(extraDebugFlags, "warn-ip-forwarding-off")
}
if c.health.RouterHealth() != nil { if c.health.RouterHealth() != nil {
extraDebugFlags = append(extraDebugFlags, "warn-router-unhealthy") extraDebugFlags = append(extraDebugFlags, "warn-router-unhealthy")
} }
@ -1413,24 +1414,6 @@ func initDevKnob() devKnobs {
var clock tstime.Clock = tstime.StdClock{} var clock tstime.Clock = tstime.StdClock{}
// ipForwardingBroken reports whether the system's IP forwarding is disabled
// and will definitely not work for the routes provided.
//
// It should not return false positives.
//
// TODO(bradfitz): Change controlclient.Options.SkipIPForwardingCheck into a
// func([]netip.Prefix) error signature instead.
func ipForwardingBroken(routes []netip.Prefix, state *netmon.State) bool {
warn, err := netutil.CheckIPForwarding(routes, state)
if err != nil {
// Oh well, we tried. This is just for debugging.
// We don't want false positives.
// TODO: maybe we want a different warning for inability to check?
return false
}
return warn != nil
}
// isUniquePingRequest reports whether pr contains a new PingRequest.URL // isUniquePingRequest reports whether pr contains a new PingRequest.URL
// not already handled, noting its value when returning true. // not already handled, noting its value when returning true.
func (c *Direct) isUniquePingRequest(pr *tailcfg.PingRequest) bool { func (c *Direct) isUniquePingRequest(pr *tailcfg.PingRequest) bool {

@ -132,6 +132,11 @@ type Tracker struct {
localLogConfigErr error localLogConfigErr error
tlsConnectionErrors map[string]error // map[ServerName]error tlsConnectionErrors map[string]error // map[ServerName]error
metricHealthMessage any // nil or *metrics.MultiLabelMap[metricHealthMessageLabel] metricHealthMessage any // nil or *metrics.MultiLabelMap[metricHealthMessageLabel]
// IP forwarding check
// If non-nil, called periodically to check if IP forwarding is broken.
// Should return true if broken, false if healthy.
isIPForwardingBroken func() bool
} }
// NewTracker contructs a new [Tracker] and attaches the given eventbus. // NewTracker contructs a new [Tracker] and attaches the given eventbus.
@ -1097,6 +1102,8 @@ func (t *Tracker) updateBuiltinWarnablesLocked() {
t.setHealthyLocked(NetworkStatusWarnable) t.setHealthyLocked(NetworkStatusWarnable)
} }
t.updateIPForwardingWarnableLocked()
if t.localLogConfigErr != nil { if t.localLogConfigErr != nil {
t.setUnhealthyLocked(localLogWarnable, Args{ t.setUnhealthyLocked(localLogWarnable, Args{
ArgError: t.localLogConfigErr.Error(), ArgError: t.localLogConfigErr.Error(),
@ -1389,3 +1396,29 @@ func (t *Tracker) LastNoiseDialWasRecent() bool {
t.lastNoiseDial = now t.lastNoiseDial = now
return dur < 2*time.Minute return dur < 2*time.Minute
} }
// SetIPForwardingCheck sets the function to check if IP forwarding is broken.
// The function should return true if IP forwarding is broken, false if healthy.
// Pass nil to disable IP forwarding checks.
func (t *Tracker) SetIPForwardingCheck(checkFunc func() bool) {
if t.nil() {
return
}
t.mu.Lock()
defer t.mu.Unlock()
t.isIPForwardingBroken = checkFunc
// Run an immediate check to set initial state
t.updateIPForwardingWarnableLocked()
}
// updateIPForwardingWarnableLocked checks the IP forwarding state and
// sets or clears the ipForwardingWarnable accordingly.
func (t *Tracker) updateIPForwardingWarnableLocked() {
if t.isIPForwardingBroken != nil && t.isIPForwardingBroken() {
t.setUnhealthyLocked(ipForwardingWarnable, Args{})
} else {
t.setHealthyLocked(ipForwardingWarnable)
}
}

@ -999,3 +999,86 @@ func TestCurrentStateETagWarnable(t *testing.T) {
} }
}) })
} }
func TestIPForwardingState(t *testing.T) {
tests := []struct {
name string
checkFunc func() bool // nil means no check function
wantUnhealthy bool
}{
{
name: "broken",
checkFunc: func() bool { return true },
wantUnhealthy: true,
},
{
name: "healthy",
checkFunc: func() bool { return false },
wantUnhealthy: false,
},
{
name: "no_check_function",
checkFunc: nil,
wantUnhealthy: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
bus := eventbus.New()
tr := NewTracker(bus)
defer bus.Close()
tr.SetIPNState("Running", true)
tr.SetIPForwardingCheck(tt.checkFunc)
tr.mu.Lock()
tr.updateBuiltinWarnablesLocked()
tr.mu.Unlock()
got := tr.IsUnhealthy(ipForwardingWarnable)
if got != tt.wantUnhealthy {
t.Errorf("IsUnhealthy(ipForwardingWarnable) = %v, want %v", got, tt.wantUnhealthy)
}
})
}
// Test state transitions
t.Run("transitions", func(t *testing.T) {
bus := eventbus.New()
tr := NewTracker(bus)
defer bus.Close()
tr.SetIPNState("Running", true)
// Start broken
tr.SetIPForwardingCheck(func() bool { return true })
tr.mu.Lock()
tr.updateBuiltinWarnablesLocked()
tr.mu.Unlock()
if !tr.IsUnhealthy(ipForwardingWarnable) {
t.Fatal("expected IP forwarding to be unhealthy initially")
}
// Transition to healthy
tr.SetIPForwardingCheck(func() bool { return false })
tr.mu.Lock()
tr.updateBuiltinWarnablesLocked()
tr.mu.Unlock()
if tr.IsUnhealthy(ipForwardingWarnable) {
t.Fatal("expected IP forwarding to be healthy after transition")
}
// Transition to nil (should stay healthy)
tr.SetIPForwardingCheck(nil)
tr.mu.Lock()
tr.updateBuiltinWarnablesLocked()
tr.mu.Unlock()
if tr.IsUnhealthy(ipForwardingWarnable) {
t.Fatal("expected IP forwarding to be healthy after clearing check")
}
})
}

@ -298,3 +298,16 @@ var warmingUpWarnable = condRegister(func() *Warnable {
Text: StaticMessage("Tailscale is starting. Please wait."), Text: StaticMessage("Tailscale is starting. Please wait."),
} }
}) })
// ipForwardingWarnable is a Warnable that warns the user that IP forwarding is disabled
// but subnet routing or exit node functionality is being used.
var ipForwardingWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: "ip-forwarding-off",
Title: "IP forwarding is off",
Severity: SeverityMedium,
MapDebugFlag: "warn-ip-forwarding-off",
Text: StaticMessage("Subnet routing is enabled, but IP forwarding is disabled. Check that IP forwarding is enabled on your machine."),
ImpactsConnectivity: true,
}
})

@ -1072,6 +1072,14 @@ func (b *LocalBackend) onHealthChange(change health.Change) {
Health: state, Health: state,
}) })
// Update control if IP forwarding state changed
_, broken := state.Warnings["ip-forwarding-off"]
b.mu.Lock()
if b.cc != nil {
b.cc.SetIPForwardingBroken(broken)
}
b.mu.Unlock()
if f, ok := hookCaptivePortalHealthChange.GetOk(); ok { if f, ok := hookCaptivePortalHealthChange.GetOk(); ok {
f(b, state) f(b, state)
} }
@ -2633,10 +2641,6 @@ func (b *LocalBackend) startLocked(opts ipn.Options) error {
Shutdown: ccShutdown, Shutdown: ccShutdown,
Bus: b.sys.Bus.Get(), Bus: b.sys.Bus.Get(),
StartPaused: prefs.Sync().EqualBool(false), StartPaused: prefs.Sync().EqualBool(false),
// Don't warn about broken Linux IP forwarding when
// netstack is being used.
SkipIPForwardingCheck: b.sys.IsNetstackRouter(),
}) })
if err != nil { if err != nil {
return err return err
@ -5639,6 +5643,22 @@ func (b *LocalBackend) applyPrefsToHostinfoLocked(hi *tailcfg.Hostinfo, prefs ip
if buildfeatures.HasAdvertiseRoutes { if buildfeatures.HasAdvertiseRoutes {
b.metrics.advertisedRoutes.Set(float64(tsaddr.WithoutExitRoute(prefs.AdvertiseRoutes()).Len())) b.metrics.advertisedRoutes.Set(float64(tsaddr.WithoutExitRoute(prefs.AdvertiseRoutes()).Len()))
// Set up IP forwarding check when routes change
if len(hi.RoutableIPs) > 0 && b.NetMon() != nil && !b.sys.IsNetstackRouter() {
routes := hi.RoutableIPs
netMon := b.NetMon()
b.health.SetIPForwardingCheck(func() bool {
warn, err := netutil.CheckIPForwarding(routes, netMon.InterfaceState())
if err != nil {
metricIPForwardingCheckError.Add(1)
return false // don't want false positives
}
return warn != nil // true if broken
})
} else {
b.health.SetIPForwardingCheck(nil)
}
} }
var sshHostKeys []string var sshHostKeys []string
@ -7917,7 +7937,8 @@ func maybeUsernameOf(actor ipnauth.Actor) string {
} }
var ( var (
metricCurrentWatchIPNBus = clientmetric.NewGauge("localbackend_current_watch_ipn_bus") metricCurrentWatchIPNBus = clientmetric.NewGauge("localbackend_current_watch_ipn_bus")
metricIPForwardingCheckError = clientmetric.NewCounter("localbackend_ip_forwarding_check_error")
) )
func (b *LocalBackend) stateEncrypted() opt.Bool { func (b *LocalBackend) stateEncrypted() opt.Bool {

@ -336,6 +336,8 @@ func (cc *mockControl) ClientID() int64 {
return cc.controlClientID return cc.controlClientID
} }
func (cc *mockControl) SetIPForwardingBroken(bool) {}
func (b *LocalBackend) nonInteractiveLoginForStateTest() { func (b *LocalBackend) nonInteractiveLoginForStateTest() {
b.mu.Lock() b.mu.Lock()
if b.cc == nil { if b.cc == nil {

Loading…
Cancel
Save