net/netmon, wgengine/userspace: purge ChangeDelta.Major and address TODOs (#17823)

updates tailscale/corp#33891

Addresses several older the TODO's in netmon.  This removes the 
Major flag precomputes the ChangeDelta state, rather than making
consumers of ChangeDeltas sort that out themselves.   We're also seeing
a lot of ChangeDelta's being flagged as "Major" when they are
not interesting, triggering rebinds in wgengine that are not needed.  This
cleans that up and adds a host of additional tests.

The dependencies are cleaned, notably removing dependency on netmon
itself for calculating what is interesting, and what is not.  This includes letting
individual platforms set a bespoke global "IsInterestingInterface"
function.  This is only used on Darwin.

RebindRequired now roughly follows how "Major" was historically
calculated but includes some additional checks for various
uninteresting events such as changes in interface addresses that
shouldn't trigger a rebind.  This significantly reduces thrashing (by
roughly half on Darwin clients which switching between nics).   The individual
values that we roll  into RebindRequired are also exposed so that
components consuming netmap.ChangeDelta can ask more
targeted questions.

Signed-off-by: Jonathan Nobels <jonathan@tailscale.com>
This commit is contained in:
Jonathan Nobels
2025-12-17 12:32:40 -05:00
committed by GitHub
parent 0fd1670a59
commit 3e89068792
19 changed files with 754 additions and 273 deletions
+19 -14
View File
@@ -296,7 +296,7 @@ type LocalBackend struct {
authURLTime time.Time // when the authURL was received from the control server; TODO(nickkhyl): move to nodeBackend
authActor ipnauth.Actor // an actor who called [LocalBackend.StartLoginInteractive] last, or nil; TODO(nickkhyl): move to nodeBackend
egg bool
prevIfState *netmon.State
interfaceState *netmon.State // latest network interface state or nil
peerAPIServer *peerAPIServer // or nil
peerAPIListeners []*peerAPIListener // TODO(nickkhyl): move to nodeBackend
loginFlags controlclient.LoginFlags
@@ -561,10 +561,16 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo
b.e.SetStatusCallback(b.setWgengineStatus)
b.prevIfState = netMon.InterfaceState()
b.interfaceState = netMon.InterfaceState()
// Call our linkChange code once with the current state.
// Following changes are triggered via the eventbus.
b.linkChange(&netmon.ChangeDelta{New: netMon.InterfaceState()})
cd, err := netmon.NewChangeDelta(nil, b.interfaceState, false, netMon.TailscaleInterfaceName(), false)
if err != nil {
b.logf("[unexpected] setting initial netmon state failed: %v", err)
} else {
b.linkChange(cd)
}
if buildfeatures.HasPeerAPIServer {
if tunWrap, ok := b.sys.Tun.GetOK(); ok {
@@ -936,7 +942,7 @@ func (b *LocalBackend) pauseOrResumeControlClientLocked() {
if b.cc == nil {
return
}
networkUp := b.prevIfState.AnyInterfaceUp()
networkUp := b.interfaceState.AnyInterfaceUp()
pauseForNetwork := (b.state == ipn.Stopped && b.NetMap() != nil) || (!networkUp && !testenv.InTest() && !assumeNetworkUpdateForTest())
prefs := b.pm.CurrentPrefs()
@@ -963,24 +969,23 @@ func (b *LocalBackend) linkChange(delta *netmon.ChangeDelta) {
b.mu.Lock()
defer b.mu.Unlock()
ifst := delta.New
hadPAC := b.prevIfState.HasPAC()
b.prevIfState = ifst
b.interfaceState = delta.CurrentState()
b.pauseOrResumeControlClientLocked()
prefs := b.pm.CurrentPrefs()
if delta.Major && prefs.AutoExitNode().IsSet() {
if delta.RebindLikelyRequired && prefs.AutoExitNode().IsSet() {
b.refreshAutoExitNode = true
}
var needReconfig bool
// If the network changed and we're using an exit node and allowing LAN access, we may need to reconfigure.
if delta.Major && prefs.ExitNodeID() != "" && prefs.ExitNodeAllowLANAccess() {
if delta.RebindLikelyRequired && prefs.ExitNodeID() != "" && prefs.ExitNodeAllowLANAccess() {
b.logf("linkChange: in state %v; updating LAN routes", b.state)
needReconfig = true
}
// If the PAC-ness of the network changed, reconfig wireguard+route to add/remove subnets.
if hadPAC != ifst.HasPAC() {
b.logf("linkChange: in state %v; PAC changed from %v->%v", b.state, hadPAC, ifst.HasPAC())
if delta.HasPACOrProxyConfigChanged {
b.logf("linkChange: in state %v; PAC or proxyConfig changed; updating routes", b.state)
needReconfig = true
}
if needReconfig {
@@ -998,7 +1003,7 @@ func (b *LocalBackend) linkChange(delta *netmon.ChangeDelta) {
// If the local network configuration has changed, our filter may
// need updating to tweak default routes.
b.updateFilterLocked(prefs)
updateExitNodeUsageWarning(prefs, delta.New, b.health)
updateExitNodeUsageWarning(prefs, delta.CurrentState(), b.health)
if buildfeatures.HasPeerAPIServer {
cn := b.currentNode()
@@ -5059,7 +5064,7 @@ func (b *LocalBackend) authReconfigLocked() {
}
prefs := b.pm.CurrentPrefs()
hasPAC := b.prevIfState.HasPAC()
hasPAC := b.interfaceState.HasPAC()
disableSubnetsIfPAC := cn.SelfHasCap(tailcfg.NodeAttrDisableSubnetsIfPAC)
dohURL, dohURLOK := cn.exitNodeCanProxyDNS(prefs.ExitNodeID())
dcfg := cn.dnsConfigForNetmap(prefs, b.keyExpired, version.OS())
@@ -5310,7 +5315,7 @@ func (b *LocalBackend) initPeerAPIListenerLocked() {
var err error
skipListen := i > 0 && isNetstack
if !skipListen {
ln, err = ps.listen(a.Addr(), b.prevIfState)
ln, err = ps.listen(a.Addr(), b.interfaceState.TailscaleInterfaceIndex)
if err != nil {
if peerAPIListenAsync {
b.logf("[v1] possibly transient peerapi listen(%q) error, will try again on linkChange: %v", a.Addr(), err)
+3 -3
View File
@@ -41,7 +41,7 @@ import (
"tailscale.com/wgengine/filter"
)
var initListenConfig func(*net.ListenConfig, netip.Addr, *netmon.State, string) error
var initListenConfig func(config *net.ListenConfig, addr netip.Addr, tunIfIndex int) error
// peerDNSQueryHandler is implemented by tsdns.Resolver.
type peerDNSQueryHandler interface {
@@ -53,7 +53,7 @@ type peerAPIServer struct {
resolver peerDNSQueryHandler
}
func (s *peerAPIServer) listen(ip netip.Addr, ifState *netmon.State) (ln net.Listener, err error) {
func (s *peerAPIServer) listen(ip netip.Addr, tunIfIndex int) (ln net.Listener, err error) {
// Android for whatever reason often has problems creating the peerapi listener.
// But since we started intercepting it with netstack, it's not even important that
// we have a real kernel-level listener. So just create a dummy listener on Android
@@ -69,7 +69,7 @@ func (s *peerAPIServer) listen(ip netip.Addr, ifState *netmon.State) (ln net.Lis
// On iOS/macOS, this sets the lc.Control hook to
// setsockopt the interface index to bind to, to get
// out of the network sandbox.
if err := initListenConfig(&lc, ip, ifState, s.b.dialer.TUNName()); err != nil {
if err := initListenConfig(&lc, ip, tunIfIndex); err != nil {
return nil, err
}
if runtime.GOOS == "darwin" || runtime.GOOS == "ios" {
+2 -8
View File
@@ -6,11 +6,9 @@
package ipnlocal
import (
"fmt"
"net"
"net/netip"
"tailscale.com/net/netmon"
"tailscale.com/net/netns"
)
@@ -21,10 +19,6 @@ func init() {
// initListenConfigNetworkExtension configures nc for listening on IP
// through the iOS/macOS Network/System Extension (Packet Tunnel
// Provider) sandbox.
func initListenConfigNetworkExtension(nc *net.ListenConfig, ip netip.Addr, st *netmon.State, tunIfName string) error {
tunIf, ok := st.Interface[tunIfName]
if !ok {
return fmt.Errorf("no interface with name %q", tunIfName)
}
return netns.SetListenConfigInterfaceIndex(nc, tunIf.Index)
func initListenConfigNetworkExtension(nc *net.ListenConfig, ip netip.Addr, ifaceIndex int) error {
return netns.SetListenConfigInterfaceIndex(nc, ifaceIndex)
}
+1 -1
View File
@@ -171,7 +171,7 @@ func (s *localListener) Run() {
// required by the network sandbox to allow binding to
// a specific interface. Without this hook, the system
// chooses a default interface to bind to.
if err := initListenConfig(&lc, ip, s.b.prevIfState, s.b.dialer.TUNName()); err != nil {
if err := initListenConfig(&lc, ip, s.b.interfaceState.TailscaleInterfaceIndex); err != nil {
s.logf("localListener failed to init listen config %v, backing off: %v", s.ap, err)
s.bo.BackOff(s.ctx, err)
continue