wgengine/magicsock,ipn/ipnlocal: store and load homeDERP from cache (#19491)
With netmap caching, the home DERP of the self node was neither saved to
the cache or loaded from it, making nodes not stick to a DERP when
starting without a connection to control.
Instead, make sure that when a cache is available, load that cache,
before looking for DERP servers. This is implemented by allowing a skip
of ReSTUN in setting the DERP map (we must have a DERP map before
setting the home DERP), so the DERP from cache will set itself and be
sticky until a connection to control is established.
Making DERP only change when connected to control is handled by existing
code from f072d017bd.
Updates #19490
Signed-off-by: Claus Lensbøl <claus@tailscale.com>
This commit is contained in:
@@ -35,7 +35,19 @@ func (b *LocalBackend) writeNetmapToDiskLocked(nm *netmap.NetworkMap) error {
|
||||
b.diskCache.cache = netmapcache.NewCache(netmapcache.FileStore(dir))
|
||||
b.diskCache.dir = dir
|
||||
}
|
||||
return b.diskCache.cache.Store(b.currentNode().Context(), nm)
|
||||
|
||||
// Set the homeDERP on the self node before saving. The self node homeDERP is
|
||||
// generally not used since the homeDERP for self is stored in magicsock, but
|
||||
// to be able to load it during loading the cache, we use the existing field
|
||||
// to save it.
|
||||
|
||||
// Make a shallow copy and mutate a copy of the selfNode.
|
||||
nmCopy := *nm
|
||||
selfNode := nm.SelfNode.AsStruct()
|
||||
selfNode.HomeDERP = int(b.currentNode().homeDERP.Load())
|
||||
nmCopy.SelfNode = selfNode.View()
|
||||
|
||||
return b.diskCache.cache.Store(b.currentNode().Context(), &nmCopy)
|
||||
}
|
||||
|
||||
func (b *LocalBackend) loadDiskCacheLocked() (om *netmap.NetworkMap, ok bool) {
|
||||
|
||||
@@ -0,0 +1,229 @@
|
||||
// Copyright (c) Tailscale Inc & contributors
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package ipnlocal
|
||||
|
||||
import (
|
||||
"net/netip"
|
||||
"testing"
|
||||
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/tstest"
|
||||
"tailscale.com/types/netmap"
|
||||
"tailscale.com/util/eventbus"
|
||||
"tailscale.com/wgengine/magicsock"
|
||||
)
|
||||
|
||||
// newCacheTestNetmap returns a minimal valid netmap suitable for testing disk
|
||||
// cache operations.
|
||||
func newCacheTestNetmap() *netmap.NetworkMap {
|
||||
return &netmap.NetworkMap{
|
||||
SelfNode: (&tailcfg.Node{
|
||||
Name: "test-node.ts.net",
|
||||
User: tailcfg.UserID(1),
|
||||
Addresses: []netip.Prefix{
|
||||
netip.MustParsePrefix("100.64.0.1/32"),
|
||||
},
|
||||
}).View(),
|
||||
UserProfiles: map[tailcfg.UserID]tailcfg.UserProfileView{
|
||||
tailcfg.UserID(1): (&tailcfg.UserProfile{
|
||||
LoginName: "user@example.com",
|
||||
DisplayName: "Test User",
|
||||
}).View(),
|
||||
},
|
||||
DERPMap: &tailcfg.DERPMap{
|
||||
Regions: map[int]*tailcfg.DERPRegion{
|
||||
1: {},
|
||||
2: {},
|
||||
3: {},
|
||||
4: {},
|
||||
5: {},
|
||||
6: {},
|
||||
7: {},
|
||||
8: {},
|
||||
9: {},
|
||||
10: {},
|
||||
11: {},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func TestWriteAndLoadHomeDERP(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
|
||||
nm := newCacheTestNetmap()
|
||||
b.currentNode().SetNetMap(nm)
|
||||
|
||||
const wantDERP = 7
|
||||
b.currentNode().homeDERP.Store(wantDERP)
|
||||
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
|
||||
if err := b.writeNetmapToDiskLocked(nm); err != nil {
|
||||
t.Fatalf("writeNetmapToDiskLocked: %v", err)
|
||||
}
|
||||
|
||||
loaded, ok := b.loadDiskCacheLocked()
|
||||
if !ok {
|
||||
t.Fatal("loadDiskCacheLocked returned ok=false")
|
||||
}
|
||||
if !loaded.SelfNode.Valid() {
|
||||
t.Fatal("loaded netmap SelfNode is invalid")
|
||||
}
|
||||
if got := loaded.SelfNode.HomeDERP(); got != wantDERP {
|
||||
t.Errorf("loaded SelfNode.HomeDERP() = %d, want %d", got, wantDERP)
|
||||
}
|
||||
}
|
||||
|
||||
func TestOnHomeDERPUpdate(t *testing.T) {
|
||||
t.Run("normal_derp_change", func(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
done := make(chan struct{})
|
||||
tstest.Replace(t, &testOnlyHomeDERPUpdate, func() { close(done) })
|
||||
|
||||
nm := newCacheTestNetmap()
|
||||
b.currentNode().SetNetMap(nm)
|
||||
|
||||
// Publish a HomeDERPChanged event via the backend's event bus.
|
||||
bus := b.Sys().Bus.Get()
|
||||
ec := bus.Client("test.TestOnHomeDERPUpdate")
|
||||
pub := eventbus.Publish[magicsock.HomeDERPChanged](ec)
|
||||
|
||||
const wantDERP = 11
|
||||
pub.Publish(magicsock.HomeDERPChanged{Old: 0, New: wantDERP})
|
||||
<-done
|
||||
|
||||
if got := b.currentNode().homeDERP.Load(); got != wantDERP {
|
||||
t.Errorf("b.homeDERP = %d, want %d", got, wantDERP)
|
||||
}
|
||||
|
||||
// Verify the value was persisted to the disk cache.
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
loaded, ok := b.loadDiskCacheLocked()
|
||||
if !ok {
|
||||
t.Fatal("loadDiskCacheLocked returned ok=false after homeDERP update")
|
||||
}
|
||||
if got := loaded.SelfNode.HomeDERP(); got != wantDERP {
|
||||
t.Errorf("cached SelfNode.HomeDERP() = %d, want %d", got, wantDERP)
|
||||
}
|
||||
})
|
||||
t.Run("old_does_not_match", func(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
done := make(chan struct{})
|
||||
tstest.Replace(t, &testOnlyHomeDERPUpdate, func() { close(done) })
|
||||
|
||||
const setDERP = 11
|
||||
const wantDERP = 4
|
||||
|
||||
nm := newCacheTestNetmap()
|
||||
selfNode := nm.SelfNode.AsStruct()
|
||||
selfNode.HomeDERP = wantDERP
|
||||
nm.SelfNode = selfNode.View()
|
||||
b.currentNode().SetNetMap(nm)
|
||||
b.currentNode().homeDERP.Store(wantDERP)
|
||||
|
||||
// Write an initial cache entry so we can verify it is not overwritten.
|
||||
b.mu.Lock()
|
||||
if err := b.writeNetmapToDiskLocked(nm); err != nil {
|
||||
b.mu.Unlock()
|
||||
t.Fatalf("setup writeNetmapToDiskLocked: %v", err)
|
||||
}
|
||||
b.mu.Unlock()
|
||||
|
||||
// Publish a HomeDERPChanged event via the backend's event bus.
|
||||
bus := b.Sys().Bus.Get()
|
||||
ec := bus.Client("test.TestOnHomeDERPUpdate")
|
||||
pub := eventbus.Publish[magicsock.HomeDERPChanged](ec)
|
||||
pub.Publish(magicsock.HomeDERPChanged{Old: wantDERP + 1, New: setDERP})
|
||||
<-done
|
||||
|
||||
if got := b.currentNode().homeDERP.Load(); got != wantDERP {
|
||||
t.Errorf("b.homeDERP = %d, wanted no change %d", got, wantDERP)
|
||||
}
|
||||
|
||||
// Verify the cache still exists and still holds the original value.
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
loaded, ok := b.loadDiskCacheLocked()
|
||||
if !ok {
|
||||
t.Fatal("loadDiskCacheLocked returned ok=false; expected cache to still exist")
|
||||
}
|
||||
if got := loaded.SelfNode.HomeDERP(); got != wantDERP {
|
||||
t.Errorf("cached SelfNode.HomeDERP() = %d after rejected event, want original %d", got, wantDERP)
|
||||
}
|
||||
})
|
||||
t.Run("new_does_not_exist_in_map", func(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
done := make(chan struct{})
|
||||
tstest.Replace(t, &testOnlyHomeDERPUpdate, func() { close(done) })
|
||||
|
||||
const setDERP = 111
|
||||
const wantDERP = 4
|
||||
|
||||
nm := newCacheTestNetmap()
|
||||
selfNode := nm.SelfNode.AsStruct()
|
||||
selfNode.HomeDERP = wantDERP
|
||||
nm.SelfNode = selfNode.View()
|
||||
b.currentNode().SetNetMap(nm)
|
||||
b.currentNode().homeDERP.Store(wantDERP)
|
||||
|
||||
// Write an initial cache entry so we can verify it is not overwritten.
|
||||
b.mu.Lock()
|
||||
if err := b.writeNetmapToDiskLocked(nm); err != nil {
|
||||
b.mu.Unlock()
|
||||
t.Fatalf("setup writeNetmapToDiskLocked: %v", err)
|
||||
}
|
||||
b.mu.Unlock()
|
||||
|
||||
// Publish a HomeDERPChanged event via the backend's event bus.
|
||||
// Old matches the stored homeDERP so only the "new region not in map"
|
||||
// guard is exercised.
|
||||
bus := b.Sys().Bus.Get()
|
||||
ec := bus.Client("test.TestOnHomeDERPUpdate")
|
||||
pub := eventbus.Publish[magicsock.HomeDERPChanged](ec)
|
||||
pub.Publish(magicsock.HomeDERPChanged{Old: wantDERP, New: setDERP})
|
||||
<-done
|
||||
|
||||
if got := b.currentNode().homeDERP.Load(); got != wantDERP {
|
||||
t.Errorf("b.homeDERP = %d, wanted no change %d", got, wantDERP)
|
||||
}
|
||||
|
||||
// Verify the cache still exists and still holds the original value.
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
loaded, ok := b.loadDiskCacheLocked()
|
||||
if !ok {
|
||||
t.Fatal("loadDiskCacheLocked returned ok=false; expected cache to still exist")
|
||||
}
|
||||
if got := loaded.SelfNode.HomeDERP(); got != wantDERP {
|
||||
t.Errorf("cached SelfNode.HomeDERP() = %d after rejected event, want original %d", got, wantDERP)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestWriteNetmapDoesNotMutateOriginal(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
|
||||
nm := newCacheTestNetmap()
|
||||
b.currentNode().SetNetMap(nm)
|
||||
|
||||
originalDERP := nm.SelfNode.HomeDERP() // expected to be 0 initially
|
||||
|
||||
const storeDERP = 5
|
||||
b.currentNode().homeDERP.Store(storeDERP)
|
||||
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
|
||||
if err := b.writeNetmapToDiskLocked(nm); err != nil {
|
||||
t.Fatalf("writeNetmapToDiskLocked: %v", err)
|
||||
}
|
||||
|
||||
// The original netmap must not have been mutated.
|
||||
if got := nm.SelfNode.HomeDERP(); got != originalDERP {
|
||||
t.Errorf("original nm.SelfNode.HomeDERP() = %d after write, want %d (original was mutated)", got, originalDERP)
|
||||
}
|
||||
}
|
||||
+60
-4
@@ -627,6 +627,7 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo
|
||||
}
|
||||
eventbus.SubscribeFunc(ec, b.onAppConnectorRouteUpdate)
|
||||
eventbus.SubscribeFunc(ec, b.onAppConnectorStoreRoutes)
|
||||
eventbus.SubscribeFunc(ec, b.onHomeDERPUpdate)
|
||||
mConn.SetNetInfoCallback(b.setNetInfo) // TODO(tailscale/tailscale#17887): move to eventbus
|
||||
|
||||
return b, nil
|
||||
@@ -658,6 +659,51 @@ func (b *LocalBackend) onAppConnectorStoreRoutes(ri appctype.RouteInfo) {
|
||||
}
|
||||
}
|
||||
|
||||
// testOnlyHomeDERPUpdate if non-nil is called after setting home DERP and
|
||||
// writing netmap to disk.
|
||||
var testOnlyHomeDERPUpdate func()
|
||||
|
||||
func (b *LocalBackend) onHomeDERPUpdate(du magicsock.HomeDERPChanged) {
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
|
||||
b.onHomeDERPUpdateLocked(du)
|
||||
|
||||
if testOnlyHomeDERPUpdate != nil {
|
||||
testOnlyHomeDERPUpdate()
|
||||
}
|
||||
}
|
||||
|
||||
// onHomeDERPUpdateLocked considitonally updates the homeDERP for use in the
|
||||
// netmap cache.
|
||||
// If we switched our currentNode by switching profiles, we might be trying
|
||||
// to update the homeDERP from another profile. If the old homeDERP does not
|
||||
// match what we expect, don't swap the homeDERP.
|
||||
// In practice, it is possible that one profile with a homeDERP of 0 (no-derp)
|
||||
// got switched before setting any home DERP or that DERP IDs match across
|
||||
// DERP maps. Since the risk of this happening is small and the consequences
|
||||
// of this is is just a possible less optimal DERP until the next reSTUN,
|
||||
// accept this possibility.
|
||||
func (b *LocalBackend) onHomeDERPUpdateLocked(du magicsock.HomeDERPChanged) {
|
||||
cn := b.currentNode()
|
||||
|
||||
if cn == nil || cn.DERPMap() == nil || cn.DERPMap().Regions == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if _, ok := cn.DERPMap().Regions[du.New]; !ok {
|
||||
return
|
||||
}
|
||||
|
||||
if !cn.homeDERP.CompareAndSwap(int64(du.Old), int64(du.New)) {
|
||||
return
|
||||
}
|
||||
|
||||
if err := b.writeNetmapToDiskLocked(b.NetMap()); err != nil {
|
||||
b.logf("write netmap to cache: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func (b *LocalBackend) Clock() tstime.Clock { return b.clock }
|
||||
func (b *LocalBackend) Sys() *tsd.System { return b.sys }
|
||||
|
||||
@@ -1821,7 +1867,18 @@ func (b *LocalBackend) setControlClientStatusLocked(c controlclient.Client, st c
|
||||
}
|
||||
|
||||
b.e.SetNetworkMap(st.NetMap)
|
||||
b.MagicConn().SetDERPMap(st.NetMap.DERPMap)
|
||||
b.MagicConn().SetDERPMap(st.NetMap.DERPMap, false)
|
||||
if c == nil && st.NetMap.Cached && st.NetMap.SelfNode.Valid() {
|
||||
// Loading from a cached netmap (c == nil means no live control
|
||||
// client). Pre-seed the home DERP from the cached self node so
|
||||
// that the guard in maybeSetNearestDERP prevents changing the
|
||||
// DERP home before we reconnect to the control plane. If the cache has
|
||||
// nothing in it, skip this, and let the node pick a DERP itself.
|
||||
if cachedHome := st.NetMap.SelfNode.HomeDERP(); cachedHome != 0 {
|
||||
b.health.SetOutOfPollNetMap()
|
||||
b.MagicConn().ForceSetNearestDERP(cachedHome)
|
||||
}
|
||||
}
|
||||
b.MagicConn().SetOnlyTCP443(st.NetMap.HasCap(tailcfg.NodeAttrOnlyTCP443))
|
||||
|
||||
// Update our cached DERP map
|
||||
@@ -3388,7 +3445,7 @@ func (b *LocalBackend) DebugForceNetmapUpdate() {
|
||||
nm := b.currentNode().NetMap()
|
||||
b.e.SetNetworkMap(nm)
|
||||
if nm != nil {
|
||||
b.MagicConn().SetDERPMap(nm.DERPMap)
|
||||
b.MagicConn().SetDERPMap(nm.DERPMap, true)
|
||||
}
|
||||
b.setNetMapLocked(nm)
|
||||
}
|
||||
@@ -4846,7 +4903,7 @@ func (b *LocalBackend) setPrefsLocked(newp *ipn.Prefs) ipn.PrefsView {
|
||||
}
|
||||
|
||||
if netMap != nil {
|
||||
b.MagicConn().SetDERPMap(netMap.DERPMap)
|
||||
b.MagicConn().SetDERPMap(netMap.DERPMap, true)
|
||||
}
|
||||
|
||||
if !oldp.WantRunning() && newp.WantRunning && cc != nil {
|
||||
@@ -5208,7 +5265,6 @@ func (b *LocalBackend) authReconfig() {
|
||||
//
|
||||
// b.mu must be held.
|
||||
func (b *LocalBackend) authReconfigLocked() {
|
||||
|
||||
if b.shutdownCalled {
|
||||
b.logf("[v1] authReconfig: skipping because in shutdown")
|
||||
return
|
||||
|
||||
@@ -80,6 +80,13 @@ type nodeBackend struct {
|
||||
eventClient *eventbus.Client
|
||||
derpMapViewPub *eventbus.Publisher[tailcfg.DERPMapView]
|
||||
|
||||
// homeDERP lives here temporarily. as long as mapSession is short lived, we
|
||||
// don't have a location delivering netmaps to local backend that knows our
|
||||
// homeDERP hence why it is cached here for now.
|
||||
// TODO(cmol): move this field into a refactored mapSession that is not
|
||||
// short lived.
|
||||
homeDERP atomic.Int64
|
||||
|
||||
// TODO(nickkhyl): maybe use sync.RWMutex?
|
||||
mu syncs.Mutex // protects the following fields
|
||||
|
||||
|
||||
Reference in New Issue
Block a user