wgengine/magicsock,ipn/ipnlocal: store and load homeDERP from cache (#19491)

With netmap caching, the home DERP of the self node was neither saved to
the cache or loaded from it, making nodes not stick to a DERP when
starting without a connection to control.

Instead, make sure that when a cache is available, load that cache,
before looking for DERP servers. This is implemented by allowing a skip
of ReSTUN in setting the DERP map (we must have a DERP map before
setting the home DERP), so the DERP from cache will set itself and be
sticky until a connection to control is established.

Making DERP only change when connected to control is handled by existing
code from f072d017bd.

Updates #19490

Signed-off-by: Claus Lensbøl <claus@tailscale.com>
This commit is contained in:
Claus Lensbøl
2026-04-29 10:24:09 -04:00
committed by GitHub
parent 1841a93ab2
commit 78627c132f
9 changed files with 493 additions and 20 deletions
+2 -2
View File
@@ -156,8 +156,8 @@ func setupWGTest(b *testing.B, logf logger.Logf, traf *TrafficGen, a1, a2 netip.
})
// Not using DERP in this test (for now?).
s1.MagicSock.Get().SetDERPMap(&tailcfg.DERPMap{})
s2.MagicSock.Get().SetDERPMap(&tailcfg.DERPMap{})
s1.MagicSock.Get().SetDERPMap(&tailcfg.DERPMap{}, true)
s2.MagicSock.Get().SetDERPMap(&tailcfg.DERPMap{}, true)
wait.Wait()
}
+38 -7
View File
@@ -102,6 +102,7 @@ type activeDerp struct {
var (
pickDERPFallbackForTests func() int
reSTUNHookForTests func(why string)
)
// pickDERPFallback returns a non-zero but deterministic DERP node to
@@ -155,7 +156,7 @@ var checkControlHealthDuringNearestDERPInTests = false
// region that it selected and set (via setNearestDERP).
//
// c.mu must NOT be held.
func (c *Conn) maybeSetNearestDERP(report *netcheck.Report) (preferredDERP int) {
func (c *Conn) maybeSetNearestDERP(report *netcheck.Report, force bool) (preferredDERP int) {
// Don't change our PreferredDERP if we don't have a connection to
// control; if we don't, then we can't inform peers about a DERP home
// change, which breaks all connectivity. Even if this DERP region is
@@ -169,7 +170,10 @@ func (c *Conn) maybeSetNearestDERP(report *netcheck.Report) (preferredDERP int)
//
// Despite the above behaviour, ensure that we set the nearest DERP if
// we don't currently have one set; any DERP server is better than
// none, even if not connected to control.
// none, even if not connected to control. The exception here is if we have
// a cached netmap with a previous DERP server. Retaining the previous DERP
// makes it easier for other nodes to find each other when control is not
// available.
var connectedToControl bool
if testenv.InTest() && !checkControlHealthDuringNearestDERPInTests {
connectedToControl = true
@@ -179,7 +183,7 @@ func (c *Conn) maybeSetNearestDERP(report *netcheck.Report) (preferredDERP int)
c.mu.Lock()
myDerp := c.myDerp
c.mu.Unlock()
if !connectedToControl {
if !connectedToControl && !force {
if myDerp != 0 {
metricDERPHomeNoChangeNoControl.Add(1)
return myDerp
@@ -198,15 +202,32 @@ func (c *Conn) maybeSetNearestDERP(report *netcheck.Report) (preferredDERP int)
}
if preferredDERP != myDerp {
c.logf(
"magicsock: home DERP changing from derp-%d [%dms] to derp-%d [%dms]",
c.myDerp, report.RegionLatency[myDerp].Milliseconds(), preferredDERP, report.RegionLatency[preferredDERP].Milliseconds())
"magicsock: home DERP changing from derp-%d [%dms] to derp-%d [%dms] (forced=%t)",
c.myDerp, report.RegionLatency[myDerp].Milliseconds(), preferredDERP, report.RegionLatency[preferredDERP].Milliseconds(), force)
}
if !c.setNearestDERP(preferredDERP) {
preferredDERP = 0
} else if preferredDERP != myDerp {
c.homeDERPChangedPub.Publish(HomeDERPChanged{Old: myDerp, New: preferredDERP})
}
return
}
// HomeDERPChanged is an event sent on the [eventbus.Bus] when a new home DERP
// server has been selected. Its publisher is [magicsock.Coon]; its main
// subscriber is [ipnlocal.LocalBackend] that updates the homeDERP used by the
// netmap cache.
// TODO(cmol): Move the subscriber to not inject into localBackend, but rather
// into the netmap at the controlClient mapSession level once there is a stable
// abstraction to use.
type HomeDERPChanged struct {
Old, New int
}
func (c *Conn) ForceSetNearestDERP(regionID int) int {
return c.maybeSetNearestDERP(&netcheck.Report{PreferredDERP: regionID}, true)
}
func (c *Conn) derpRegionCodeLocked(regionID int) string {
if c.derpMap == nil {
return ""
@@ -771,7 +792,11 @@ func (c *Conn) SetOnlyTCP443(v bool) {
// SetDERPMap controls which (if any) DERP servers are used.
// A nil value means to disable DERP; it's disabled by default.
func (c *Conn) SetDERPMap(dm *tailcfg.DERPMap) {
//
// If doReStun is false, the post setting ReSTUN is not performed.
// This flag is used for setting the map from a cache to make it possible to
// also set the homeDERP from cache.
func (c *Conn) SetDERPMap(dm *tailcfg.DERPMap, doReStun bool) {
c.mu.Lock()
defer c.mu.Unlock()
@@ -828,8 +853,14 @@ func (c *Conn) SetDERPMap(dm *tailcfg.DERPMap) {
}
}
go c.ReSTUN("derp-map-update")
if doReStun {
if reSTUNHookForTests != nil {
reSTUNHookForTests("derp-map-update")
}
go c.ReSTUN("derp-map-update")
}
}
func (c *Conn) wantDerpLocked() bool { return c.derpMap != nil }
// c.mu must be held.
+114
View File
@@ -4,9 +4,15 @@
package magicsock
import (
"fmt"
"testing"
"tailscale.com/health"
"tailscale.com/net/netcheck"
"tailscale.com/tailcfg"
"tailscale.com/tstest"
"tailscale.com/util/eventbus"
"tailscale.com/util/eventbus/eventbustest"
)
func CheckDERPHeuristicTimes(t *testing.T) {
@@ -14,3 +20,111 @@ func CheckDERPHeuristicTimes(t *testing.T) {
t.Errorf("PreferredDERPFrameTime too low; should be at least frameReceiveRecordRate")
}
}
func TestForceSetNearestDERP(t *testing.T) {
derpMap := &tailcfg.DERPMap{
Regions: map[int]*tailcfg.DERPRegion{
7: {
RegionID: 7,
RegionCode: "test",
Nodes: []*tailcfg.DERPNode{
{
Name: "7a",
RegionID: 7,
HostName: "derp7.test.unused",
IPv4: "127.0.0.1",
IPv6: "none",
},
},
},
},
}
// Force the real control health check so we can verify force=true bypasses it.
tstest.Replace(t, &checkControlHealthDuringNearestDERPInTests, true)
bus := eventbustest.NewBus(t)
ht := health.NewTracker(bus)
c := newConn(t.Logf)
ec := bus.Client("magicsock.Conn.Test")
c.eventClient = ec
c.homeDERPChangedPub = eventbus.Publish[HomeDERPChanged](ec)
c.eventBus = bus
c.derpMap = derpMap
c.health = ht
ht.SetOutOfPollNetMap()
tw := eventbustest.NewWatcher(t, bus)
got := c.ForceSetNearestDERP(7)
if got != 7 {
t.Fatalf("ForceSetNearestDERP(7) = %d, want 7", got)
}
if c.myDerp != 7 {
t.Errorf("c.myDerp = %d after ForceSetNearestDERP, want 7", c.myDerp)
}
if err := eventbustest.Expect(tw, func(e HomeDERPChanged) error {
if e.Old != 0 || e.New != 7 {
return fmt.Errorf("got HomeDERPChanged{Old:%d, New:%d}, want {Old:0, New:7}", e.Old, e.New)
}
return nil
}); err != nil {
t.Errorf("expected HomeDERPChanged event: %v", err)
}
}
func TestSetDERPMapDoReStun(t *testing.T) {
derpMap1 := &tailcfg.DERPMap{
Regions: map[int]*tailcfg.DERPRegion{
1: {
RegionID: 1,
RegionCode: "cph",
Nodes: []*tailcfg.DERPNode{
{Name: "1a", RegionID: 1, HostName: "cph.test.unused", IPv4: "127.0.0.1", IPv6: "none"},
},
},
},
}
derpMap2 := &tailcfg.DERPMap{
Regions: map[int]*tailcfg.DERPRegion{
2: {
RegionID: 2,
RegionCode: "inc",
Nodes: []*tailcfg.DERPNode{
{Name: "2a", RegionID: 2, HostName: "inc.test.unused", IPv4: "127.0.0.1", IPv6: "none"},
},
},
},
}
var reSTUNCalls int
tstest.Replace(t, &reSTUNHookForTests, func(_ string) {
reSTUNCalls++
})
bus := eventbustest.NewBus(t)
ht := health.NewTracker(bus)
c := newConn(t.Logf)
ec := bus.Client("magicsock.Conn.Test")
c.eventClient = ec
c.homeDERPChangedPub = eventbus.Publish[HomeDERPChanged](ec)
c.eventBus = bus
c.health = ht
// With a zero private key and everHadKey=true, ReSTUN returns early without
// spawning updateEndpoints.
c.everHadKey = true
// Should not trigger a ReSTUN.
c.SetDERPMap(derpMap1, false)
if reSTUNCalls != 0 {
t.Errorf("SetDERPMap(dm, doReStun=false): got %d ReSTUN calls, want 0", reSTUNCalls)
}
// doReStun=true: should trigger a ReSTUN.
c.SetDERPMap(derpMap2, true)
if reSTUNCalls != 1 {
t.Errorf("SetDERPMap(dm, doReStun=true): got %d ReSTUN calls, want 1", reSTUNCalls)
}
}
+3 -1
View File
@@ -183,6 +183,7 @@ type Conn struct {
allocRelayEndpointPub *eventbus.Publisher[UDPRelayAllocReq]
portUpdatePub *eventbus.Publisher[router.PortUpdate]
tsmpDiscoKeyAvailablePub *eventbus.Publisher[NewDiscoKeyAvailable]
homeDERPChangedPub *eventbus.Publisher[HomeDERPChanged]
// pconn4 and pconn6 are the underlying UDP sockets used to
// send/receive packets for wireguard and other magicsock
@@ -657,6 +658,7 @@ func NewConn(opts Options) (*Conn, error) {
c.allocRelayEndpointPub = eventbus.Publish[UDPRelayAllocReq](ec)
c.portUpdatePub = eventbus.Publish[router.PortUpdate](ec)
c.tsmpDiscoKeyAvailablePub = eventbus.Publish[NewDiscoKeyAvailable](ec)
c.homeDERPChangedPub = eventbus.Publish[HomeDERPChanged](ec)
eventbus.SubscribeFunc(ec, c.onPortMapChanged)
eventbus.SubscribeFunc(ec, c.onUDPRelayAllocResp)
@@ -1056,7 +1058,7 @@ func (c *Conn) updateNetInfo(ctx context.Context) (*netcheck.Report, error) {
ni.OSHasIPv6.Set(report.OSHasIPv6)
ni.WorkingUDP.Set(report.UDP)
ni.WorkingICMPv4.Set(report.ICMPv4)
ni.PreferredDERP = c.maybeSetNearestDERP(report)
ni.PreferredDERP = c.maybeSetNearestDERP(report, false)
ni.FirewallMode = hostinfo.FirewallMode()
c.callNetInfoCallback(ni)
+27 -5
View File
@@ -203,7 +203,7 @@ func newMagicStackWithKey(t testing.TB, logf logger.Logf, ln nettype.PacketListe
if err != nil {
t.Fatalf("constructing magicsock: %v", err)
}
conn.SetDERPMap(derpMap)
conn.SetDERPMap(derpMap, true)
if err := conn.SetPrivateKey(privateKey); err != nil {
t.Fatalf("setting private key in magicsock: %v", err)
}
@@ -435,7 +435,7 @@ func TestNewConn(t *testing.T) {
t.Fatal("LocalPort returned 0")
}
conn.SetDERPMap(stuntest.DERPMapOf(stunAddr.String()))
conn.SetDERPMap(stuntest.DERPMapOf(stunAddr.String()), true)
conn.SetPrivateKey(key.NewNode())
go func() {
@@ -567,7 +567,7 @@ func TestDERPActiveFuncCalledAfterConnect(t *testing.T) {
}
defer conn.Close()
conn.SetDERPMap(derpMap)
conn.SetDERPMap(derpMap, true)
if err := conn.SetPrivateKey(key.NewNode()); err != nil {
t.Fatal(err)
}
@@ -3080,6 +3080,7 @@ func TestMaybeSetNearestDERP(t *testing.T) {
old int
reportDERP int
connectedToControl bool
force bool
want int
}{
{
@@ -3103,6 +3104,22 @@ func TestMaybeSetNearestDERP(t *testing.T) {
connectedToControl: false, // not connected...
want: 21, // ... but want to change to new DERP
},
{
name: "force_not_connected_with_report_derp",
old: 1,
reportDERP: 21,
connectedToControl: false,
force: true,
want: 21, // force bypasses the no-change-without-control guard
},
{
name: "force_not_connected_no_derp_no_current",
old: 0,
reportDERP: 0,
connectedToControl: false,
force: true,
want: 31, // force + no report DERP → deterministic fallback
},
{
name: "not_connected_with_fallback_and_no_current",
old: 0, // no current DERP
@@ -3127,8 +3144,13 @@ func TestMaybeSetNearestDERP(t *testing.T) {
}
for _, tt := range testCases {
t.Run(tt.name, func(t *testing.T) {
ht := health.NewTracker(eventbustest.NewBus(t))
bus := eventbustest.NewBus(t)
ht := health.NewTracker(bus)
c := newConn(t.Logf)
ec := bus.Client("magicsock.Conn.Test")
c.eventClient = ec
c.homeDERPChangedPub = eventbus.Publish[HomeDERPChanged](ec)
c.eventBus = bus
c.myDerp = tt.old
c.derpMap = derpMap
c.health = ht
@@ -3146,7 +3168,7 @@ func TestMaybeSetNearestDERP(t *testing.T) {
}
}
got := c.maybeSetNearestDERP(report)
got := c.maybeSetNearestDERP(report, tt.force)
if got != tt.want {
t.Errorf("got new DERP region %d, want %d", got, tt.want)
}