magicsock: invalidate endpoint on trust timeout (#19415)

Endpoint's best address was cleared on trustBestAddrUntil expiry
only if it was a udprelay connection. This generalizes invalidation
to also cover direct UDP.

Trust deadline is checked in two cases:

On disco ping timeout from the endpoint's best address.
Traffic goes DERP-only, heartbeats to the old address stop.
The discovery pings are still in flight, handled by the following.

On disco ping success from an alternative. BestAddr switches to the
working path, trust refreshed, eager discovery stops. The still
in flight pongs are handled by betterAddr().

Updates #19407


Change-Id: Ic41ed18edb4a6e4350a2d49271ba01566a6a6964

Signed-off-by: Alex Valiushko <alexvaliushko@tailscale.com>
This commit is contained in:
Alex Valiushko
2026-04-15 19:22:07 -07:00
committed by GitHub
parent b39ee0445d
commit d3ba1480f5
2 changed files with 237 additions and 8 deletions
+3 -8
View File
@@ -1183,8 +1183,7 @@ func (de *endpoint) discoPingTimeout(txid stun.TxID) {
return
}
bestUntrusted := mono.Now().After(de.trustBestAddrUntil)
if sp.to == de.bestAddr.epAddr && sp.to.vni.IsSet() && bestUntrusted {
// TODO(jwhited): consider applying this to direct UDP paths as well
if sp.to == de.bestAddr.epAddr && bestUntrusted {
de.clearBestAddrLocked()
}
if debugDisco() || !de.bestAddr.ap.IsValid() || bestUntrusted {
@@ -1778,12 +1777,8 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src epAdd
latency: latency,
wireMTU: pingSizeToPktLen(sp.size, sp.to),
}
// TODO(jwhited): consider checking de.trustBestAddrUntil as well. If
// de.bestAddr is untrusted we may want to clear it, otherwise we could
// get stuck with a forever untrusted bestAddr that blackholes, since
// we don't clear direct UDP paths on disco ping timeout (see
// discoPingTimeout).
if betterAddr(thisPong, de.bestAddr) {
bestUntrusted := now.After(de.trustBestAddrUntil)
if betterAddr(thisPong, de.bestAddr) || bestUntrusted {
de.c.logf("magicsock: disco: node %v %v now using %v mtu=%v tx=%x", de.publicKey.ShortString(), de.discoShort(), sp.to, thisPong.wireMTU, m.TxID[:6])
de.debugUpdates.Add(EndpointChange{
When: time.Now(),
+234
View File
@@ -6,12 +6,16 @@ package magicsock
import (
"net/netip"
"testing"
"testing/synctest"
"time"
"tailscale.com/disco"
"tailscale.com/net/packet"
"tailscale.com/net/stun"
"tailscale.com/tailcfg"
"tailscale.com/tstime/mono"
"tailscale.com/types/key"
"tailscale.com/util/ringlog"
)
func TestProbeUDPLifetimeConfig_Equals(t *testing.T) {
@@ -453,3 +457,233 @@ func Test_endpoint_udpRelayEndpointReady(t *testing.T) {
})
}
}
func Test_endpoint_discoPingTimeout(t *testing.T) {
expired := -1 * time.Hour
valid := 1 * time.Hour
directAddrA := epAddr{ap: netip.MustParseAddrPort("192.0.2.1:7")}
relayAddrA := epAddr{ap: netip.MustParseAddrPort("192.0.2.2:77")}
relayAddrA.vni.Set(1)
directAddrB := epAddr{ap: netip.MustParseAddrPort("192.0.2.3:7")}
relayAddrB := epAddr{ap: netip.MustParseAddrPort("192.0.2.4:77")}
relayAddrB.vni.Set(1)
for _, tc := range []struct {
name string
bestAddr addrQuality
trustBestAddrUntil time.Duration
pingTo epAddr
wantBestAddrCleared bool
}{
{
name: "relay-path-trust-expired",
bestAddr: addrQuality{epAddr: relayAddrA},
trustBestAddrUntil: expired,
pingTo: relayAddrA,
wantBestAddrCleared: true,
},
{
name: "direct-udp-path-trust-expired",
bestAddr: addrQuality{epAddr: directAddrA},
trustBestAddrUntil: expired,
pingTo: directAddrA,
wantBestAddrCleared: true,
},
{
name: "direct-udp-path-trust-valid",
bestAddr: addrQuality{epAddr: directAddrA},
trustBestAddrUntil: valid,
pingTo: directAddrA,
wantBestAddrCleared: false,
},
{
name: "relay-path-trust-valid",
bestAddr: addrQuality{epAddr: relayAddrA},
trustBestAddrUntil: valid,
pingTo: relayAddrA,
wantBestAddrCleared: false,
},
{
name: "ping-to-different-direct-addr-trust-expired",
bestAddr: addrQuality{epAddr: directAddrA},
trustBestAddrUntil: expired,
pingTo: directAddrB,
wantBestAddrCleared: false,
},
{
name: "ping-to-different-relay-addr-trust-expired",
bestAddr: addrQuality{epAddr: relayAddrA},
trustBestAddrUntil: expired,
pingTo: relayAddrB,
wantBestAddrCleared: false,
},
} {
t.Run(tc.name, func(t *testing.T) {
synctest.Test(t, func(t *testing.T) {
now := mono.Now() // synctest to match this to the internal 'now'
c := &Conn{
logf: func(msg string, args ...any) {},
}
c.discoAtomic.Set(key.NewDisco())
de := &endpoint{
c: c,
bestAddr: tc.bestAddr,
trustBestAddrUntil: now.Add(tc.trustBestAddrUntil),
sentPing: make(map[stun.TxID]sentPing),
}
txid := stun.NewTxID()
timer := time.NewTimer(time.Hour)
timer.Stop()
de.sentPing[txid] = sentPing{
to: tc.pingTo,
at: now.Add(-100 * time.Millisecond),
timer: timer,
purpose: pingDiscovery,
}
de.discoPingTimeout(txid)
if tc.wantBestAddrCleared {
if de.bestAddr.ap.IsValid() {
t.Errorf("expected bestAddr to be cleared, but bestAddr.ap is valid: %v", de.bestAddr.ap)
}
if de.trustBestAddrUntil != 0 {
t.Errorf("expected trustBestAddrUntil to be cleared, but got: %v", de.trustBestAddrUntil)
}
} else {
if de.bestAddr != tc.bestAddr {
t.Errorf("expected bestAddr to be unchanged, got: %v, want: %v", de.bestAddr, tc.bestAddr)
}
}
if _, ok := de.sentPing[txid]; ok {
t.Errorf("expected sentPing[txid] to be removed, but it still exists")
}
})
})
}
}
func Test_endpoint_handlePongConnLocked(t *testing.T) {
goodLatency := 50 * time.Millisecond
badLatency := 100 * time.Millisecond
expired := -1 * time.Hour
valid := 1 * time.Hour
directAddrA := epAddr{ap: netip.MustParseAddrPort("192.0.2.1:7")}
directAddrB := epAddr{ap: netip.MustParseAddrPort("192.0.2.2:8")}
derpAddr := epAddr{ap: netip.AddrPortFrom(tailcfg.DerpMagicIPAddr, 0)}
for _, tc := range []struct {
name string
bestAddr addrQuality
trustBestAddrUntil time.Duration
pongFrom epAddr
pongLatency time.Duration
wantBestAddr epAddr
}{
{
name: "better-latency-trust-valid",
bestAddr: addrQuality{epAddr: directAddrA, latency: badLatency},
trustBestAddrUntil: valid,
pongFrom: directAddrB,
pongLatency: goodLatency,
wantBestAddr: directAddrB,
},
{
name: "worse-latency-trust-valid",
bestAddr: addrQuality{epAddr: directAddrA, latency: goodLatency},
trustBestAddrUntil: valid,
pongFrom: directAddrB,
pongLatency: badLatency,
wantBestAddr: directAddrA,
},
{
name: "worse-latency-trust-expired",
bestAddr: addrQuality{epAddr: directAddrA, latency: goodLatency},
trustBestAddrUntil: expired,
pongFrom: directAddrB,
pongLatency: badLatency,
wantBestAddr: directAddrB,
},
{
name: "same-path-trust-expired",
bestAddr: addrQuality{epAddr: directAddrA, latency: badLatency},
trustBestAddrUntil: expired,
pongFrom: directAddrA,
pongLatency: goodLatency, // updated latency
wantBestAddr: directAddrA,
},
{
name: "derp-pong-trust-expired",
bestAddr: addrQuality{epAddr: directAddrA, latency: badLatency},
trustBestAddrUntil: expired,
pongFrom: derpAddr,
pongLatency: goodLatency,
wantBestAddr: directAddrA,
},
{
name: "better-latency-trust-expired",
bestAddr: addrQuality{epAddr: directAddrA, latency: badLatency},
trustBestAddrUntil: expired,
pongFrom: directAddrB,
pongLatency: goodLatency,
wantBestAddr: directAddrB,
},
} {
t.Run(tc.name, func(t *testing.T) {
synctest.Test(t, func(t *testing.T) {
now := mono.Now() // synctest to match this to the internal 'now'
pm := newPeerMap()
c := &Conn{
logf: func(msg string, args ...any) {},
peerMap: pm,
}
c.discoAtomic.Set(key.NewDisco())
de := &endpoint{
c: c,
bestAddr: tc.bestAddr,
bestAddrAt: now.Add(-5 * time.Minute),
trustBestAddrUntil: now.Add(tc.trustBestAddrUntil),
sentPing: make(map[stun.TxID]sentPing),
endpointState: make(map[netip.AddrPort]*endpointState),
debugUpdates: ringlog.New[EndpointChange](10),
}
txid := stun.NewTxID()
pong := &disco.Pong{
TxID: txid,
Src: tc.pongFrom.ap,
}
timer := time.NewTimer(time.Hour)
timer.Stop()
de.sentPing[txid] = sentPing{
to: tc.pongFrom,
at: now.Add(-tc.pongLatency),
timer: timer,
purpose: pingDiscovery,
}
if tc.pongFrom.ap.Addr() != tailcfg.DerpMagicIPAddr && !tc.pongFrom.vni.IsSet() {
de.endpointState[tc.pongFrom.ap] = &endpointState{}
}
di := &discoInfo{
discoKey: key.NewDisco().Public(),
discoShort: "test",
}
knownTxID := de.handlePongConnLocked(pong, di, tc.pongFrom)
if !knownTxID {
t.Errorf("expected knownTxID to be true, got false")
}
if de.bestAddr.epAddr != tc.wantBestAddr {
t.Errorf("expected bestAddr.epAddr to be %v, got: %v", tc.wantBestAddr, de.bestAddr.epAddr)
}
if tc.pongFrom == tc.bestAddr.epAddr && de.bestAddr.latency-tc.pongLatency > 0 {
t.Errorf("expected latency to be %v, got: %v", tc.pongLatency, de.bestAddr.latency)
}
if tc.pongFrom != derpAddr && de.trustBestAddrUntil.Before(now) {
t.Errorf("expected trustBestAddrUntil to be refreshed, but it's in the past: %v", de.trustBestAddrUntil)
}
if _, ok := de.sentPing[txid]; ok {
t.Errorf("expected sentPing[txid] to be removed, but it still exists")
}
})
})
}
}