wgengine/magicsock: only cache N most recent endpoints per-Addr

If a node is flapping or otherwise generating lots of STUN endpoints, we
can end up caching a ton of useless values and sending them to peers.
Instead, let's apply a fixed per-Addr limit of endpoints that we cache,
so that we're only sending peers up to the N most recent.

Updates tailscale/corp#13890

Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
Change-Id: I8079a05b44220c46da55016c0e5fc96dd2135ef8
This commit is contained in:
Andrew Dunham
2023-08-14 21:06:38 -07:00
parent 9c4364e0b7
commit 95d776bd8c
5 changed files with 438 additions and 191 deletions
-79
View File
@@ -53,7 +53,6 @@ import (
"tailscale.com/util/clientmetric"
"tailscale.com/util/mak"
"tailscale.com/util/ringbuffer"
"tailscale.com/util/set"
"tailscale.com/util/uniq"
"tailscale.com/version"
"tailscale.com/wgengine/capture"
@@ -2594,11 +2593,6 @@ const (
// STUN-derived endpoint valid for. UDP NAT mappings typically
// expire at 30 seconds, so this is a few seconds shy of that.
endpointsFreshEnoughDuration = 27 * time.Second
// endpointTrackerLifetime is how long we continue advertising an
// endpoint after we last see it. This is intentionally chosen to be
// slightly longer than a full netcheck period.
endpointTrackerLifetime = 5*time.Minute + 10*time.Second
)
// Constants that are variable for testing.
@@ -2683,79 +2677,6 @@ type discoInfo struct {
lastPingTime time.Time
}
type endpointTrackerEntry struct {
endpoint tailcfg.Endpoint
until time.Time
}
type endpointTracker struct {
mu sync.Mutex
cache map[netip.AddrPort]endpointTrackerEntry
}
func (et *endpointTracker) update(now time.Time, eps []tailcfg.Endpoint) (epsPlusCached []tailcfg.Endpoint) {
epsPlusCached = eps
var inputEps set.Slice[netip.AddrPort]
for _, ep := range eps {
inputEps.Add(ep.Addr)
}
et.mu.Lock()
defer et.mu.Unlock()
// Add entries to the return array that aren't already there.
for k, ep := range et.cache {
// If the endpoint was in the input list, or has expired, skip it.
if inputEps.Contains(k) {
continue
} else if now.After(ep.until) {
continue
}
// We haven't seen this endpoint; add to the return array
epsPlusCached = append(epsPlusCached, ep.endpoint)
}
// Add entries from the original input array into the cache, and/or
// extend the lifetime of entries that are already in the cache.
until := now.Add(endpointTrackerLifetime)
for _, ep := range eps {
et.addLocked(now, ep, until)
}
// Remove everything that has now expired.
et.removeExpiredLocked(now)
return epsPlusCached
}
// add will store the provided endpoint(s) in the cache for a fixed period of
// time, and remove any entries in the cache that have expired.
//
// et.mu must be held.
func (et *endpointTracker) addLocked(now time.Time, ep tailcfg.Endpoint, until time.Time) {
// If we already have an entry for this endpoint, update the timeout on
// it; otherwise, add it.
entry, found := et.cache[ep.Addr]
if found {
entry.until = until
} else {
entry = endpointTrackerEntry{ep, until}
}
mak.Set(&et.cache, ep.Addr, entry)
}
// removeExpired will remove all expired entries from the cache
//
// et.mu must be held
func (et *endpointTracker) removeExpiredLocked(now time.Time) {
for k, ep := range et.cache {
if now.After(ep.until) {
delete(et.cache, k)
}
}
}
var (
metricNumPeers = clientmetric.NewGauge("magicsock_netmap_num_peers")
metricNumDERPConns = clientmetric.NewGauge("magicsock_num_derp_conns")