wgengine/magicsock: only cache N most recent endpoints per-Addr
If a node is flapping or otherwise generating lots of STUN endpoints, we can end up caching a ton of useless values and sending them to peers. Instead, let's apply a fixed per-Addr limit of endpoints that we cache, so that we're only sending peers up to the N most recent. Updates tailscale/corp#13890 Signed-off-by: Andrew Dunham <andrew@du.nham.ca> Change-Id: I8079a05b44220c46da55016c0e5fc96dd2135ef8
This commit is contained in:
@@ -53,7 +53,6 @@ import (
|
||||
"tailscale.com/util/clientmetric"
|
||||
"tailscale.com/util/mak"
|
||||
"tailscale.com/util/ringbuffer"
|
||||
"tailscale.com/util/set"
|
||||
"tailscale.com/util/uniq"
|
||||
"tailscale.com/version"
|
||||
"tailscale.com/wgengine/capture"
|
||||
@@ -2594,11 +2593,6 @@ const (
|
||||
// STUN-derived endpoint valid for. UDP NAT mappings typically
|
||||
// expire at 30 seconds, so this is a few seconds shy of that.
|
||||
endpointsFreshEnoughDuration = 27 * time.Second
|
||||
|
||||
// endpointTrackerLifetime is how long we continue advertising an
|
||||
// endpoint after we last see it. This is intentionally chosen to be
|
||||
// slightly longer than a full netcheck period.
|
||||
endpointTrackerLifetime = 5*time.Minute + 10*time.Second
|
||||
)
|
||||
|
||||
// Constants that are variable for testing.
|
||||
@@ -2683,79 +2677,6 @@ type discoInfo struct {
|
||||
lastPingTime time.Time
|
||||
}
|
||||
|
||||
type endpointTrackerEntry struct {
|
||||
endpoint tailcfg.Endpoint
|
||||
until time.Time
|
||||
}
|
||||
|
||||
type endpointTracker struct {
|
||||
mu sync.Mutex
|
||||
cache map[netip.AddrPort]endpointTrackerEntry
|
||||
}
|
||||
|
||||
func (et *endpointTracker) update(now time.Time, eps []tailcfg.Endpoint) (epsPlusCached []tailcfg.Endpoint) {
|
||||
epsPlusCached = eps
|
||||
|
||||
var inputEps set.Slice[netip.AddrPort]
|
||||
for _, ep := range eps {
|
||||
inputEps.Add(ep.Addr)
|
||||
}
|
||||
|
||||
et.mu.Lock()
|
||||
defer et.mu.Unlock()
|
||||
|
||||
// Add entries to the return array that aren't already there.
|
||||
for k, ep := range et.cache {
|
||||
// If the endpoint was in the input list, or has expired, skip it.
|
||||
if inputEps.Contains(k) {
|
||||
continue
|
||||
} else if now.After(ep.until) {
|
||||
continue
|
||||
}
|
||||
|
||||
// We haven't seen this endpoint; add to the return array
|
||||
epsPlusCached = append(epsPlusCached, ep.endpoint)
|
||||
}
|
||||
|
||||
// Add entries from the original input array into the cache, and/or
|
||||
// extend the lifetime of entries that are already in the cache.
|
||||
until := now.Add(endpointTrackerLifetime)
|
||||
for _, ep := range eps {
|
||||
et.addLocked(now, ep, until)
|
||||
}
|
||||
|
||||
// Remove everything that has now expired.
|
||||
et.removeExpiredLocked(now)
|
||||
return epsPlusCached
|
||||
}
|
||||
|
||||
// add will store the provided endpoint(s) in the cache for a fixed period of
|
||||
// time, and remove any entries in the cache that have expired.
|
||||
//
|
||||
// et.mu must be held.
|
||||
func (et *endpointTracker) addLocked(now time.Time, ep tailcfg.Endpoint, until time.Time) {
|
||||
// If we already have an entry for this endpoint, update the timeout on
|
||||
// it; otherwise, add it.
|
||||
entry, found := et.cache[ep.Addr]
|
||||
if found {
|
||||
entry.until = until
|
||||
} else {
|
||||
entry = endpointTrackerEntry{ep, until}
|
||||
}
|
||||
mak.Set(&et.cache, ep.Addr, entry)
|
||||
}
|
||||
|
||||
// removeExpired will remove all expired entries from the cache
|
||||
//
|
||||
// et.mu must be held
|
||||
func (et *endpointTracker) removeExpiredLocked(now time.Time) {
|
||||
for k, ep := range et.cache {
|
||||
if now.After(ep.until) {
|
||||
delete(et.cache, k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
metricNumPeers = clientmetric.NewGauge("magicsock_netmap_num_peers")
|
||||
metricNumDERPConns = clientmetric.NewGauge("magicsock_num_derp_conns")
|
||||
|
||||
Reference in New Issue
Block a user