WIP: rebase for 2026-05-18 #7

Draft
codinget wants to merge 234 commits from rebase/2026-05-18 into webnet
5 changed files with 360 additions and 193 deletions
Showing only changes of commit 311dd3839d - Show all commits
+2 -2
View File
@@ -108,8 +108,8 @@ func (c *Conn) ServeHTTPDebug(w http.ResponseWriter, r *http.Request) {
}
sort.Slice(ent, func(i, j int) bool { return ent[i].pub.Less(ent[j].pub) })
peers := map[key.NodePublic]tailcfg.NodeView{}
for _, p := range c.peers.All() {
peers := make(map[key.NodePublic]tailcfg.NodeView, len(c.peersByID))
for _, p := range c.peersByID {
peers[p.Key()] = p
}
+2 -2
View File
@@ -897,7 +897,7 @@ func (de *endpoint) wantUDPRelayPathDiscoveryLocked(now mono.Time) bool {
if runtime.GOOS == "js" {
return false
}
if !de.c.hasPeerRelayServers.Load() {
if !de.c.relayManager.hasPeerRelayServers.Load() {
// Changes in this value between its access and a call to
// [endpoint.discoverUDPRelayPathsLocked] are fine, we will eventually
// do the "right" thing during future path discovery. The worst case is
@@ -2093,7 +2093,7 @@ func (de *endpoint) setDERPHome(regionID uint16) {
de.mu.Lock()
defer de.mu.Unlock()
de.derpAddr = netip.AddrPortFrom(tailcfg.DerpMagicIPAddr, uint16(regionID))
if de.c.hasPeerRelayServers.Load() {
if de.c.relayManager.hasPeerRelayServers.Load() {
de.c.relayManager.handleDERPHomeChange(de.publicKey, regionID)
}
}
+285 -183
View File
@@ -269,12 +269,6 @@ type Conn struct {
// captureHook, if non-nil, is the pcap logging callback when capturing.
captureHook syncs.AtomicValue[packet.CaptureCallback]
// hasPeerRelayServers is whether [relayManager] is configured with at least
// one peer relay server via [relayManager.handleRelayServersSet]. It exists
// to suppress calls into [relayManager] leading to wasted work involving
// channel operations and goroutine creation.
hasPeerRelayServers atomic.Bool
// discoAtomic is the current disco private and public keypair for this conn.
discoAtomic discoAtomic
@@ -361,18 +355,18 @@ type Conn struct {
// magicsock could do with any complexity reduction it can get.
netInfoLast *tailcfg.NetInfo
derpMap *tailcfg.DERPMap // nil (or zero regions/nodes) means DERP is disabled
self tailcfg.NodeView // from last SetNetworkMap
peers views.Slice[tailcfg.NodeView] // from last SetNetworkMap, sorted by Node.ID; Note: [netmap.NodeMutation]'s rx'd in UpdateNetmapDelta are never applied
filt *filter.Filter // from last SetFilter
relayClientEnabled bool // whether we can allocate UDP relay endpoints on UDP relay servers or receive CallMeMaybeVia messages from peers
lastFlags debugFlags // at time of last SetNetworkMap
privateKey key.NodePrivate // WireGuard private key for this node
everHadKey bool // whether we ever had a non-zero private key
myDerp int // nearest DERP region ID; 0 means none/unknown
homeless bool // if true, don't try to find & stay conneted to a DERP home (myDerp will stay 0)
derpStarted chan struct{} // closed on first connection to DERP; for tests & cleaner Close
activeDerp map[int]activeDerp // DERP regionID -> connection to a node in that region
derpMap *tailcfg.DERPMap // nil (or zero regions/nodes) means DERP is disabled
self tailcfg.NodeView // from last SetNetworkMap
peersByID map[tailcfg.NodeID]tailcfg.NodeView // current peer set, keyed by NodeID. Maintained by SetNetworkMap/UpsertPeer/RemovePeer. Note: per-field NodeMutation patches received in UpdateNetmapDelta are never applied to these snapshots.
filt *filter.Filter // from last SetFilter
relayClientEnabled bool // whether we can allocate UDP relay endpoints on UDP relay servers or receive CallMeMaybeVia messages from peers
lastFlags debugFlags // at time of last SetNetworkMap
privateKey key.NodePrivate // WireGuard private key for this node
everHadKey bool // whether we ever had a non-zero private key
myDerp int // nearest DERP region ID; 0 means none/unknown
homeless bool // if true, don't try to find & stay conneted to a DERP home (myDerp will stay 0)
derpStarted chan struct{} // closed on first connection to DERP; for tests & cleaner Close
activeDerp map[int]activeDerp // DERP regionID -> connection to a node in that region
prevDerp map[int]*syncs.WaitGroupChan
// derpRoute contains optional alternate routes to use as an
@@ -2430,24 +2424,12 @@ func (c *Conn) handleDiscoMessage(msg []byte, src epAddr, shouldBeRelayHandshake
if c.filt == nil {
return
}
// Binary search of peers is O(log n) while c.mu is held.
// TODO: We might be able to use ep.nodeAddr instead of all addresses,
// or we might be able to release c.mu before doing this work. Keep it
// simple and slow for now. c.peers.AsSlice is a copy. We may need to
// write our own binary search for a [views.Slice].
peerI, ok := slices.BinarySearchFunc(c.peers.AsSlice(), ep.nodeID, func(peer tailcfg.NodeView, target tailcfg.NodeID) int {
if peer.ID() < target {
return -1
} else if peer.ID() > target {
return 1
}
return 0
})
peer, ok := c.peersByID[ep.nodeID]
if !ok {
// unexpected
return
}
if !nodeHasCap(c.filt, c.peers.At(peerI), c.self, tailcfg.PeerCapabilityRelay) {
if !nodeHasCap(c.filt, peer, c.self, tailcfg.PeerCapabilityRelay) {
return
}
// [Conn.mu] must not be held while publishing, or [Conn.onUDPRelayAllocResp]
@@ -2784,18 +2766,6 @@ func (c *Conn) UpdatePeers(newPeers set.Set[key.NodePublic]) {
}
}
func nodesEqual(x, y views.Slice[tailcfg.NodeView]) bool {
if x.Len() != y.Len() {
return false
}
for i := range x.Len() {
if !x.At(i).Equal(y.At(i)) {
return false
}
}
return true
}
// debugRingBufferSize returns a maximum size for our set of endpoint ring
// buffers by assuming that a single large update is ~500 bytes, and that we
// want to not use more than 1MiB of memory on phones / 4MiB on other devices.
@@ -2883,7 +2853,7 @@ func (c *Conn) SetFilter(f *filter.Filter) {
c.mu.Lock()
c.filt = f
self := c.self
peers := c.peers
peers := c.peerSnapshotLocked()
relayClientEnabled := c.relayClientEnabled
c.mu.Unlock() // release c.mu before potentially calling c.updateRelayServersSet which is O(m * n)
@@ -2897,11 +2867,26 @@ func (c *Conn) SetFilter(f *filter.Filter) {
c.updateRelayServersSet(f, self, peers)
}
// peerSnapshotLocked returns a freshly-allocated slice of the current peers.
// It's used by callers that need to pass peer state to an O(m * n) callee
// (like [Conn.updateRelayServersSet]) after releasing c.mu. c.mu must be held.
func (c *Conn) peerSnapshotLocked() []tailcfg.NodeView {
if len(c.peersByID) == 0 {
return nil
}
out := make([]tailcfg.NodeView, 0, len(c.peersByID))
for _, p := range c.peersByID {
out = append(out, p)
}
return out
}
// updateRelayServersSet iterates all peers and self, evaluating filt for each
// one in order to determine which are relay server candidates. filt, self, and
// peers are passed as args (vs c.mu-guarded fields) to enable callers to
// release c.mu before calling as this is O(m * n) (we iterate all cap rules 'm'
// in filt for every peer 'n').
// in filt for every peer 'n'). peers must be a snapshot owned by the caller;
// this function does not retain it after return.
//
// Calls to updateRelayServersSet must never run concurrent to
// [endpoint.setDERPHome], otherwise [candidatePeerRelay] DERP home changes may
@@ -2913,9 +2898,9 @@ func (c *Conn) SetFilter(f *filter.Filter) {
// them.
// 2. Moving this work upstream into [nodeBackend] or similar, and publishing
// the computed result over the eventbus instead.
func (c *Conn) updateRelayServersSet(filt *filter.Filter, self tailcfg.NodeView, peers views.Slice[tailcfg.NodeView]) {
func (c *Conn) updateRelayServersSet(filt *filter.Filter, self tailcfg.NodeView, peers []tailcfg.NodeView) {
relayServers := make(set.Set[candidatePeerRelay])
nodes := append(peers.AsSlice(), self)
nodes := append(peers, self)
for _, maybeCandidate := range nodes {
if maybeCandidate.ID() != self.ID() && !capVerIsRelayCapable(maybeCandidate.Cap()) {
// If maybeCandidate's [tailcfg.CapabilityVersion] is not relay-capable,
@@ -2933,12 +2918,9 @@ func (c *Conn) updateRelayServersSet(filt *filter.Filter, self tailcfg.NodeView,
derpHomeRegionID: uint16(maybeCandidate.HomeDERP()),
})
}
// [relayManager]'s run loop updates [relayManager.hasPeerRelayServers]
// to reflect the new server count.
c.relayManager.handleRelayServersSet(relayServers)
if len(relayServers) > 0 {
c.hasPeerRelayServers.Store(true)
} else {
c.hasPeerRelayServers.Store(false)
}
}
// nodeHasCap returns true if src has cap on dst, otherwise it returns false.
@@ -2990,6 +2972,12 @@ func (c *candidatePeerRelay) isValid() bool {
// magicsock has the current state before subsequent operations proceed.
//
// self may be invalid if there's no network map.
//
// SetNetworkMap takes the full peer list and walks all of it. For incremental
// updates where only a single peer changes, prefer the O(1) [Conn.UpsertPeer]
// and [Conn.RemovePeer] methods. SetNetworkMap remains the right call for the
// initial netmap and for changes to self or to global state (filter, DERP,
// etc.) that aren't covered by the per-peer methods.
func (c *Conn) SetNetworkMap(self tailcfg.NodeView, peers []tailcfg.NodeView) {
peersChanged := c.updateNodes(self, peers)
@@ -3002,7 +2990,7 @@ func (c *Conn) SetNetworkMap(self tailcfg.NodeView, peers []tailcfg.NodeView) {
c.relayClientEnabled = relayClientEnabled
filt := c.filt
selfView := c.self
peersView := c.peers
peersSnap := c.peerSnapshotLocked()
isClosed := c.closed
c.mu.Unlock() // release c.mu before potentially calling c.updateRelayServersSet which is O(m * n)
@@ -3012,16 +3000,16 @@ func (c *Conn) SetNetworkMap(self tailcfg.NodeView, peers []tailcfg.NodeView) {
if peersChanged || relayClientChanged {
if !relayClientEnabled {
// [relayManager]'s run loop updates [relayManager.hasPeerRelayServers].
c.relayManager.handleRelayServersSet(nil)
c.hasPeerRelayServers.Store(false)
} else {
c.updateRelayServersSet(filt, selfView, peersView)
c.updateRelayServersSet(filt, selfView, peersSnap)
}
}
}
// updateNodes updates [Conn] to reflect the given self node and peers.
// It reports whether the peers were changed from before.
// It reports whether the peer set (membership or any field) changed.
func (c *Conn) updateNodes(self tailcfg.NodeView, peers []tailcfg.NodeView) (peersChanged bool) {
c.mu.Lock()
defer c.mu.Unlock()
@@ -3030,13 +3018,9 @@ func (c *Conn) updateNodes(self tailcfg.NodeView, peers []tailcfg.NodeView) (pee
return false
}
priorPeers := c.peers
metricNumPeers.Set(int64(len(peers)))
// Update c.self & c.peers regardless, before the following early return.
c.self = self
curPeers := views.SliceOf(peers)
c.peers = curPeers
// [debugFlags] are mutable in [Conn.SetSilentDisco] &
// [Conn.SetProbeUDPLifetime]. These setters are passed [controlknobs.Knobs]
@@ -3049,137 +3033,43 @@ func (c *Conn) updateNodes(self tailcfg.NodeView, peers []tailcfg.NodeView) (pee
// TODO: mutate [debugFlags] here instead of in various [Conn] setters.
flags := c.debugFlagsLocked()
peersChanged = !nodesEqual(priorPeers, curPeers)
if !peersChanged && c.lastFlags == flags {
// The rest of this function is all adjusting state for peers that have
// changed. But if the set of peers is equal and the debug flags (for
// silent disco and probe UDP lifetime) haven't changed, there is no
// need to do anything else.
return
// Fast path: if the peer set and every peer's NodeView are unchanged,
// and flags are unchanged, skip all further work.
if c.lastFlags == flags && len(peers) == len(c.peersByID) {
allSame := true
for _, n := range peers {
if prev, ok := c.peersByID[n.ID()]; !ok || !prev.Equal(n) {
allSame = false
break
}
}
if allSame {
return false
}
}
c.lastFlags = flags
c.logf("[v1] magicsock: got updated network map; %d peers", len(peers))
entriesPerBuffer := debugRingBufferSize(len(peers))
// Try a pass of just upserting nodes and creating missing
// endpoints. If the set of nodes is the same, this is an
// efficient alloc-free update. If the set of nodes is different,
// we'll fall through to the next pass, which allocates but can
// handle full set updates.
// Build the new peer map while upserting each peer.
newPeers := make(map[tailcfg.NodeID]tailcfg.NodeView, len(peers))
for _, n := range peers {
if n.ID() == 0 {
devPanicf("node with zero ID")
continue
}
if n.Key().IsZero() {
devPanicf("node with zero key")
continue
}
ep, ok := c.peerMap.endpointForNodeID(n.ID())
if ok && ep.publicKey != n.Key() {
// The node rotated public keys. Delete the old endpoint and create
// it anew.
c.peerMap.deleteEndpoint(ep)
ok = false
}
if ok {
// At this point we're modifying an existing endpoint (ep) whose
// public key and nodeID match n. Its other fields (such as disco
// key or endpoints) might've changed.
if n.DiscoKey().IsZero() && !n.IsWireGuardOnly() {
// Discokey transitioned from non-zero to zero? This should not
// happen in the wild, however it could mean:
// 1. A node was downgraded from post 0.100 to pre 0.100.
// 2. A Tailscale node key was extracted and used on a
// non-Tailscale node (should not enter here due to the
// IsWireGuardOnly check)
// 3. The server is misbehaving.
c.peerMap.deleteEndpoint(ep)
continue
}
var oldDiscoKey key.DiscoPublic
if epDisco := ep.disco.Load(); epDisco != nil {
oldDiscoKey = epDisco.key
}
ep.updateFromNode(n, flags.heartbeatDisabled, flags.probeUDPLifetimeOn)
c.peerMap.upsertEndpoint(ep, oldDiscoKey) // maybe update discokey mappings in peerMap
continue
}
if ep, ok := c.peerMap.endpointForNodeKey(n.Key()); ok {
// At this point n.Key() should be for a key we've never seen before. If
// ok was true above, it was an update to an existing matching key and
// we don't get this far. If ok was false above, that means it's a key
// that differs from the one the NodeID had. But double check.
if ep.nodeID != n.ID() {
// Server error. This is known to be a particular issue for Mullvad
// nodes (http://go/corp/27300), so log a distinct error for the
// Mullvad and non-Mullvad cases. The error will be logged either way,
// so an approximate heuristic is fine.
//
// When #27300 is fixed, we can delete this branch and log the same
// panic for any public key moving.
if strings.HasSuffix(n.Name(), ".mullvad.ts.net.") {
devPanicf("public key moved between Mullvad nodeIDs (old=%v new=%v, key=%s); see http://go/corp/27300", ep.nodeID, n.ID(), n.Key().String())
} else {
devPanicf("public key moved between nodeIDs (old=%v new=%v, key=%s)", ep.nodeID, n.ID(), n.Key().String())
}
} else {
// Internal data structures out of sync.
devPanicf("public key found in peerMap but not by nodeID")
}
continue
}
if n.DiscoKey().IsZero() && !n.IsWireGuardOnly() {
// Ancient pre-0.100 node, which does not have a disco key.
// No longer supported.
continue
}
ep = &endpoint{
c: c,
nodeID: n.ID(),
publicKey: n.Key(),
publicKeyHex: n.Key().UntypedHexString(),
sentPing: map[stun.TxID]sentPing{},
endpointState: map[netip.AddrPort]*endpointState{},
heartbeatDisabled: flags.heartbeatDisabled,
isWireguardOnly: n.IsWireGuardOnly(),
}
switch runtime.GOOS {
case "ios", "android":
// Omit, to save memory. Prior to 2024-03-20 we used to limit it to
// ~1MB on mobile but we never used the data so the memory was just
// wasted.
default:
ep.debugUpdates = ringlog.New[EndpointChange](entriesPerBuffer)
}
if n.Addresses().Len() > 0 {
ep.nodeAddr = n.Addresses().At(0).Addr()
}
ep.initFakeUDPAddr()
ep.updateDiscoKey(n.DiscoKey())
if debugPeerMap() {
c.logEndpointCreated(n)
}
ep.updateFromNode(n, flags.heartbeatDisabled, flags.probeUDPLifetimeOn)
c.peerMap.upsertEndpoint(ep, key.DiscoPublic{})
newPeers[n.ID()] = n
c.upsertPeerLocked(n, flags, entriesPerBuffer)
}
if len(newPeers) != len(peers) {
// Duplicate NodeIDs in the input shouldn't happen, but log if so.
c.logf("[unexpected] magicsock.updateNodes: %d peers input but %d unique IDs", len(peers), len(newPeers))
}
c.peersByID = newPeers
// If the set of nodes changed since the last SetNetworkMap, the
// upsert loop just above made c.peerMap contain the union of the
// old and new peers - which will be larger than the set from the
// current netmap. If that happens, go through the allocful
// deletion path to clean up moribund nodes.
if c.peerMap.nodeCount() != len(peers) {
// If the upsert pass left stale endpoints in peerMap (peers removed
// relative to before), clean them up.
if c.peerMap.nodeCount() != len(newPeers) {
keep := set.Set[key.NodePublic]{}
for _, n := range peers {
for _, n := range newPeers {
keep.Add(n.Key())
}
c.peerMap.forEachEndpoint(func(ep *endpoint) {
@@ -3189,14 +3079,226 @@ func (c *Conn) updateNodes(self tailcfg.NodeView, peers []tailcfg.NodeView) (pee
})
}
// discokeys might have changed in the above. Discard unused info.
// discokeys might have changed above. Discard unused info.
for dk := range c.discoInfo {
if !c.peerMap.knownPeerDiscoKey(dk) {
delete(c.discoInfo, dk)
}
}
return peersChanged
return true
}
// upsertPeerLocked upserts a single peer's endpoint in c.peerMap. It is the
// per-peer body shared by [Conn.SetNetworkMap]'s upsert pass and by the
// efficient per-peer [Conn.UpsertPeer] path.
//
// c.mu must be held.
func (c *Conn) upsertPeerLocked(n tailcfg.NodeView, flags debugFlags, entriesPerBuffer int) {
if n.ID() == 0 {
devPanicf("node with zero ID")
return
}
if n.Key().IsZero() {
devPanicf("node with zero key")
return
}
ep, ok := c.peerMap.endpointForNodeID(n.ID())
if ok && ep.publicKey != n.Key() {
// The node rotated public keys. Delete the old endpoint and create
// it anew.
c.peerMap.deleteEndpoint(ep)
ok = false
}
if ok {
// At this point we're modifying an existing endpoint (ep) whose
// public key and nodeID match n. Its other fields (such as disco
// key or endpoints) might've changed.
if n.DiscoKey().IsZero() && !n.IsWireGuardOnly() {
// Discokey transitioned from non-zero to zero? This should not
// happen in the wild, however it could mean:
// 1. A node was downgraded from post 0.100 to pre 0.100.
// 2. A Tailscale node key was extracted and used on a
// non-Tailscale node (should not enter here due to the
// IsWireGuardOnly check)
// 3. The server is misbehaving.
c.peerMap.deleteEndpoint(ep)
return
}
var oldDiscoKey key.DiscoPublic
if epDisco := ep.disco.Load(); epDisco != nil {
oldDiscoKey = epDisco.key
}
ep.updateFromNode(n, flags.heartbeatDisabled, flags.probeUDPLifetimeOn)
c.peerMap.upsertEndpoint(ep, oldDiscoKey) // maybe update discokey mappings in peerMap
return
}
if ep, ok := c.peerMap.endpointForNodeKey(n.Key()); ok {
// At this point n.Key() should be for a key we've never seen before. If
// ok was true above, it was an update to an existing matching key and
// we don't get this far. If ok was false above, that means it's a key
// that differs from the one the NodeID had. But double check.
if ep.nodeID != n.ID() {
// Server error. This is known to be a particular issue for Mullvad
// nodes (http://go/corp/27300), so log a distinct error for the
// Mullvad and non-Mullvad cases. The error will be logged either way,
// so an approximate heuristic is fine.
//
// When #27300 is fixed, we can delete this branch and log the same
// panic for any public key moving.
if strings.HasSuffix(n.Name(), ".mullvad.ts.net.") {
devPanicf("public key moved between Mullvad nodeIDs (old=%v new=%v, key=%s); see http://go/corp/27300", ep.nodeID, n.ID(), n.Key().String())
} else {
devPanicf("public key moved between nodeIDs (old=%v new=%v, key=%s)", ep.nodeID, n.ID(), n.Key().String())
}
} else {
// Internal data structures out of sync.
devPanicf("public key found in peerMap but not by nodeID")
}
return
}
if n.DiscoKey().IsZero() && !n.IsWireGuardOnly() {
// Ancient pre-0.100 node, which does not have a disco key.
// No longer supported.
return
}
ep = &endpoint{
c: c,
nodeID: n.ID(),
publicKey: n.Key(),
publicKeyHex: n.Key().UntypedHexString(),
sentPing: map[stun.TxID]sentPing{},
endpointState: map[netip.AddrPort]*endpointState{},
heartbeatDisabled: flags.heartbeatDisabled,
isWireguardOnly: n.IsWireGuardOnly(),
}
switch runtime.GOOS {
case "ios", "android":
// Omit, to save memory. Prior to 2024-03-20 we used to limit it to
// ~1MB on mobile but we never used the data so the memory was just
// wasted.
default:
ep.debugUpdates = ringlog.New[EndpointChange](entriesPerBuffer)
}
if n.Addresses().Len() > 0 {
ep.nodeAddr = n.Addresses().At(0).Addr()
}
ep.initFakeUDPAddr()
ep.updateDiscoKey(n.DiscoKey())
if debugPeerMap() {
c.logEndpointCreated(n)
}
ep.updateFromNode(n, flags.heartbeatDisabled, flags.probeUDPLifetimeOn)
c.peerMap.upsertEndpoint(ep, key.DiscoPublic{})
}
// UpsertPeer adds or updates a single peer in c. It is the efficient
// O(1)-per-peer alternative to [Conn.SetNetworkMap] when a single peer was
// added or its fields changed. The caller is responsible for serializing
// UpsertPeer/RemovePeer/SetNetworkMap calls relative to one another.
//
// UpsertPeer updates the relay-server set incrementally (O(1)) when the
// upserted peer's relay candidacy changed, rather than rebuilding the
// whole set with [Conn.updateRelayServersSet].
func (c *Conn) UpsertPeer(n tailcfg.NodeView) {
c.mu.Lock()
if c.closed {
c.mu.Unlock()
return
}
if n.ID() == 0 {
c.mu.Unlock()
devPanicf("UpsertPeer: node with zero ID")
return
}
flags := c.debugFlagsLocked()
c.peersByID[n.ID()] = n
c.upsertPeerLocked(n, flags, debugRingBufferSize(len(c.peersByID)))
var relayUpsert candidatePeerRelay
relayQualifies := false
if c.relayClientEnabled {
relayQualifies, relayUpsert = c.relayCandidateLocked(n)
}
relayClientEnabled := c.relayClientEnabled
c.mu.Unlock()
if relayClientEnabled {
if relayQualifies {
c.relayManager.handleRelayServerUpsert(relayUpsert)
} else {
// The peer may have previously qualified; remove covers that
// case and is a no-op otherwise.
c.relayManager.handleRelayServerRemove(n.Key())
}
}
}
// RemovePeer removes a single peer from c. It is the efficient
// O(1)-per-peer alternative to [Conn.SetNetworkMap] when a single peer was
// removed. The caller is responsible for serializing UpsertPeer/RemovePeer/
// SetNetworkMap calls relative to one another.
func (c *Conn) RemovePeer(nid tailcfg.NodeID) {
c.mu.Lock()
if c.closed {
c.mu.Unlock()
return
}
prev, ok := c.peersByID[nid]
if !ok {
c.mu.Unlock()
return
}
delete(c.peersByID, nid)
if ep, ok := c.peerMap.endpointForNodeID(nid); ok {
c.peerMap.deleteEndpoint(ep)
}
// If the peer we just removed held the only reference to its disco
// key, drop the now-orphaned c.discoInfo entry. No need to scan the
// whole map — only this peer's disco key can have become unreferenced
// by this single removal.
if dk := prev.DiscoKey(); !dk.IsZero() && !c.peerMap.knownPeerDiscoKey(dk) {
delete(c.discoInfo, dk)
}
relayClientEnabled := c.relayClientEnabled
c.mu.Unlock()
if relayClientEnabled {
// Tell the relay manager to drop the peer. The run loop no-ops
// this if the peer wasn't a relay server.
c.relayManager.handleRelayServerRemove(prev.Key())
}
}
// relayCandidateLocked reports whether peer p is eligible to be a relay
// server candidate for self, and if so returns the [candidatePeerRelay]
// that would be added to the relay-server set. c.mu must be held.
//
// It mirrors the per-peer predicate in [Conn.updateRelayServersSet].
func (c *Conn) relayCandidateLocked(p tailcfg.NodeView) (ok bool, cp candidatePeerRelay) {
if !p.Valid() {
return false, candidatePeerRelay{}
}
// The cap-version gate in updateRelayServersSet only applies to peers
// (not self). This helper is only called for peers, so always check.
if !capVerIsRelayCapable(p.Cap()) {
return false, candidatePeerRelay{}
}
if !nodeHasCap(c.filt, p, c.self, tailcfg.PeerCapabilityRelayTarget) {
return false, candidatePeerRelay{}
}
return true, candidatePeerRelay{
nodeKey: p.Key(),
discoKey: p.DiscoKey(),
derpHomeRegionID: uint16(p.HomeDERP()),
}
}
func devPanicf(format string, a ...any) {
+5 -6
View File
@@ -62,7 +62,6 @@ import (
"tailscale.com/types/netlogtype"
"tailscale.com/types/netmap"
"tailscale.com/types/nettype"
"tailscale.com/types/views"
"tailscale.com/util/cibuild"
"tailscale.com/util/clientmetric"
"tailscale.com/util/eventbus"
@@ -3847,7 +3846,7 @@ func TestConn_SetNetworkMap_updateRelayServersSet(t *testing.T) {
c.filt = tt.filt
if len(tt.wantRelayServers) == 0 {
// So we can verify it gets flipped back.
c.hasPeerRelayServers.Store(true)
c.relayManager.hasPeerRelayServers.Store(true)
}
c.SetNetworkMap(tt.self, tt.peers)
@@ -3855,8 +3854,8 @@ func TestConn_SetNetworkMap_updateRelayServersSet(t *testing.T) {
if !got.Equal(tt.wantRelayServers) {
t.Fatalf("got: %v != want: %v", got, tt.wantRelayServers)
}
if len(tt.wantRelayServers) > 0 != c.hasPeerRelayServers.Load() {
t.Fatalf("c.hasPeerRelayServers: %v != len(tt.wantRelayServers) > 0: %v", c.hasPeerRelayServers.Load(), len(tt.wantRelayServers) > 0)
if got, want := c.relayManager.hasPeerRelayServers.Load(), len(tt.wantRelayServers) > 0; got != want {
t.Fatalf("c.relayManager.hasPeerRelayServers: %v != len(tt.wantRelayServers) > 0: %v", got, want)
}
if c.relayClientEnabled != tt.wantRelayClientEnabled {
t.Fatalf("c.relayClientEnabled: %v != wantRelayClientEnabled: %v", c.relayClientEnabled, tt.wantRelayClientEnabled)
@@ -4422,7 +4421,7 @@ func TestReceiveTSMPDiscoKeyAdvertisement(t *testing.T) {
netip.MustParsePrefix("100.64.0.1/32"),
},
}).View()
conn.peers = views.SliceOf([]tailcfg.NodeView{nodeView})
conn.peersByID = map[tailcfg.NodeID]tailcfg.NodeView{nodeView.ID(): nodeView}
conn.mu.Unlock()
conn.peerMap.upsertEndpoint(ep, key.DiscoPublic{})
@@ -4468,7 +4467,7 @@ func TestSendingTSMPDiscoTimer(t *testing.T) {
netip.MustParsePrefix("100.64.0.1/32"),
},
}).View()
conn.peers = views.SliceOf([]tailcfg.NodeView{nodeView})
conn.peersByID = map[tailcfg.NodeID]tailcfg.NodeView{nodeView.ID(): nodeView}
conn.mu.Unlock()
conn.peerMap.upsertEndpoint(ep, key.DiscoPublic{})
+66
View File
@@ -9,6 +9,7 @@ import (
"fmt"
"net/netip"
"sync"
"sync/atomic"
"time"
"tailscale.com/disco"
@@ -34,6 +35,14 @@ import (
type relayManager struct {
initOnce sync.Once
// hasPeerRelayServers is whether relayManager is configured with at
// least one peer relay server via [relayManager.handleRelayServersSet]
// (or per-peer variants). Exposed as an atomic so [endpoint] hot paths
// can short-circuit when there are no relay servers without taking any
// lock or entering the run loop. Written only from runLoop() via
// [relayManager.publishHasServersRunLoop].
hasPeerRelayServers atomic.Bool
// ===================================================================
// The following fields are owned by a single goroutine, runLoop().
serversByNodeKey map[key.NodePublic]candidatePeerRelay
@@ -56,6 +65,8 @@ type relayManager struct {
newServerEndpointCh chan newRelayServerEndpointEvent
rxDiscoMsgCh chan relayDiscoMsgEvent
serversCh chan set.Set[candidatePeerRelay]
serverUpsertCh chan candidatePeerRelay
serverRemoveCh chan key.NodePublic
getServersCh chan chan set.Set[candidatePeerRelay]
derpHomeChangeCh chan derpHomeChangeEvent
@@ -228,6 +239,16 @@ func (r *relayManager) runLoop() {
if !r.hasActiveWorkRunLoop() {
return
}
case upsert := <-r.serverUpsertCh:
r.handleServerUpsertRunLoop(upsert)
if !r.hasActiveWorkRunLoop() {
return
}
case nk := <-r.serverRemoveCh:
r.handleServerRemoveRunLoop(nk)
if !r.hasActiveWorkRunLoop() {
return
}
case getServersCh := <-r.getServersCh:
r.handleGetServersRunLoop(getServersCh)
if !r.hasActiveWorkRunLoop() {
@@ -265,6 +286,34 @@ func (r *relayManager) handleServersUpdateRunLoop(update set.Set[candidatePeerRe
for _, v := range update.Slice() {
r.serversByNodeKey[v.nodeKey] = v
}
r.publishHasServersRunLoop()
}
// handleServerUpsertRunLoop inserts or updates cp in serversByNodeKey. It is
// the per-peer analog of [relayManager.handleServersUpdateRunLoop] used by
// [Conn.UpsertPeer].
func (r *relayManager) handleServerUpsertRunLoop(cp candidatePeerRelay) {
r.serversByNodeKey[cp.nodeKey] = cp
r.publishHasServersRunLoop()
}
// handleServerRemoveRunLoop deletes nk from serversByNodeKey. It is a no-op
// if nk isn't currently a known server. It is the per-peer analog of
// [relayManager.handleServersUpdateRunLoop] used by [Conn.RemovePeer] and by
// [Conn.UpsertPeer] when a peer is upserted with fields that make it no
// longer a relay candidate.
func (r *relayManager) handleServerRemoveRunLoop(nk key.NodePublic) {
if _, ok := r.serversByNodeKey[nk]; !ok {
return
}
delete(r.serversByNodeKey, nk)
r.publishHasServersRunLoop()
}
// publishHasServersRunLoop updates [relayManager.hasPeerRelayServers] to
// reflect whether any relay servers are currently known.
func (r *relayManager) publishHasServersRunLoop() {
r.hasPeerRelayServers.Store(len(r.serversByNodeKey) > 0)
}
type relayDiscoMsgEvent struct {
@@ -330,6 +379,8 @@ func (r *relayManager) init() {
r.newServerEndpointCh = make(chan newRelayServerEndpointEvent)
r.rxDiscoMsgCh = make(chan relayDiscoMsgEvent)
r.serversCh = make(chan set.Set[candidatePeerRelay])
r.serverUpsertCh = make(chan candidatePeerRelay)
r.serverRemoveCh = make(chan key.NodePublic)
r.getServersCh = make(chan chan set.Set[candidatePeerRelay])
r.derpHomeChangeCh = make(chan derpHomeChangeEvent)
r.runLoopStoppedCh = make(chan struct{}, 1)
@@ -436,6 +487,21 @@ func (r *relayManager) handleRelayServersSet(servers set.Set[candidatePeerRelay]
relayManagerInputEvent(r, nil, &r.serversCh, servers)
}
// handleRelayServerUpsert is the O(1) per-peer variant of
// [relayManager.handleRelayServersSet]: it inserts or updates a single
// relay server entry.
func (r *relayManager) handleRelayServerUpsert(cp candidatePeerRelay) {
relayManagerInputEvent(r, nil, &r.serverUpsertCh, cp)
}
// handleRelayServerRemove is the O(1) per-peer variant of
// [relayManager.handleRelayServersSet]: it removes a single relay server
// entry by node key. It is a no-op if nk is not currently a known relay
// server.
func (r *relayManager) handleRelayServerRemove(nk key.NodePublic) {
relayManagerInputEvent(r, nil, &r.serverRemoveCh, nk)
}
// relayManagerInputEvent initializes [relayManager] if necessary, starts
// relayManager.runLoop() if it is not running, and writes 'event' on 'eventCh'.
//