wgengine/magicsock,control/controlclient: do not overwrite discokey with old key (#18606)
When a client starts up without being able to connect to control, it sends its discoKey to other nodes it wants to communicate with over TSMP. This disco key will be a newer key than the one control knows about. If the client that can connect to control gets a full netmap, ensure that the disco key for the node not connected to control is not overwritten with the stale key control knows about. This is implemented through keeping track of mapSession and use that for the discokey injection if it is available. This ensures that we are not constantly resetting the wireguard connection when getting the wrong keys from control. This is implemented as: - If the key is received via TSMP: - Set lastSeen for the peer to now() - Set online for the peer to false - When processing new keys, only accept keys where either: - Peer is online - lastSeen is newer than existing last seen If mapSession is not available, as in we are not yet connected to control, punt down the disco key injection to magicsock. Ideally, we will want to have mapSession be long lived at some point in the near future so we only need to inject keys in one location and then also use that for testing and loading the cache, but that is a yak for another PR. Updates #12639 Signed-off-by: Claus Lensbøl <claus@tailscale.com>
This commit is contained in:
@@ -46,6 +46,7 @@ import (
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/tka"
|
||||
"tailscale.com/tstime"
|
||||
"tailscale.com/types/events"
|
||||
"tailscale.com/types/key"
|
||||
"tailscale.com/types/logger"
|
||||
"tailscale.com/types/netmap"
|
||||
@@ -106,8 +107,9 @@ type Direct struct {
|
||||
netinfo *tailcfg.NetInfo
|
||||
endpoints []tailcfg.Endpoint
|
||||
tkaHead string
|
||||
lastPingURL string // last PingRequest.URL received, for dup suppression
|
||||
connectionHandleForTest string // sent in MapRequest.ConnectionHandleForTest
|
||||
lastPingURL string // last PingRequest.URL received, for dup suppression
|
||||
connectionHandleForTest string // sent in MapRequest.ConnectionHandleForTest
|
||||
streamingMapSession *mapSession // the one streaming mapSession instance
|
||||
|
||||
controlClientID int64 // Random ID used to differentiate clients for consumers of messages.
|
||||
}
|
||||
@@ -348,6 +350,38 @@ func NewDirect(opts Options) (*Direct, error) {
|
||||
c.clientVersionPub = eventbus.Publish[tailcfg.ClientVersion](c.busClient)
|
||||
c.autoUpdatePub = eventbus.Publish[AutoUpdate](c.busClient)
|
||||
c.controlTimePub = eventbus.Publish[ControlTime](c.busClient)
|
||||
discoKeyPub := eventbus.Publish[events.PeerDiscoKeyUpdate](c.busClient)
|
||||
eventbus.SubscribeFunc(c.busClient, func(update events.DiscoKeyAdvertisement) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
c.logf("controlclient direct: got TSMP disco key advertisement from %v via eventbus", update.Src)
|
||||
if c.streamingMapSession != nil {
|
||||
nm := c.streamingMapSession.netmap()
|
||||
peer, ok := nm.PeerByTailscaleIP(update.Src)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
c.logf("controlclient direct: updating discoKey for %v via mapSession", update.Src)
|
||||
|
||||
// If we update without error, return. If the err indicates that the
|
||||
// mapSession has gone away, we want to fall back to pushing the key
|
||||
// further down the chain.
|
||||
if err := c.streamingMapSession.updateDiscoForNode(
|
||||
peer.ID(), update.Key, time.Now(), false); err == nil ||
|
||||
!errors.Is(err, ErrChangeQueueClosed) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// We need to push the update further down the chain. Either because we do
|
||||
// not have a mapSession (we are not connected to control) or because the
|
||||
// mapSession queue has closed.
|
||||
c.logf("controlclient direct: updating discoKey for %v via magicsock", update.Src)
|
||||
discoKeyPub.Publish(events.PeerDiscoKeyUpdate{
|
||||
Src: update.Src,
|
||||
Key: update.Key,
|
||||
})
|
||||
})
|
||||
|
||||
return c, nil
|
||||
}
|
||||
@@ -821,21 +855,28 @@ func (c *Direct) PollNetMap(ctx context.Context, nu NetmapUpdater) error {
|
||||
return c.sendMapRequest(ctx, true, nu)
|
||||
}
|
||||
|
||||
// rememberLastNetmapUpdater is a container that remembers the last netmap
|
||||
// update it observed. It is used by tests and [NetmapFromMapResponseForDebug].
|
||||
// It will report only the first netmap seen.
|
||||
type rememberLastNetmapUpdater struct {
|
||||
last *netmap.NetworkMap
|
||||
done chan any
|
||||
}
|
||||
|
||||
func (nu *rememberLastNetmapUpdater) UpdateFullNetmap(nm *netmap.NetworkMap) {
|
||||
nu.last = nm
|
||||
nu.done <- nil
|
||||
}
|
||||
|
||||
// FetchNetMapForTest fetches the netmap once.
|
||||
func (c *Direct) FetchNetMapForTest(ctx context.Context) (*netmap.NetworkMap, error) {
|
||||
var nu rememberLastNetmapUpdater
|
||||
nu.done = make(chan any)
|
||||
err := c.sendMapRequest(ctx, false, &nu)
|
||||
if err == nil && nu.last == nil {
|
||||
return nil, errors.New("[unexpected] sendMapRequest success without callback")
|
||||
}
|
||||
<-nu.done
|
||||
return nu.last, err
|
||||
}
|
||||
|
||||
@@ -1080,8 +1121,18 @@ func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu Netmap
|
||||
return nil
|
||||
}
|
||||
|
||||
if c.streamingMapSession != nil {
|
||||
panic("mapSession is already set")
|
||||
}
|
||||
|
||||
sess := newMapSession(persist.PrivateNodeKey(), nu, c.controlKnobs)
|
||||
defer sess.Close()
|
||||
c.streamingMapSession = sess
|
||||
defer func() {
|
||||
sess.Close()
|
||||
c.mu.Lock()
|
||||
c.streamingMapSession = nil
|
||||
c.mu.Unlock()
|
||||
}()
|
||||
sess.cancel = cancel
|
||||
sess.logf = c.logf
|
||||
sess.vlogf = vlogf
|
||||
@@ -1235,7 +1286,7 @@ func NetmapFromMapResponseForDebug(ctx context.Context, pr persist.PersistView,
|
||||
return nil, errors.New("PersistView invalid")
|
||||
}
|
||||
|
||||
nu := &rememberLastNetmapUpdater{}
|
||||
nu := &rememberLastNetmapUpdater{done: make(chan any)}
|
||||
sess := newMapSession(pr.PrivateNodeKey(), nu, nil)
|
||||
defer sess.Close()
|
||||
|
||||
@@ -1243,6 +1294,7 @@ func NetmapFromMapResponseForDebug(ctx context.Context, pr persist.PersistView,
|
||||
return nil, fmt.Errorf("HandleNonKeepAliveMapResponse: %w", err)
|
||||
}
|
||||
|
||||
<-nu.done
|
||||
return sess.netmap(), nil
|
||||
}
|
||||
|
||||
@@ -1303,10 +1355,10 @@ var jsonEscapedZero = []byte(`\u0000`)
|
||||
const justKeepAliveStr = `{"KeepAlive":true}`
|
||||
|
||||
// decodeMsg is responsible for uncompressing msg and unmarshaling into v.
|
||||
func (sess *mapSession) decodeMsg(compressedMsg []byte, v *tailcfg.MapResponse) error {
|
||||
func (ms *mapSession) decodeMsg(compressedMsg []byte, v *tailcfg.MapResponse) error {
|
||||
// Fast path for common case of keep-alive message.
|
||||
// See tailscale/tailscale#17343.
|
||||
if sess.keepAliveZ != nil && bytes.Equal(compressedMsg, sess.keepAliveZ) {
|
||||
if ms.keepAliveZ != nil && bytes.Equal(compressedMsg, ms.keepAliveZ) {
|
||||
v.KeepAlive = true
|
||||
return nil
|
||||
}
|
||||
@@ -1315,7 +1367,7 @@ func (sess *mapSession) decodeMsg(compressedMsg []byte, v *tailcfg.MapResponse)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sess.ztdDecodesForTest++
|
||||
ms.ztdDecodesForTest++
|
||||
|
||||
if DevKnob.DumpNetMaps() {
|
||||
var buf bytes.Buffer
|
||||
@@ -1330,7 +1382,7 @@ func (sess *mapSession) decodeMsg(compressedMsg []byte, v *tailcfg.MapResponse)
|
||||
return fmt.Errorf("response: %v", err)
|
||||
}
|
||||
if v.KeepAlive && string(b) == justKeepAliveStr {
|
||||
sess.keepAliveZ = compressedMsg
|
||||
ms.keepAliveZ = compressedMsg
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user