From 27f1d4c15ddf725b83db1a34ed443b9165ce9e7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Claus=20Lensb=C3=B8l?= Date: Tue, 14 Apr 2026 08:43:07 -0400 Subject: [PATCH] control/controlclient: improve filter on netmap updates (#19308) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous filters would allow for a handful of subtle issues such as updating the last seen date when the key or online status had not changed, and making online keys unconditionally make an engine update. These have been fixed along side making no change updates from TSMP into a no-op for the engine so we don't have to reconfigure. A bunch of additional testing has been added as well. Updates #12639 Signed-off-by: Claus Lensbøl --- control/controlclient/map.go | 38 ++++- control/controlclient/map_test.go | 243 ++++++++++++++++++++++++++---- 2 files changed, 240 insertions(+), 41 deletions(-) diff --git a/control/controlclient/map.go b/control/controlclient/map.go index c8cbdbce5..84a7c2e8b 100644 --- a/control/controlclient/map.go +++ b/control/controlclient/map.go @@ -298,7 +298,12 @@ func (ms *mapSession) handleNonKeepAliveMapResponse(ctx context.Context, resp *t ms.patchifyPeersChanged(resp) - ms.removeUnwantedDiscoUpdates(resp) + ms.removeUnwantedDiscoUpdates(resp, viaTSMP) + + // TSMP learned key was rejected, no need to do any more work in the engine. + if viaTSMP && len(resp.PeersChangedPatch) == 0 { + return nil + } ms.removeUnwantedDiscoUpdatesFromFullNetmapUpdate(resp) ms.updateStateFromResponse(resp) @@ -407,7 +412,7 @@ type updateStats struct { // removeUnwantedDiscoUpdates goes over the patchified updates and reject items // where the node is offline and has last been seen before the recorded last seen. -func (ms *mapSession) removeUnwantedDiscoUpdates(resp *tailcfg.MapResponse) { +func (ms *mapSession) removeUnwantedDiscoUpdates(resp *tailcfg.MapResponse, viaTSMP bool) { ms.peersMu.RLock() defer ms.peersMu.RUnlock() @@ -422,17 +427,33 @@ func (ms *mapSession) removeUnwantedDiscoUpdates(resp *tailcfg.MapResponse) { continue } + existingNode, ok := ms.peers[change.NodeID] // Accept if: - // - Node is online. - if *change.Online { + // - Cannot find the peer, don't have enough data. + if !ok { acceptedDiscoUpdates = append(acceptedDiscoUpdates, change) continue } - existingNode, ok := ms.peers[change.NodeID] + // Reject if: + // - key was learned via tsmp AND, + // - existing node is online AND, + // - key did not change. + // Here to avoid a deeper reconfig in the case where we get a TSMP key + // exchange while that node is already in a connected state (from the view + // of the control plane). This is meant to keep the node stable, avoiding a + // reconfiguration of the node deeper down in the engine. + // With this, we are avoiding updating the LastSeen and Online fields from + // TSMP updates when that is not relevant, overall making the connection + // state change less, and updating the engine less. + if viaTSMP && existingNode.Online().Get() && + *change.DiscoKey == existingNode.DiscoKey() { + continue + } + // Accept if: - // - Cannot find the peer, don't have enough data - if !ok { + // - Node is online. + if *change.Online { acceptedDiscoUpdates = append(acceptedDiscoUpdates, change) continue } @@ -497,8 +518,9 @@ func (ms *mapSession) removeUnwantedDiscoUpdatesFromFullNetmapUpdate(resp *tailc continue } - // Overwrite the key in the full netmap update. + // Overwrite the key and last seen in the full netmap update. peer.DiscoKey = existingNode.DiscoKey() + *peer.LastSeen = existingNode.LastSeen().Get() } } diff --git a/control/controlclient/map_test.go b/control/controlclient/map_test.go index f057345d9..679c48c17 100644 --- a/control/controlclient/map_test.go +++ b/control/controlclient/map_test.go @@ -14,6 +14,7 @@ import ( "strings" "sync/atomic" "testing" + "testing/synctest" "time" "github.com/google/go-cmp/cmp" @@ -630,81 +631,149 @@ func TestUpdateDiscoForNode(t *testing.T) { name string initialOnline bool initialLastSeen time.Time + updateDiscoKey bool updateOnline bool updateLastSeen time.Time wantUpdate bool + wantKeyChanged bool }{ { name: "newer_key_not_online", initialOnline: true, initialLastSeen: time.Unix(1, 0), + updateDiscoKey: true, updateOnline: false, updateLastSeen: time.Now(), wantUpdate: true, + wantKeyChanged: true, }, { name: "newer_key_online", initialOnline: true, initialLastSeen: time.Unix(1, 0), + updateDiscoKey: true, updateOnline: true, updateLastSeen: time.Now(), wantUpdate: true, + wantKeyChanged: true, }, { name: "older_key_not_online", initialOnline: false, initialLastSeen: time.Now(), + updateDiscoKey: true, updateOnline: false, updateLastSeen: time.Unix(1, 0), wantUpdate: false, + wantKeyChanged: false, }, { name: "older_key_online", initialOnline: false, initialLastSeen: time.Now(), + updateDiscoKey: true, updateOnline: true, updateLastSeen: time.Unix(1, 0), wantUpdate: true, + wantKeyChanged: true, + }, + { + name: "same_newer_key_not_online", + initialOnline: true, + initialLastSeen: time.Unix(1, 0), + updateDiscoKey: false, + updateOnline: false, + updateLastSeen: time.Now(), + wantUpdate: false, + wantKeyChanged: false, + }, + { + name: "same_newer_key_online", + initialOnline: true, + initialLastSeen: time.Unix(1, 0), + updateDiscoKey: false, + updateOnline: true, + updateLastSeen: time.Now(), + wantUpdate: false, + wantKeyChanged: false, + }, + { + name: "same_older_key_not_online", + initialOnline: false, + initialLastSeen: time.Now(), + updateDiscoKey: false, + updateOnline: false, + updateLastSeen: time.Unix(1, 0), + wantUpdate: false, + wantKeyChanged: false, + }, + { + name: "same_older_key_online", + initialOnline: false, + initialLastSeen: time.Now(), + updateDiscoKey: false, + updateOnline: true, + updateLastSeen: time.Unix(1, 0), + wantUpdate: true, + wantKeyChanged: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - nu := &rememberLastNetmapUpdater{ - done: make(chan any, 1), - } - ms := newTestMapSession(t, nu) + synctest.Test(t, func(*testing.T) { + nu := &rememberLastNetmapUpdater{ + done: make(chan any, 1), + } + ms := newTestMapSession(t, nu) + defer ms.Close() - oldKey := key.NewDisco() + oldKey := key.NewDisco() - // Insert existing node - node := tailcfg.Node{ - ID: 1, - Key: key.NewNode().Public(), - DiscoKey: oldKey.Public(), - Online: &tt.initialOnline, - LastSeen: &tt.initialLastSeen, - } + // Insert existing node + node := tailcfg.Node{ + ID: 1, + Key: key.NewNode().Public(), + DiscoKey: oldKey.Public(), + Online: &tt.initialOnline, + LastSeen: &tt.initialLastSeen, + } - if nm := ms.netmapForResponse(&tailcfg.MapResponse{ - Peers: []*tailcfg.Node{&node}, - }); len(nm.Peers) != 1 { - t.Fatalf("node not inserted") - } + if nm := ms.netmapForResponse(&tailcfg.MapResponse{ + Peers: []*tailcfg.Node{&node}, + }); len(nm.Peers) != 1 { + t.Fatalf("node not inserted") + } - newKey := key.NewDisco() - ms.updateDiscoForNode(node.ID, node.Key, newKey.Public(), tt.updateLastSeen, tt.updateOnline) - <-nu.done + newKey := oldKey.Public() + if tt.updateDiscoKey { + newKey = key.NewDisco().Public() + } + ms.updateDiscoForNode(node.ID, node.Key, newKey, tt.updateLastSeen, tt.updateOnline) - peer, ok := ms.peers[node.ID] - if !ok { - t.Fatal("node not found") - } + // We have an early escape that would not trigger the netmap updater. + synctest.Wait() + select { + case <-nu.done: + if !tt.wantUpdate { + t.Errorf("did not expect update, got: %v", nu.last) + } + default: + if tt.wantUpdate { + t.Errorf("expected update, did not get any") + } + } - updated := peer.DiscoKey().Compare(newKey.Public()) == 0 - if updated != tt.wantUpdate { - t.Fatalf("Disco key update: %t, wanted update: %t", updated, tt.wantUpdate) - } + peer, ok := ms.peers[node.ID] + if !ok { + t.Fatal("node not found") + } + + keyChanged := peer.DiscoKey().Compare(oldKey.Public()) != 0 + if keyChanged != tt.wantKeyChanged { + t.Errorf("Disco key update: %t, wanted update: %t", keyChanged, tt.wantKeyChanged) + } + }) }) } } @@ -831,6 +900,14 @@ func TestUpdateDiscoForNodeCallbackWithFullNetmap(t *testing.T) { updateLastSeen: now, expectNewDisco: true, }, + { + name: "local-lastseen-preserved-after-first-reconnect", + initialOnline: false, + initialLastSeen: now, + updateOnline: false, + updateLastSeen: now, + expectNewDisco: false, + }, } for _, tt := range tests { @@ -1115,17 +1192,20 @@ func TestPeerChangeDiff(t *testing.T) { a: &tailcfg.Node{ID: 1, CapMap: tailcfg.NodeCapMap{tailcfg.CapabilityAdmin: nil}}, b: &tailcfg.Node{ID: 1, CapMap: tailcfg.NodeCapMap{tailcfg.CapabilityAdmin: nil, tailcfg.CapabilityDebug: nil}}, want: &tailcfg.PeerChange{NodeID: 1, CapMap: tailcfg.NodeCapMap{tailcfg.CapabilityAdmin: nil, tailcfg.CapabilityDebug: nil}}, - }, { + }, + { name: "patch-capmap-remove-key", a: &tailcfg.Node{ID: 1, CapMap: tailcfg.NodeCapMap{tailcfg.CapabilityAdmin: nil}}, b: &tailcfg.Node{ID: 1, CapMap: tailcfg.NodeCapMap{}}, want: &tailcfg.PeerChange{NodeID: 1, CapMap: tailcfg.NodeCapMap{}}, - }, { + }, + { name: "patch-capmap-remove-as-nil", a: &tailcfg.Node{ID: 1, CapMap: tailcfg.NodeCapMap{tailcfg.CapabilityAdmin: nil}}, b: &tailcfg.Node{ID: 1}, want: &tailcfg.PeerChange{NodeID: 1, CapMap: tailcfg.NodeCapMap{}}, - }, { + }, + { name: "patch-capmap-add-key-to-empty-map", a: &tailcfg.Node{ID: 1}, b: &tailcfg.Node{ID: 1, CapMap: tailcfg.NodeCapMap{tailcfg.CapabilityAdmin: nil}}, @@ -1366,7 +1446,6 @@ func TestUpgradeNode(t *testing.T) { } }) } - } func BenchmarkMapSessionDelta(b *testing.B) { @@ -1838,3 +1917,101 @@ func TestPeerIDAndKeyByTailscaleIP(t *testing.T) { } }) } + +func TestRemoveUnwantedDiscoUpdates(t *testing.T) { + tests := []struct { + name string + viaTSMP bool + existingOnline bool + sameKey bool + newerLastSeen bool + wantAccepted bool + }{ + { + name: "tsmp_online_peer_same_key", + viaTSMP: true, + existingOnline: true, + sameKey: true, + newerLastSeen: true, + wantAccepted: false, + }, + { + name: "not_tsmp_online_peer_same_key", + viaTSMP: false, + existingOnline: true, + sameKey: true, + newerLastSeen: true, + wantAccepted: true, + }, + { + name: "tsmp_offline_peer_same_key", + viaTSMP: true, + existingOnline: false, + sameKey: true, + newerLastSeen: true, + wantAccepted: true, + }, + { + name: "tsmp_online_peer_diff_key", + viaTSMP: true, + existingOnline: true, + sameKey: false, + newerLastSeen: true, + wantAccepted: true, + }, + { + name: "tsmp_online_peer_same_key_old_lastseen", + viaTSMP: true, + existingOnline: true, + sameKey: true, + newerLastSeen: false, + wantAccepted: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ms := newTestMapSession(t, &rememberLastNetmapUpdater{done: make(chan any, 1)}) + + existingKey := key.NewDisco().Public() + existingOnline := tt.existingOnline + initialLastSeen := time.Unix(1, 0) + + ms.updateStateFromResponse(&tailcfg.MapResponse{ + Peers: []*tailcfg.Node{{ + ID: 1, + Key: key.NewNode().Public(), + DiscoKey: existingKey, + Online: &existingOnline, + LastSeen: &initialLastSeen, + }}, + }) + + changeKey := existingKey + if !tt.sameKey { + changeKey = key.NewDisco().Public() + } + changeOnline := false // must be false to reach the new guard + updateLastSeen := time.Unix(2, 0) + if !tt.newerLastSeen { + updateLastSeen = time.Unix(0, 0) + } + + resp := &tailcfg.MapResponse{ + PeersChangedPatch: []*tailcfg.PeerChange{{ + NodeID: 1, + DiscoKey: &changeKey, + Online: &changeOnline, + LastSeen: &updateLastSeen, + }}, + } + + ms.removeUnwantedDiscoUpdates(resp, tt.viaTSMP) + + got := len(resp.PeersChangedPatch) > 0 + if got != tt.wantAccepted { + t.Errorf("accepted=%v, want %v", got, tt.wantAccepted) + } + }) + } +}