From 9a7f1439037ce4dcfccb72881afc5af06ecbfdc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Claus=20Lensb=C3=B8l?= Date: Tue, 7 Apr 2026 09:11:11 -0400 Subject: [PATCH] wgengine/userspace: add extra check for tsmp learned keys in engine (#19223) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If an entry in the tsmpLearnedDisco does not match the disco key of the key currently being processed, overwrite the key, and leave the entry in the map for later processing. In reality, this should not happen, but is put in as a safety measure with logging of the situation so we can replicate the behaviour and correct it should it happen. Updates #12639 Signed-off-by: Claus Lensbøl --- wgengine/userspace.go | 32 ++++++++++++-- wgengine/userspace_test.go | 87 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 4 deletions(-) diff --git a/wgengine/userspace.go b/wgengine/userspace.go index 5b81206d0..274682270 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -1133,14 +1133,36 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config, // If the key changed, mark the connection for reconfiguration. pub := p.PublicKey + if old, ok := prevEP[pub]; ok && old != p.DiscoKey { // If the disco key was learned via TSMP, we do not need to reset the // wireguard config as the new key was received over an existing wireguard // connection. - if discoTSMP, okTSMP := e.tsmpLearnedDisco[p.PublicKey]; okTSMP && - discoTSMP == p.DiscoKey { - delete(e.tsmpLearnedDisco, p.PublicKey) - e.logf("wgengine: Skipping reconfig (TSMP key): %s changed from %q to %q", pub.ShortString(), old, p.DiscoKey) + if discoTSMP, okTSMP := e.tsmpLearnedDisco[p.PublicKey]; okTSMP { + if discoTSMP == p.DiscoKey { + // Key matches, remove entry from map. + e.logf("wgengine: Skipping reconfig (TSMP key): %s changed from %q to %q", + pub.ShortString(), old, p.DiscoKey) + delete(e.tsmpLearnedDisco, p.PublicKey) + } else { + // The new disco key does not match what we received via + // TSMP for this peer. This is unexpected, so log it. + // If it does happen, overwrite the previously-saved + // disco key with the new one for now: We expect another + // update must be pending in that case, so keep the map + // entry. + // The reason why this should never happen is that only a single + // request is coming through the netmap pipeline at a time, and there + // should realistically ever only be a single entry in the map. This + // is really a belt and suspenders solution to find usage that is + // inconsistent with our expectations. + e.logf("wgengine: [unexpected] Reconfig: using TSMP key for %s (control stale): tsmp=%q control=%q old=%q", + pub.ShortString(), discoTSMP, p.DiscoKey, old) + metricTSMPLearnedKeyMismatch.Add(1) + p.DiscoKey = discoTSMP + } + + // Skip session clear no matter what. continue } @@ -1875,6 +1897,8 @@ var ( metricTSMPDiscoKeyAdvertisementSent = clientmetric.NewCounter("magicsock_tsmp_disco_key_advertisement_sent") metricTSMPDiscoKeyAdvertisementError = clientmetric.NewCounter("magicsock_tsmp_disco_key_advertisement_error") + + metricTSMPLearnedKeyMismatch = clientmetric.NewCounter("magicsock_tsmp_learned_key_mismatch") ) func (e *userspaceEngine) InstallCaptureHook(cb packet.CaptureCallback) { diff --git a/wgengine/userspace_test.go b/wgengine/userspace_test.go index 210c528b3..558df4ced 100644 --- a/wgengine/userspace_test.go +++ b/wgengine/userspace_test.go @@ -237,6 +237,93 @@ func TestUserspaceEngineTSMPLearned(t *testing.T) { } } +func TestUserspaceEngineTSMPLearnedMismatch(t *testing.T) { + bus := eventbustest.NewBus(t) + + ht := health.NewTracker(bus) + reg := new(usermetric.Registry) + e, err := NewFakeUserspaceEngine(t.Logf, 0, ht, reg, bus) + if err != nil { + t.Fatal(err) + } + t.Cleanup(e.Close) + ue := e.(*userspaceEngine) + + discoChangedChan := make(chan map[key.NodePublic]bool, 1) + ue.testDiscoChangedHook = func(m map[key.NodePublic]bool) { + discoChangedChan <- m + } + + routerCfg := &router.Config{} + var metricValue int64 = 0 + + keyChanges := []struct { + tsmp bool + inMap bool + wrongKey bool + }{ + {tsmp: false, inMap: false, wrongKey: false}, + {tsmp: true, inMap: false, wrongKey: true}, + {tsmp: false, inMap: false, wrongKey: false}, + } + + nkHex := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + for _, change := range keyChanges { + oldDisco := key.NewDisco() + nm := &netmap.NetworkMap{ + Peers: nodeViews([]*tailcfg.Node{ + { + ID: 1, + Key: nkFromHex(nkHex), + DiscoKey: oldDisco.Public(), + }, + }), + } + nk, err := key.ParseNodePublicUntyped(mem.S(nkHex)) + if err != nil { + t.Fatal(err) + } + e.SetNetworkMap(nm) + + newDisco := key.NewDisco() + cfg := &wgcfg.Config{ + Peers: []wgcfg.Peer{ + { + PublicKey: nk, + DiscoKey: newDisco.Public(), + }, + }, + } + + tsmpKey := newDisco.Public() + if change.tsmp { + if change.wrongKey { + tsmpKey = key.NewDisco().Public() + } + ue.PatchDiscoKey(nk, tsmpKey) + } + err = e.Reconfig(cfg, routerCfg, &dns.Config{}) + if err != nil { + t.Fatal(err) + } + + changeMap := <-discoChangedChan + + if _, ok := changeMap[nk]; ok != change.inMap { + t.Fatalf("expect key %v in map %v to be %t, got %t", nk, changeMap, + change.inMap, ok) + } + + metric := metricTSMPLearnedKeyMismatch.Value() + delta := metric - metricValue + metricValue = metric + + if change.wrongKey && delta != 1 { + t.Fatalf("expected a delta of 1, got %d", delta) + } + } +} + func TestUserspaceEnginePortReconfig(t *testing.T) { flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/2855") const defaultPort = 49983