diff --git a/wgengine/userspace.go b/wgengine/userspace.go index 5b81206d0..274682270 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -1133,14 +1133,36 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config, // If the key changed, mark the connection for reconfiguration. pub := p.PublicKey + if old, ok := prevEP[pub]; ok && old != p.DiscoKey { // If the disco key was learned via TSMP, we do not need to reset the // wireguard config as the new key was received over an existing wireguard // connection. - if discoTSMP, okTSMP := e.tsmpLearnedDisco[p.PublicKey]; okTSMP && - discoTSMP == p.DiscoKey { - delete(e.tsmpLearnedDisco, p.PublicKey) - e.logf("wgengine: Skipping reconfig (TSMP key): %s changed from %q to %q", pub.ShortString(), old, p.DiscoKey) + if discoTSMP, okTSMP := e.tsmpLearnedDisco[p.PublicKey]; okTSMP { + if discoTSMP == p.DiscoKey { + // Key matches, remove entry from map. + e.logf("wgengine: Skipping reconfig (TSMP key): %s changed from %q to %q", + pub.ShortString(), old, p.DiscoKey) + delete(e.tsmpLearnedDisco, p.PublicKey) + } else { + // The new disco key does not match what we received via + // TSMP for this peer. This is unexpected, so log it. + // If it does happen, overwrite the previously-saved + // disco key with the new one for now: We expect another + // update must be pending in that case, so keep the map + // entry. + // The reason why this should never happen is that only a single + // request is coming through the netmap pipeline at a time, and there + // should realistically ever only be a single entry in the map. This + // is really a belt and suspenders solution to find usage that is + // inconsistent with our expectations. + e.logf("wgengine: [unexpected] Reconfig: using TSMP key for %s (control stale): tsmp=%q control=%q old=%q", + pub.ShortString(), discoTSMP, p.DiscoKey, old) + metricTSMPLearnedKeyMismatch.Add(1) + p.DiscoKey = discoTSMP + } + + // Skip session clear no matter what. continue } @@ -1875,6 +1897,8 @@ var ( metricTSMPDiscoKeyAdvertisementSent = clientmetric.NewCounter("magicsock_tsmp_disco_key_advertisement_sent") metricTSMPDiscoKeyAdvertisementError = clientmetric.NewCounter("magicsock_tsmp_disco_key_advertisement_error") + + metricTSMPLearnedKeyMismatch = clientmetric.NewCounter("magicsock_tsmp_learned_key_mismatch") ) func (e *userspaceEngine) InstallCaptureHook(cb packet.CaptureCallback) { diff --git a/wgengine/userspace_test.go b/wgengine/userspace_test.go index 210c528b3..558df4ced 100644 --- a/wgengine/userspace_test.go +++ b/wgengine/userspace_test.go @@ -237,6 +237,93 @@ func TestUserspaceEngineTSMPLearned(t *testing.T) { } } +func TestUserspaceEngineTSMPLearnedMismatch(t *testing.T) { + bus := eventbustest.NewBus(t) + + ht := health.NewTracker(bus) + reg := new(usermetric.Registry) + e, err := NewFakeUserspaceEngine(t.Logf, 0, ht, reg, bus) + if err != nil { + t.Fatal(err) + } + t.Cleanup(e.Close) + ue := e.(*userspaceEngine) + + discoChangedChan := make(chan map[key.NodePublic]bool, 1) + ue.testDiscoChangedHook = func(m map[key.NodePublic]bool) { + discoChangedChan <- m + } + + routerCfg := &router.Config{} + var metricValue int64 = 0 + + keyChanges := []struct { + tsmp bool + inMap bool + wrongKey bool + }{ + {tsmp: false, inMap: false, wrongKey: false}, + {tsmp: true, inMap: false, wrongKey: true}, + {tsmp: false, inMap: false, wrongKey: false}, + } + + nkHex := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + for _, change := range keyChanges { + oldDisco := key.NewDisco() + nm := &netmap.NetworkMap{ + Peers: nodeViews([]*tailcfg.Node{ + { + ID: 1, + Key: nkFromHex(nkHex), + DiscoKey: oldDisco.Public(), + }, + }), + } + nk, err := key.ParseNodePublicUntyped(mem.S(nkHex)) + if err != nil { + t.Fatal(err) + } + e.SetNetworkMap(nm) + + newDisco := key.NewDisco() + cfg := &wgcfg.Config{ + Peers: []wgcfg.Peer{ + { + PublicKey: nk, + DiscoKey: newDisco.Public(), + }, + }, + } + + tsmpKey := newDisco.Public() + if change.tsmp { + if change.wrongKey { + tsmpKey = key.NewDisco().Public() + } + ue.PatchDiscoKey(nk, tsmpKey) + } + err = e.Reconfig(cfg, routerCfg, &dns.Config{}) + if err != nil { + t.Fatal(err) + } + + changeMap := <-discoChangedChan + + if _, ok := changeMap[nk]; ok != change.inMap { + t.Fatalf("expect key %v in map %v to be %t, got %t", nk, changeMap, + change.inMap, ok) + } + + metric := metricTSMPLearnedKeyMismatch.Value() + delta := metric - metricValue + metricValue = metric + + if change.wrongKey && delta != 1 { + t.Fatalf("expected a delta of 1, got %d", delta) + } + } +} + func TestUserspaceEnginePortReconfig(t *testing.T) { flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/2855") const defaultPort = 49983