ipn/ipnlocal,wgengine/magicsock: add basic counters for cached peer connectivity (#19699)

Add new clientmetric counters for establishing contact with peers while using
cached network map data. To do this, instrument the magicsock.Conn with a bit
to indicate whether its peer data came from a cached netmap. If so, there are
two conditions we will count as establishing connectivity to a peer:

  - Receipt of a CallMeMaybe from a peer via disco.
  - Establishing a valid endpoint address for a peer.

In vmtest, add Env.ClientMetrics to scrape metrics from the specified node.
Use this to check that counters were updated in caching tests.

Updates https://github.com/tailscale/projects/issues/13
Updates #12639

Change-Id: Ie8cf3244ac8af4f5bcfe4d0d944078da2ba08990
Signed-off-by: M. J. Fromberger <fromberger@tailscale.com>
This commit is contained in:
M. J. Fromberger
2026-05-12 12:01:05 -07:00
committed by GitHub
parent 120bfcf1cc
commit 9f48567bf1
5 changed files with 155 additions and 20 deletions
+35 -4
View File
@@ -923,6 +923,20 @@ func TestMullvadExitNode(t *testing.T) {
check(checkOff2Step, "exit-off (again)", clientWAN)
}
// checkClientMetrics verifies that each entry in want exists and has the given
// value in metrics.
func checkClientMetrics(t *testing.T, label string, metrics vmtest.ClientMetrics, want map[string]int64) {
t.Helper()
for name, wantValue := range want {
got, ok := metrics[name]
if !ok {
t.Errorf("%s: required metric %q not found", label, name)
} else if got.Value != wantValue {
t.Errorf("%s: metric %q: got %v, want %v", label, name, got.Value, wantValue)
}
}
}
// TestCachedNetmapAfterRestart verifies that two nodes with netmap
// caching enabled (NodeAttrCacheNetworkMaps) can re-establish a direct
// WireGuard tunnel after both are restarted while the control server is
@@ -1020,13 +1034,23 @@ func TestDirectConnectionWithCachedNetmapOnOneNode(t *testing.T) {
vmtest.OS(vmtest.Gokrazy),
tailcfg.NodeCapMap{tailcfg.NodeAttrCacheNetworkMaps: nil})
checkInitialMetrics := env.AddStep("Check initial client metrics")
cutControlStep := env.AddStep("Cut control server access")
restartStep := env.AddStep("Restart tailscaled on a")
tsmpPingStep := env.AddStep("Ping a → b TSMP (cached netmap, no control)")
DiscoPingStep := env.AddStep("Ping a → b Disco (want Direct)")
discoPingStep := env.AddStep("Ping a → b Disco (want Direct)")
checkFinalMetrics := env.AddStep("Check final client metrics")
env.Start()
// Before: Verify that we have not recorded any cached contacts.
checkInitialMetrics.Begin()
checkClientMetrics(t, "Node A", env.ClientMetrics(a), map[string]int64{
"magicsock_cached_peer_contact_derp": 0,
"magicsock_cached_peer_contact_direct": 0,
})
checkInitialMetrics.End(nil)
cutControlStep.Begin()
a.DropControlTraffic()
env.ControlServer().SetOnMapRequest(func(nk key.NodePublic) {
@@ -1047,10 +1071,17 @@ func TestDirectConnectionWithCachedNetmapOnOneNode(t *testing.T) {
}
tsmpPingStep.End(nil)
DiscoPingStep.Begin()
discoPingStep.Begin()
if err := env.PingExpect(a, b, vmtest.PingRouteDirect, 30*time.Second); err != nil {
DiscoPingStep.End(err)
discoPingStep.End(err)
t.Fatal(err)
}
DiscoPingStep.End(nil)
discoPingStep.End(nil)
// After: Verify that we recorded a direct contact on the disconnected node.
checkFinalMetrics.Begin()
checkClientMetrics(t, "Node A", env.ClientMetrics(a), map[string]int64{
"magicsock_cached_peer_contact_direct": 1,
})
checkFinalMetrics.End(nil)
}