7 changed files with 519 additions and 11 deletions
@@ -0,0 +1,47 @@
+// Copyright (c) Tailscale Inc & contributors
+// SPDX-License-Identifier: BSD-3-Clause
+
+package main
+
+import (
+	"fmt"
+	"os"
+	"strconv"
+	"strings"
+)
+
+func init() {
+	restartTailscaled = restartTailscaledLinux
+}
+
+// restartTailscaledLinux finds the tailscaled process by walking /proc and
+// sends it SIGKILL. On gokrazy, the supervisor will restart tailscaled within
+// a few seconds. The PID of the process that was killed is returned.
+func restartTailscaledLinux() (int, error) {
+	ents, err := os.ReadDir("/proc")
+	if err != nil {
+		return 0, err
+	}
+	for _, e := range ents {
+		pid, err := strconv.Atoi(e.Name())
+		if err != nil {
+			continue
+		}
+		comm, err := os.ReadFile("/proc/" + e.Name() + "/comm")
+		if err != nil {
+			continue
+		}
+		if strings.TrimSpace(string(comm)) != "tailscaled" {
+			continue
+		}
+		proc, err := os.FindProcess(pid)
+		if err != nil {
+			return 0, err
+		}
+		if err := proc.Kill(); err != nil {
+			return 0, fmt.Errorf("killing tailscaled pid %d: %w", pid, err)
+		}
+		return pid, nil
+	}
+	return 0, fmt.Errorf("tailscaled process not found in /proc")
+}
@@ -391,6 +391,18 @@ func main() {
 		}
 		wgServerUp(w, r)
 	})
+	ttaMux.HandleFunc("/restart-tailscaled", func(w http.ResponseWriter, r *http.Request) {
+		if restartTailscaled == nil {
+			http.Error(w, "restart-tailscaled not supported on this platform", http.StatusNotImplemented)
+			return
+		}
+		pid, err := restartTailscaled()
+		if err != nil {
+			http.Error(w, err.Error(), http.StatusInternalServerError)
+			return
+		}
+		fmt.Fprintf(w, "killed tailscaled pid %d (supervisor will respawn)\n", pid)
+	})
 	ttaMux.HandleFunc("/logs", func(w http.ResponseWriter, r *http.Request) {
 		logBuf.mu.Lock()
 		defer logBuf.mu.Unlock()
@@ -600,6 +612,11 @@ var addFirewall func() error // set by fw_linux.go
 // non-Linux.
 var wgServerUp func(w http.ResponseWriter, r *http.Request)

+// restartTailscaled sends SIGKILL to the local tailscaled process so the
+// gokrazy supervisor restarts it. It is set by restart_tailscaled_linux.go
+// and is nil on non-Linux.
+var restartTailscaled func() (pid int, err error)
+
 // logBuffer is a bytes.Buffer that is safe for concurrent use
 // intended to capture early logs from the process, even if
 // gokrazy's syslog streaming isn't working or yet working.
@@ -7093,11 +7093,28 @@ func (b *LocalBackend) DebugRotateDiscoKey() error {

 	b.mu.Lock()
 	cc := b.cc
+	wantRunning := b.pm.CurrentPrefs().WantRunning()
 	b.mu.Unlock()
 	if cc != nil {
 		cc.SetDiscoPublicKey(newDiscoKey)
 	}

+	// Bounce WantRunning to fully reset wireguard-go state for all peers.
+	if wantRunning {
+		if _, err := b.EditPrefs(&ipn.MaskedPrefs{
+			Prefs:          ipn.Prefs{WantRunning: false},
+			WantRunningSet: true,
+		}); err != nil {
+			return err
+		}
+		if _, err := b.EditPrefs(&ipn.MaskedPrefs{
+			Prefs:          ipn.Prefs{WantRunning: true},
+			WantRunningSet: true,
+		}); err != nil {
+			return err
+		}
+	}
+
 	return nil
 }

@@ -7105,6 +7122,25 @@ func (b *LocalBackend) DebugPeerRelayServers() set.Set[netip.Addr] {
 	return b.MagicConn().PeerRelays()
 }

+// DebugPeerDiscoKeys returns the disco public keys this node has learned for
+// each of its peers from the most recent network map. Intended for tests
+// (the production [ipnstate.PeerStatus] purposefully does not surface disco
+// keys; surfacing them via the [ipnstate.Status] API would also pollute
+// every PeerStatus consumer with a non-comparable struct field).
+func (b *LocalBackend) DebugPeerDiscoKeys() map[key.NodePublic]key.DiscoPublic {
+	nm := b.currentNode().NetMap()
+	if nm == nil {
+		return nil
+	}
+	m := make(map[key.NodePublic]key.DiscoPublic, len(nm.Peers))
+	for _, p := range nm.Peers {
+		if dk := p.DiscoKey(); !dk.IsZero() {
+			m[p.Key()] = dk
+		}
+	}
+	return m
+}
+
 // ControlKnobs returns the node's control knobs.
 func (b *LocalBackend) ControlKnobs() *controlknobs.Knobs {
 	return b.sys.ControlKnobs()
@@ -232,6 +232,12 @@ func (h *Handler) serveDebug(w http.ResponseWriter, r *http.Request) {
 		if err == nil {
 			return
 		}
+	case "peer-disco-keys":
+		w.Header().Set("Content-Type", "application/json")
+		err = json.NewEncoder(w).Encode(h.b.DebugPeerDiscoKeys())
+		if err == nil {
+			return
+		}
 	case "rotate-disco-key":
 		err = h.b.DebugRotateDiscoKey()
 	case "statedir":
@@ -69,6 +69,22 @@ type Server struct {
 	// belong to the same user.
 	AllNodesSameUser bool

+	// AllOnline, if true, marks every peer entry in MapResponses as
+	// Online=true. This is a coarse stand-in for the per-node
+	// online/offline tracking that production control servers do based
+	// on streaming map sessions: certain disco-key handling fast paths
+	// in [tailscale.com/control/controlclient] and
+	// [tailscale.com/wgengine/userspace] only fire when the peer is
+	// reported online, so without this flag they are silently skipped
+	// in tests, which can mask bugs and slow down recovery from disco
+	// rotations. See [tailscale.com/control/controlclient/map.go]
+	// removeUnwantedDiscoUpdates and
+	// removeUnwantedDiscoUpdatesFromFullNetmapUpdate for callers that
+	// branch on Online.
+	//
+	// Finer-grained per-node online tracking can be added later.
+	AllOnline bool
+
 	// DefaultNodeCapabilities overrides the capability map sent to each client.
 	DefaultNodeCapabilities *tailcfg.NodeCapMap

@@ -1405,6 +1421,9 @@ func (s *Server) MapResponse(req *tailcfg.MapRequest) (res *tailcfg.MapResponse,
 			p.PrimaryRoutes = routes
 			p.AllowedIPs = append(p.AllowedIPs, routes...)
 		}
+		if s.AllOnline {
+			p.Online = new(true)
+		}
 		res.Peers = append(res.Peers, p)
 	}

@@ -19,6 +19,7 @@ import (
 	"bytes"
 	"context"
 	"encoding/base64"
+	"encoding/json"
 	"flag"
 	"fmt"
 	"io"
@@ -43,6 +44,7 @@ import (
 	"tailscale.com/ipn"
 	"tailscale.com/ipn/ipnstate"
 	"tailscale.com/tailcfg"
+	"tailscale.com/tstest"
 	"tailscale.com/tstest/integration/testcontrol"
 	"tailscale.com/tstest/natlab/vnet"
 	"tailscale.com/types/key"
@@ -85,6 +87,7 @@ type Env struct {
 	qemuProcs []*exec.Cmd // launched QEMU processes

 	sameTailnetUser bool // all nodes register as the same Tailnet user
+	allOnline       bool // mark every peer as Online=true in MapResponses

 	// Shared resource initialization (sync.Once for things multiple nodes share).
 	vnetOnce      sync.Once
@@ -346,6 +349,16 @@ func SameTailnetUser() EnvOption {
 	return envOptFunc(func(e *Env) { e.sameTailnetUser = true })
 }

+// AllOnline returns an [EnvOption] that makes the test control server mark
+// every peer as Online=true in MapResponses (testcontrol.Server.AllOnline).
+// Several disco-key handling fast paths in the controlclient and wgengine
+// only fire when the peer is reported online; without this option those
+// paths are silently skipped, which can mask bugs and slow down recovery
+// from disco-key rotations.
+func AllOnline() EnvOption {
+	return envOptFunc(func(e *Env) { e.allOnline = true })
+}
+
 // AddNetwork creates a new virtual network. Arguments follow the same pattern as
 // vnet.Config.AddNetwork (string IPs, NAT types, NetworkService values).
 func (e *Env) AddNetwork(opts ...any) *vnet.Network {
@@ -414,6 +427,11 @@ func (e *Env) AddNode(name string, opts ...any) *Node {

 // LanIP returns the LAN IPv4 address of this node on the given network.
 // This is only valid after Env.Start() has been called.
+// Name returns the node's name as set in [Env.AddNode].
+func (n *Node) Name() string {
+	return n.name
+}
+
 func (n *Node) LanIP(net *vnet.Network) netip.Addr {
 	return n.vnetNode.LanIP(net)
 }
@@ -864,33 +882,172 @@ func (e *Env) ApproveRoutes(n *Node, routes ...string) {
 	}
 }

-// ping pings from one node to another's Tailscale IP, retrying until it succeeds
-// or the timeout expires. This establishes the WireGuard tunnel between the nodes.
+// ping does a disco ping from one node to another's Tailscale IP, retrying
+// for up to 30 seconds, fataling on failure. It is used internally to wake
+// up magicsock peer state before a test runs; tests that want to assert
+// connectivity should use [Env.Ping] with the appropriate ping type and
+// timeout.
 func (e *Env) ping(from, to *Node) {
-	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	e.t.Helper()
+	if err := e.Ping(from, to, tailcfg.PingDisco, 30*time.Second); err != nil {
+		e.t.Fatal(err)
+	}
+}
+
+// Ping pings from one node to another's Tailscale IP using the given ping
+// type, retrying until it succeeds or timeout expires. It returns the error
+// from the last attempt if the timeout expires. Unlike the internal ping
+// helper, it does not fatal the test on failure; callers can check the error
+// to assert on timing.
+//
+// [tailcfg.PingTSMP] actually flows packets across the WireGuard tunnel and is
+// the right choice for asserting end-to-end connectivity.
+// [tailcfg.PingDisco] only exchanges disco messages between magicsock layers
+// and is useful for warming up peer state without requiring a working tunnel.
+func (e *Env) Ping(from, to *Node, ptype tailcfg.PingType, timeout time.Duration) error {
+	e.t.Helper()
+	ctx, cancel := context.WithTimeout(context.Background(), timeout)
 	defer cancel()

 	toSt, err := to.agent.Status(ctx)
 	if err != nil {
-		e.t.Fatalf("ping: can't get %s status: %v", to.name, err)
+		return fmt.Errorf("ping: can't get %s status: %w", to.name, err)
 	}
 	if len(toSt.Self.TailscaleIPs) == 0 {
-		e.t.Fatalf("ping: %s has no Tailscale IPs", to.name)
+		return fmt.Errorf("ping: %s has no Tailscale IPs", to.name)
 	}
 	targetIP := toSt.Self.TailscaleIPs[0]

+	var lastErr error
 	for {
-		pingCtx, pingCancel := context.WithTimeout(ctx, 3*time.Second)
-		pr, err := from.agent.PingWithOpts(pingCtx, targetIP, tailcfg.PingDisco, local.PingOpts{})
+		// Per-attempt timeout: cap at 3s but never exceed the remaining budget.
+		attemptTimeout := 3 * time.Second
+		if d := time.Until(deadline(ctx)); d < attemptTimeout {
+			attemptTimeout = d
+		}
+		if attemptTimeout <= 0 {
+			break
+		}
+		pingCtx, pingCancel := context.WithTimeout(ctx, attemptTimeout)
+		pr, err := from.agent.PingWithOpts(pingCtx, targetIP, ptype, local.PingOpts{})
 		pingCancel()
 		if err == nil && pr.Err == "" {
-			e.logVerbosef("ping: %s -> %s OK", from.name, targetIP)
-			return
+			e.logVerbosef("ping(%s): %s -> %s OK", ptype, from.name, targetIP)
+			return nil
+		}
+		switch {
+		case err != nil:
+			lastErr = err
+		case pr.Err != "":
+			lastErr = fmt.Errorf("%s", pr.Err)
 		}
 		if ctx.Err() != nil {
-			e.t.Fatalf("ping: %s -> %s timed out", from.name, targetIP)
+			break
 		}
-		time.Sleep(time.Second)
+		time.Sleep(500 * time.Millisecond)
+	}
+	if lastErr == nil {
+		lastErr = ctx.Err()
+	}
+	return fmt.Errorf("ping(%s): %s -> %s (%s) timed out after %v: %w", ptype, from.name, to.name, targetIP, timeout, lastErr)
+}
+
+// deadline returns ctx's deadline, or a zero Time if it has none.
+func deadline(ctx context.Context) time.Time {
+	d, _ := ctx.Deadline()
+	return d
+}
+
+// PeerDiscoKey returns n's view of the given peer's disco key. It returns a
+// non-nil error if the LocalAPI request fails (e.g. tailscaled briefly
+// unavailable during a restart). It returns (zero, false, nil) if n is
+// reachable but has no record of the given peer in its current netmap.
+//
+// PeerDiscoKey is suitable for use inside a [tstest.WaitFor] poll loop: it
+// does not fatal the test on transient errors.
+//
+// The disco key is fetched from the debug-only "peer-disco-keys" LocalAPI
+// action ([ipnlocal.LocalBackend.DebugPeerDiscoKeys]) rather than via
+// [ipnstate.Status], to keep the production PeerStatus struct free of disco
+// keys (and free of non-comparable fields like [key.DiscoPublic] that break
+// reflect-based test helpers).
+func (e *Env) PeerDiscoKey(n *Node, peer key.NodePublic) (key.DiscoPublic, bool, error) {
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+	got, err := n.agent.DebugResultJSON(ctx, "peer-disco-keys")
+	if err != nil {
+		return key.DiscoPublic{}, false, err
+	}
+	// DebugResultJSON returns the result as a generic any (the body is
+	// re-decoded into any), so the map comes back keyed by string text-
+	// encoded node keys. Re-marshal+unmarshal into a typed map for cleaner
+	// lookup. (Roundtripping through JSON is fine for a test helper.)
+	raw, err := json.Marshal(got)
+	if err != nil {
+		return key.DiscoPublic{}, false, fmt.Errorf("re-marshal: %w", err)
+	}
+	var m map[key.NodePublic]key.DiscoPublic
+	if err := json.Unmarshal(raw, &m); err != nil {
+		return key.DiscoPublic{}, false, fmt.Errorf("unmarshal peer-disco-keys: %w", err)
+	}
+	d, ok := m[peer]
+	return d, ok, nil
+}
+
+// RotateDiscoKey asks tailscaled on n to rotate its discovery (magicsock) key
+// in place via the LocalAPI debug action. The node key, control connection,
+// and other tailscaled state are unaffected. It fatals the test on error.
+func (e *Env) RotateDiscoKey(n *Node) {
+	e.t.Helper()
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+	if err := n.agent.DebugAction(ctx, "rotate-disco-key"); err != nil {
+		e.t.Fatalf("RotateDiscoKey(%s): %v", n.name, err)
+	}
+}
+
+// RestartTailscaled signals tailscaled on n to die so that its supervisor
+// (gokrazy) restarts it. It then waits for tailscaled to come back to the
+// "Running" backend state. It fatals the test on error.
+//
+// Restarting tailscaled is currently only supported on gokrazy nodes.
+func (e *Env) RestartTailscaled(n *Node) {
+	e.t.Helper()
+	if !n.os.IsGokrazy {
+		e.t.Fatalf("RestartTailscaled(%s): only supported on gokrazy nodes (have %q)", n.name, n.os.Name)
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
+	defer cancel()
+
+	req, err := http.NewRequestWithContext(ctx, "GET", "http://unused/restart-tailscaled", nil)
+	if err != nil {
+		e.t.Fatalf("RestartTailscaled(%s): %v", n.name, err)
+	}
+	res, err := n.agent.HTTPClient.Do(req)
+	if err != nil {
+		e.t.Fatalf("RestartTailscaled(%s): %v", n.name, err)
+	}
+	body, _ := io.ReadAll(res.Body)
+	res.Body.Close()
+	if res.StatusCode != 200 {
+		e.t.Fatalf("RestartTailscaled(%s): %s: %s", n.name, res.Status, body)
+	}
+	e.t.Logf("[%s] %s", n.name, strings.TrimSpace(string(body)))
+
+	// Wait for tailscaled to come back. Status calls will fail while the unix
+	// socket is gone, then return Starting/NeedsLogin briefly before settling
+	// on Running.
+	if err := tstest.WaitFor(45*time.Second, func() error {
+		st, err := n.agent.Status(ctx)
+		if err != nil {
+			return err
+		}
+		if st.BackendState != "Running" {
+			return fmt.Errorf("backend state = %q", st.BackendState)
+		}
+		return nil
+	}); err != nil {
+		e.t.Fatalf("RestartTailscaled(%s): waiting for Running: %v", n.name, err)
 	}
 }

@@ -1094,6 +1251,9 @@ func (e *Env) initVnet() {
 		if e.sameTailnetUser {
 			e.server.ControlServer().AllNodesSameUser = true
 		}
+		if e.allOnline {
+			e.server.ControlServer().AllOnline = true
+		}
 	})
 }

@@ -9,11 +9,14 @@ import (
 	"net/netip"
 	"strings"
 	"testing"
+	"time"

 	"tailscale.com/tailcfg"
+	"tailscale.com/tstest"
 	"tailscale.com/tstest/integration/testcontrol"
 	"tailscale.com/tstest/natlab/vmtest"
 	"tailscale.com/tstest/natlab/vnet"
+	"tailscale.com/types/key"
 )

 func TestMacOSAndLinuxCanPing(t *testing.T) {
@@ -534,6 +537,226 @@ func TestExitNode(t *testing.T) {
 	}
 }

+// TestDiscoKeyChange verifies that when one node's disco key rotates without
+// its WireGuard node key changing, peers detect the change, tear down stale
+// WireGuard session state for that peer, and re-establish the tunnel in both
+// directions. This exercises the disco-key-change handling that the
+// bradfitz/rm_lazy_wg branch relies on for traffic to and from a peer whose
+// magicsock state has been reset.
+//
+// Topology: two gokrazy nodes A and B, each on its own One2OneNAT network so
+// every connection between them is a direct UDP path with no port-mapping or
+// filtering. With NAT effects out of the way, what we measure here is the
+// speed of disco-key-change reconciliation in wgengine/magicsock alone. The
+// test control server is also configured with [testcontrol.Server.AllOnline]
+// (via [vmtest.AllOnline]) so the controlclient/wgengine fast paths that
+// branch on Online actually fire — without that flag the test exercises
+// only the offline-peer code paths, which mask separate latent issues and
+// are several seconds slower.
+//
+// The test runs four B-side rotations followed by a TSMP ping in the
+// requested direction:
+//
+//	rotate (LocalAPI rotate-disco-key) → ping B → A
+//	rotate (LocalAPI rotate-disco-key) → ping A → B
+//	restart  (SIGKILL tailscaled)      → ping B → A
+//	restart  (SIGKILL tailscaled)      → ping A → B
+//
+// Plus an initial A→B TSMP ping with a generous 30s budget to bring up the
+// WireGuard tunnel before the rotations begin (so the post-rotation pings
+// measure stale-state recovery, not first-time setup). All pings are TSMP
+// because TSMP traverses the actual WireGuard data plane; PingDisco only
+// exercises the magicsock disco layer and would mask any stale WG session
+// problems.
+//
+// Two rotation methods are exercised:
+//
+//   - LocalAPI rotate-disco-key (debug action): rolls B's magicsock disco
+//     private key in place, then bounces WantRunning to force wgengine to
+//     drop wireguard-go session keys for every peer (RotateDiscoKey alone
+//     only touches local disco state; without the WantRunning bounce, B
+//     keeps using stale per-peer session keys against A and A drops
+//     everything until B's WG rekey timer eventually fires).
+//   - SIGKILL of tailscaled (via TTA's /kill-tailscaled): the gokrazy
+//     supervisor respawns tailscaled, fully resetting B's magicsock and
+//     wgengine state in addition to rotating the disco key.
+//
+// Each post-rotation ping currently gets a 15-second budget. On a
+// hypothetical perfect build it should take well under a second. In
+// practice today there are two unavoidable multi-second waits:
+//
+//   - The rotate-then-a→b phase on main takes ~10s for LazyWG. After
+//     B's WantRunning bounce, B's wgengine resets its sentActivityAt/
+//     recvActivityAt maps and trims A out of the wireguard-go config
+//     as an "idle peer"; B only re-adds A on inbound activity, by
+//     which point A's first few TSMP packets have been silently
+//     dropped at B's tundev. The bradfitz/rm_lazy_wg branch removes
+//     that trimming entirely (verified locally), so this phase will
+//     drop to <100ms once that branch lands.
+//
+//   - The restart phases take ~5s for the wireguard-go handshake retry
+//     timer. After SIGKILL+respawn the first WG handshake init from
+//     the restarted node sometimes goes into the void (likely the
+//     brief peer-removed window in the receiver's two-step
+//     [wgengine.userspaceEngine.maybeReconfigWireguardLocked] reconfig
+//     during which the peer is absent from wireguard-go), and wg-go's
+//     [device.RekeyTimeout] of 5s + jitter is the next opportunity to
+//     retry. That retry succeeds and the staged TSMP packet flushes.
+//     This is intrinsic to the protocol's retransmit policy.
+//
+// Once LazyWG is removed and the first-handshake-after-reconfig race
+// is fixed, this budget should be tightened to 5s (or less).
+//
+// All four rotations also assert that B's WireGuard node key is unchanged.
+func TestDiscoKeyChange(t *testing.T) {
+	// AllOnline makes the test control server mark every peer as Online=true
+	// in its MapResponses. Several disco-key handling fast paths
+	// (controlclient.removeUnwantedDiscoUpdates,
+	// removeUnwantedDiscoUpdatesFromFullNetmapUpdate, and the wgengine
+	// tsmpLearnedDisco fast path) only fire for online peers. Production
+	// control servers always populate Online; without this flag the test
+	// would only exercise the offline-peer paths.
+	env := vmtest.New(t, vmtest.AllOnline())
+
+	// One2OneNAT so each node has a 1:1 mapping to a public WAN IP with no
+	// port-translation or address-port filtering. This makes A↔B traffic
+	// behave like two unfirewalled hosts on the public internet, so any
+	// slowness we observe in this test cannot be blamed on NAT traversal.
+	aNet := env.AddNetwork("1.0.0.1", "192.168.1.1/24", vnet.One2OneNAT)
+	bNet := env.AddNetwork("2.0.0.1", "192.168.2.1/24", vnet.One2OneNAT)
+
+	a := env.AddNode("a", aNet, vmtest.OS(vmtest.Gokrazy))
+	b := env.AddNode("b", bNet, vmtest.OS(vmtest.Gokrazy))
+
+	type phase struct {
+		name      string
+		rotate    func()
+		pingFrom  *vmtest.Node
+		pingTo    *vmtest.Node
+		applyStep *vmtest.Step
+		verify    *vmtest.Step
+		wait      *vmtest.Step
+		ping      *vmtest.Step
+	}
+	phases := []*phase{
+		{name: "rotate (LocalAPI), b → a", pingFrom: b, pingTo: a, rotate: func() { env.RotateDiscoKey(b) }},
+		{name: "rotate (LocalAPI), a → b", pingFrom: a, pingTo: b, rotate: func() { env.RotateDiscoKey(b) }},
+		{name: "restart, b → a", pingFrom: b, pingTo: a, rotate: func() { env.RestartTailscaled(b) }},
+		{name: "restart, a → b", pingFrom: a, pingTo: b, rotate: func() { env.RestartTailscaled(b) }},
+	}
+
+	pingABStep := env.AddStep("Ping a → b TSMP (establish tunnel)")
+	for _, p := range phases {
+		p.applyStep = env.AddStep("Apply: " + p.name)
+		p.verify = env.AddStep("Verify b: same node key, new disco key (" + p.name + ")")
+		p.wait = env.AddStep("Wait for a to see b's new disco key (" + p.name + ")")
+		p.ping = env.AddStep("Ping " + p.pingFrom.Name() + " → " + p.pingTo.Name() + " TSMP (" + p.name + ")")
+	}
+
+	env.Start()
+
+	pingABStep.Begin()
+	if err := env.Ping(a, b, tailcfg.PingTSMP, 30*time.Second); err != nil {
+		pingABStep.End(err)
+		t.Fatal(err)
+	}
+	pingABStep.End(nil)
+
+	bStInitial := env.Status(b)
+	bNodeKey := bStInitial.Self.PublicKey
+	cs := env.ControlServer()
+	bCtlNode := cs.Node(bNodeKey)
+	if bCtlNode == nil {
+		t.Fatalf("control server has no node for b's key %v", bNodeKey)
+	}
+	prevDisco := bCtlNode.DiscoKey
+	if prevDisco.IsZero() {
+		t.Fatalf("control server has no disco key for b before rotation")
+	}
+	t.Logf("[b] initial: nodekey=%s discokey=%s", bNodeKey.ShortString(), prevDisco.ShortString())
+
+	for _, p := range phases {
+		p.applyStep.Begin()
+		p.rotate()
+		p.applyStep.End(nil)
+		prevDisco = checkDiscoRotated(t, env, a, b, p.pingFrom, p.pingTo, bNodeKey, prevDisco, p.name,
+			p.verify, p.wait, p.ping)
+	}
+}
+
+// checkDiscoRotated verifies that after some action that should have rotated
+// b's disco key, control has learned the new key, b's node key is unchanged,
+// a's local view picks up the new disco key, and pingFrom can ping pingTo
+// (TSMP) within the budget. It returns b's new disco key and fatals on
+// failure.
+//
+// The TSMP ping budget is 15 seconds rather than the few hundred ms it
+// ought to take. See the top-level test docstring for a full breakdown:
+// it has to absorb LazyWG's trim+re-add for the rotate-a→b phase (~10s)
+// and wireguard-go's RekeyTimeout retry for the SIGKILL+restart phases
+// (~5s). Tighten this once both are addressed.
+func checkDiscoRotated(t *testing.T, env *vmtest.Env, a, b, pingFrom, pingTo *vmtest.Node, bNodeKey key.NodePublic, oldDisco key.DiscoPublic, label string, verifyStep, waitStep, pingStep *vmtest.Step) key.DiscoPublic {
+	t.Helper()
+	cs := env.ControlServer()
+
+	verifyStep.Begin()
+	bSt := env.Status(b)
+	if got := bSt.Self.PublicKey; got != bNodeKey {
+		err := fmt.Errorf("[%s] b's node key changed: %v -> %v", label, bNodeKey, got)
+		verifyStep.End(err)
+		t.Fatal(err)
+	}
+	var newDisco key.DiscoPublic
+	if err := tstest.WaitFor(15*time.Second, func() error {
+		n := cs.Node(bNodeKey)
+		if n == nil {
+			return fmt.Errorf("control server has no node for b")
+		}
+		if n.DiscoKey.IsZero() || n.DiscoKey == oldDisco {
+			return fmt.Errorf("control still has old disco key %v for b", n.DiscoKey)
+		}
+		newDisco = n.DiscoKey
+		return nil
+	}); err != nil {
+		verifyStep.End(err)
+		t.Fatalf("[%s] %v", label, err)
+	}
+	t.Logf("[b] after %s: nodekey=%s discokey=%s", label, bNodeKey.ShortString(), newDisco.ShortString())
+	verifyStep.End(nil)
+
+	waitStep.Begin()
+	if err := tstest.WaitFor(30*time.Second, func() error {
+		d, ok, err := env.PeerDiscoKey(a, bNodeKey)
+		if err != nil {
+			return err
+		}
+		if !ok {
+			return fmt.Errorf("a doesn't yet have b in its status")
+		}
+		if d != newDisco {
+			return fmt.Errorf("a still sees b's old disco %v, want %v", d.ShortString(), newDisco.ShortString())
+		}
+		return nil
+	}); err != nil {
+		waitStep.End(err)
+		env.DumpStatus(a)
+		t.Fatalf("[%s] %v", label, err)
+	}
+	waitStep.End(nil)
+
+	pingStep.Begin()
+	t0 := time.Now()
+	if err := env.Ping(pingFrom, pingTo, tailcfg.PingTSMP, 15*time.Second); err != nil {
+		pingStep.End(err)
+		env.DumpStatus(a)
+		env.DumpStatus(b)
+		t.Fatalf("[%s] %v", label, err)
+	}
+	t.Logf("[%s] ping %s -> %s succeeded in %v", label, pingFrom.Name(), pingTo.Name(), time.Since(t0).Round(100*time.Millisecond))
+	pingStep.End(nil)
+	return newDisco
+}
+
 // TestMullvadExitNode verifies that a Tailscale client whose netmap contains
 // a plain-WireGuard exit node (the way Mullvad exit nodes are wired up by
 // the control plane) can route internet traffic through it, with the source