tailscale/tstest/natlab/vmtest/vmtest.go

// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause

// Package vmtest provides a high-level framework for running integration tests
// across multiple QEMU virtual machines connected by natlab's vnet virtual
// network infrastructure. It supports mixed OS types (gokrazy, Ubuntu, Debian)
// and multi-NIC configurations for scenarios like subnet routing.
//
// Prerequisites:
//   - qemu-system-x86_64 (KVM is used automatically on Linux when /dev/kvm is accessible)
//   - A built gokrazy natlabapp image (auto-built on first run via "make natlab" in gokrazy/)
//
// Run tests with:
//
//	go test ./tstest/natlab/vmtest/ --run-vm-tests -v
package vmtest

import (
	"bytes"
	"context"
	"encoding/base64"
	"encoding/json"
	"flag"
	"fmt"
	"io"
	"net"
	"net/http"
	"net/netip"
	"net/url"
	"os"
	"os/exec"
	"path/filepath"
	"runtime"
	"strconv"
	"strings"
	"sync"
	"testing"
	"time"

	"github.com/google/gopacket/layers"
	dto "github.com/prometheus/client_model/go"
	"github.com/prometheus/common/expfmt"
	"go4.org/mem"
	"golang.org/x/sync/errgroup"
	"tailscale.com/client/local"
	"tailscale.com/ipn"
	"tailscale.com/ipn/ipnstate"
	"tailscale.com/tailcfg"
	"tailscale.com/tstest"
	"tailscale.com/tstest/integration/testcontrol"
	"tailscale.com/tstest/natlab/vnet"
	"tailscale.com/types/key"
	"tailscale.com/util/mak"
)

var (
	runVMTests     = flag.Bool("run-vm-tests", false, "run tests that require QEMU VMs")
	verboseVMDebug = flag.Bool("verbose-vm-debug", false, "enable verbose debug logging for VM tests")
	testVersion    = flag.String("test-version", "", `if non-empty, download tailscale & tailscaled at the given release version (e.g. "1.97.255", "unstable", or "stable") instead of building from the source tree`)
)

// Env is a test environment that manages virtual networks and QEMU VMs.
// Create one with New, add networks and nodes, then call Start.
type Env struct {
	t       testing.TB
	cfg     vnet.Config
	server  *vnet.Server
	nodes   []*Node
	tempDir string

	sockDir       string // short-path dir for Unix sockets (macOS has 104-byte limit)
	sockAddr      string // shared Unix socket path for all QEMU netdevs
	dgramSockAddr string // Unix dgram socket path for macOS VMs (tailmac)
	binDir        string // directory for compiled binaries

	// testVersion is the resolved Tailscale release version to use (empty if
	// building from source). When non-empty, tailscale and tailscaled binaries
	// are downloaded from pkgs.tailscale.com instead of compiled from the tree.
	testVersion string

	// gokrazy-specific paths
	gokrazyBase   string // path to gokrazy base qcow2 image
	gokrazyKernel string // path to gokrazy kernel

	// tailmac-specific paths (macOS VMs)
	tailmacDir        string // path to tailmac bin/ directory containing Host.app
	macosSnapshot     string // path to cached macOS VM snapshot directory
	macosSnapshotOnce sync.Once

	qemuProcs []*exec.Cmd // launched QEMU processes

	sameTailnetUser bool // all nodes register as the same Tailnet user
	allOnline       bool // mark every peer as Online=true in MapResponses
	peerRelayGrants bool // grant peer-relay capabilities on the wildcard packet filter

	// Shared resource initialization (sync.Once for things multiple nodes share).
	vnetOnce      sync.Once
	gokrazyOnce   sync.Once
	qemuSockOnce  sync.Once
	dgramSockOnce sync.Once
	compileMu     sync.Mutex
	compileOnce   map[string]*sync.Once // keyed by goos_goarch
	imageOnce     map[string]*sync.Once // keyed by OSImage.Name

	// Web UI support.
	ctx        context.Context // cancelled when test ends
	eventBus   *EventBus
	testStatus *TestStatus
	stepsMu    sync.Mutex
	stepsByKey map[string]*Step
	steps      []*Step

	nodeStatusMu sync.Mutex
	nodeStatus   map[string]*NodeStatus // keyed by node name
}

// logVerbosef logs a message only when --verbose-vm-debug is set.
func (e *Env) logVerbosef(format string, args ...any) {
	if *verboseVMDebug {
		e.t.Helper()
		e.t.Logf(format, args...)
	}
}

// vmPlatform defines how a VM type boots. Each OS image type (gokrazy,
// cloud, macOS) implements this interface.
type vmPlatform interface {
	// planSteps registers steps with the web UI in a dry-run pass.
	planSteps(e *Env, n *Node)

	// boot does everything needed to get this node running: ensure images,
	// compile binaries, set up sockets, launch VM. Called concurrently.
	boot(ctx context.Context, e *Env, n *Node) error
}

// platform returns the vmPlatform for this node's OS type.
func (n *Node) platform() vmPlatform {
	if n.os.IsMacOS {
		return macPlatform{}
	}
	if n.os.IsGokrazy {
		return gokrazyPlatform{}
	}
	return qemuCloudPlatform{}
}

// AddStep declares an expected stage of the test. The web UI shows all steps
// from the start, tracking their progress. Call before or during the test.
// Returns a *Step whose Begin/End methods drive the progress display.
func (e *Env) AddStep(name string) *Step {
	s := &Step{
		name:  name,
		index: len(e.steps),
		env:   e,
	}
	e.steps = append(e.steps, s)
	return s
}

// Step returns a step by key, creating it if it doesn't exist.
// Safe for concurrent use. Both planSteps (dry-run) and boot (real-run)
// call this to get the same Step object.
func (e *Env) Step(key string) *Step {
	e.stepsMu.Lock()
	defer e.stepsMu.Unlock()
	if s, ok := e.stepsByKey[key]; ok {
		return s
	}
	s := &Step{
		name:  key,
		index: len(e.steps),
		env:   e,
	}
	e.steps = append(e.steps, s)
	if e.stepsByKey == nil {
		e.stepsByKey = make(map[string]*Step)
	}
	e.stepsByKey[key] = s
	return s
}

// Steps returns all declared steps in order.
func (e *Env) Steps() []*Step {
	return e.steps
}

// publishStepChange publishes a step status change event.
func (e *Env) publishStepChange(s *Step) {
	e.eventBus.Publish(VMEvent{
		Type:    EventStepChanged,
		Message: fmt.Sprintf("%s %s", s.Status().Icon(), s.name),
		Step:    s,
	})
}

// initNodeStatus initializes the NodeStatus for all nodes. Called after
// AddNode but before Start so the web UI can render them.
func (e *Env) initNodeStatus() {
	e.nodeStatusMu.Lock()
	defer e.nodeStatusMu.Unlock()
	for _, n := range e.nodes {
		nics := make([]NICStatus, len(n.nets))
		for i := range n.nets {
			nics[i] = NICStatus{
				NetName: e.nicLabel(n, i),
				DHCP:    "waiting",
			}
		}
		e.nodeStatus[n.name] = &NodeStatus{
			Name:         n.name,
			OS:           n.os.Name,
			NICs:         nics,
			JoinsTailnet: n.joinTailnet,
			Tailscale:    "--",
		}
	}
}

// nicLabel returns a short human-readable label for a node's i-th NIC.
// After Start(), we can use the assigned LAN IP. Before that, we use "NIC N".
func (e *Env) nicLabel(n *Node, i int) string {
	if n.vnetNode != nil {
		ip := n.vnetNode.LanIP(n.nets[i])
		if ip.IsValid() {
			return ip.String()
		}
	}
	return fmt.Sprintf("NIC %d", i)
}

// getNodeStatus returns the current status for a node.
func (e *Env) getNodeStatus(name string) NodeStatus {
	e.nodeStatusMu.Lock()
	defer e.nodeStatusMu.Unlock()
	ns := e.nodeStatus[name]
	if ns == nil {
		return NodeStatus{Name: name, Tailscale: "--"}
	}
	return *ns
}

// setNodeDHCP updates the DHCP status for a specific NIC on a node.
func (e *Env) setNodeDHCP(name string, nicIdx int, status string) {
	e.nodeStatusMu.Lock()
	ns := e.nodeStatus[name]
	if ns != nil && nicIdx < len(ns.NICs) {
		ns.NICs[nicIdx].DHCP = status
	}
	e.nodeStatusMu.Unlock()
}

// setNodeTailscale updates the Tailscale status for a node and publishes
// an event so the web UI updates via WebSocket.
func (e *Env) setNodeTailscale(name, status string) {
	e.nodeStatusMu.Lock()
	ns := e.nodeStatus[name]
	if ns != nil {
		ns.Tailscale = status
	}
	e.nodeStatusMu.Unlock()
	e.eventBus.Publish(VMEvent{
		NodeName: name,
		Type:     EventTailscale,
		Message:  "Tailscale: " + status,
		Detail:   status,
	})
}

// appendConsoleLine adds a line to a node's console buffer.
func (e *Env) appendConsoleLine(name, line string) {
	e.nodeStatusMu.Lock()
	ns := e.nodeStatus[name]
	if ns != nil {
		ns.Console = append(ns.Console, line)
		if len(ns.Console) > maxConsoleLines {
			ns.Console = ns.Console[len(ns.Console)-maxConsoleLines:]
		}
	}
	e.nodeStatusMu.Unlock()
}

// nicIndexForMAC returns the NIC index (0-based) for a given MAC on a node.
// Returns -1 if not found.
func (e *Env) nicIndexForMAC(name string, mac vnet.MAC) int {
	for _, n := range e.nodes {
		if n.name != name {
			continue
		}
		for i := range n.nets {
			if n.vnetNode.NICMac(i) == mac {
				return i
			}
		}
	}
	return -1
}

// nodeNameByNum returns the node name for a given vnet node number.
func (e *Env) nodeNameByNum(num int) string {
	for _, n := range e.nodes {
		if n.num == num {
			return n.name
		}
	}
	return fmt.Sprintf("node%d", num)
}

// New creates a new test environment. It skips the test if --run-vm-tests is
// not set. opts may contain [EnvOption] values returned by helpers like
// [SameTailnetUser].
func New(t testing.TB, opts ...EnvOption) *Env {
	if !*runVMTests {
		t.Skip("skipping VM test; set --run-vm-tests to run")
	}

	tempDir := t.TempDir()

	// Unix sockets have a short path limit (104 bytes on macOS). The Go
	// test TempDir path easily exceeds that, so create a dedicated short
	// directory under /tmp for sockets.
	sockDir, err := os.MkdirTemp("", "vmtest")
	if err != nil {
		t.Fatalf("creating socket tempdir: %v", err)
	}
	t.Cleanup(func() { os.RemoveAll(sockDir) })

	e := &Env{
		t:          t,
		tempDir:    tempDir,
		sockDir:    sockDir,
		binDir:     filepath.Join(tempDir, "bin"),
		eventBus:   newEventBus(),
		testStatus: newTestStatus(),
		nodeStatus: make(map[string]*NodeStatus),
	}
	for _, o := range opts {
		o.applyTo(e)
	}
	t.Cleanup(func() {
		e.testStatus.finish(t.Failed())
		e.eventBus.Publish(VMEvent{
			Type:    EventTestStatus,
			Message: e.testStatus.State(),
			Detail:  formatDuration(e.testStatus.Elapsed()),
		})
	})
	return e
}

// EnvOption configures an [Env] in [New].
type EnvOption interface {
	applyTo(*Env)
}

type envOptFunc func(*Env)

func (f envOptFunc) applyTo(e *Env) { f(e) }

// SameTailnetUser returns an [EnvOption] that makes every node register with
// the test control server as the same Tailnet user. This is needed for
// cross-node features that require a same-user relationship — Taildrop, for
// example.
func SameTailnetUser() EnvOption {
	return envOptFunc(func(e *Env) { e.sameTailnetUser = true })
}

// AllOnline returns an [EnvOption] that makes the test control server mark
// every peer as Online=true in MapResponses (testcontrol.Server.AllOnline).
// Several disco-key handling fast paths in the controlclient and wgengine
// only fire when the peer is reported online; without this option those
// paths are silently skipped, which can mask bugs and slow down recovery
// from disco-key rotations.
func AllOnline() EnvOption {
	return envOptFunc(func(e *Env) { e.allOnline = true })
}

// PeerRelayGrants returns an [EnvOption] that makes the test control server
// grant [tailcfg.PeerCapabilityRelay] and [tailcfg.PeerCapabilityRelayTarget]
// on the wildcard packet filter (testcontrol.Server.PeerRelayGrants). Without
// those capabilities, magicsock does not consider any peer a candidate
// peer-relay server, so a node that has [ipn.Prefs.RelayServerPort] set
// cannot actually be used as a relay by its peers.
func PeerRelayGrants() EnvOption {
	return envOptFunc(func(e *Env) { e.peerRelayGrants = true })
}

// AddNetwork creates a new virtual network. Arguments follow the same pattern as
// vnet.Config.AddNetwork (string IPs, NAT types, NetworkService values).
func (e *Env) AddNetwork(opts ...any) *vnet.Network {
	return e.cfg.AddNetwork(opts...)
}

// Node represents a virtual machine in the test environment.
type Node struct {
	name string
	num  int // assigned during AddNode

	os               OSImage
	nets             []*vnet.Network
	vnetNode         *vnet.Node // primary vnet node (set during Start)
	agent            *vnet.NodeAgentClient
	joinTailnet      bool
	noAgent          bool // true to skip TTA agent setup (e.g. macOS VMs without TTA)
	advertiseRoutes  string
	snatSubnetRoutes *bool // nil means default (true)
	webServerPort    int
	sshPort          int // host port for SSH debug access (cloud VMs only)
}

// AddNode creates a new VM node. The name is used for identification and as the
// webserver greeting. Options can be *vnet.Network (for network attachment),
// NodeOption values, or vnet node options (like vnet.TailscaledEnv).
func (e *Env) AddNode(name string, opts ...any) *Node {
	n := &Node{
		name:        name,
		os:          Gokrazy, // default
		joinTailnet: true,
	}
	e.nodes = append(e.nodes, n)

	// Separate network options from other options.
	var vnetOpts []any
	for _, o := range opts {
		switch o := o.(type) {
		case *vnet.Network:
			n.nets = append(n.nets, o)
			vnetOpts = append(vnetOpts, o)
		case nodeOptOS:
			n.os = OSImage(o)
		case nodeOptNoTailscale:
			n.joinTailnet = false
			vnetOpts = append(vnetOpts, vnet.DontJoinTailnet)
		case nodeOptNoAgent:
			n.noAgent = true
		case nodeOptAdvertiseRoutes:
			n.advertiseRoutes = string(o)
		case nodeOptSNATSubnetRoutes:
			v := bool(o)
			n.snatSubnetRoutes = &v
		case nodeOptWebServer:
			n.webServerPort = int(o)
		default:
			// Pass through to vnet (TailscaledEnv, NodeOption, MAC, etc.)
			vnetOpts = append(vnetOpts, o)
		}
	}

	// macOS VMs require a macOS arm64 host (Apple Virtualization.framework via
	// tailmac). Skip the test now rather than letting it proceed through the
	// rest of the setup only to fail later.
	if n.os.IsMacOS && (runtime.GOOS != "darwin" || runtime.GOARCH != "arm64") {
		e.t.Skipf("macOS VM tests require a macOS arm64 host (got %s/%s)", runtime.GOOS, runtime.GOARCH)
	}

	n.vnetNode = e.cfg.AddNode(vnetOpts...)
	n.num = n.vnetNode.Num()
	return n
}

// Name returns the name of the Node.
func (n *Node) Name() string {
	return n.name
}

// LanIP returns the LAN IPv4 address of this node on the given network.
// This is only valid after Env.Start() has been called.
// Name returns the node's name as set in [Env.AddNode].
func (n *Node) LanIP(net *vnet.Network) netip.Addr {
	return n.vnetNode.LanIP(net)
}

// DropControlTraffic sets up a blackhole for control traffic for just this
// node on all the networks belonging to the node.
func (n *Node) DropControlTraffic() {
	for _, network := range n.nets {
		network.BlackholeControlForAddr(n.LanIP(network))
	}
}

// NodeOption types for configuring nodes.

type nodeOptOS OSImage
type nodeOptNoTailscale struct{}
type nodeOptNoAgent struct{}
type nodeOptAdvertiseRoutes string
type nodeOptSNATSubnetRoutes bool
type nodeOptWebServer int

// OS returns a NodeOption that sets the node's operating system image.
func OS(img OSImage) nodeOptOS { return nodeOptOS(img) }

// DontJoinTailnet returns a NodeOption that prevents the node from running tailscale up.
func DontJoinTailnet() nodeOptNoTailscale { return nodeOptNoTailscale{} }

// NoAgent returns a NodeOption that skips TTA agent setup. The node will not
// have a test agent, so agent-dependent operations (Status, ExecOnNode, etc.)
// won't work. Useful for VMs that just need to boot and respond to ICMP.
func NoAgent() nodeOptNoAgent { return nodeOptNoAgent{} }

// AdvertiseRoutes returns a NodeOption that configures the node to advertise
// the given routes (comma-separated CIDRs) when joining the tailnet.
func AdvertiseRoutes(routes string) nodeOptAdvertiseRoutes {
	return nodeOptAdvertiseRoutes(routes)
}

// SNATSubnetRoutes returns a NodeOption that sets whether the node should
// source NAT traffic to advertised subnet routes. The default is true.
// Setting this to false preserves original source IPs, which is needed
// for site-to-site configurations.
func SNATSubnetRoutes(v bool) nodeOptSNATSubnetRoutes { return nodeOptSNATSubnetRoutes(v) }

// WebServer returns a NodeOption that starts a webserver on the given port.
// The webserver responds with "Hello world I am <nodename> from <sourceIP>" on all requests.
func WebServer(port int) nodeOptWebServer { return nodeOptWebServer(port) }

// Start initializes the virtual network, boots all VMs in parallel, and waits
// for all TTA agents to connect. It should be called after all AddNetwork/AddNode calls.
func (e *Env) Start() {
	t := e.t
	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
	t.Cleanup(cancel)
	e.ctx = ctx

	e.initNodeStatus()
	e.maybeStartWebServer()

	if err := os.MkdirAll(e.binDir, 0755); err != nil {
		t.Fatal(err)
	}

	if *testVersion != "" {
		v, err := resolveTestVersion(ctx, *testVersion)
		if err != nil {
			t.Fatalf("resolving --test-version=%q: %v", *testVersion, err)
		}
		e.testVersion = v
		t.Logf("using Tailscale release version %s (from --test-version=%q)", v, *testVersion)
	}

	// Dry-run: let each platform register its steps with the web UI.
	userSteps := e.steps
	e.steps = nil
	for _, n := range e.nodes {
		n.platform().planSteps(e, n)
	}
	for _, n := range e.nodes {
		if !n.noAgent {
			e.Step("Wait for agent: " + n.name)
		}
		if n.joinTailnet {
			e.Step("Tailscale up: " + n.name)
		}
	}
	for _, s := range userSteps {
		s.index = len(e.steps)
		e.steps = append(e.steps, s)
	}

	// Boot all nodes in parallel. Each platform handles its own
	// dependencies (image prep, binary compilation, socket setup)
	// via sync.Once, so independent work overlaps naturally.
	var bootEg errgroup.Group
	for _, n := range e.nodes {
		bootEg.Go(func() error {
			return n.platform().boot(ctx, e, n)
		})
	}
	if err := bootEg.Wait(); err != nil {
		t.Fatalf("boot: %v", err)
	}

	// Set up agent clients and wait for all agents to connect.
	for _, n := range e.nodes {
		if n.noAgent {
			continue
		}
		e.initVnet() // ensure vnet is ready for agent clients
		n.agent = e.server.NodeAgentClient(n.vnetNode)
		n.vnetNode.SetClient(n.agent)
	}

	var agentEg errgroup.Group
	for _, n := range e.nodes {
		if n.noAgent {
			continue
		}
		agentEg.Go(func() error {
			aStep := e.Step("Wait for agent: " + n.name)
			aStep.Begin()
			t.Logf("[%s] waiting for agent...", n.name)
			if n.joinTailnet {
				st, err := n.agent.Status(ctx)
				if err != nil {
					return fmt.Errorf("[%s] agent status: %w", n.name, err)
				}
				t.Logf("[%s] agent connected, backend state: %s", n.name, st.BackendState)
			} else {
				if err := e.waitForAgentConn(ctx, n); err != nil {
					return fmt.Errorf("[%s] agent connect: %w", n.name, err)
				}
				t.Logf("[%s] agent connected (no tailscale)", n.name)
			}
			aStep.End(nil)

			if n.vnetNode.HostFirewall() {
				if err := n.agent.EnableHostFirewall(ctx); err != nil {
					return fmt.Errorf("[%s] enable firewall: %w", n.name, err)
				}
			}

			if n.joinTailnet {
				tsStep := e.Step("Tailscale up: " + n.name)
				tsStep.Begin()
				if err := e.tailscaleUp(ctx, n); err != nil {
					return fmt.Errorf("[%s] tailscale up: %w", n.name, err)
				}
				st2, err := n.agent.Status(ctx)
				if err != nil {
					return fmt.Errorf("[%s] status after up: %w", n.name, err)
				}
				if st2.BackendState != "Running" {
					return fmt.Errorf("[%s] state = %q, want Running", n.name, st2.BackendState)
				}

				// Apply any capabilities for the node to the map.
				// SetNodeCapMap pushes an updated map response immediately, then wait
				// until the node reports the capability in its status.
				if cm := n.vnetNode.WantCapMap(); cm != nil {
					e.server.ControlServer().SetNodeCapMap(st2.Self.PublicKey, cm)
					if err := tstest.WaitFor(15*time.Second, func() error {
						st, err := n.agent.Status(ctx)
						if err != nil {
							return err
						}
						if st.Self == nil {
							return fmt.Errorf("self is nil")
						}
						for c := range cm {
							if !st.Self.HasCap(c) {
								return fmt.Errorf("cap %v not yet received", c)
							}
						}
						return nil
					}); err != nil {
						return fmt.Errorf("[%s] waiting for capabilities: %w", n.name, err)
					}
				}

				ips := fmt.Sprintf("%v", st2.Self.TailscaleIPs)
				e.setNodeTailscale(n.name, "Running "+ips)
				t.Logf("[%s] up with %v", n.name, st2.Self.TailscaleIPs)
				tsStep.End(nil)
			}

			return nil
		})
	}
	if err := agentEg.Wait(); err != nil {
		t.Fatal(err)
	}

	// Start webservers.
	for _, n := range e.nodes {
		if n.webServerPort > 0 {
			if err := e.startWebServer(ctx, n); err != nil {
				t.Fatalf("startWebServer(%s): %v", n.name, err)
			}
		}
	}
}

// tailscaleUp runs "tailscale up" on the node via TTA.
func (e *Env) tailscaleUp(ctx context.Context, n *Node) error {
	url := "http://unused/up?accept-routes=true"
	if n.advertiseRoutes != "" {
		url += "&advertise-routes=" + n.advertiseRoutes
	}
	if n.snatSubnetRoutes != nil {
		if *n.snatSubnetRoutes {
			url += "&snat-subnet-routes=true"
		} else {
			url += "&snat-subnet-routes=false"
		}
	}
	req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
	if err != nil {
		return err
	}
	res, err := n.agent.HTTPClient.Do(req)
	if err != nil {
		return err
	}
	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)
	if res.StatusCode != 200 {
		return fmt.Errorf("tailscale up: %s: %s", res.Status, body)
	}
	return nil
}

// startWebServer tells TTA on the node to start a webserver.
func (e *Env) startWebServer(ctx context.Context, n *Node) error {
	url := fmt.Sprintf("http://unused/start-webserver?port=%d&name=%s", n.webServerPort, n.name)
	req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
	if err != nil {
		return err
	}
	res, err := n.agent.HTTPClient.Do(req)
	if err != nil {
		return err
	}
	defer res.Body.Close()
	if res.StatusCode != 200 {
		body, _ := io.ReadAll(res.Body)
		return fmt.Errorf("start-webserver: %s: %s", res.Status, body)
	}
	e.t.Logf("[%s] webserver started on port %d", n.name, n.webServerPort)
	return nil
}

// SetExitNode sets the client node's exit node to use for internet traffic.
// If exitNode is nil, the client's exit node is cleared (i.e., turned off).
// Otherwise exitNode must be a tailnet node with an approved 0.0.0.0/0 (and
// ::/0) route, typically configured via [AdvertiseRoutes] and
// [Env.ApproveRoutes].
func (e *Env) SetExitNode(client, exitNode *Node) {
	e.t.Helper()
	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
	defer cancel()

	var ip netip.Addr
	if exitNode != nil {
		st, err := exitNode.agent.Status(ctx)
		if err != nil {
			e.t.Fatalf("SetExitNode: status for %s: %v", exitNode.name, err)
		}
		if len(st.Self.TailscaleIPs) == 0 {
			e.t.Fatalf("SetExitNode: %s has no Tailscale IPs", exitNode.name)
		}
		ip = st.Self.TailscaleIPs[0]
	}

	if _, err := client.agent.EditPrefs(ctx, &ipn.MaskedPrefs{
		Prefs: ipn.Prefs{
			ExitNodeID: "",
			ExitNodeIP: ip,
		},
		ExitNodeIDSet: true,
		ExitNodeIPSet: true,
	}); err != nil {
		e.t.Fatalf("SetExitNode(%s -> %v): %v", client.name, exitNode, err)
	}
	if exitNode == nil {
		e.t.Logf("[%s] cleared exit node", client.name)
	} else {
		e.t.Logf("[%s] using exit node %s (%v)", client.name, exitNode.name, ip)
	}
}

// SetExitNodeIP sets the client's ExitNodeIP preference directly, by IP.
// This is the right helper for plain-WireGuard exit nodes (Mullvad-style)
// that aren't on the tailnet — pass an invalid netip.Addr{} to clear.
// For tailnet exit nodes whose Tailscale IP is discoverable via TTA, use
// [Env.SetExitNode] instead.
func (e *Env) SetExitNodeIP(client *Node, ip netip.Addr) {
	e.t.Helper()
	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
	defer cancel()
	if _, err := client.agent.EditPrefs(ctx, &ipn.MaskedPrefs{
		Prefs: ipn.Prefs{
			ExitNodeID: "",
			ExitNodeIP: ip,
		},
		ExitNodeIDSet: true,
		ExitNodeIPSet: true,
	}); err != nil {
		e.t.Fatalf("SetExitNodeIP(%s, %v): %v", client.name, ip, err)
	}
	if !ip.IsValid() {
		e.t.Logf("[%s] cleared exit node", client.name)
	} else {
		e.t.Logf("[%s] using exit-node IP %v", client.name, ip)
	}
}

// ControlServer returns the underlying test control server, for tests that
// need to inject custom peers, masquerade pairs, etc. The returned server's
// Node store is shared with the running tailnet, so changes take effect on
// the next netmap update sent to peers.
func (e *Env) ControlServer() *testcontrol.Server {
	return e.server.ControlServer()
}

// BringUpMullvadWGServer brings up a userspace WireGuard server on n,
// configured as a single-peer "Mullvad-style" exit-node target. The
// server runs inside n's TTA process on a Linux TUN named "wg0".
//
// gw is the WG interface address (e.g. 10.64.0.1/24). The server listens
// on listenPort, accepts only the single peer whose public key is peerPub
// at peerAllowedIP, and MASQUERADEs egress traffic from masqSrc so that
// decrypted packets from the peer egress with n's WAN IP.
//
// It returns the freshly generated public key of the WG server, which
// the caller must pin as the peer key on the [tailcfg.Node] it injects
// into the netmap to advertise this server as a plain-WireGuard exit
// node. It fatals the test on error.
func (e *Env) BringUpMullvadWGServer(n *Node, gw netip.Prefix, listenPort uint16, peerPub key.NodePublic, peerAllowedIP, masqSrc netip.Prefix) key.NodePublic {
	e.t.Helper()
	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
	defer cancel()

	peerPubRaw := peerPub.Raw32()
	v := url.Values{
		"addr":            {gw.String()},
		"listen-port":     {strconv.Itoa(int(listenPort))},
		"peer-pub-b64":    {base64.StdEncoding.EncodeToString(peerPubRaw[:])},
		"peer-allowed-ip": {peerAllowedIP.String()},
		"masq-src":        {masqSrc.String()},
	}
	reqURL := "http://unused/wg-server-up?" + v.Encode()
	req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
	if err != nil {
		e.t.Fatalf("BringUpMullvadWGServer: %v", err)
	}
	res, err := n.agent.HTTPClient.Do(req)
	if err != nil {
		e.t.Fatalf("BringUpMullvadWGServer(%s): %v", n.name, err)
	}
	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)
	if res.StatusCode != 200 {
		e.t.Fatalf("BringUpMullvadWGServer(%s): %s: %s", n.name, res.Status, body)
	}
	var pubB64 string
	for _, line := range strings.Split(string(body), "\n") {
		if s, ok := strings.CutPrefix(strings.TrimSpace(line), "PUBKEY="); ok {
			pubB64 = s
			break
		}
	}
	if pubB64 == "" {
		e.t.Fatalf("BringUpMullvadWGServer(%s): no PUBKEY in response: %q", n.name, body)
	}
	pubRaw, err := base64.StdEncoding.DecodeString(pubB64)
	if err != nil || len(pubRaw) != 32 {
		e.t.Fatalf("BringUpMullvadWGServer(%s): bad PUBKEY %q: %v", n.name, pubB64, err)
	}
	return key.NodePublicFromRaw32(mem.B(pubRaw))
}

// Status returns the tailscale status of the given node, fetched from its
// TTA agent. It fatals the test on error.
func (e *Env) Status(n *Node) *ipnstate.Status {
	e.t.Helper()
	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
	defer cancel()
	st, err := n.agent.Status(ctx)
	if err != nil {
		e.t.Fatalf("Status(%s): %v", n.name, err)
	}
	return st
}

// ClientMetrics returns the client metrics exported by the given node.
func (e *Env) ClientMetrics(n *Node) ClientMetrics {
	e.t.Helper()
	raw, err := n.Agent().DaemonMetrics(e.t.Context())
	if err != nil {
		e.t.Fatalf("Node %q DaemonMetrics: %v", n.Name(), err)
	}

	// Metrics are reported in Prometheus exposition format.
	var parser expfmt.TextParser
	mfs, err := parser.TextToMetricFamilies(bytes.NewReader(raw))
	if err != nil {
		e.t.Fatalf("Node %q parse client metrics: %v", n.Name(), err)
	}

	// Tailscale client metrics are all unlabelled integer-valued counters and
	// gauges, so we don't need to handle the full generality of the Prometheus
	// representation. If we see anything else, we'll log and skip it.
	out := make(ClientMetrics)
	for _, mf := range mfs {
		name := mf.GetName()
		if _, ok := out[name]; ok {
			e.t.Logf("Node %q: duplicate client metric %q (ignored)", n.Name(), name)
			continue
		} else if len(mf.Metric) != 1 {
			e.t.Logf("Node %q: got %d values for client metric %q, want 1 (ignored)", n.Name(), len(mf.Metric), name)
			continue
		}

		var mtype string
		var value int64
		switch mf.GetType() {
		case dto.MetricType_COUNTER:
			mtype = "counter"
			value = int64(mf.Metric[0].GetCounter().GetValue())
		case dto.MetricType_GAUGE:
			mtype = "gauge"
			value = int64(mf.Metric[0].GetGauge().GetValue())
		default:
			e.t.Logf("Node %q unexpected client metric %q type %q (ignored)", n.Name(), name, mf.GetType().String())
			continue
		}
		out[name] = ClientMetric{
			Name:  name,
			Type:  mtype,
			Value: value,
		}
	}
	return out
}

// ClientMetrics is a view of the client metrics exported by a node.
// The keys of the map are the metric names.
type ClientMetrics map[string]ClientMetric

// ClientMetric is a view of a node client metric.
type ClientMetric struct {
	Name  string // as published to the clientmetrics package
	Type  string // either "gauge" or "counter"
	Value int64  // the gauge or counter value
}

// SetAcceptRoutes toggles the node's RouteAll preference (the
// --accept-routes flag), controlling whether it installs subnet routes
// advertised by peers.
func (e *Env) SetAcceptRoutes(n *Node, on bool) {
	e.t.Helper()
	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
	defer cancel()

	if _, err := n.agent.EditPrefs(ctx, &ipn.MaskedPrefs{
		Prefs:       ipn.Prefs{RouteAll: on},
		RouteAllSet: true,
	}); err != nil {
		e.t.Fatalf("SetAcceptRoutes(%s, %v): %v", n.name, on, err)
	}
	e.t.Logf("[%s] accept-routes=%v", n.name, on)
}

// ApproveRoutes tells the test control server to approve subnet routes
// for the given node. The routes should be CIDR strings.
func (e *Env) ApproveRoutes(n *Node, routes ...string) {
	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
	defer cancel()

	// Get the node's public key from its status.
	st, err := n.agent.Status(ctx)
	if err != nil {
		e.t.Fatalf("ApproveRoutes: status for %s: %v", n.name, err)
	}
	nodeKey := st.Self.PublicKey

	var prefixes []netip.Prefix
	for _, r := range routes {
		p, err := netip.ParsePrefix(r)
		if err != nil {
			e.t.Fatalf("ApproveRoutes: bad route %q: %v", r, err)
		}
		prefixes = append(prefixes, p)
	}

	// Enable --accept-routes on all other tailscale nodes BEFORE setting the
	// routes on the control server. This way, when the map update arrives with
	// the new peer routes, peers will immediately install them.
	for _, other := range e.nodes {
		if other == n || !other.joinTailnet {
			continue
		}
		if _, err := other.agent.EditPrefs(ctx, &ipn.MaskedPrefs{
			Prefs:       ipn.Prefs{RouteAll: true},
			RouteAllSet: true,
		}); err != nil {
			e.t.Fatalf("ApproveRoutes: set accept-routes on %s: %v", other.name, err)
		}
	}

	// Approve the routes on the control server. SetSubnetRoutes notifies all
	// peers via updatePeerChanged, so they'll re-fetch their MapResponse.
	e.server.ControlServer().SetSubnetRoutes(nodeKey, prefixes)

	// Wait for each peer to see the routes.
	for _, r := range routes {
		for _, other := range e.nodes {
			if other == n || !other.joinTailnet {
				continue
			}
			if !e.waitForPeerRoute(other, r, 15*time.Second) {
				e.DumpStatus(other)
				e.t.Fatalf("ApproveRoutes: %s never saw route %s", other.name, r)
			}
		}
	}
	e.t.Logf("approved routes %v on %s", routes, n.name)

	// Ping the advertiser from each peer to establish WireGuard tunnels.
	for _, other := range e.nodes {
		if other == n || !other.joinTailnet {
			continue
		}
		e.ping(other, n)
	}
}

// ping does a disco ping from one node to another's Tailscale IP, retrying
// for up to 30 seconds, fataling on failure. It is used internally to wake
// up magicsock peer state before a test runs; tests that want to assert
// connectivity should use [Env.Ping] with the appropriate ping type and
// timeout.
func (e *Env) ping(from, to *Node) {
	e.t.Helper()
	if err := e.Ping(from, to, tailcfg.PingDisco, 30*time.Second); err != nil {
		e.t.Fatal(err)
	}
}

// Ping pings from one node to another's Tailscale IP using the given ping
// type, retrying until it succeeds or timeout expires. It returns the error
// from the last attempt if the timeout expires. Unlike the internal ping
// helper, it does not fatal the test on failure; callers can check the error
// to assert on timing.
//
// [tailcfg.PingTSMP] actually flows packets across the WireGuard tunnel and is
// the right choice for asserting end-to-end connectivity.
// [tailcfg.PingDisco] only exchanges disco messages between magicsock layers
// and is useful for warming up peer state without requiring a working tunnel.
func (e *Env) Ping(from, to *Node, ptype tailcfg.PingType, timeout time.Duration) error {
	e.t.Helper()
	ctx, cancel := context.WithTimeout(context.Background(), timeout)
	defer cancel()

	toSt, err := to.agent.Status(ctx)
	if err != nil {
		return fmt.Errorf("ping: can't get %s status: %w", to.name, err)
	}
	if len(toSt.Self.TailscaleIPs) == 0 {
		return fmt.Errorf("ping: %s has no Tailscale IPs", to.name)
	}
	targetIP := toSt.Self.TailscaleIPs[0]

	var lastErr error
	for {
		// Per-attempt timeout: cap at 3s but never exceed the remaining budget.
		attemptTimeout := 3 * time.Second
		if d := time.Until(deadline(ctx)); d < attemptTimeout {
			attemptTimeout = d
		}
		if attemptTimeout <= 0 {
			break
		}
		pingCtx, pingCancel := context.WithTimeout(ctx, attemptTimeout)
		pr, err := from.agent.PingWithOpts(pingCtx, targetIP, ptype, local.PingOpts{})
		pingCancel()
		if err == nil && pr.Err == "" {
			e.logVerbosef("ping(%s): %s -> %s OK", ptype, from.name, targetIP)
			return nil
		}
		switch {
		case err != nil:
			lastErr = err
		case pr.Err != "":
			lastErr = fmt.Errorf("%s", pr.Err)
		}
		if ctx.Err() != nil {
			break
		}
		time.Sleep(500 * time.Millisecond)
	}
	if lastErr == nil {
		lastErr = ctx.Err()
	}
	return fmt.Errorf("ping(%s): %s -> %s (%s) timed out after %v: %w", ptype, from.name, to.name, targetIP, timeout, lastErr)
}

// deadline returns ctx's deadline, or a zero Time if it has none.
func deadline(ctx context.Context) time.Time {
	d, _ := ctx.Deadline()
	return d
}

// PeerDiscoKey returns n's view of the given peer's disco key. It returns a
// non-nil error if the LocalAPI request fails (e.g. tailscaled briefly
// unavailable during a restart). It returns (zero, false, nil) if n is
// reachable but has no record of the given peer in its current netmap.
//
// PeerDiscoKey is suitable for use inside a [tstest.WaitFor] poll loop: it
// does not fatal the test on transient errors.
//
// The disco key is fetched from the debug-only "peer-disco-keys" LocalAPI
// action ([ipnlocal.LocalBackend.DebugPeerDiscoKeys]) rather than via
// [ipnstate.Status], to keep the production PeerStatus struct free of disco
// keys (and free of non-comparable fields like [key.DiscoPublic] that break
// reflect-based test helpers).
func (e *Env) PeerDiscoKey(n *Node, peer key.NodePublic) (key.DiscoPublic, bool, error) {
	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
	defer cancel()
	got, err := n.agent.DebugResultJSON(ctx, "peer-disco-keys")
	if err != nil {
		return key.DiscoPublic{}, false, err
	}
	// DebugResultJSON returns the result as a generic any (the body is
	// re-decoded into any), so the map comes back keyed by string text-
	// encoded node keys. Re-marshal+unmarshal into a typed map for cleaner
	// lookup. (Roundtripping through JSON is fine for a test helper.)
	raw, err := json.Marshal(got)
	if err != nil {
		return key.DiscoPublic{}, false, fmt.Errorf("re-marshal: %w", err)
	}
	var m map[key.NodePublic]key.DiscoPublic
	if err := json.Unmarshal(raw, &m); err != nil {
		return key.DiscoPublic{}, false, fmt.Errorf("unmarshal peer-disco-keys: %w", err)
	}
	d, ok := m[peer]
	return d, ok, nil
}

// RotateDiscoKey asks tailscaled on n to rotate its discovery (magicsock) key
// in place via the LocalAPI debug action. The node key, control connection,
// and other tailscaled state are unaffected. It fatals the test on error.
func (e *Env) RotateDiscoKey(n *Node) {
	e.t.Helper()
	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
	defer cancel()
	if err := n.agent.DebugAction(ctx, "rotate-disco-key"); err != nil {
		e.t.Fatalf("RotateDiscoKey(%s): %v", n.name, err)
	}
}

// RestartTailscaled signals tailscaled on n to die so that its supervisor
// (gokrazy) restarts it. It then waits for tailscaled to come back to the
// "Running" backend state. It fatals the test on error.
//
// Restarting tailscaled is currently only supported on gokrazy nodes.
func (e *Env) RestartTailscaled(n *Node) {
	e.t.Helper()
	if !n.os.IsGokrazy {
		e.t.Fatalf("RestartTailscaled(%s): only supported on gokrazy nodes (have %q)", n.name, n.os.Name)
	}
	ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
	defer cancel()

	req, err := http.NewRequestWithContext(ctx, "GET", "http://unused/restart-tailscaled", nil)
	if err != nil {
		e.t.Fatalf("RestartTailscaled(%s): %v", n.name, err)
	}
	res, err := n.agent.HTTPClient.Do(req)
	if err != nil {
		e.t.Fatalf("RestartTailscaled(%s): %v", n.name, err)
	}
	body, _ := io.ReadAll(res.Body)
	res.Body.Close()
	if res.StatusCode != 200 {
		e.t.Fatalf("RestartTailscaled(%s): %s: %s", n.name, res.Status, body)
	}
	e.t.Logf("[%s] %s", n.name, strings.TrimSpace(string(body)))

	// Wait for tailscaled to come back. Status calls will fail while the unix
	// socket is gone, then return Starting/NeedsLogin briefly before settling
	// on Running.
	if err := tstest.WaitFor(45*time.Second, func() error {
		st, err := n.agent.Status(ctx)
		if err != nil {
			return err
		}
		if st.BackendState != "Running" {
			return fmt.Errorf("backend state = %q", st.BackendState)
		}
		return nil
	}); err != nil {
		e.t.Fatalf("RestartTailscaled(%s): waiting for Running: %v", n.name, err)
	}
}

// AddRoute adds a kernel static route on the given node, pointing prefix at
// via. It uses TTA's /add-route handler, so it works on any node where TTA
// is running (which is all of them — DontJoinTailnet only skips
// `tailscale up`; the agent runs regardless). Currently Linux-only in TTA.
//
// It fatals the test on error.
func (e *Env) AddRoute(n *Node, prefix, via string) {
	e.t.Helper()
	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
	defer cancel()
	reqURL := fmt.Sprintf("http://unused/add-route?prefix=%s&via=%s", prefix, via)
	req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
	if err != nil {
		e.t.Fatalf("AddRoute: %v", err)
	}
	resp, err := n.agent.HTTPClient.Do(req)
	if err != nil {
		e.t.Fatalf("AddRoute(%s, %s → %s): %v", n.name, prefix, via, err)
	}
	defer resp.Body.Close()
	body, _ := io.ReadAll(resp.Body)
	if resp.StatusCode != 200 {
		e.t.Fatalf("AddRoute(%s, %s → %s): %s: %s", n.name, prefix, via, resp.Status, body)
	}
}

// SSHExec runs a command on a cloud VM via its debug SSH NIC.
// Only works for cloud VMs that have the debug NIC and SSH key configured.
// Returns stdout and any error.
func (e *Env) SSHExec(n *Node, cmd string) (string, error) {
	if n.sshPort == 0 {
		return "", fmt.Errorf("node %s has no SSH debug port", n.name)
	}
	sshCmd := exec.Command("ssh",
		"-o", "StrictHostKeyChecking=no",
		"-o", "UserKnownHostsFile=/dev/null",
		"-o", "ConnectTimeout=5",
		"-i", "/tmp/vmtest_key",
		"-p", fmt.Sprintf("%d", n.sshPort),
		"root@127.0.0.1",
		cmd)
	out, err := sshCmd.CombinedOutput()
	return string(out), err
}

// DumpStatus logs the tailscale status of a node, including its peers and their
// AllowedIPs. Useful for debugging routing issues.
func (e *Env) DumpStatus(n *Node) {
	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
	defer cancel()

	st, err := n.agent.Status(ctx)
	if err != nil {
		e.t.Logf("[%s] DumpStatus error: %v", n.name, err)
		return
	}
	var selfAllowed []string
	if st.Self.AllowedIPs != nil {
		for i := range st.Self.AllowedIPs.Len() {
			selfAllowed = append(selfAllowed, st.Self.AllowedIPs.At(i).String())
		}
	}
	var selfPrimary []string
	if st.Self.PrimaryRoutes != nil {
		for i := range st.Self.PrimaryRoutes.Len() {
			selfPrimary = append(selfPrimary, st.Self.PrimaryRoutes.At(i).String())
		}
	}
	e.t.Logf("[%s] self: %v, backend=%s, AllowedIPs=%v, PrimaryRoutes=%v", n.name, st.Self.TailscaleIPs, st.BackendState, selfAllowed, selfPrimary)
	for _, peer := range st.Peer {
		var aips []string
		if peer.AllowedIPs != nil {
			for i := range peer.AllowedIPs.Len() {
				aips = append(aips, peer.AllowedIPs.At(i).String())
			}
		}
		e.t.Logf("[%s] peer %s (%s): AllowedIPs=%v, Online=%v, Relay=%q, CurAddr=%q",
			n.name, peer.HostName, peer.TailscaleIPs,
			aips, peer.Online, peer.Relay, peer.CurAddr)
	}
}

// waitForPeerRoute polls the node's status until it sees the given route prefix
// in a peer's AllowedIPs, or until timeout. Returns true if found.
func (e *Env) waitForPeerRoute(n *Node, prefix string, timeout time.Duration) bool {
	ctx, cancel := context.WithTimeout(context.Background(), timeout)
	defer cancel()

	for {
		st, err := n.agent.Status(ctx)
		if err != nil {
			return false
		}
		for _, peer := range st.Peer {
			if peer.AllowedIPs != nil {
				for i := range peer.AllowedIPs.Len() {
					if peer.AllowedIPs.At(i).String() == prefix {
						return true
					}
				}
			}
		}
		if ctx.Err() != nil {
			return false
		}
		time.Sleep(time.Second)
	}
}

// HTTPGet makes an HTTP GET request from the given node to the specified URL.
// The request is proxied through TTA's /http-get handler.
func (e *Env) HTTPGet(from *Node, targetURL string) string {
	for attempt := range 3 {
		ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second)
		reqURL := "http://unused/http-get?url=" + targetURL
		req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
		if err != nil {
			cancel()
			e.t.Fatalf("HTTPGet: %v", err)
		}
		res, err := from.agent.HTTPClient.Do(req)
		cancel()
		if err != nil {
			e.logVerbosef("HTTPGet attempt %d from %s: %v", attempt+1, from.name, err)
			continue
		}
		body, _ := io.ReadAll(res.Body)
		res.Body.Close()
		if res.StatusCode == http.StatusBadGateway || res.StatusCode == http.StatusServiceUnavailable {
			e.t.Logf("HTTPGet attempt %d from %s: status %d, body: %s", attempt+1, from.name, res.StatusCode, string(body))
			time.Sleep(2 * time.Second)
			continue
		}
		return string(body)
	}
	e.t.Fatalf("HTTPGet from %s to %s: all attempts failed", from.name, targetURL)
	return ""
}

// setNodeScreenshot stores the latest screenshot data URI for a node.
func (e *Env) setNodeScreenshot(name, dataURI string) {
	e.nodeStatusMu.Lock()
	if ns := e.nodeStatus[name]; ns != nil {
		ns.Screenshot = dataURI
	}
	e.nodeStatusMu.Unlock()
}

// setNodeScreenshotPort stores the Host.app screenshot server port for a node.
func (e *Env) setNodeScreenshotPort(name string, port int) {
	e.nodeStatusMu.Lock()
	if ns := e.nodeStatus[name]; ns != nil {
		ns.ScreenshotPort = port
	}
	e.nodeStatusMu.Unlock()
}

// nodeScreenshotPort returns the Host.app screenshot server port for a node, or 0.
func (e *Env) nodeScreenshotPort(name string) int {
	e.nodeStatusMu.Lock()
	defer e.nodeStatusMu.Unlock()
	if ns := e.nodeStatus[name]; ns != nil {
		return ns.ScreenshotPort
	}
	return 0
}

// initVnet creates the vnet server. Called once via sync.Once.
func (e *Env) initVnet() {
	e.vnetOnce.Do(func() {
		var err error
		e.server, err = vnet.New(&e.cfg)
		if err != nil {
			e.t.Fatalf("vnet.New: %v", err)
		}
		e.t.Cleanup(func() { e.server.Close() })

		e.server.SetDHCPCallback(func(mac vnet.MAC, nodeNum int, msgType layers.DHCPMsgType, ip netip.Addr) {
			name := e.nodeNameByNum(nodeNum)
			nicIdx := e.nicIndexForMAC(name, mac)
			ipStr := ip.String()
			switch msgType {
			case layers.DHCPMsgTypeDiscover:
				e.setNodeDHCP(name, nicIdx, "Discover sent")
				e.eventBus.Publish(VMEvent{NodeName: name, Type: EventDHCPDiscover, Message: "DHCP Discover sent", NIC: nicIdx})
			case layers.DHCPMsgTypeOffer:
				e.setNodeDHCP(name, nicIdx, "Offered "+ipStr)
				e.eventBus.Publish(VMEvent{NodeName: name, Type: EventDHCPOffer, Message: "DHCP Offer received", Detail: ipStr, NIC: nicIdx})
			case layers.DHCPMsgTypeRequest:
				e.setNodeDHCP(name, nicIdx, "Requesting "+ipStr)
				e.eventBus.Publish(VMEvent{NodeName: name, Type: EventDHCPRequest, Message: "DHCP Request sent", Detail: ipStr, NIC: nicIdx})
			case layers.DHCPMsgTypeAck:
				e.setNodeDHCP(name, nicIdx, "Got "+ipStr)
				e.eventBus.Publish(VMEvent{NodeName: name, Type: EventDHCPAck, Message: "DHCP Ack: got " + ipStr, Detail: ipStr, NIC: nicIdx})
			}
		})

		if e.sameTailnetUser {
			e.server.ControlServer().AllNodesSameUser = true
		}
		if e.allOnline {
			e.server.ControlServer().AllOnline = true
		}
		if e.peerRelayGrants {
			e.server.ControlServer().PeerRelayGrants = true
		}
	})
}

// ensureQEMUSocket creates the Unix stream socket for QEMU VMs. Called once.
func (e *Env) ensureQEMUSocket() {
	e.qemuSockOnce.Do(func() {
		e.initVnet()
		e.sockAddr = filepath.Join(e.sockDir, "vnet.sock")
		srv, err := net.Listen("unix", e.sockAddr)
		if err != nil {
			e.t.Fatalf("listen unix: %v", err)
		}
		e.t.Cleanup(func() { srv.Close() })
		go func() {
			for {
				c, err := srv.Accept()
				if err != nil {
					return
				}
				go e.server.ServeUnixConn(c.(*net.UnixConn), vnet.ProtocolQEMU)
			}
		}()
	})
}

// ensureDgramSocket creates the Unix dgram socket for macOS VMs. Called once.
func (e *Env) ensureDgramSocket() {
	e.dgramSockOnce.Do(func() {
		e.initVnet()
		e.dgramSockAddr = filepath.Join(e.sockDir, "dgram.sock")
		dgramAddr, err := net.ResolveUnixAddr("unixgram", e.dgramSockAddr)
		if err != nil {
			e.t.Fatalf("resolve dgram addr: %v", err)
		}
		uc, err := net.ListenUnixgram("unixgram", dgramAddr)
		if err != nil {
			e.t.Fatalf("listen unixgram: %v", err)
		}
		e.t.Cleanup(func() { uc.Close() })
		go e.server.ServeUnixConn(uc, vnet.ProtocolUnixDGRAM)
	})
}

// ensureCompiled compiles binaries for the given platform and registers them
// with the vnet file server. Safe for concurrent use; only compiles once per platform.
func (e *Env) ensureCompiled(ctx context.Context, goos, goarch string) {
	key := goos + "_" + goarch

	e.compileMu.Lock()
	once, ok := e.compileOnce[key]
	if !ok {
		once = new(sync.Once)
		mak.Set(&e.compileOnce, key, once)
	}
	e.compileMu.Unlock()

	once.Do(func() {
		step := e.Step(fmt.Sprintf("Compile %s_%s binaries", goos, goarch))
		step.Begin()
		if err := e.compileBinariesForOS(ctx, goos, goarch); err != nil {
			step.End(err)
			e.t.Fatalf("compileBinariesForOS(%s, %s): %v", goos, goarch, err)
		}
		step.End(nil)
		e.registerBinaries(goos, goarch)
	})
}

// ensureImage prepares the cloud image for os and returns any error from the
// preparation. Safe for concurrent use; only prepares once per OS name.
func (e *Env) ensureImage(ctx context.Context, os OSImage) error {
	e.compileMu.Lock()
	once, ok := e.imageOnce[os.Name]
	if !ok {
		once = new(sync.Once)
		mak.Set(&e.imageOnce, os.Name, once)
	}
	e.compileMu.Unlock()

	var err error
	once.Do(func() {
		step := e.Step(fmt.Sprintf("Prepare %s image", os.Name))
		step.Begin()
		err = ensureImage(ctx, os)
		step.End(err)
	})
	return err
}

// registerBinaries registers compiled binaries with the vnet file server.
// Safe for concurrent use.
func (e *Env) registerBinaries(goos, goarch string) {
	e.initVnet()
	dir := goos + "_" + goarch
	for _, name := range []string{"tta", "tailscale", "tailscaled"} {
		data, err := os.ReadFile(filepath.Join(e.binDir, dir, name))
		if err != nil {
			e.t.Fatalf("reading compiled %s/%s: %v", dir, name, err)
		}
		e.server.RegisterFile(dir+"/"+name, data)
	}
}

// waitForAgentConn waits for a TTA agent to connect by issuing a simple
// HTTP GET to the root endpoint, without requiring tailscaled.
func (e *Env) waitForAgentConn(ctx context.Context, n *Node) error {
	for {
		reqCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
		req, err := http.NewRequestWithContext(reqCtx, "GET", "http://unused/", nil)
		if err != nil {
			cancel()
			return err
		}
		res, err := n.agent.HTTPClient.Do(req)
		cancel()
		if err == nil {
			res.Body.Close()
			return nil
		}
		if ctx.Err() != nil {
			return ctx.Err()
		}
		time.Sleep(500 * time.Millisecond)
	}
}

// Agent returns the node's TTA agent client, or nil if NoAgent is set.
func (n *Node) Agent() *vnet.NodeAgentClient {
	return n.agent
}

// LANPing pings a LAN IP from the given node using TTA's /ping endpoint.
// It retries for up to 2 minutes, which is enough for a macOS VM to boot
// and acquire a DHCP lease.
func (e *Env) LANPing(from *Node, targetIP netip.Addr) {
	if from.agent == nil {
		e.t.Fatalf("LANPing: node %s has no agent (NoAgent set?)", from.name)
	}
	e.t.Logf("LANPing: %s -> %s", from.name, targetIP)
	deadline := time.Now().Add(2 * time.Minute)
	for attempt := 0; time.Now().Before(deadline); attempt++ {
		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
		reqURL := fmt.Sprintf("http://unused/ping?host=%s", targetIP)
		req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
		if err != nil {
			cancel()
			e.t.Fatalf("LANPing: %v", err)
		}
		res, err := from.agent.HTTPClient.Do(req)
		cancel()
		if err != nil {
			if attempt%10 == 0 {
				e.t.Logf("LANPing attempt %d: %v", attempt+1, err)
			}
			time.Sleep(2 * time.Second)
			continue
		}
		body, _ := io.ReadAll(res.Body)
		res.Body.Close()
		if res.StatusCode == 200 {
			e.t.Logf("LANPing: %s -> %s succeeded on attempt %d", from.name, targetIP, attempt+1)
			return
		}
		if attempt%10 == 0 {
			e.t.Logf("LANPing attempt %d: status %d, body: %s", attempt+1, res.StatusCode, string(body))
		}
		time.Sleep(2 * time.Second)
	}
	e.t.Fatalf("LANPing: %s -> %s timed out after 2 minutes", from.name, targetIP)
}

// SendTaildropFile sends a file via Taildrop from one node to another.
// The to node must be on the tailnet. It fatals on error.
func (e *Env) SendTaildropFile(from, to *Node, name string, content []byte) {
	e.t.Helper()
	ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
	defer cancel()

	st, err := to.agent.Status(ctx)
	if err != nil {
		e.t.Fatalf("SendTaildropFile: status for %s: %v", to.name, err)
	}
	if len(st.Self.TailscaleIPs) == 0 {
		e.t.Fatalf("SendTaildropFile: %s has no Tailscale IPs", to.name)
	}
	target := st.Self.TailscaleIPs[0].String()

	reqURL := fmt.Sprintf("http://unused/taildrop-send?to=%s&name=%s", target, name)
	req, err := http.NewRequestWithContext(ctx, "POST", reqURL, bytes.NewReader(content))
	if err != nil {
		e.t.Fatalf("SendTaildropFile: %v", err)
	}
	res, err := from.agent.HTTPClient.Do(req)
	if err != nil {
		e.t.Fatalf("SendTaildropFile(%s -> %s): %v", from.name, to.name, err)
	}
	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)
	if res.StatusCode != 200 {
		e.t.Fatalf("SendTaildropFile(%s -> %s): %s: %s", from.name, to.name, res.Status, body)
	}
	if msg := strings.TrimSpace(string(body)); msg != "" {
		e.t.Logf("[%s] %s", from.name, msg)
	}
	e.t.Logf("[%s] sent Taildrop %q (%d bytes) to %s", from.name, name, len(content), to.name)
}

// RecvTaildropFile waits for an incoming Taildrop file on the node and
// returns the filename and contents. The provided context bounds the wait;
// in addition, RecvTaildropFile imposes its own 90s upper bound. It fatals
// on error or timeout.
func (e *Env) RecvTaildropFile(ctx context.Context, n *Node) (name string, content []byte) {
	e.t.Helper()
	ctx, cancel := context.WithTimeout(ctx, 90*time.Second)
	defer cancel()

	req, err := http.NewRequestWithContext(ctx, "GET", "http://unused/taildrop-recv", nil)
	if err != nil {
		e.t.Fatalf("RecvTaildropFile: %v", err)
	}
	res, err := n.agent.HTTPClient.Do(req)
	if err != nil {
		e.t.Fatalf("RecvTaildropFile(%s): %v", n.name, err)
	}
	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)
	if res.StatusCode != 200 {
		e.t.Fatalf("RecvTaildropFile(%s): %s: %s", n.name, res.Status, body)
	}
	name = res.Header.Get("Taildrop-Filename")
	e.t.Logf("[%s] received Taildrop %q (%d bytes)", n.name, name, len(body))
	return name, body
}

var buildGokrazy sync.Once

// ensureGokrazy builds the gokrazy base image (once per test process) and
// locates the kernel. The build is fast (~4s) so we always rebuild to ensure
// the baked-in binaries (tta, tailscale, tailscaled) match the current source.
func (e *Env) ensureGokrazy(ctx context.Context) error {
	if e.gokrazyBase != "" {
		return nil // already found
	}

	modRoot, err := findModRoot()
	if err != nil {
		return err
	}

	var buildErr error
	buildGokrazy.Do(func() {
		e.t.Logf("building gokrazy natlab image...")
		cmd := exec.CommandContext(ctx, "make", "natlab")
		cmd.Dir = filepath.Join(modRoot, "gokrazy")
		cmd.Stderr = os.Stderr
		cmd.Stdout = os.Stdout
		if err := cmd.Run(); err != nil {
			buildErr = fmt.Errorf("make natlab: %w", err)
		}
	})
	if buildErr != nil {
		return buildErr
	}

	e.gokrazyBase = filepath.Join(modRoot, "gokrazy/natlabapp.qcow2")

	kernel, err := findKernelPath(filepath.Join(modRoot, "go.mod"))
	if err != nil {
		return fmt.Errorf("finding kernel: %w", err)
	}
	e.gokrazyKernel = kernel
	return nil
}

// compileBinariesForOS prepares the tta, tailscale, and tailscaled binaries
// for the given GOOS/GOARCH and places them in e.binDir/<goos>_<goarch>/.
//
// tta is always built from the local source tree (the test agent must match
// the test framework). When --test-version is set, tailscale and tailscaled
// are taken from the downloaded release tarball instead of being compiled
// from source.
func (e *Env) compileBinariesForOS(ctx context.Context, goos, goarch string) error {
	modRoot, err := findModRoot()
	if err != nil {
		return err
	}

	dir := goos + "_" + goarch
	outDir := filepath.Join(e.binDir, dir)
	if err := os.MkdirAll(outDir, 0755); err != nil {
		return err
	}

	// Use downloaded release binaries only on Linux: pkgs.tailscale.com only
	// publishes Linux tarballs, so other GOOS values still build from source.
	useDownloaded := e.testVersion != "" && goos == "linux"

	type binary struct{ name, pkg string }
	buildBins := []binary{{"tta", "./cmd/tta"}}
	if !useDownloaded {
		buildBins = append(buildBins,
			binary{"tailscale", "./cmd/tailscale"},
			binary{"tailscaled", "./cmd/tailscaled"})
	}

	var eg errgroup.Group
	for _, bin := range buildBins {
		eg.Go(func() error {
			outPath := filepath.Join(outDir, bin.name)
			e.t.Logf("compiling %s/%s...", dir, bin.name)
			cmd := exec.CommandContext(ctx, "go", "build", "-o", outPath, bin.pkg)
			cmd.Dir = modRoot
			cmd.Env = append(os.Environ(), "GOOS="+goos, "GOARCH="+goarch, "CGO_ENABLED=0")
			if out, err := cmd.CombinedOutput(); err != nil {
				return fmt.Errorf("building %s/%s: %v\n%s", dir, bin.name, err, out)
			}
			e.t.Logf("compiled %s/%s", dir, bin.name)
			return nil
		})
	}

	if useDownloaded {
		eg.Go(func() error {
			srcDir, err := ensureVersionBinaries(ctx, e.testVersion, goarch, e.t.Logf)
			if err != nil {
				return err
			}
			for _, name := range []string{"tailscale", "tailscaled"} {
				if err := copyFile(filepath.Join(srcDir, name), filepath.Join(outDir, name), 0755); err != nil {
					return fmt.Errorf("staging %s/%s: %w", dir, name, err)
				}
			}
			e.t.Logf("staged version %s tailscale & tailscaled for %s", e.testVersion, dir)
			return nil
		})
	}

	return eg.Wait()
}

// copyFile copies src to dst with the given permission bits.
func copyFile(src, dst string, perm os.FileMode) error {
	in, err := os.Open(src)
	if err != nil {
		return err
	}
	defer in.Close()
	return writeAtomic(dst, in, perm)
}

// findModRoot returns the root of the Go module (where go.mod is).
func findModRoot() (string, error) {
	out, err := exec.Command("go", "env", "GOMOD").CombinedOutput()
	if err != nil {
		return "", fmt.Errorf("go env GOMOD: %w", err)
	}
	gomod := strings.TrimSpace(string(out))
	if gomod == "" || gomod == os.DevNull {
		return "", fmt.Errorf("not in a Go module")
	}
	return filepath.Dir(gomod), nil
}

// findKernelPath finds the gokrazy kernel vmlinuz path from go.mod.
func findKernelPath(goMod string) (string, error) {
	// Import the same logic as nat_test.go.
	b, err := os.ReadFile(goMod)
	if err != nil {
		return "", err
	}

	goModCacheB, err := exec.Command("go", "env", "GOMODCACHE").CombinedOutput()
	if err != nil {
		return "", err
	}
	goModCache := strings.TrimSpace(string(goModCacheB))

	// Parse go.mod to find gokrazy-kernel version.
	for _, line := range strings.Split(string(b), "\n") {
		line = strings.TrimSpace(line)
		if strings.HasPrefix(line, "github.com/tailscale/gokrazy-kernel") {
			parts := strings.Fields(line)
			if len(parts) >= 2 {
				return filepath.Join(goModCache, parts[0]+"@"+parts[1], "vmlinuz"), nil
			}
		}
	}
	return "", fmt.Errorf("gokrazy-kernel not found in %s", goMod)
}

// PingRoute describes what connection type was used to transfer a Disco ping.
type PingRoute string

const (
	PingRouteDirect PingRoute = "direct"
	PingRouteDERP   PingRoute = "derp"
	PingRouteLocal  PingRoute = "local"
	PingRouteNil    PingRoute = "nil"
)

// classifyPing finds what kind of route has been used on a ping path.
// It is only really relevant for DiscoPings.
func classifyPing(pr *ipnstate.PingResult) PingRoute {
	if pr == nil {
		return PingRouteNil
	}

	if pr.Endpoint == "" {
		return PingRouteDERP
	}

	ap, err := netip.ParseAddrPort(pr.Endpoint)
	if err == nil && ap.Addr().IsPrivate() {
		return PingRouteLocal
	}
	return PingRouteDirect
}

// PingExpect retries disco pings until the result matches wantRoute or the
// timeout is reached. It is using DiscoPings as this is the only ping type
// that can classify the connection type.
func (e *Env) PingExpect(from, to *Node, wantRoute PingRoute, timeout time.Duration) error {
	e.t.Helper()
	ctx, cancel := context.WithTimeout(e.t.Context(), timeout)
	defer cancel()
	var lastRoute PingRoute
	toSt, err := to.agent.Status(ctx)
	if err != nil {
		return fmt.Errorf("ping: can't get %s status: %w", to.name, err)
	}
	if len(toSt.Self.TailscaleIPs) == 0 {
		return fmt.Errorf("ping: %s has no Tailscale IPs", to.name)
	}
	targetIP := toSt.Self.TailscaleIPs[0]
	for ctx.Err() == nil {
		pingCtx, pingCancel := context.WithTimeout(ctx, 3*time.Second)
		pr, err := from.agent.PingWithOpts(pingCtx, targetIP, tailcfg.PingDisco, local.PingOpts{})
		pingCancel()
		if err == nil && pr.Err == "" {
			if got := classifyPing(pr); got == wantRoute {
				e.t.Logf("Saw ping type %q", got)
				return nil
			} else {
				e.t.Logf("Saw ping type %q", got)
				lastRoute = got
			}
		}
		select {
		case <-time.After(500 * time.Millisecond):
		case <-ctx.Done():
		}
	}
	return fmt.Errorf("ping route = %q, want %q (after %v)", lastRoute, wantRoute, timeout)
}

// NumNodes returns the current number of nodes configured in the env.
func (env *Env) NumNodes() int {
	return len(env.nodes)
}