tstest/natlab/vmtest: add TestPeerRelay

Add a VM-based natlab test that exercises the peer-relay feature
(feature/relayserver) end-to-end across three Tailscale nodes whose
network topology makes a direct A<->B UDP path impossible: both peers
are behind HardNAT (FreeBSD/pfSense-style endpoint-dependent NAT) with
no port-mapping services, while the relay node is behind One2OneNAT so
its STUN-discovered WAN endpoint is reachable from both peers. The
test enables the relay server via EditPrefs, then waits for an a->b
PingDisco whose PingResult.PeerRelay is set (proving magicsock chose
the peer-relay path, not DERP), and finally asserts that the relay's
DebugPeerRelaySessions LocalAPI reports the session.

The existing TestPeerRelayPing in tstest/integration runs three
tailscaled processes on the loopback interface with no NATs; this new
vmtest covers peer relay through real per-VM kernels and NATs.

To wire control-server capabilities into vmtest, also add a
PeerRelayGrants() EnvOption (sibling of AllOnline,
SameTailnetUser) that flips testcontrol.Server.PeerRelayGrants so the
wildcard packet filter grants tailcfg.PeerCapabilityRelay and
PeerCapabilityRelayTarget; without those caps magicsock won't consider
any peer a candidate relay.

Updates #13038

Change-Id: Ib3440b83ec442da0d3b89ffa48ceea9398ea9062
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick
2026-05-14 01:28:15 +00:00
committed by Brad Fitzpatrick
parent 9437a634e6
commit 93440604e0
2 changed files with 142 additions and 0 deletions
+14
View File
@@ -91,6 +91,7 @@ type Env struct {
sameTailnetUser bool // all nodes register as the same Tailnet user
allOnline bool // mark every peer as Online=true in MapResponses
peerRelayGrants bool // grant peer-relay capabilities on the wildcard packet filter
// Shared resource initialization (sync.Once for things multiple nodes share).
vnetOnce sync.Once
@@ -373,6 +374,16 @@ func AllOnline() EnvOption {
return envOptFunc(func(e *Env) { e.allOnline = true })
}
// PeerRelayGrants returns an [EnvOption] that makes the test control server
// grant [tailcfg.PeerCapabilityRelay] and [tailcfg.PeerCapabilityRelayTarget]
// on the wildcard packet filter (testcontrol.Server.PeerRelayGrants). Without
// those capabilities, magicsock does not consider any peer a candidate
// peer-relay server, so a node that has [ipn.Prefs.RelayServerPort] set
// cannot actually be used as a relay by its peers.
func PeerRelayGrants() EnvOption {
return envOptFunc(func(e *Env) { e.peerRelayGrants = true })
}
// AddNetwork creates a new virtual network. Arguments follow the same pattern as
// vnet.Config.AddNetwork (string IPs, NAT types, NetworkService values).
func (e *Env) AddNetwork(opts ...any) *vnet.Network {
@@ -1365,6 +1376,9 @@ func (e *Env) initVnet() {
if e.allOnline {
e.server.ControlServer().AllOnline = true
}
if e.peerRelayGrants {
e.server.ControlServer().PeerRelayGrants = true
}
})
}
+128
View File
@@ -5,6 +5,7 @@ package vmtest_test
import (
"bytes"
"context"
"fmt"
"net/netip"
"runtime"
@@ -13,6 +14,8 @@ import (
"time"
"tailscale.com/client/local"
"tailscale.com/ipn"
"tailscale.com/net/udprelay/status"
"tailscale.com/tailcfg"
"tailscale.com/tstest"
"tailscale.com/tstest/integration/testcontrol"
@@ -20,6 +23,7 @@ import (
"tailscale.com/tstest/natlab/vnet"
"tailscale.com/types/key"
"tailscale.com/types/netmap"
"tailscale.com/util/set"
)
// skipIfNotMacOSArm64 skips the test when the host isn't a macOS arm64 host.
@@ -1060,3 +1064,127 @@ func TestDirectConnectionWithCachedNetmapOnOneNode(t *testing.T) {
})
checkFinalMetrics.End(nil)
}
// TestPeerRelay verifies that two Tailscale nodes whose direct UDP path is
// impossible at the network layer (both behind HardNAT, with no port-mapping
// services on either of their networks) can still communicate via a third
// Tailscale node configured as a peer-relay server.
//
// Topology:
//
// a (gokrazy, HardNAT) — aNet WAN 1.0.0.1
// b (gokrazy, HardNAT) — bNet WAN 2.0.0.1
// relay (gokrazy, One2OneNAT) — relayNet WAN 3.0.0.1
//
// HardNAT in natlab is endpoint-dependent (each (src, dst) tuple gets a fresh
// outbound port, and the inbound table keys on (wanPort, src)). Without
// NAT-PMP/UPnP a→b and b→a direct UDP paths cannot be established. The relay
// uses One2OneNAT so its STUN-discovered WAN endpoint is reachable from both
// peers. The test then asserts that magicsock chose the peer-relay path
// (not DERP) and that the relay reports the session.
func TestPeerRelay(t *testing.T) {
env := vmtest.New(t, vmtest.PeerRelayGrants())
aNet := env.AddNetwork("1.0.0.1", "192.168.1.1/24", vnet.HardNAT)
bNet := env.AddNetwork("2.0.0.1", "192.168.2.1/24", vnet.HardNAT)
relayNet := env.AddNetwork("3.0.0.1", "192.168.3.1/24", vnet.One2OneNAT)
a := env.AddNode("a", aNet, vmtest.OS(vmtest.Gokrazy))
b := env.AddNode("b", bNet, vmtest.OS(vmtest.Gokrazy))
relay := env.AddNode("relay", relayNet, vmtest.OS(vmtest.Gokrazy))
enableRelayStep := env.AddStep("Enable peer-relay server on relay")
pingStep := env.AddStep("Disco ping a → b (want peer-relay path)")
sessionsStep := env.AddStep("Check DebugPeerRelaySessions on relay")
env.Start()
// Turn on the relay server. Port 0 picks an unused port.
enableRelayStep.Begin()
editCtx, editCancel := context.WithTimeout(t.Context(), 30*time.Second)
_, err := relay.Agent().EditPrefs(editCtx, &ipn.MaskedPrefs{
Prefs: ipn.Prefs{RelayServerPort: new(uint16(0))},
RelayServerPortSet: true,
})
editCancel()
if err != nil {
enableRelayStep.Fatalf("EditPrefs(relay, RelayServerPort=0): %v", err)
}
enableRelayStep.End(nil)
// Wait for the relay to start, peers to learn about it via netmap,
// and the a→b disco ping to traverse it.
// PingResult.PeerRelay is set by magicsock to "ip:port:vni:N" when the
// disco probe rode a peer relay (vs Endpoint for direct UDP or
// DERPRegionID for DERP).
pingStep.Begin()
bIP := env.Status(b).Self.TailscaleIPs[0]
var lastDetail string
err = tstest.WaitFor(60*time.Second, func() error {
ctx, cancel := context.WithTimeout(t.Context(), 5*time.Second)
defer cancel()
pr, err := a.Agent().PingWithOpts(ctx, bIP, tailcfg.PingDisco, local.PingOpts{})
if err != nil {
return fmt.Errorf("ping: %w", err)
}
if pr.Err != "" {
return fmt.Errorf("ping err: %s", pr.Err)
}
if pr.PeerRelay == "" {
lastDetail = fmt.Sprintf("endpoint=%q derp=%d", pr.Endpoint, pr.DERPRegionID)
return fmt.Errorf("ping did not use a peer relay; %s", lastDetail)
}
t.Logf("a → b disco ping rode peer-relay %s", pr.PeerRelay)
return nil
})
if err != nil {
env.DumpStatus(a)
env.DumpStatus(b)
env.DumpStatus(relay)
pingStep.Fatalf("waiting for peer-relay path a → b: %v (last: %s)", err, lastDetail)
}
pingStep.End(nil)
// The relay's local debug-peer-relay-sessions LocalAPI should now
// report a single session for the a↔b disco probe. Cross-check the
// session's client disco keys against control's view of a and b, and
// confirm both sides recorded non-zero packet/byte counts (the disco
// ping + pong each take one underlay packet through the relay).
sessionsStep.Begin()
ctx, cancel := context.WithTimeout(t.Context(), 5*time.Second)
defer cancel()
srv, err := relay.Agent().DebugPeerRelaySessions(ctx)
if err != nil {
sessionsStep.Fatalf("DebugPeerRelaySessions: %v", err)
}
if srv.UDPPort == nil {
sessionsStep.Fatalf("relay UDPPort is nil; want set")
}
if got, want := len(srv.Sessions), 1; got != want {
sessionsStep.Fatalf("relay sessions = %d; want %d: %+v", got, want, srv.Sessions)
}
cs := env.ControlServer()
wantShorts := set.Of(
cs.Node(env.Status(a).Self.PublicKey).DiscoKey.ShortString(),
cs.Node(env.Status(b).Self.PublicKey).DiscoKey.ShortString(),
)
session := srv.Sessions[0]
gotShorts := set.Of(session.Client1.ShortDisco, session.Client2.ShortDisco)
if !gotShorts.Equal(wantShorts) {
sessionsStep.Fatalf("session disco shorts = %v; want %v", gotShorts, wantShorts)
}
for _, ci := range []status.ClientInfo{session.Client1, session.Client2} {
if !ci.Endpoint.IsValid() {
sessionsStep.Fatalf("session client %s: invalid Endpoint", ci.ShortDisco)
}
if ci.PacketsTx == 0 {
sessionsStep.Fatalf("session client %s: PacketsTx = 0; want >0", ci.ShortDisco)
}
if ci.BytesTx == 0 {
sessionsStep.Fatalf("session client %s: BytesTx = 0; want >0", ci.ShortDisco)
}
}
t.Logf("relay session VNI=%d %s <-> %s on UDP port %d",
session.VNI, session.Client1.ShortDisco, session.Client2.ShortDisco, *srv.UDPPort)
sessionsStep.End(nil)
}