From 9657a9321795dc7aa837da347b453099767c76d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Claus=20Lensb=C3=B8l?= Date: Thu, 5 Mar 2026 16:00:36 -0500 Subject: [PATCH] tstest/natlab: add test for no control and rotated disco key (#18261) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Updates #12639 Signed-off-by: Claus Lensbøl --- tstest/integration/nat/nat_test.go | 56 ++++++++++++++++++++++-- tstest/natlab/vnet/conf.go | 68 +++++++++++++++++++++++++++--- tstest/natlab/vnet/vnet.go | 44 +++++++++++-------- wgengine/magicsock/magicsock.go | 1 + 4 files changed, 140 insertions(+), 29 deletions(-) diff --git a/tstest/integration/nat/nat_test.go b/tstest/integration/nat/nat_test.go index 2322e243a..1f62436ff 100644 --- a/tstest/integration/nat/nat_test.go +++ b/tstest/integration/nat/nat_test.go @@ -25,6 +25,7 @@ import ( "golang.org/x/mod/modfile" "golang.org/x/sync/errgroup" "tailscale.com/client/tailscale" + "tailscale.com/envknob" "tailscale.com/ipn/ipnstate" "tailscale.com/syncs" "tailscale.com/tailcfg" @@ -133,6 +134,24 @@ func easyAnd6(c *vnet.Config) *vnet.Node { vnet.EasyNAT)) } +// easyNoControlDiscoRotate sets up a node with easy NAT, cuts traffic to +// control after connecting, and then rotates the disco key to simulate a newly +// started node (from a disco perspective). +func easyNoControlDiscoRotate(c *vnet.Config) *vnet.Node { + n := c.NumNodes() + 1 + nw := c.AddNetwork( + fmt.Sprintf("2.%d.%d.%d", n, n, n), // public IP + fmt.Sprintf("192.168.%d.1/24", n), + vnet.EasyNAT) + nw.SetPostConnectControlBlackhole(true) + return c.AddNode( + vnet.TailscaledEnv{ + Key: "TS_USE_CACHED_NETMAP", + Value: "true", + }, + vnet.RotateDisco, vnet.PreICMPPing, nw) +} + func v6AndBlackholedIPv4(c *vnet.Config) *vnet.Node { n := c.NumNodes() + 1 nw := c.AddNetwork( @@ -364,7 +383,9 @@ func (nt *natTest) runTest(addNode ...addNodeFunc) pingRoute { var clients []*vnet.NodeAgentClient for _, n := range nodes { - clients = append(clients, nt.vnet.NodeAgentClient(n)) + client := nt.vnet.NodeAgentClient(n) + n.SetClient(client) + clients = append(clients, client) } sts := make([]*ipnstate.Status, len(nodes)) @@ -415,7 +436,27 @@ func (nt *natTest) runTest(addNode ...addNodeFunc) pingRoute { return "" } - pingRes, err := ping(ctx, t, clients[0], sts[1].Self.TailscaleIPs[0]) + preICMPPing := false + for _, node := range c.Nodes() { + node.Network().PostConnectedToControl() + if err := node.PostConnectedToControl(ctx); err != nil { + t.Fatalf("post control error: %s", err) + } + if node.PreICMPPing() { + preICMPPing = true + } + } + + // Should we send traffic across the nodes before starting disco? + // For nodes that rotated disco keys after control going away. + if preICMPPing { + _, err := ping(ctx, t, clients[0], sts[1].Self.TailscaleIPs[0], tailcfg.PingICMP) + if err != nil { + t.Fatalf("ICMP ping failure: %v", err) + } + } + + pingRes, err := ping(ctx, t, clients[0], sts[1].Self.TailscaleIPs[0], tailcfg.PingDisco) if err != nil { t.Fatalf("ping failure: %v", err) } @@ -450,12 +491,12 @@ const ( routeNil pingRoute = "nil" // *ipnstate.PingResult is nil ) -func ping(ctx context.Context, t testing.TB, c *vnet.NodeAgentClient, target netip.Addr) (*ipnstate.PingResult, error) { +func ping(ctx context.Context, t testing.TB, c *vnet.NodeAgentClient, target netip.Addr, pType tailcfg.PingType) (*ipnstate.PingResult, error) { var lastRes *ipnstate.PingResult for n := range 10 { t.Logf("ping attempt %d to %v ...", n+1, target) pingCtx, cancel := context.WithTimeout(ctx, 2*time.Second) - pr, err := c.PingWithOpts(pingCtx, target, tailcfg.PingDisco, tailscale.PingOpts{}) + pr, err := c.PingWithOpts(pingCtx, target, pType, tailscale.PingOpts{}) cancel() if err != nil { t.Logf("ping attempt %d error: %v", n+1, err) @@ -529,6 +570,13 @@ func TestEasyEasy(t *testing.T) { nt.want(routeDirect) } +func TestTwoEasyNoControlDiscoRotate(t *testing.T) { + envknob.Setenv("TS_USE_CACHED_NETMAP", "1") + nt := newNatTest(t) + nt.runTest(easyNoControlDiscoRotate, easyNoControlDiscoRotate) + nt.want(routeDirect) +} + // Issue tailscale/corp#26438: use learned DERP route as send path of last // resort // diff --git a/tstest/natlab/vnet/conf.go b/tstest/natlab/vnet/conf.go index 3f83e35c0..33a9bd7e5 100644 --- a/tstest/natlab/vnet/conf.go +++ b/tstest/natlab/vnet/conf.go @@ -5,6 +5,7 @@ package vnet import ( "cmp" + "context" "fmt" "iter" "net/netip" @@ -114,6 +115,10 @@ func (c *Config) AddNode(opts ...any) *Node { switch o { case HostFirewall: n.hostFW = true + case RotateDisco: + n.rotateDisco = true + case PreICMPPing: + n.preICMPPing = true case VerboseSyslog: n.verboseSyslog = true default: @@ -137,6 +142,8 @@ type NodeOption string const ( HostFirewall NodeOption = "HostFirewall" + RotateDisco NodeOption = "RotateDisco" + PreICMPPing NodeOption = "PreICMPPing" VerboseSyslog NodeOption = "VerboseSyslog" ) @@ -197,12 +204,15 @@ func (c *Config) AddNetwork(opts ...any) *Network { // Node is the configuration of a node in the virtual network. type Node struct { - err error - num int // 1-based node number - n *node // nil until NewServer called + err error + num int // 1-based node number + n *node // nil until NewServer called + client *NodeAgentClient env []TailscaledEnv hostFW bool + rotateDisco bool + preICMPPing bool verboseSyslog bool // TODO(bradfitz): this is halfway converted to supporting multiple NICs @@ -243,6 +253,33 @@ func (n *Node) SetVerboseSyslog(v bool) { n.verboseSyslog = v } +func (n *Node) SetClient(c *NodeAgentClient) { + n.client = c +} + +// PostConnectedToControl should be called after the clients have connected to +// control to modify the client behaviour after getting the network maps. +// Currently, the only implemented behavior is rotating disco keys. +func (n *Node) PostConnectedToControl(ctx context.Context) error { + if n.rotateDisco { + if err := n.client.DebugAction(ctx, "rotate-disco-key"); err != nil { + return err + } + } + return nil +} + +// PreICMPPing reports whether node should send an ICMP Ping sent before +// the disco ping. This is important for the nodes having rotated their +// disco keys while control is down. Disco pings deliberately does not +// trigger a TSMPDiscoKeyAdvertisement, making the need for other traffic (here +// simlulated as an ICMP ping) needed first. Any traffic could trigger this key +// exchange, the ICMP Ping is used as a handy existing way of sending some +// non-disco traffic. +func (n *Node) PreICMPPing() bool { + return n.preICMPPing +} + // IsV6Only reports whether this node is only connected to IPv6 networks. func (n *Node) IsV6Only() bool { for _, net := range n.nets { @@ -275,10 +312,12 @@ type Network struct { wanIP6 netip.Prefix // global unicast router in host bits; CIDR is /64 delegated to LAN - wanIP4 netip.Addr // IPv4 WAN IP, if any - lanIP4 netip.Prefix - nodes []*Node - breakWAN4 bool // whether to break WAN IPv4 connectivity + wanIP4 netip.Addr // IPv4 WAN IP, if any + lanIP4 netip.Prefix + nodes []*Node + breakWAN4 bool // whether to break WAN IPv4 connectivity + postConnectBlackholeControl bool // whether to break control connectivity after nodes have connected + network *network svcs set.Set[NetworkService] @@ -310,6 +349,12 @@ func (n *Network) SetBlackholedIPv4(v bool) { n.breakWAN4 = v } +// SetPostConnectControlBlackhole sets wether the network should blackhole all +// traffic to the control server after the clients have connected. +func (n *Network) SetPostConnectControlBlackhole(v bool) { + n.postConnectBlackholeControl = v +} + func (n *Network) CanV4() bool { return n.lanIP4.IsValid() || n.wanIP4.IsValid() } @@ -325,6 +370,13 @@ func (n *Network) CanTakeMoreNodes() bool { return len(n.nodes) < 150 } +// PostConnectedToControl should be called after the clients have connected to +// the control server to modify network behaviors. Currently the only +// implemented behavior is to conditionally blackhole traffic to control. +func (n *Network) PostConnectedToControl() { + n.network.SetControlBlackholed(n.postConnectBlackholeControl) +} + // NetworkService is a service that can be added to a network. type NetworkService string @@ -390,6 +442,8 @@ func (s *Server) initFromConfig(c *Config) error { } netOfConf[conf] = n s.networks.Add(n) + + conf.network = n if conf.wanIP4.IsValid() { if conf.wanIP4.Is6() { return fmt.Errorf("invalid IPv6 address in wanIP") diff --git a/tstest/natlab/vnet/vnet.go b/tstest/natlab/vnet/vnet.go index 357fe213c..43d370c61 100644 --- a/tstest/natlab/vnet/vnet.go +++ b/tstest/natlab/vnet/vnet.go @@ -506,23 +506,24 @@ func (nw networkWriter) write(b []byte) { } type network struct { - s *Server - num int // 1-based - mac MAC // of router - portmap bool - lanInterfaceID int - wanInterfaceID int - v4 bool // network supports IPv4 - v6 bool // network support IPv6 - wanIP6 netip.Prefix // router's WAN IPv6, if any, as a /64. - wanIP4 netip.Addr // router's LAN IPv4, if any - lanIP4 netip.Prefix // router's LAN IP + CIDR (e.g. 192.168.2.1/24) - breakWAN4 bool // break WAN IPv4 connectivity - latency time.Duration // latency applied to interface writes - lossRate float64 // probability of dropping a packet (0.0 to 1.0) - nodesByIP4 map[netip.Addr]*node // by LAN IPv4 - nodesByMAC map[MAC]*node - logf func(format string, args ...any) + s *Server + num int // 1-based + mac MAC // of router + portmap bool + lanInterfaceID int + wanInterfaceID int + v4 bool // network supports IPv4 + v6 bool // network support IPv6 + wanIP6 netip.Prefix // router's WAN IPv6, if any, as a /64. + wanIP4 netip.Addr // router's LAN IPv4, if any + lanIP4 netip.Prefix // router's LAN IP + CIDR (e.g. 192.168.2.1/24) + breakWAN4 bool // break WAN IPv4 connectivity + blackholeControl bool // blackhole control connectivity + latency time.Duration // latency applied to interface writes + lossRate float64 // probability of dropping a packet (0.0 to 1.0) + nodesByIP4 map[netip.Addr]*node // by LAN IPv4 + nodesByMAC map[MAC]*node + logf func(format string, args ...any) ns *stack.Stack linkEP *channel.Endpoint @@ -578,6 +579,12 @@ func (n *network) MACOfIP(ip netip.Addr) (_ MAC, ok bool) { return MAC{}, false } +// SetControlBlackholed sets wether traffic to control should be blackholed for the +// network. +func (n *network) SetControlBlackholed(v bool) { + n.blackholeControl = v +} + type node struct { mac MAC num int // 1-based node number @@ -1263,7 +1270,8 @@ func (n *network) HandleEthernetPacketForRouter(ep EthernetPacket) { } if toForward && n.s.shouldInterceptTCP(packet) { - if flow.dst.Is4() && n.breakWAN4 { + if (flow.dst.Is4() && n.breakWAN4) || + (n.blackholeControl && fakeControl.Match(flow.dst)) { // Blackhole the packet. return } diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 169369f4b..1f02d84c7 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -4266,6 +4266,7 @@ func (c *Conn) HandleDiscoKeyAdvertisement(node tailcfg.NodeView, update packet. // If the key did not change, count it and return. if oldDiscoKey.Compare(discoKey) == 0 { metricTSMPDiscoKeyAdvertisementUnchanged.Add(1) + c.logf("magicsock: disco key did not change for node %v", nodeKey.ShortString()) return } c.discoInfoForKnownPeerLocked(discoKey)