tstest/natlab/vmtest: add test loading netmap cache from disk (#19598)

For testing the loading of netmap cache from disk, the cache needs to
exist. The simple solution is to start two nodes and connect them to
control, with the netmap caching capability set. Then cut the connection
to control, restart the nodes, and ping between them.

This tests that we can start from a cache and get to running state, but
also that we are able to establish a connection between the nodes.

For now this is not testing how the nodes are able to talk to each other
(DERP vs direct).

Updates #19597

Signed-off-by: Claus Lensbøl <claus@tailscale.com>
This commit is contained in:
Claus Lensbøl
2026-05-01 09:46:19 -04:00
committed by GitHub
parent 89a78dc9b7
commit ff9c3f0e00
3 changed files with 117 additions and 0 deletions
+25
View File
@@ -586,6 +586,31 @@ func (e *Env) Start() {
if st2.BackendState != "Running" {
return fmt.Errorf("[%s] state = %q, want Running", n.name, st2.BackendState)
}
// Apply any capabilities for the node to the map.
// SetNodeCapMap pushes an updated map response immediately, then wait
// until the node reports the capability in its status.
if cm := n.vnetNode.WantCapMap(); cm != nil {
e.server.ControlServer().SetNodeCapMap(st2.Self.PublicKey, cm)
if err := tstest.WaitFor(15*time.Second, func() error {
st, err := n.agent.Status(ctx)
if err != nil {
return err
}
if st.Self == nil {
return fmt.Errorf("self is nil")
}
for c := range cm {
if !st.Self.HasCap(c) {
return fmt.Errorf("cap %v not yet received", c)
}
}
return nil
}); err != nil {
return fmt.Errorf("[%s] waiting for capabilities: %w", n.name, err)
}
}
ips := fmt.Sprintf("%v", st2.Self.TailscaleIPs)
e.setNodeTailscale(n.name, "Running "+ips)
t.Logf("[%s] up with %v", n.name, st2.Self.TailscaleIPs)
+73
View File
@@ -11,12 +11,14 @@ import (
"testing"
"time"
"tailscale.com/client/local"
"tailscale.com/tailcfg"
"tailscale.com/tstest"
"tailscale.com/tstest/integration/testcontrol"
"tailscale.com/tstest/natlab/vmtest"
"tailscale.com/tstest/natlab/vnet"
"tailscale.com/types/key"
"tailscale.com/types/netmap"
)
func TestMacOSAndLinuxCanPing(t *testing.T) {
@@ -905,3 +907,74 @@ func TestMullvadExitNode(t *testing.T) {
env.SetExitNodeIP(client, netip.Addr{})
check(checkOff2Step, "exit-off (again)", clientWAN)
}
// TestCachedNetmapAfterRestart verifies that two nodes with netmap
// caching enabled (NodeAttrCacheNetworkMaps) can re-establish a direct
// WireGuard tunnel after both are restarted while the control server is
// unreachable. After restart the nodes must use only their on-disk cached
// netmaps to re-connect.
func TestCachedNetmapAfterRestart(t *testing.T) {
env := vmtest.New(t)
aNet := env.AddNetwork("1.0.0.1", "192.168.1.1/24", vnet.EasyNAT)
bNet := env.AddNetwork("2.0.0.1", "192.168.2.1/24", vnet.EasyNAT)
aNet.SetPostConnectControlBlackhole(true)
bNet.SetPostConnectControlBlackhole(true)
a := env.AddNode("a", aNet,
vmtest.OS(vmtest.Gokrazy),
tailcfg.NodeCapMap{tailcfg.NodeAttrCacheNetworkMaps: nil})
b := env.AddNode("b", bNet,
vmtest.OS(vmtest.Gokrazy),
tailcfg.NodeCapMap{tailcfg.NodeAttrCacheNetworkMaps: nil})
connectStep := env.AddStep("Establish initial TSMP tunnel")
cutControlStep := env.AddStep("Cut control server access")
restartStep := env.AddStep("Restart tailscaled on both nodes")
netmapCheckStep := env.AddStep("Check netmap loaded is cached")
pingStep := env.AddStep("Ping a → b TSMP (cached netmap, no control)")
env.Start()
connectStep.Begin()
if err := env.Ping(a, b, tailcfg.PingTSMP, 30*time.Second); err != nil {
connectStep.End(err)
t.Fatal(err)
}
connectStep.End(nil)
cutControlStep.Begin()
aNet.PostConnectedToControl()
bNet.PostConnectedToControl()
env.ControlServer().SetOnMapRequest(func(nk key.NodePublic) {
panic(fmt.Sprintf("got connection from %v", nk))
})
cutControlStep.End(nil)
restartStep.Begin()
env.RestartTailscaled(a)
env.RestartTailscaled(b)
restartStep.End(nil)
netmapCheckStep.Begin()
for _, node := range []*vmtest.Node{a, b} {
nm, err := local.GetDebugResultJSON[netmap.NetworkMap](t.Context(), node.Agent().Client, "current-netmap")
if err != nil {
netmapCheckStep.End(fmt.Errorf("[%s] got err fetching netmap %q", node.Name(), err))
t.Fatalf("[%s] got err fetching netmap %q", node.Name(), err)
}
if !nm.Cached {
netmapCheckStep.End(fmt.Errorf("[%s] expected netmap.Cached = true, got: %t", node.Name(), nm.Cached))
t.Fatalf("[%s] expected netmap.Cached = true, got: %t", node.Name(), nm.Cached)
}
}
netmapCheckStep.End(nil)
pingStep.Begin()
if err := env.Ping(a, b, tailcfg.PingTSMP, 30*time.Second); err != nil {
pingStep.End(err)
t.Fatal(err)
}
pingStep.End(nil)
}