From d6b626f5bbd45464c8914041f696433801681524 Mon Sep 17 00:00:00 2001 From: Naman Sood Date: Thu, 2 Apr 2026 14:44:40 -0400 Subject: [PATCH] tstest: add test for connectivity to off-tailnet CGNAT endpoints This test is currently known-broken, but work is underway to fix it. tailscale/corp#36270 tracks this work. Updates tailscale/corp#36270 Fixes tailscale/corp#36272 Signed-off-by: Naman Sood --- cmd/tta/tta.go | 35 +++++ tstest/integration/nat/nat_test.go | 206 +++++++++++++++++++++++------ tstest/natlab/vnet/conf.go | 29 ++-- 3 files changed, 224 insertions(+), 46 deletions(-) diff --git a/cmd/tta/tta.go b/cmd/tta/tta.go index dbdbf5ddf..28bb48b23 100644 --- a/cmd/tta/tta.go +++ b/cmd/tta/tta.go @@ -38,6 +38,17 @@ import ( "tailscale.com/version/distro" ) +// connContextKeyType is the type of connContextKey, which isn't of type +// `string` to avoid collisions while being used as a context key. +type connContextKeyType string + +const ( + // connContextKey is the key for looking up the TCP connection + // corresponding to an HTTP request coming in from testing + // infrastructure. + connContextKey connContextKeyType = "conn-context-key" +) + var ( driverAddr = flag.String("driver", "test-driver.tailscale:8008", "address of the test driver; by default we use the DNS name test-driver.tailscale which is special cased in the emulated network's DNS server") ) @@ -55,9 +66,13 @@ func serveCmd(w http.ResponseWriter, cmd string, args ...string) { w.Header().Set("Content-Type", "text/plain; charset=utf-8") if err != nil { w.Header().Set("Exec-Err", err.Error()) + if exiterr, ok := err.(*exec.ExitError); ok { + w.Header().Set("Exec-Exit-Code", strconv.Itoa(exiterr.ExitCode())) + } w.WriteHeader(500) log.Printf("Err on serveCmd for %q %v, %d bytes of output: %v", cmd, args, len(out), err) } else { + w.Header().Set("Exec-Exit-Code", "0") log.Printf("Did serveCmd for %q %v, %d bytes of output", cmd, args, len(out)) } w.Write(out) @@ -139,8 +154,12 @@ func main() { } ttaMux.ServeHTTP(w, r) }) + var hs http.Server hs.Handler = &serveMux + hs.ConnContext = func(ctx context.Context, c net.Conn) context.Context { + return context.WithValue(ctx, connContextKey, c) + } revSt := revDialState{ needConnCh: make(chan bool, 1), debug: debug, @@ -164,6 +183,22 @@ func main() { ttaMux.HandleFunc("/up", func(w http.ResponseWriter, r *http.Request) { serveCmd(w, "tailscale", "up", "--login-server=http://control.tailscale") }) + ttaMux.HandleFunc("/ip", func(w http.ResponseWriter, r *http.Request) { + conn, ok := r.Context().Value(connContextKey).(net.Conn) + if !ok { + w.WriteHeader(http.StatusInternalServerError) + return + } + w.Write([]byte(conn.LocalAddr().String())) + }) + ttaMux.HandleFunc("/ping", func(w http.ResponseWriter, r *http.Request) { + // Send 4 packets and wait a maximum of 1 second for each. The deadline + // is required for ping to return a non-zero exit code on no response. + // The busybox in question here is the breakglass busybox inside the + // natlab QEMU image - the host running the test does not need to have + // busybox installed at that path, or at all. + serveCmd(w, "/usr/local/bin/busybox", "ping", "-c", "4", "-W", "1", r.URL.Query().Get("host")) + }) ttaMux.HandleFunc("/fw", addFirewallHandler) ttaMux.HandleFunc("/logs", func(w http.ResponseWriter, r *http.Request) { logBuf.mu.Lock() diff --git a/tstest/integration/nat/nat_test.go b/tstest/integration/nat/nat_test.go index 2ac16bf58..98116f173 100644 --- a/tstest/integration/nat/nat_test.go +++ b/tstest/integration/nat/nat_test.go @@ -17,6 +17,7 @@ import ( "os" "os/exec" "path/filepath" + "strconv" "strings" "sync" "testing" @@ -75,7 +76,7 @@ func newNatTest(tb testing.TB) *natTest { cmd.Stderr = os.Stderr cmd.Stdout = os.Stdout if err := cmd.Run(); err != nil { - tb.Fatalf("Error running 'make natlab' in gokrazy directory") + tb.Fatalf("Error running 'make natlab' in gokrazy directory: %v", err) } if _, err := os.Stat(nt.base); err != nil { tb.Skipf("still can't find VM image: %v", err) @@ -278,7 +279,7 @@ func hardPMP(c *vnet.Config) *vnet.Node { fmt.Sprintf("10.7.%d.1/24", n), vnet.HardNAT, vnet.NATPMP)) } -func (nt *natTest) runTest(addNode ...addNodeFunc) pingRoute { +func (nt *natTest) setupTest(ctx context.Context, addNode ...addNodeFunc) (nodes []*vnet.Node, clients []*vnet.NodeAgentClient, cleanup func()) { if len(addNode) < 1 || len(addNode) > 2 { nt.tb.Fatalf("runTest: invalid number of nodes %v; want 1 or 2", len(addNode)) } @@ -286,7 +287,6 @@ func (nt *natTest) runTest(addNode ...addNodeFunc) pingRoute { var c vnet.Config c.SetPCAPFile(*pcapFile) - nodes := []*vnet.Node{} for _, fn := range addNode { node := fn(&c) if node == nil { @@ -376,16 +376,11 @@ func (nt *natTest) runTest(addNode ...addNodeFunc) pingRoute { }) } - ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) - defer cancel() - - var clients []*vnet.NodeAgentClient for _, n := range nodes { client := nt.vnet.NodeAgentClient(n) n.SetClient(client) clients = append(clients, client) } - sts := make([]*ipnstate.Status, len(nodes)) var eg errgroup.Group for i, c := range clients { @@ -405,21 +400,26 @@ func (nt *natTest) runTest(addNode ...addNodeFunc) pingRoute { t.Logf("%v firewalled", node) } - if err := up(ctx, c); err != nil { - return fmt.Errorf("%v up: %w", node, err) - } - t.Logf("%v up!", node) + if node.ShouldJoinTailnet() { + if err := up(ctx, c); err != nil { + return fmt.Errorf("%v up: %w", node, err) + } + t.Logf("%v up!", node) - st, err = c.Status(ctx) - if err != nil { - return fmt.Errorf("%v status: %w", node, err) - } - sts[i] = st + st, err = c.Status(ctx) + if err != nil { + return fmt.Errorf("%v status: %w", node, err) + } - if st.BackendState != "Running" { - return fmt.Errorf("%v state = %q", node, st.BackendState) + if st.BackendState != "Running" { + return fmt.Errorf("%v state = %q", node, st.BackendState) + } + + t.Logf("%v AllowedIPs: %v", node, st.Self.Addrs) + t.Logf("%v up with %v", node, st.Self.TailscaleIPs) + } else { + t.Logf("%v skipping joining tailnet", node) } - t.Logf("%v up with %v", node, sts[i].Self.TailscaleIPs) return nil }) } @@ -427,14 +427,72 @@ func (nt *natTest) runTest(addNode ...addNodeFunc) pingRoute { t.Fatalf("initial setup: %v", err) } - defer nt.vnet.Close() + return nodes, clients, nt.vnet.Close +} + +func (nt *natTest) runHostConnectivityTest(addNode ...addNodeFunc) bool { + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + nodes, clients, cleanup := nt.setupTest(ctx, addNode...) + defer cleanup() + + if len(nodes) != 2 { + nt.tb.Logf("ping can only be done among exactly two nodes") + return false + } + var fromClient, toClient *vnet.NodeAgentClient + for i, n := range nodes { + if n.ShouldJoinTailnet() && fromClient == nil { + fromClient = clients[i] + } else { + toClient = clients[i] + } + } + got, err := sendHostNetworkPing(ctx, nt.tb, fromClient, toClient) + if err != nil { + nt.tb.Fatalf("ping host: %v", err) + } + nt.tb.Logf("ping success: %v", got) + return got +} + +func (nt *natTest) runTailscaleConnectivityTest(addNode ...addNodeFunc) pingRoute { + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + nodes, clients, cleanup := nt.setupTest(ctx, addNode...) + defer cleanup() + t := nt.tb if len(nodes) < 2 { return "" } + for _, n := range nodes { + if !n.ShouldJoinTailnet() { + t.Logf("%v did not join tailnet", n) + return "" + } + } + + sts := make([]*ipnstate.Status, len(nodes)) + var eg errgroup.Group + for i, c := range clients { + eg.Go(func() error { + node := nodes[i] + st, err := c.Status(ctx) + if err != nil { + return fmt.Errorf("%v: %w", node, err) + } + sts[i] = st + return nil + }) + } + if err := eg.Wait(); err != nil { + t.Fatalf("get node statuses: %v", err) + } preICMPPing := false - for _, node := range c.Nodes() { + for _, node := range nodes { node.Network().PostConnectedToControl() if err := node.PostConnectedToControl(ctx); err != nil { t.Fatalf("post control error: %s", err) @@ -455,7 +513,7 @@ func (nt *natTest) runTest(addNode ...addNodeFunc) pingRoute { pingRes, err := ping(ctx, t, clients[0], sts[1].Self.TailscaleIPs[0], tailcfg.PingDisco) if err != nil { - t.Fatalf("ping failure: %v", err) + t.Logf("ping failure: %v", err) } nt.gotRoute = classifyPing(pingRes) t.Logf("ping route: %v", nt.gotRoute) @@ -539,6 +597,60 @@ func up(ctx context.Context, c *vnet.NodeAgentClient) error { return nil } +func getClientIP(ctx context.Context, c *vnet.NodeAgentClient) (netip.Addr, error) { + getIPReq, err := http.NewRequestWithContext(ctx, "GET", "http://unused/ip", nil) + if err != nil { + return netip.Addr{}, err + } + res, err := c.HTTPClient.Do(getIPReq) + if err != nil { + return netip.Addr{}, err + } + defer res.Body.Close() + if res.StatusCode != http.StatusOK { + return netip.Addr{}, fmt.Errorf("client returned http status %q", res.Status) + } + ipBytes, err := io.ReadAll(res.Body) + if err != nil { + return netip.Addr{}, err + } + addrPort, err := netip.ParseAddrPort(string(ipBytes)) + if err != nil { + return netip.Addr{}, err + } + return addrPort.Addr(), nil +} + +// sendHostNetworkPing pings toClient from fromClient, and returns whether +// toClient responded to the ping. +func sendHostNetworkPing(ctx context.Context, tb testing.TB, fromClient, toClient *vnet.NodeAgentClient) (bool, error) { + toIP, err := getClientIP(ctx, toClient) + if err != nil { + return false, fmt.Errorf("get ip: %w", err) + } + req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("http://unused/ping?host=%s", toIP.String()), nil) + if err != nil { + return false, err + } + res, err := fromClient.HTTPClient.Do(req) + if err != nil { + return false, err + } + defer res.Body.Close() + got, err := io.ReadAll(res.Body) + if err != nil { + tb.Logf("error while reading http body: %v", err) + } else { + tb.Logf("got response from ping: %q", got) + } + ec, err := strconv.Atoi(res.Header.Get("Exec-Exit-Code")) + if err != nil { + return false, fmt.Errorf("parse exit code: %w", err) + } + tb.Logf("got ec: %v", ec) + return ec == 0, nil +} + type nodeType struct { name string fn addNodeFunc @@ -552,6 +664,7 @@ var types = []nodeType{ {"hardPMP", hardPMP}, {"one2one", one2one}, {"sameLAN", sameLAN}, + {"cgnat", cgnatNoTailnet}, } // want sets the expected ping route for the test. @@ -563,17 +676,36 @@ func (nt *natTest) want(r pingRoute) { func TestEasyEasy(t *testing.T) { nt := newNatTest(t) - nt.runTest(easy, easy) + nt.runTailscaleConnectivityTest(easy, easy) nt.want(routeDirect) } func TestTwoEasyNoControlDiscoRotate(t *testing.T) { envknob.Setenv("TS_USE_CACHED_NETMAP", "1") nt := newNatTest(t) - nt.runTest(easyNoControlDiscoRotate, easyNoControlDiscoRotate) + nt.runTailscaleConnectivityTest(easyNoControlDiscoRotate, easyNoControlDiscoRotate) nt.want(routeDirect) } +func cgnatNoTailnet(c *vnet.Config) *vnet.Node { + n := c.NumNodes() + 1 + return c.AddNode(c.AddNetwork( + fmt.Sprintf("100.65.%d.1/16", n), + fmt.Sprintf("2.%d.%d.%d", n, n, n), // public IP + vnet.EasyNAT), + vnet.DontJoinTailnet) +} + +func TestNonTailscaleCGNATEndpoint(t *testing.T) { + if !*knownBroken { + t.Skip("skipping known-broken test; set --known-broken to run; see https://github.com/tailscale/corp/issues/36270") + } + nt := newNatTest(t) + if !nt.runHostConnectivityTest(cgnatNoTailnet, sameLAN) { + t.Fatalf("could not ping") + } +} + // Issue tailscale/corp#26438: use learned DERP route as send path of last // resort // @@ -590,13 +722,13 @@ func TestTwoEasyNoControlDiscoRotate(t *testing.T) { // packet over a particular DERP from that peer. func TestFallbackDERPRegionForPeer(t *testing.T) { nt := newNatTest(t) - nt.runTest(hard, hardNoDERPOrEndoints) + nt.runTailscaleConnectivityTest(hard, hardNoDERPOrEndoints) nt.want(routeDERP) } func TestSingleJustIPv6(t *testing.T) { nt := newNatTest(t) - nt.runTest(just6) + nt.runTailscaleConnectivityTest(just6) } var knownBroken = flag.Bool("known-broken", false, "run known-broken tests") @@ -610,24 +742,24 @@ func TestSingleDualBrokenIPv4(t *testing.T) { t.Skip("skipping known-broken test; set --known-broken to run; see https://github.com/tailscale/tailscale/issues/13346") } nt := newNatTest(t) - nt.runTest(v6AndBlackholedIPv4) + nt.runTailscaleConnectivityTest(v6AndBlackholedIPv4) } func TestJustIPv6(t *testing.T) { nt := newNatTest(t) - nt.runTest(just6, just6) + nt.runTailscaleConnectivityTest(just6, just6) nt.want(routeDirect) } func TestEasy4AndJust6(t *testing.T) { nt := newNatTest(t) - nt.runTest(easyAnd6, just6) + nt.runTailscaleConnectivityTest(easyAnd6, just6) nt.want(routeDirect) } func TestSameLAN(t *testing.T) { nt := newNatTest(t) - nt.runTest(easy, sameLAN) + nt.runTailscaleConnectivityTest(easy, sameLAN) nt.want(routeLocal) } @@ -637,25 +769,25 @@ func TestSameLAN(t *testing.T) { // * client machine has a stateful host firewall (e.g. ufw) func TestBPFDisco(t *testing.T) { nt := newNatTest(t) - nt.runTest(easyPMPFWPlusBPF, hard) + nt.runTailscaleConnectivityTest(easyPMPFWPlusBPF, hard) nt.want(routeDirect) } func TestHostFWNoBPF(t *testing.T) { nt := newNatTest(t) - nt.runTest(easyPMPFWNoBPF, hard) + nt.runTailscaleConnectivityTest(easyPMPFWNoBPF, hard) nt.want(routeDERP) } func TestHostFWPair(t *testing.T) { nt := newNatTest(t) - nt.runTest(easyFW, easyFW) + nt.runTailscaleConnectivityTest(easyFW, easyFW) nt.want(routeDirect) } func TestOneHostFW(t *testing.T) { nt := newNatTest(t) - nt.runTest(easy, easyFW) + nt.runTailscaleConnectivityTest(easy, easyFW) nt.want(routeDirect) } @@ -677,7 +809,7 @@ func TestPair(t *testing.T) { } nt := newNatTest(t) - nt.runTest(find(t1), find(t2)) + nt.runTailscaleConnectivityTest(find(t1), find(t2)) } var runGrid = flag.Bool("run-grid", false, "run grid test") @@ -713,7 +845,7 @@ func TestGrid(t *testing.T) { if route == "" { nt := newNatTest(t) - route = nt.runTest(a.fn, b.fn) + route = nt.runTailscaleConnectivityTest(a.fn, b.fn) if err := os.WriteFile(filename, []byte(string(route)), 0666); err != nil { t.Fatalf("writeFile: %v", err) } diff --git a/tstest/natlab/vnet/conf.go b/tstest/natlab/vnet/conf.go index eec8a4731..ccccb8e21 100644 --- a/tstest/natlab/vnet/conf.go +++ b/tstest/natlab/vnet/conf.go @@ -119,6 +119,8 @@ func (c *Config) AddNode(opts ...any) *Node { n.rotateDisco = true case PreICMPPing: n.preICMPPing = true + case DontJoinTailnet: + n.dontJoinTailnet = true case VerboseSyslog: n.verboseSyslog = true default: @@ -141,10 +143,11 @@ func (c *Config) AddNode(opts ...any) *Node { type NodeOption string const ( - HostFirewall NodeOption = "HostFirewall" - RotateDisco NodeOption = "RotateDisco" - PreICMPPing NodeOption = "PreICMPPing" - VerboseSyslog NodeOption = "VerboseSyslog" + HostFirewall NodeOption = "HostFirewall" + RotateDisco NodeOption = "RotateDisco" + PreICMPPing NodeOption = "PreICMPPing" + DontJoinTailnet NodeOption = "DontJoinTailnet" + VerboseSyslog NodeOption = "VerboseSyslog" ) // TailscaledEnv is а option that can be passed to Config.AddNode @@ -209,11 +212,12 @@ type Node struct { n *node // nil until NewServer called client *NodeAgentClient - env []TailscaledEnv - hostFW bool - rotateDisco bool - preICMPPing bool - verboseSyslog bool + env []TailscaledEnv + hostFW bool + rotateDisco bool + preICMPPing bool + verboseSyslog bool + dontJoinTailnet bool // TODO(bradfitz): this is halfway converted to supporting multiple NICs // but not done. We need a MAC-per-Network. @@ -280,6 +284,13 @@ func (n *Node) PreICMPPing() bool { return n.preICMPPing } +// ShouldJoinTailnet reports whether node should join the test tailnet. Machines in +// the virtual universe that aren't on the tailnet are useful for testing that +// Tailscale does not break connectivity to resources outside the tailnet. +func (n *Node) ShouldJoinTailnet() bool { + return !n.dontJoinTailnet +} + // IsV6Only reports whether this node is only connected to IPv6 networks. func (n *Node) IsV6Only() bool { for _, net := range n.nets {