tstest: add test for connectivity to off-tailnet CGNAT endpoints

This test is currently known-broken, but work is underway to fix it.
tailscale/corp#36270 tracks this work.

Updates tailscale/corp#36270
Fixes tailscale/corp#36272

Signed-off-by: Naman Sood <mail@nsood.in>
main
Naman Sood 2 weeks ago committed by GitHub
parent ffaebd71fb
commit d6b626f5bb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 35
      cmd/tta/tta.go
  2. 206
      tstest/integration/nat/nat_test.go
  3. 29
      tstest/natlab/vnet/conf.go

@ -38,6 +38,17 @@ import (
"tailscale.com/version/distro"
)
// connContextKeyType is the type of connContextKey, which isn't of type
// `string` to avoid collisions while being used as a context key.
type connContextKeyType string
const (
// connContextKey is the key for looking up the TCP connection
// corresponding to an HTTP request coming in from testing
// infrastructure.
connContextKey connContextKeyType = "conn-context-key"
)
var (
driverAddr = flag.String("driver", "test-driver.tailscale:8008", "address of the test driver; by default we use the DNS name test-driver.tailscale which is special cased in the emulated network's DNS server")
)
@ -55,9 +66,13 @@ func serveCmd(w http.ResponseWriter, cmd string, args ...string) {
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
if err != nil {
w.Header().Set("Exec-Err", err.Error())
if exiterr, ok := err.(*exec.ExitError); ok {
w.Header().Set("Exec-Exit-Code", strconv.Itoa(exiterr.ExitCode()))
}
w.WriteHeader(500)
log.Printf("Err on serveCmd for %q %v, %d bytes of output: %v", cmd, args, len(out), err)
} else {
w.Header().Set("Exec-Exit-Code", "0")
log.Printf("Did serveCmd for %q %v, %d bytes of output", cmd, args, len(out))
}
w.Write(out)
@ -139,8 +154,12 @@ func main() {
}
ttaMux.ServeHTTP(w, r)
})
var hs http.Server
hs.Handler = &serveMux
hs.ConnContext = func(ctx context.Context, c net.Conn) context.Context {
return context.WithValue(ctx, connContextKey, c)
}
revSt := revDialState{
needConnCh: make(chan bool, 1),
debug: debug,
@ -164,6 +183,22 @@ func main() {
ttaMux.HandleFunc("/up", func(w http.ResponseWriter, r *http.Request) {
serveCmd(w, "tailscale", "up", "--login-server=http://control.tailscale")
})
ttaMux.HandleFunc("/ip", func(w http.ResponseWriter, r *http.Request) {
conn, ok := r.Context().Value(connContextKey).(net.Conn)
if !ok {
w.WriteHeader(http.StatusInternalServerError)
return
}
w.Write([]byte(conn.LocalAddr().String()))
})
ttaMux.HandleFunc("/ping", func(w http.ResponseWriter, r *http.Request) {
// Send 4 packets and wait a maximum of 1 second for each. The deadline
// is required for ping to return a non-zero exit code on no response.
// The busybox in question here is the breakglass busybox inside the
// natlab QEMU image - the host running the test does not need to have
// busybox installed at that path, or at all.
serveCmd(w, "/usr/local/bin/busybox", "ping", "-c", "4", "-W", "1", r.URL.Query().Get("host"))
})
ttaMux.HandleFunc("/fw", addFirewallHandler)
ttaMux.HandleFunc("/logs", func(w http.ResponseWriter, r *http.Request) {
logBuf.mu.Lock()

@ -17,6 +17,7 @@ import (
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"sync"
"testing"
@ -75,7 +76,7 @@ func newNatTest(tb testing.TB) *natTest {
cmd.Stderr = os.Stderr
cmd.Stdout = os.Stdout
if err := cmd.Run(); err != nil {
tb.Fatalf("Error running 'make natlab' in gokrazy directory")
tb.Fatalf("Error running 'make natlab' in gokrazy directory: %v", err)
}
if _, err := os.Stat(nt.base); err != nil {
tb.Skipf("still can't find VM image: %v", err)
@ -278,7 +279,7 @@ func hardPMP(c *vnet.Config) *vnet.Node {
fmt.Sprintf("10.7.%d.1/24", n), vnet.HardNAT, vnet.NATPMP))
}
func (nt *natTest) runTest(addNode ...addNodeFunc) pingRoute {
func (nt *natTest) setupTest(ctx context.Context, addNode ...addNodeFunc) (nodes []*vnet.Node, clients []*vnet.NodeAgentClient, cleanup func()) {
if len(addNode) < 1 || len(addNode) > 2 {
nt.tb.Fatalf("runTest: invalid number of nodes %v; want 1 or 2", len(addNode))
}
@ -286,7 +287,6 @@ func (nt *natTest) runTest(addNode ...addNodeFunc) pingRoute {
var c vnet.Config
c.SetPCAPFile(*pcapFile)
nodes := []*vnet.Node{}
for _, fn := range addNode {
node := fn(&c)
if node == nil {
@ -376,16 +376,11 @@ func (nt *natTest) runTest(addNode ...addNodeFunc) pingRoute {
})
}
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
var clients []*vnet.NodeAgentClient
for _, n := range nodes {
client := nt.vnet.NodeAgentClient(n)
n.SetClient(client)
clients = append(clients, client)
}
sts := make([]*ipnstate.Status, len(nodes))
var eg errgroup.Group
for i, c := range clients {
@ -405,21 +400,26 @@ func (nt *natTest) runTest(addNode ...addNodeFunc) pingRoute {
t.Logf("%v firewalled", node)
}
if err := up(ctx, c); err != nil {
return fmt.Errorf("%v up: %w", node, err)
}
t.Logf("%v up!", node)
if node.ShouldJoinTailnet() {
if err := up(ctx, c); err != nil {
return fmt.Errorf("%v up: %w", node, err)
}
t.Logf("%v up!", node)
st, err = c.Status(ctx)
if err != nil {
return fmt.Errorf("%v status: %w", node, err)
}
sts[i] = st
st, err = c.Status(ctx)
if err != nil {
return fmt.Errorf("%v status: %w", node, err)
}
if st.BackendState != "Running" {
return fmt.Errorf("%v state = %q", node, st.BackendState)
if st.BackendState != "Running" {
return fmt.Errorf("%v state = %q", node, st.BackendState)
}
t.Logf("%v AllowedIPs: %v", node, st.Self.Addrs)
t.Logf("%v up with %v", node, st.Self.TailscaleIPs)
} else {
t.Logf("%v skipping joining tailnet", node)
}
t.Logf("%v up with %v", node, sts[i].Self.TailscaleIPs)
return nil
})
}
@ -427,14 +427,72 @@ func (nt *natTest) runTest(addNode ...addNodeFunc) pingRoute {
t.Fatalf("initial setup: %v", err)
}
defer nt.vnet.Close()
return nodes, clients, nt.vnet.Close
}
func (nt *natTest) runHostConnectivityTest(addNode ...addNodeFunc) bool {
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
nodes, clients, cleanup := nt.setupTest(ctx, addNode...)
defer cleanup()
if len(nodes) != 2 {
nt.tb.Logf("ping can only be done among exactly two nodes")
return false
}
var fromClient, toClient *vnet.NodeAgentClient
for i, n := range nodes {
if n.ShouldJoinTailnet() && fromClient == nil {
fromClient = clients[i]
} else {
toClient = clients[i]
}
}
got, err := sendHostNetworkPing(ctx, nt.tb, fromClient, toClient)
if err != nil {
nt.tb.Fatalf("ping host: %v", err)
}
nt.tb.Logf("ping success: %v", got)
return got
}
func (nt *natTest) runTailscaleConnectivityTest(addNode ...addNodeFunc) pingRoute {
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
nodes, clients, cleanup := nt.setupTest(ctx, addNode...)
defer cleanup()
t := nt.tb
if len(nodes) < 2 {
return ""
}
for _, n := range nodes {
if !n.ShouldJoinTailnet() {
t.Logf("%v did not join tailnet", n)
return ""
}
}
sts := make([]*ipnstate.Status, len(nodes))
var eg errgroup.Group
for i, c := range clients {
eg.Go(func() error {
node := nodes[i]
st, err := c.Status(ctx)
if err != nil {
return fmt.Errorf("%v: %w", node, err)
}
sts[i] = st
return nil
})
}
if err := eg.Wait(); err != nil {
t.Fatalf("get node statuses: %v", err)
}
preICMPPing := false
for _, node := range c.Nodes() {
for _, node := range nodes {
node.Network().PostConnectedToControl()
if err := node.PostConnectedToControl(ctx); err != nil {
t.Fatalf("post control error: %s", err)
@ -455,7 +513,7 @@ func (nt *natTest) runTest(addNode ...addNodeFunc) pingRoute {
pingRes, err := ping(ctx, t, clients[0], sts[1].Self.TailscaleIPs[0], tailcfg.PingDisco)
if err != nil {
t.Fatalf("ping failure: %v", err)
t.Logf("ping failure: %v", err)
}
nt.gotRoute = classifyPing(pingRes)
t.Logf("ping route: %v", nt.gotRoute)
@ -539,6 +597,60 @@ func up(ctx context.Context, c *vnet.NodeAgentClient) error {
return nil
}
func getClientIP(ctx context.Context, c *vnet.NodeAgentClient) (netip.Addr, error) {
getIPReq, err := http.NewRequestWithContext(ctx, "GET", "http://unused/ip", nil)
if err != nil {
return netip.Addr{}, err
}
res, err := c.HTTPClient.Do(getIPReq)
if err != nil {
return netip.Addr{}, err
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return netip.Addr{}, fmt.Errorf("client returned http status %q", res.Status)
}
ipBytes, err := io.ReadAll(res.Body)
if err != nil {
return netip.Addr{}, err
}
addrPort, err := netip.ParseAddrPort(string(ipBytes))
if err != nil {
return netip.Addr{}, err
}
return addrPort.Addr(), nil
}
// sendHostNetworkPing pings toClient from fromClient, and returns whether
// toClient responded to the ping.
func sendHostNetworkPing(ctx context.Context, tb testing.TB, fromClient, toClient *vnet.NodeAgentClient) (bool, error) {
toIP, err := getClientIP(ctx, toClient)
if err != nil {
return false, fmt.Errorf("get ip: %w", err)
}
req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("http://unused/ping?host=%s", toIP.String()), nil)
if err != nil {
return false, err
}
res, err := fromClient.HTTPClient.Do(req)
if err != nil {
return false, err
}
defer res.Body.Close()
got, err := io.ReadAll(res.Body)
if err != nil {
tb.Logf("error while reading http body: %v", err)
} else {
tb.Logf("got response from ping: %q", got)
}
ec, err := strconv.Atoi(res.Header.Get("Exec-Exit-Code"))
if err != nil {
return false, fmt.Errorf("parse exit code: %w", err)
}
tb.Logf("got ec: %v", ec)
return ec == 0, nil
}
type nodeType struct {
name string
fn addNodeFunc
@ -552,6 +664,7 @@ var types = []nodeType{
{"hardPMP", hardPMP},
{"one2one", one2one},
{"sameLAN", sameLAN},
{"cgnat", cgnatNoTailnet},
}
// want sets the expected ping route for the test.
@ -563,17 +676,36 @@ func (nt *natTest) want(r pingRoute) {
func TestEasyEasy(t *testing.T) {
nt := newNatTest(t)
nt.runTest(easy, easy)
nt.runTailscaleConnectivityTest(easy, easy)
nt.want(routeDirect)
}
func TestTwoEasyNoControlDiscoRotate(t *testing.T) {
envknob.Setenv("TS_USE_CACHED_NETMAP", "1")
nt := newNatTest(t)
nt.runTest(easyNoControlDiscoRotate, easyNoControlDiscoRotate)
nt.runTailscaleConnectivityTest(easyNoControlDiscoRotate, easyNoControlDiscoRotate)
nt.want(routeDirect)
}
func cgnatNoTailnet(c *vnet.Config) *vnet.Node {
n := c.NumNodes() + 1
return c.AddNode(c.AddNetwork(
fmt.Sprintf("100.65.%d.1/16", n),
fmt.Sprintf("2.%d.%d.%d", n, n, n), // public IP
vnet.EasyNAT),
vnet.DontJoinTailnet)
}
func TestNonTailscaleCGNATEndpoint(t *testing.T) {
if !*knownBroken {
t.Skip("skipping known-broken test; set --known-broken to run; see https://github.com/tailscale/corp/issues/36270")
}
nt := newNatTest(t)
if !nt.runHostConnectivityTest(cgnatNoTailnet, sameLAN) {
t.Fatalf("could not ping")
}
}
// Issue tailscale/corp#26438: use learned DERP route as send path of last
// resort
//
@ -590,13 +722,13 @@ func TestTwoEasyNoControlDiscoRotate(t *testing.T) {
// packet over a particular DERP from that peer.
func TestFallbackDERPRegionForPeer(t *testing.T) {
nt := newNatTest(t)
nt.runTest(hard, hardNoDERPOrEndoints)
nt.runTailscaleConnectivityTest(hard, hardNoDERPOrEndoints)
nt.want(routeDERP)
}
func TestSingleJustIPv6(t *testing.T) {
nt := newNatTest(t)
nt.runTest(just6)
nt.runTailscaleConnectivityTest(just6)
}
var knownBroken = flag.Bool("known-broken", false, "run known-broken tests")
@ -610,24 +742,24 @@ func TestSingleDualBrokenIPv4(t *testing.T) {
t.Skip("skipping known-broken test; set --known-broken to run; see https://github.com/tailscale/tailscale/issues/13346")
}
nt := newNatTest(t)
nt.runTest(v6AndBlackholedIPv4)
nt.runTailscaleConnectivityTest(v6AndBlackholedIPv4)
}
func TestJustIPv6(t *testing.T) {
nt := newNatTest(t)
nt.runTest(just6, just6)
nt.runTailscaleConnectivityTest(just6, just6)
nt.want(routeDirect)
}
func TestEasy4AndJust6(t *testing.T) {
nt := newNatTest(t)
nt.runTest(easyAnd6, just6)
nt.runTailscaleConnectivityTest(easyAnd6, just6)
nt.want(routeDirect)
}
func TestSameLAN(t *testing.T) {
nt := newNatTest(t)
nt.runTest(easy, sameLAN)
nt.runTailscaleConnectivityTest(easy, sameLAN)
nt.want(routeLocal)
}
@ -637,25 +769,25 @@ func TestSameLAN(t *testing.T) {
// * client machine has a stateful host firewall (e.g. ufw)
func TestBPFDisco(t *testing.T) {
nt := newNatTest(t)
nt.runTest(easyPMPFWPlusBPF, hard)
nt.runTailscaleConnectivityTest(easyPMPFWPlusBPF, hard)
nt.want(routeDirect)
}
func TestHostFWNoBPF(t *testing.T) {
nt := newNatTest(t)
nt.runTest(easyPMPFWNoBPF, hard)
nt.runTailscaleConnectivityTest(easyPMPFWNoBPF, hard)
nt.want(routeDERP)
}
func TestHostFWPair(t *testing.T) {
nt := newNatTest(t)
nt.runTest(easyFW, easyFW)
nt.runTailscaleConnectivityTest(easyFW, easyFW)
nt.want(routeDirect)
}
func TestOneHostFW(t *testing.T) {
nt := newNatTest(t)
nt.runTest(easy, easyFW)
nt.runTailscaleConnectivityTest(easy, easyFW)
nt.want(routeDirect)
}
@ -677,7 +809,7 @@ func TestPair(t *testing.T) {
}
nt := newNatTest(t)
nt.runTest(find(t1), find(t2))
nt.runTailscaleConnectivityTest(find(t1), find(t2))
}
var runGrid = flag.Bool("run-grid", false, "run grid test")
@ -713,7 +845,7 @@ func TestGrid(t *testing.T) {
if route == "" {
nt := newNatTest(t)
route = nt.runTest(a.fn, b.fn)
route = nt.runTailscaleConnectivityTest(a.fn, b.fn)
if err := os.WriteFile(filename, []byte(string(route)), 0666); err != nil {
t.Fatalf("writeFile: %v", err)
}

@ -119,6 +119,8 @@ func (c *Config) AddNode(opts ...any) *Node {
n.rotateDisco = true
case PreICMPPing:
n.preICMPPing = true
case DontJoinTailnet:
n.dontJoinTailnet = true
case VerboseSyslog:
n.verboseSyslog = true
default:
@ -141,10 +143,11 @@ func (c *Config) AddNode(opts ...any) *Node {
type NodeOption string
const (
HostFirewall NodeOption = "HostFirewall"
RotateDisco NodeOption = "RotateDisco"
PreICMPPing NodeOption = "PreICMPPing"
VerboseSyslog NodeOption = "VerboseSyslog"
HostFirewall NodeOption = "HostFirewall"
RotateDisco NodeOption = "RotateDisco"
PreICMPPing NodeOption = "PreICMPPing"
DontJoinTailnet NodeOption = "DontJoinTailnet"
VerboseSyslog NodeOption = "VerboseSyslog"
)
// TailscaledEnv is а option that can be passed to Config.AddNode
@ -209,11 +212,12 @@ type Node struct {
n *node // nil until NewServer called
client *NodeAgentClient
env []TailscaledEnv
hostFW bool
rotateDisco bool
preICMPPing bool
verboseSyslog bool
env []TailscaledEnv
hostFW bool
rotateDisco bool
preICMPPing bool
verboseSyslog bool
dontJoinTailnet bool
// TODO(bradfitz): this is halfway converted to supporting multiple NICs
// but not done. We need a MAC-per-Network.
@ -280,6 +284,13 @@ func (n *Node) PreICMPPing() bool {
return n.preICMPPing
}
// ShouldJoinTailnet reports whether node should join the test tailnet. Machines in
// the virtual universe that aren't on the tailnet are useful for testing that
// Tailscale does not break connectivity to resources outside the tailnet.
func (n *Node) ShouldJoinTailnet() bool {
return !n.dontJoinTailnet
}
// IsV6Only reports whether this node is only connected to IPv6 networks.
func (n *Node) IsV6Only() bool {
for _, net := range n.nets {

Loading…
Cancel
Save