From f289f7e77c66c4870897eb21f1b3a0ba6e83bb14 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Wed, 22 Apr 2026 03:39:15 +0000 Subject: [PATCH] tstest/natlab/vmtest,cmd/tta: add TestSiteToSite Verifies that site-to-site Tailscale subnet routing with --snat-subnet-routes=false preserves the original source IP end-to-end. Topology: two sites, each with a Linux subnet router on a NATted WAN plus an internal LAN, and a non-Tailscale backend on each LAN. Backends are given static routes pointing to their local subnet router for the remote site's prefix; an HTTP GET from backend-a to backend-b over Tailscale returns a body containing backend-a's LAN IP. Adds the supporting vmtest.SNATSubnetRoutes NodeOption and plumbs snat-subnet-routes through TTA's /up handler. The webserver started by vmtest.WebServer now also echoes the remote IP, for the preservation assertion. Adds a /add-route TTA endpoint (Linux-only for now) and a vmtest Env.AddRoute helper so the test can install the backend static routes through TTA rather than needing a host SSH key and debug NIC. ensureGokrazy now always rebuilds the natlab qcow2 (once per test process, via sync.Once) so the test picks up the new TTA and webserver behavior. This is pulled out of a larger pending change that adds FreeBSD site-to-site subnet routing support; figured we should have at least the Linux test covering what works today. Updates #5573 Change-Id: I881c55b0f118ac9094546b5fbe68dddf179bb042 Signed-off-by: Brad Fitzpatrick --- cmd/tta/tta.go | 21 +++++++- tstest/natlab/vmtest/vmtest.go | 83 +++++++++++++++++++++++------ tstest/natlab/vmtest/vmtest_test.go | 82 ++++++++++++++++++++++++++++ 3 files changed, 169 insertions(+), 17 deletions(-) diff --git a/cmd/tta/tta.go b/cmd/tta/tta.go index cf5dc4162..a94727503 100644 --- a/cmd/tta/tta.go +++ b/cmd/tta/tta.go @@ -25,6 +25,7 @@ import ( "os" "os/exec" "regexp" + "runtime" "strconv" "strings" "sync" @@ -202,6 +203,9 @@ func main() { if routes := r.URL.Query().Get("advertise-routes"); routes != "" { args = append(args, "--advertise-routes="+routes) } + if snat := r.URL.Query().Get("snat-subnet-routes"); snat != "" { + args = append(args, "--snat-subnet-routes="+snat) + } serveCmd(w, "tailscale", args...) }) ttaMux.HandleFunc("/ip", func(w http.ResponseWriter, r *http.Request) { @@ -222,6 +226,20 @@ func main() { serveCmd(w, "ping", "-c", "4", "-W", "1", host) } }) + ttaMux.HandleFunc("/add-route", func(w http.ResponseWriter, r *http.Request) { + prefix := r.URL.Query().Get("prefix") + via := r.URL.Query().Get("via") + if prefix == "" || via == "" { + http.Error(w, "missing prefix or via", http.StatusBadRequest) + return + } + switch runtime.GOOS { + case "linux": + serveCmd(w, "ip", "route", "add", prefix, "via", via) + default: + http.Error(w, "add-route not supported on "+runtime.GOOS, http.StatusNotImplemented) + } + }) ttaMux.HandleFunc("/start-webserver", func(w http.ResponseWriter, r *http.Request) { port := r.URL.Query().Get("port") name := r.URL.Query().Get("name") @@ -236,7 +254,8 @@ func main() { go func() { mux := http.NewServeMux() mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { - fmt.Fprintf(w, "Hello world I am %s", name) + host, _, _ := net.SplitHostPort(r.RemoteAddr) + fmt.Fprintf(w, "Hello world I am %s from %s", name, host) }) if err := http.ListenAndServe(":"+port, mux); err != nil { log.Printf("webserver on :%s failed: %v", port, err) diff --git a/tstest/natlab/vmtest/vmtest.go b/tstest/natlab/vmtest/vmtest.go index e6c89467f..1f58d02f9 100644 --- a/tstest/natlab/vmtest/vmtest.go +++ b/tstest/natlab/vmtest/vmtest.go @@ -27,6 +27,7 @@ import ( "os/exec" "path/filepath" "strings" + "sync" "testing" "time" @@ -95,14 +96,15 @@ type Node struct { name string num int // assigned during AddNode - os OSImage - nets []*vnet.Network - vnetNode *vnet.Node // primary vnet node (set during Start) - agent *vnet.NodeAgentClient - joinTailnet bool - advertiseRoutes string - webServerPort int - sshPort int // host port for SSH debug access (cloud VMs only) + os OSImage + nets []*vnet.Network + vnetNode *vnet.Node // primary vnet node (set during Start) + agent *vnet.NodeAgentClient + joinTailnet bool + advertiseRoutes string + snatSubnetRoutes *bool // nil means default (true) + webServerPort int + sshPort int // host port for SSH debug access (cloud VMs only) } // AddNode creates a new VM node. The name is used for identification and as the @@ -130,6 +132,9 @@ func (e *Env) AddNode(name string, opts ...any) *Node { vnetOpts = append(vnetOpts, vnet.DontJoinTailnet) case nodeOptAdvertiseRoutes: n.advertiseRoutes = string(o) + case nodeOptSNATSubnetRoutes: + v := bool(o) + n.snatSubnetRoutes = &v case nodeOptWebServer: n.webServerPort = int(o) default: @@ -154,6 +159,7 @@ func (n *Node) LanIP(net *vnet.Network) netip.Addr { type nodeOptOS OSImage type nodeOptNoTailscale struct{} type nodeOptAdvertiseRoutes string +type nodeOptSNATSubnetRoutes bool type nodeOptWebServer int // OS returns a NodeOption that sets the node's operating system image. @@ -168,8 +174,14 @@ func AdvertiseRoutes(routes string) nodeOptAdvertiseRoutes { return nodeOptAdvertiseRoutes(routes) } +// SNATSubnetRoutes returns a NodeOption that sets whether the node should +// source NAT traffic to advertised subnet routes. The default is true. +// Setting this to false preserves original source IPs, which is needed +// for site-to-site configurations. +func SNATSubnetRoutes(v bool) nodeOptSNATSubnetRoutes { return nodeOptSNATSubnetRoutes(v) } + // WebServer returns a NodeOption that starts a webserver on the given port. -// The webserver responds with "Hello world I am " on all requests. +// The webserver responds with "Hello world I am from " on all requests. func WebServer(port int) nodeOptWebServer { return nodeOptWebServer(port) } // Start initializes the virtual network, builds/downloads images, compiles @@ -332,6 +344,13 @@ func (e *Env) tailscaleUp(ctx context.Context, n *Node) error { if n.advertiseRoutes != "" { url += "&advertise-routes=" + n.advertiseRoutes } + if n.snatSubnetRoutes != nil { + if *n.snatSubnetRoutes { + url += "&snat-subnet-routes=true" + } else { + url += "&snat-subnet-routes=false" + } + } req, err := http.NewRequestWithContext(ctx, "GET", url, nil) if err != nil { return err @@ -462,6 +481,32 @@ func (e *Env) ping(from, to *Node) { } } +// AddRoute adds a kernel static route on the given node, pointing prefix at +// via. It uses TTA's /add-route handler, so it works on any node where TTA +// is running (which is all of them — DontJoinTailnet only skips +// `tailscale up`; the agent runs regardless). Currently Linux-only in TTA. +// +// Fatals on error. +func (e *Env) AddRoute(n *Node, prefix, via string) { + e.t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + reqURL := fmt.Sprintf("http://unused/add-route?prefix=%s&via=%s", prefix, via) + req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil) + if err != nil { + e.t.Fatalf("AddRoute: %v", err) + } + resp, err := n.agent.HTTPClient.Do(req) + if err != nil { + e.t.Fatalf("AddRoute(%s, %s → %s): %v", n.name, prefix, via, err) + } + defer resp.Body.Close() + body, _ := io.ReadAll(resp.Body) + if resp.StatusCode != 200 { + e.t.Fatalf("AddRoute(%s, %s → %s): %s: %s", n.name, prefix, via, resp.Status, body) + } +} + // SSHExec runs a command on a cloud VM via its debug SSH NIC. // Only works for cloud VMs that have the debug NIC and SSH key configured. // Returns stdout and any error. @@ -575,7 +620,11 @@ func (e *Env) HTTPGet(from *Node, targetURL string) string { return "" } -// ensureGokrazy finds or builds the gokrazy base image and kernel. +var buildGokrazy sync.Once + +// ensureGokrazy builds the gokrazy base image (once per test process) and +// locates the kernel. The build is fast (~4s) so we always rebuild to ensure +// the baked-in binaries (tta, tailscale, tailscaled) match the current source. func (e *Env) ensureGokrazy(ctx context.Context) error { if e.gokrazyBase != "" { return nil // already found @@ -586,21 +635,23 @@ func (e *Env) ensureGokrazy(ctx context.Context) error { return err } - e.gokrazyBase = filepath.Join(modRoot, "gokrazy/natlabapp.qcow2") - if _, err := os.Stat(e.gokrazyBase); err != nil { - if !os.IsNotExist(err) { - return err - } + var buildErr error + buildGokrazy.Do(func() { e.t.Logf("building gokrazy natlab image...") cmd := exec.CommandContext(ctx, "make", "natlab") cmd.Dir = filepath.Join(modRoot, "gokrazy") cmd.Stderr = os.Stderr cmd.Stdout = os.Stdout if err := cmd.Run(); err != nil { - return fmt.Errorf("make natlab: %w", err) + buildErr = fmt.Errorf("make natlab: %w", err) } + }) + if buildErr != nil { + return buildErr } + e.gokrazyBase = filepath.Join(modRoot, "gokrazy/natlabapp.qcow2") + kernel, err := findKernelPath(filepath.Join(modRoot, "go.mod")) if err != nil { return fmt.Errorf("finding kernel: %w", err) diff --git a/tstest/natlab/vmtest/vmtest_test.go b/tstest/natlab/vmtest/vmtest_test.go index 91c8359f1..89e5a022f 100644 --- a/tstest/natlab/vmtest/vmtest_test.go +++ b/tstest/natlab/vmtest/vmtest_test.go @@ -45,3 +45,85 @@ func testSubnetRouterForOS(t testing.TB, srOS vmtest.OSImage) { t.Fatalf("got %q", body) } } + +func TestSiteToSite(t *testing.T) { + testSiteToSite(t, vmtest.Ubuntu2404) +} + +// testSiteToSite runs a site-to-site subnet routing test with +// --snat-subnet-routes=false, verifying that original source IPs are preserved +// across Tailscale subnet routes. +// +// Topology: +// +// Site A: backend-a (10.1.0.0/24) ← → sr-a (WAN + LAN-A) +// Site B: backend-b (10.2.0.0/24) ← → sr-b (WAN + LAN-B) +// +// Both subnet routers are on Tailscale with --snat-subnet-routes=false. +// The test sends HTTP from backend-a to backend-b through the subnet routers +// and verifies that backend-b sees backend-a's LAN IP (not the subnet router's). +func testSiteToSite(t *testing.T, srOS vmtest.OSImage) { + env := vmtest.New(t) + + // WAN networks for each site (each behind NAT). + wanA := env.AddNetwork("2.1.1.1", "192.168.1.1/24", vnet.EasyNAT) + wanB := env.AddNetwork("3.1.1.1", "192.168.2.1/24", vnet.EasyNAT) + + // Internal LAN for each site. + lanA := env.AddNetwork("10.1.0.1/24") + lanB := env.AddNetwork("10.2.0.1/24") + + // Subnet routers: each on its WAN + LAN, advertising the local LAN, + // with SNAT disabled to preserve source IPs. + srA := env.AddNode("sr-a", wanA, lanA, + vmtest.OS(srOS), + vmtest.AdvertiseRoutes("10.1.0.0/24"), + vmtest.SNATSubnetRoutes(false)) + srB := env.AddNode("sr-b", wanB, lanB, + vmtest.OS(srOS), + vmtest.AdvertiseRoutes("10.2.0.0/24"), + vmtest.SNATSubnetRoutes(false)) + + // Backend servers on each site's LAN (not on Tailscale). + // Use Ubuntu so we can SSH in to add static routes. + backendA := env.AddNode("backend-a", lanA, + vmtest.OS(vmtest.Ubuntu2404), + vmtest.DontJoinTailnet(), + vmtest.WebServer(8080)) + backendB := env.AddNode("backend-b", lanB, + vmtest.OS(vmtest.Ubuntu2404), + vmtest.DontJoinTailnet(), + vmtest.WebServer(8080)) + + env.Start() + env.ApproveRoutes(srA, "10.1.0.0/24") + env.ApproveRoutes(srB, "10.2.0.0/24") + + // Add static routes on the backends so that traffic to the remote site's + // subnet goes through the local subnet router. This mirrors how a real + // site-to-site deployment is configured. + srALanIP := srA.LanIP(lanA).String() + srBLanIP := srB.LanIP(lanB).String() + t.Logf("sr-a LAN IP: %s, sr-b LAN IP: %s", srALanIP, srBLanIP) + t.Logf("backend-a LAN IP: %s, backend-b LAN IP: %s", backendA.LanIP(lanA), backendB.LanIP(lanB)) + + env.AddRoute(backendA, "10.2.0.0/24", srALanIP) + env.AddRoute(backendB, "10.1.0.0/24", srBLanIP) + + // Make an HTTP request from backend-a to backend-b through the subnet routers. + // TTA's /http-get falls back to direct dial on non-Tailscale nodes. + backendBIP := backendB.LanIP(lanB) + body := env.HTTPGet(backendA, fmt.Sprintf("http://%s:8080/", backendBIP)) + t.Logf("response: %s", body) + + if !strings.Contains(body, "Hello world I am backend-b") { + t.Fatalf("expected response from backend-b, got %q", body) + } + + // Verify the source IP was preserved. With --snat-subnet-routes=false, + // backend-b should see backend-a's LAN IP as the source, not sr-b's LAN IP. + backendAIP := backendA.LanIP(lanA).String() + if !strings.Contains(body, "from "+backendAIP) { + t.Fatalf("source IP not preserved: expected %q in response, got %q", backendAIP, body) + } +}