tstest/natlab/vmtest,cmd/tta: add TestSiteToSite

Verifies that site-to-site Tailscale subnet routing with
--snat-subnet-routes=false preserves the original source IP
end-to-end.

Topology: two sites, each with a Linux subnet router on a NATted WAN
plus an internal LAN, and a non-Tailscale backend on each LAN. Backends
are given static routes pointing to their local subnet router for the
remote site's prefix; an HTTP GET from backend-a to backend-b over
Tailscale returns a body containing backend-a's LAN IP.

Adds the supporting vmtest.SNATSubnetRoutes NodeOption and plumbs
snat-subnet-routes through TTA's /up handler. The webserver started by
vmtest.WebServer now also echoes the remote IP, for the preservation
assertion.

Adds a /add-route TTA endpoint (Linux-only for now) and a vmtest
Env.AddRoute helper so the test can install the backend static routes
through TTA rather than needing a host SSH key and debug NIC.

ensureGokrazy now always rebuilds the natlab qcow2 (once per test
process, via sync.Once) so the test picks up the new TTA and webserver
behavior.

This is pulled out of a larger pending change that adds FreeBSD
site-to-site subnet routing support; figured we should have at least
the Linux test covering what works today.

Updates #5573

Change-Id: I881c55b0f118ac9094546b5fbe68dddf179bb042
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick
2026-04-22 03:39:15 +00:00
committed by Brad Fitzpatrick
parent 81fbcc1ac8
commit f289f7e77c
3 changed files with 169 additions and 17 deletions
+20 -1
View File
@@ -25,6 +25,7 @@ import (
"os"
"os/exec"
"regexp"
"runtime"
"strconv"
"strings"
"sync"
@@ -202,6 +203,9 @@ func main() {
if routes := r.URL.Query().Get("advertise-routes"); routes != "" {
args = append(args, "--advertise-routes="+routes)
}
if snat := r.URL.Query().Get("snat-subnet-routes"); snat != "" {
args = append(args, "--snat-subnet-routes="+snat)
}
serveCmd(w, "tailscale", args...)
})
ttaMux.HandleFunc("/ip", func(w http.ResponseWriter, r *http.Request) {
@@ -222,6 +226,20 @@ func main() {
serveCmd(w, "ping", "-c", "4", "-W", "1", host)
}
})
ttaMux.HandleFunc("/add-route", func(w http.ResponseWriter, r *http.Request) {
prefix := r.URL.Query().Get("prefix")
via := r.URL.Query().Get("via")
if prefix == "" || via == "" {
http.Error(w, "missing prefix or via", http.StatusBadRequest)
return
}
switch runtime.GOOS {
case "linux":
serveCmd(w, "ip", "route", "add", prefix, "via", via)
default:
http.Error(w, "add-route not supported on "+runtime.GOOS, http.StatusNotImplemented)
}
})
ttaMux.HandleFunc("/start-webserver", func(w http.ResponseWriter, r *http.Request) {
port := r.URL.Query().Get("port")
name := r.URL.Query().Get("name")
@@ -236,7 +254,8 @@ func main() {
go func() {
mux := http.NewServeMux()
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "Hello world I am %s", name)
host, _, _ := net.SplitHostPort(r.RemoteAddr)
fmt.Fprintf(w, "Hello world I am %s from %s", name, host)
})
if err := http.ListenAndServe(":"+port, mux); err != nil {
log.Printf("webserver on :%s failed: %v", port, err)
+67 -16
View File
@@ -27,6 +27,7 @@ import (
"os/exec"
"path/filepath"
"strings"
"sync"
"testing"
"time"
@@ -95,14 +96,15 @@ type Node struct {
name string
num int // assigned during AddNode
os OSImage
nets []*vnet.Network
vnetNode *vnet.Node // primary vnet node (set during Start)
agent *vnet.NodeAgentClient
joinTailnet bool
advertiseRoutes string
webServerPort int
sshPort int // host port for SSH debug access (cloud VMs only)
os OSImage
nets []*vnet.Network
vnetNode *vnet.Node // primary vnet node (set during Start)
agent *vnet.NodeAgentClient
joinTailnet bool
advertiseRoutes string
snatSubnetRoutes *bool // nil means default (true)
webServerPort int
sshPort int // host port for SSH debug access (cloud VMs only)
}
// AddNode creates a new VM node. The name is used for identification and as the
@@ -130,6 +132,9 @@ func (e *Env) AddNode(name string, opts ...any) *Node {
vnetOpts = append(vnetOpts, vnet.DontJoinTailnet)
case nodeOptAdvertiseRoutes:
n.advertiseRoutes = string(o)
case nodeOptSNATSubnetRoutes:
v := bool(o)
n.snatSubnetRoutes = &v
case nodeOptWebServer:
n.webServerPort = int(o)
default:
@@ -154,6 +159,7 @@ func (n *Node) LanIP(net *vnet.Network) netip.Addr {
type nodeOptOS OSImage
type nodeOptNoTailscale struct{}
type nodeOptAdvertiseRoutes string
type nodeOptSNATSubnetRoutes bool
type nodeOptWebServer int
// OS returns a NodeOption that sets the node's operating system image.
@@ -168,8 +174,14 @@ func AdvertiseRoutes(routes string) nodeOptAdvertiseRoutes {
return nodeOptAdvertiseRoutes(routes)
}
// SNATSubnetRoutes returns a NodeOption that sets whether the node should
// source NAT traffic to advertised subnet routes. The default is true.
// Setting this to false preserves original source IPs, which is needed
// for site-to-site configurations.
func SNATSubnetRoutes(v bool) nodeOptSNATSubnetRoutes { return nodeOptSNATSubnetRoutes(v) }
// WebServer returns a NodeOption that starts a webserver on the given port.
// The webserver responds with "Hello world I am <nodename>" on all requests.
// The webserver responds with "Hello world I am <nodename> from <sourceIP>" on all requests.
func WebServer(port int) nodeOptWebServer { return nodeOptWebServer(port) }
// Start initializes the virtual network, builds/downloads images, compiles
@@ -332,6 +344,13 @@ func (e *Env) tailscaleUp(ctx context.Context, n *Node) error {
if n.advertiseRoutes != "" {
url += "&advertise-routes=" + n.advertiseRoutes
}
if n.snatSubnetRoutes != nil {
if *n.snatSubnetRoutes {
url += "&snat-subnet-routes=true"
} else {
url += "&snat-subnet-routes=false"
}
}
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return err
@@ -462,6 +481,32 @@ func (e *Env) ping(from, to *Node) {
}
}
// AddRoute adds a kernel static route on the given node, pointing prefix at
// via. It uses TTA's /add-route handler, so it works on any node where TTA
// is running (which is all of them — DontJoinTailnet only skips
// `tailscale up`; the agent runs regardless). Currently Linux-only in TTA.
//
// Fatals on error.
func (e *Env) AddRoute(n *Node, prefix, via string) {
e.t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
reqURL := fmt.Sprintf("http://unused/add-route?prefix=%s&via=%s", prefix, via)
req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
if err != nil {
e.t.Fatalf("AddRoute: %v", err)
}
resp, err := n.agent.HTTPClient.Do(req)
if err != nil {
e.t.Fatalf("AddRoute(%s, %s → %s): %v", n.name, prefix, via, err)
}
defer resp.Body.Close()
body, _ := io.ReadAll(resp.Body)
if resp.StatusCode != 200 {
e.t.Fatalf("AddRoute(%s, %s → %s): %s: %s", n.name, prefix, via, resp.Status, body)
}
}
// SSHExec runs a command on a cloud VM via its debug SSH NIC.
// Only works for cloud VMs that have the debug NIC and SSH key configured.
// Returns stdout and any error.
@@ -575,7 +620,11 @@ func (e *Env) HTTPGet(from *Node, targetURL string) string {
return ""
}
// ensureGokrazy finds or builds the gokrazy base image and kernel.
var buildGokrazy sync.Once
// ensureGokrazy builds the gokrazy base image (once per test process) and
// locates the kernel. The build is fast (~4s) so we always rebuild to ensure
// the baked-in binaries (tta, tailscale, tailscaled) match the current source.
func (e *Env) ensureGokrazy(ctx context.Context) error {
if e.gokrazyBase != "" {
return nil // already found
@@ -586,21 +635,23 @@ func (e *Env) ensureGokrazy(ctx context.Context) error {
return err
}
e.gokrazyBase = filepath.Join(modRoot, "gokrazy/natlabapp.qcow2")
if _, err := os.Stat(e.gokrazyBase); err != nil {
if !os.IsNotExist(err) {
return err
}
var buildErr error
buildGokrazy.Do(func() {
e.t.Logf("building gokrazy natlab image...")
cmd := exec.CommandContext(ctx, "make", "natlab")
cmd.Dir = filepath.Join(modRoot, "gokrazy")
cmd.Stderr = os.Stderr
cmd.Stdout = os.Stdout
if err := cmd.Run(); err != nil {
return fmt.Errorf("make natlab: %w", err)
buildErr = fmt.Errorf("make natlab: %w", err)
}
})
if buildErr != nil {
return buildErr
}
e.gokrazyBase = filepath.Join(modRoot, "gokrazy/natlabapp.qcow2")
kernel, err := findKernelPath(filepath.Join(modRoot, "go.mod"))
if err != nil {
return fmt.Errorf("finding kernel: %w", err)
+82
View File
@@ -45,3 +45,85 @@ func testSubnetRouterForOS(t testing.TB, srOS vmtest.OSImage) {
t.Fatalf("got %q", body)
}
}
func TestSiteToSite(t *testing.T) {
testSiteToSite(t, vmtest.Ubuntu2404)
}
// testSiteToSite runs a site-to-site subnet routing test with
// --snat-subnet-routes=false, verifying that original source IPs are preserved
// across Tailscale subnet routes.
//
// Topology:
//
// Site A: backend-a (10.1.0.0/24) ← → sr-a (WAN + LAN-A)
// Site B: backend-b (10.2.0.0/24) ← → sr-b (WAN + LAN-B)
//
// Both subnet routers are on Tailscale with --snat-subnet-routes=false.
// The test sends HTTP from backend-a to backend-b through the subnet routers
// and verifies that backend-b sees backend-a's LAN IP (not the subnet router's).
func testSiteToSite(t *testing.T, srOS vmtest.OSImage) {
env := vmtest.New(t)
// WAN networks for each site (each behind NAT).
wanA := env.AddNetwork("2.1.1.1", "192.168.1.1/24", vnet.EasyNAT)
wanB := env.AddNetwork("3.1.1.1", "192.168.2.1/24", vnet.EasyNAT)
// Internal LAN for each site.
lanA := env.AddNetwork("10.1.0.1/24")
lanB := env.AddNetwork("10.2.0.1/24")
// Subnet routers: each on its WAN + LAN, advertising the local LAN,
// with SNAT disabled to preserve source IPs.
srA := env.AddNode("sr-a", wanA, lanA,
vmtest.OS(srOS),
vmtest.AdvertiseRoutes("10.1.0.0/24"),
vmtest.SNATSubnetRoutes(false))
srB := env.AddNode("sr-b", wanB, lanB,
vmtest.OS(srOS),
vmtest.AdvertiseRoutes("10.2.0.0/24"),
vmtest.SNATSubnetRoutes(false))
// Backend servers on each site's LAN (not on Tailscale).
// Use Ubuntu so we can SSH in to add static routes.
backendA := env.AddNode("backend-a", lanA,
vmtest.OS(vmtest.Ubuntu2404),
vmtest.DontJoinTailnet(),
vmtest.WebServer(8080))
backendB := env.AddNode("backend-b", lanB,
vmtest.OS(vmtest.Ubuntu2404),
vmtest.DontJoinTailnet(),
vmtest.WebServer(8080))
env.Start()
env.ApproveRoutes(srA, "10.1.0.0/24")
env.ApproveRoutes(srB, "10.2.0.0/24")
// Add static routes on the backends so that traffic to the remote site's
// subnet goes through the local subnet router. This mirrors how a real
// site-to-site deployment is configured.
srALanIP := srA.LanIP(lanA).String()
srBLanIP := srB.LanIP(lanB).String()
t.Logf("sr-a LAN IP: %s, sr-b LAN IP: %s", srALanIP, srBLanIP)
t.Logf("backend-a LAN IP: %s, backend-b LAN IP: %s", backendA.LanIP(lanA), backendB.LanIP(lanB))
env.AddRoute(backendA, "10.2.0.0/24", srALanIP)
env.AddRoute(backendB, "10.1.0.0/24", srBLanIP)
// Make an HTTP request from backend-a to backend-b through the subnet routers.
// TTA's /http-get falls back to direct dial on non-Tailscale nodes.
backendBIP := backendB.LanIP(lanB)
body := env.HTTPGet(backendA, fmt.Sprintf("http://%s:8080/", backendBIP))
t.Logf("response: %s", body)
if !strings.Contains(body, "Hello world I am backend-b") {
t.Fatalf("expected response from backend-b, got %q", body)
}
// Verify the source IP was preserved. With --snat-subnet-routes=false,
// backend-b should see backend-a's LAN IP as the source, not sr-b's LAN IP.
backendAIP := backendA.LanIP(lanA).String()
if !strings.Contains(body, "from "+backendAIP) {
t.Fatalf("source IP not preserved: expected %q in response, got %q", backendAIP, body)
}
}