vmtest: add VM-based integration test framework

Add tstest/natlab/vmtest, a high-level framework for running multi-VM
integration tests with mixed OS types (gokrazy + Ubuntu/Debian cloud
images) connected via natlab's vnet virtual network.

The vmtest package provides:
  - Env type that orchestrates vnet, QEMU processes, and agent connections
  - OS image support (Gokrazy, Ubuntu2404, Debian12) with download/cache
  - QEMU launch per OS type (microvm for gokrazy, q35+KVM for cloud)
  - Cloud-init seed ISO generation with network-config for multi-NIC
  - Cross-compilation of test binaries for cloud VMs
  - Debug SSH NIC on cloud VMs for interactive debugging
  - Test helpers: ApproveRoutes, HTTPGet, TailscalePing, DumpStatus,
    WaitForPeerRoute, SSHExec

TTA enhancements (cmd/tta):
  - Parameterize /up (accept-routes, advertise-routes, snat-subnet-routes)
  - Add /set, /start-webserver, /http-get endpoints
  - /http-get uses local.Client.UserDial for Tailscale-routed requests
  - Fix /ping for non-gokrazy systems

TestSubnetRouter exercises a 3-VM subnet router scenario:
  client (gokrazy) → subnet-router (Ubuntu, dual-NIC) → backend (gokrazy)
  Verifies HTTP access to the backend webserver through the Tailscale
  subnet route. Passes in ~30 seconds.

Updates tailscale/tailscale#13038

Change-Id: I165b64af241d37f5f5870e796a52502fc56146fa
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
main
Brad Fitzpatrick 1 week ago committed by Brad Fitzpatrick
parent d948b78b23
commit ec0b23a21f
  1. 108
      cmd/tta/tta.go
  2. 2
      flake.nix
  3. 1
      go.mod
  4. 2
      go.mod.sri
  5. 2
      go.sum
  6. 2
      shell.nix
  7. 117
      tstest/natlab/vmtest/cloudinit.go
  8. 170
      tstest/natlab/vmtest/images.go
  9. 239
      tstest/natlab/vmtest/qemu.go
  10. 676
      tstest/natlab/vmtest/vmtest.go
  11. 38
      tstest/natlab/vmtest/vmtest_test.go
  12. 36
      tstest/natlab/vnet/vnet.go

@ -15,6 +15,7 @@ import (
"context"
"errors"
"flag"
"fmt"
"io"
"log"
"net"
@ -181,7 +182,27 @@ func main() {
return
})
ttaMux.HandleFunc("/up", func(w http.ResponseWriter, r *http.Request) {
serveCmd(w, "tailscale", "up", "--login-server=http://control.tailscale")
args := []string{"up", "--login-server=http://control.tailscale"}
if routes := r.URL.Query().Get("advertise-routes"); routes != "" {
args = append(args, "--advertise-routes="+routes)
}
if snat := r.URL.Query().Get("snat-subnet-routes"); snat != "" {
args = append(args, "--snat-subnet-routes="+snat)
}
if r.URL.Query().Get("accept-routes") == "true" {
args = append(args, "--accept-routes")
}
serveCmd(w, "tailscale", args...)
})
ttaMux.HandleFunc("/set", func(w http.ResponseWriter, r *http.Request) {
args := []string{"set"}
if r.URL.Query().Get("accept-routes") == "true" {
args = append(args, "--accept-routes")
}
if routes := r.URL.Query().Get("advertise-routes"); routes != "" {
args = append(args, "--advertise-routes="+routes)
}
serveCmd(w, "tailscale", args...)
})
ttaMux.HandleFunc("/ip", func(w http.ResponseWriter, r *http.Request) {
conn, ok := r.Context().Value(connContextKey).(net.Conn)
@ -192,12 +213,85 @@ func main() {
w.Write([]byte(conn.LocalAddr().String()))
})
ttaMux.HandleFunc("/ping", func(w http.ResponseWriter, r *http.Request) {
// Send 4 packets and wait a maximum of 1 second for each. The deadline
// is required for ping to return a non-zero exit code on no response.
// The busybox in question here is the breakglass busybox inside the
// natlab QEMU image - the host running the test does not need to have
// busybox installed at that path, or at all.
serveCmd(w, "/usr/local/bin/busybox", "ping", "-c", "4", "-W", "1", r.URL.Query().Get("host"))
host := r.URL.Query().Get("host")
if distro.Get() == distro.Gokrazy {
// The busybox in question here is the breakglass busybox inside the
// natlab QEMU image.
serveCmd(w, "/usr/local/bin/busybox", "ping", "-c", "4", "-W", "1", host)
} else {
serveCmd(w, "ping", "-c", "4", "-W", "1", host)
}
})
ttaMux.HandleFunc("/start-webserver", func(w http.ResponseWriter, r *http.Request) {
port := r.URL.Query().Get("port")
name := r.URL.Query().Get("name")
if port == "" {
http.Error(w, "missing port", http.StatusBadRequest)
return
}
if name == "" {
name = "unnamed"
}
log.Printf("Starting webserver on port %s as %q", port, name)
go func() {
mux := http.NewServeMux()
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "Hello world I am %s", name)
})
if err := http.ListenAndServe(":"+port, mux); err != nil {
log.Printf("webserver on :%s failed: %v", port, err)
}
}()
io.WriteString(w, "OK\n")
})
ttaMux.HandleFunc("/http-get", func(w http.ResponseWriter, r *http.Request) {
targetURL := r.URL.Query().Get("url")
if targetURL == "" {
http.Error(w, "missing url", http.StatusBadRequest)
return
}
log.Printf("HTTP GET %s", targetURL)
ctx, cancel := context.WithTimeout(r.Context(), 10*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "GET", targetURL, nil)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
// Use Tailscale's SOCKS5 proxy if available, so traffic to Tailscale
// subnet routes goes through the WireGuard tunnel instead of the
// host network stack (which may not have the routes, especially
// in userspace networking mode).
client := &http.Client{
Transport: &http.Transport{
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
// Try the Tailscale localapi proxy dialer first.
host, portStr, err := net.SplitHostPort(addr)
if err != nil {
var d net.Dialer
return d.DialContext(ctx, network, addr)
}
port, _ := strconv.ParseUint(portStr, 10, 16)
var lc local.Client
conn, err := lc.UserDial(ctx, network, host, uint16(port))
if err == nil {
return conn, nil
}
log.Printf("http-get: UserDial failed, falling back to direct: %v", err)
var d net.Dialer
return d.DialContext(ctx, network, addr)
},
},
}
resp, err := client.Do(req)
if err != nil {
http.Error(w, err.Error(), http.StatusBadGateway)
return
}
defer resp.Body.Close()
w.Header().Set("X-Upstream-Status", strconv.Itoa(resp.StatusCode))
w.WriteHeader(resp.StatusCode)
io.Copy(w, resp.Body)
})
ttaMux.HandleFunc("/fw", addFirewallHandler)
ttaMux.HandleFunc("/logs", func(w http.ResponseWriter, r *http.Request) {

@ -163,4 +163,4 @@
});
};
}
# nix-direnv cache busting line: sha256-PLt+IPqemF3agESg6jV8AzbiOpgL45mJ/AymcNUo7VU=
# nix-direnv cache busting line: sha256-GB5riRI9hkutLc2wBzv2jil+Tf6fogLxUw54HRSPNUk=

@ -67,6 +67,7 @@ require (
github.com/jellydator/ttlcache/v3 v3.1.0
github.com/jsimonetti/rtnetlink v1.4.0
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51
github.com/kdomanski/iso9660 v0.4.0
github.com/klauspost/compress v1.18.2
github.com/kortschak/wol v0.0.0-20200729010619-da482cc4850a
github.com/mattn/go-colorable v0.1.13

@ -1 +1 @@
sha256-PLt+IPqemF3agESg6jV8AzbiOpgL45mJ/AymcNUo7VU=
sha256-GB5riRI9hkutLc2wBzv2jil+Tf6fogLxUw54HRSPNUk=

@ -753,6 +753,8 @@ github.com/karamaru-alpha/copyloopvar v1.0.8 h1:gieLARwuByhEMxRwM3GRS/juJqFbLraf
github.com/karamaru-alpha/copyloopvar v1.0.8/go.mod h1:u7CIfztblY0jZLOQZgH3oYsJzpC2A7S6u/lfgSXHy0k=
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs=
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8=
github.com/kdomanski/iso9660 v0.4.0 h1:BPKKdcINz3m0MdjIMwS0wx1nofsOjxOq8TOr45WGHFg=
github.com/kdomanski/iso9660 v0.4.0/go.mod h1:OxUSupHsO9ceI8lBLPJKWBTphLemjrCQY8LPXM7qSzU=
github.com/kenshaw/evdev v0.1.0 h1:wmtceEOFfilChgdNT+c/djPJ2JineVsQ0N14kGzFRUo=
github.com/kenshaw/evdev v0.1.0/go.mod h1:B/fErKCihUyEobz0mjn2qQbHgyJKFQAxkXSvkeeA/Wo=
github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4=

@ -16,4 +16,4 @@
) {
src = ./.;
}).shellNix
# nix-direnv cache busting line: sha256-PLt+IPqemF3agESg6jV8AzbiOpgL45mJ/AymcNUo7VU=
# nix-direnv cache busting line: sha256-GB5riRI9hkutLc2wBzv2jil+Tf6fogLxUw54HRSPNUk=

@ -0,0 +1,117 @@
// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause
package vmtest
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/kdomanski/iso9660"
)
// createCloudInitISO creates a cidata seed ISO for the given cloud VM node.
// The ISO contains meta-data, user-data, and network-config files.
// Cloud-init reads these during init-local (pre-network), which is critical
// for network-config to take effect before systemd-networkd-wait-online runs.
func (e *Env) createCloudInitISO(n *Node) (string, error) {
metaData := fmt.Sprintf("instance-id: %s\nlocal-hostname: %s\n", n.name, n.name)
userData := e.generateUserData(n)
// Network config: DHCP all ethernet interfaces.
// The "optional: true" prevents systemd-networkd-wait-online from blocking.
// The first vnet NIC gets the default route (metric 100).
// Other interfaces get higher metrics to avoid routing conflicts.
networkConfig := `version: 2
ethernets:
primary:
match:
macaddress: "` + n.vnetNode.NICMac(0).String() + `"
dhcp4: true
dhcp4-overrides:
route-metric: 100
optional: true
secondary:
match:
name: "en*"
dhcp4: true
dhcp4-overrides:
route-metric: 200
optional: true
`
iw, err := iso9660.NewWriter()
if err != nil {
return "", fmt.Errorf("creating ISO writer: %w", err)
}
defer iw.Cleanup()
for name, content := range map[string]string{
"meta-data": metaData,
"user-data": userData,
"network-config": networkConfig,
} {
if err := iw.AddFile(strings.NewReader(content), name); err != nil {
return "", fmt.Errorf("adding %s to ISO: %w", name, err)
}
}
isoPath := filepath.Join(e.tempDir, n.name+"-seed.iso")
f, err := os.Create(isoPath)
if err != nil {
return "", err
}
defer f.Close()
if err := iw.WriteTo(f, "cidata"); err != nil {
return "", fmt.Errorf("writing seed ISO: %w", err)
}
return isoPath, nil
}
// generateUserData creates the cloud-init user-data (#cloud-config) for a node.
func (e *Env) generateUserData(n *Node) string {
var ud strings.Builder
ud.WriteString("#cloud-config\n")
// Enable root SSH login for debugging via the debug NIC.
ud.WriteString("ssh_pwauth: true\n")
ud.WriteString("disable_root: false\n")
ud.WriteString("users:\n")
ud.WriteString(" - name: root\n")
ud.WriteString(" lock_passwd: false\n")
ud.WriteString(" plain_text_passwd: root\n")
// Also inject the host's SSH key if available.
if pubkey, err := os.ReadFile("/tmp/vmtest_key.pub"); err == nil {
ud.WriteString(fmt.Sprintf(" ssh_authorized_keys:\n - %s\n", strings.TrimSpace(string(pubkey))))
}
ud.WriteString("runcmd:\n")
// Remove the default route from the debug NIC (enp0s4) so traffic goes through vnet.
// The debug NIC is only for SSH access from the host.
ud.WriteString(" - [\"/bin/sh\", \"-c\", \"ip route del default via 10.0.2.2 dev enp0s4 2>/dev/null || true\"]\n")
// Download binaries from the files.tailscale VIP (52.52.0.6).
// Use the IP directly to avoid DNS resolution issues during early boot.
for _, bin := range []string{"tailscaled", "tailscale", "tta"} {
fmt.Fprintf(&ud, " - [\"/bin/sh\", \"-c\", \"curl -v --retry 10 --retry-delay 2 --retry-all-errors -o /usr/local/bin/%s http://52.52.0.6/%s 2>&1\"]\n", bin, bin)
}
ud.WriteString(" - [\"chmod\", \"+x\", \"/usr/local/bin/tailscaled\", \"/usr/local/bin/tailscale\", \"/usr/local/bin/tta\"]\n")
// Enable IP forwarding for subnet routers.
if n.advertiseRoutes != "" {
ud.WriteString(" - [\"sysctl\", \"-w\", \"net.ipv4.ip_forward=1\"]\n")
ud.WriteString(" - [\"sysctl\", \"-w\", \"net.ipv6.conf.all.forwarding=1\"]\n")
}
// Start tailscaled in the background.
ud.WriteString(" - [\"/bin/sh\", \"-c\", \"/usr/local/bin/tailscaled --state=mem: &\"]\n")
ud.WriteString(" - [\"sleep\", \"2\"]\n")
// Start tta (Tailscale Test Agent).
ud.WriteString(" - [\"/bin/sh\", \"-c\", \"/usr/local/bin/tta &\"]\n")
return ud.String()
}

@ -0,0 +1,170 @@
// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause
package vmtest
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"log"
"net/http"
"os"
"os/exec"
"path/filepath"
)
// OSImage describes a VM operating system image.
type OSImage struct {
Name string
URL string // download URL for the cloud image
SHA256 string // expected SHA256 hash of the image
MemoryMB int // RAM for the VM
IsGokrazy bool // true for gokrazy images (different QEMU setup)
}
var (
// Gokrazy is a minimal Tailscale appliance image built from the gokrazy/natlabapp directory.
Gokrazy = OSImage{
Name: "gokrazy",
IsGokrazy: true,
MemoryMB: 384,
}
// Ubuntu2404 is Ubuntu 24.04 LTS (Noble Numbat) cloud image.
Ubuntu2404 = OSImage{
Name: "ubuntu-24.04",
URL: "https://cloud-images.ubuntu.com/noble/current/noble-server-cloudimg-amd64.img",
MemoryMB: 1024,
}
// Debian12 is Debian 12 (Bookworm) generic cloud image.
Debian12 = OSImage{
Name: "debian-12",
URL: "https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-generic-amd64.qcow2",
MemoryMB: 1024,
}
)
// imageCacheDir returns the directory for cached VM images.
func imageCacheDir() string {
if d := os.Getenv("VMTEST_CACHE_DIR"); d != "" {
return d
}
home, _ := os.UserHomeDir()
return filepath.Join(home, ".cache", "tailscale", "vmtest", "images")
}
// ensureImage downloads and caches the OS image if not already present.
func ensureImage(ctx context.Context, img OSImage) error {
if img.IsGokrazy {
return nil // gokrazy images are handled separately
}
cacheDir := imageCacheDir()
if err := os.MkdirAll(cacheDir, 0755); err != nil {
return err
}
// Use a filename based on the image name.
cachedPath := filepath.Join(cacheDir, img.Name+".qcow2")
if _, err := os.Stat(cachedPath); err == nil {
// If we have a SHA256 to verify, check it.
if img.SHA256 != "" {
if err := verifySHA256(cachedPath, img.SHA256); err != nil {
log.Printf("cached image %s failed SHA256 check, re-downloading: %v", img.Name, err)
os.Remove(cachedPath)
} else {
return nil
}
} else {
return nil // exists, no hash to verify
}
}
log.Printf("downloading %s from %s...", img.Name, img.URL)
req, err := http.NewRequestWithContext(ctx, "GET", img.URL, nil)
if err != nil {
return fmt.Errorf("downloading %s: %w", img.Name, err)
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return fmt.Errorf("downloading %s: %w", img.Name, err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return fmt.Errorf("downloading %s: HTTP %s", img.Name, resp.Status)
}
tmpFile := cachedPath + ".tmp"
f, err := os.Create(tmpFile)
if err != nil {
return err
}
defer func() {
f.Close()
os.Remove(tmpFile)
}()
h := sha256.New()
w := io.MultiWriter(f, h)
if _, err := io.Copy(w, resp.Body); err != nil {
return fmt.Errorf("downloading %s: %w", img.Name, err)
}
if err := f.Close(); err != nil {
return err
}
if img.SHA256 != "" {
got := hex.EncodeToString(h.Sum(nil))
if got != img.SHA256 {
return fmt.Errorf("SHA256 mismatch for %s: got %s, want %s", img.Name, got, img.SHA256)
}
}
if err := os.Rename(tmpFile, cachedPath); err != nil {
return err
}
log.Printf("downloaded %s", img.Name)
return nil
}
// verifySHA256 checks that the file at path has the expected SHA256 hash.
func verifySHA256(path, expected string) error {
f, err := os.Open(path)
if err != nil {
return err
}
defer f.Close()
h := sha256.New()
if _, err := io.Copy(h, f); err != nil {
return err
}
got := hex.EncodeToString(h.Sum(nil))
if got != expected {
return fmt.Errorf("got %s, want %s", got, expected)
}
return nil
}
// cachedImagePath returns the filesystem path to the cached image for the given OS.
func cachedImagePath(img OSImage) string {
return filepath.Join(imageCacheDir(), img.Name+".qcow2")
}
// createOverlay creates a qcow2 overlay image on top of the given base image.
func createOverlay(base, overlay string) error {
out, err := exec.Command("qemu-img", "create",
"-f", "qcow2",
"-F", "qcow2",
"-b", base,
overlay).CombinedOutput()
if err != nil {
return fmt.Errorf("qemu-img create overlay: %v: %s", err, out)
}
return nil
}

@ -0,0 +1,239 @@
// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause
package vmtest
import (
"bytes"
"encoding/json"
"fmt"
"net"
"os"
"os/exec"
"path/filepath"
"regexp"
"strconv"
"time"
"tailscale.com/tstest/natlab/vnet"
)
// startQEMU launches a QEMU process for the given node.
func (e *Env) startQEMU(n *Node) error {
if n.os.IsGokrazy {
return e.startGokrazyQEMU(n)
}
return e.startCloudQEMU(n)
}
// startGokrazyQEMU launches a QEMU process for a gokrazy node.
// This follows the same pattern as tstest/integration/nat/nat_test.go.
func (e *Env) startGokrazyQEMU(n *Node) error {
disk := filepath.Join(e.tempDir, fmt.Sprintf("%s.qcow2", n.name))
if err := createOverlay(e.gokrazyBase, disk); err != nil {
return err
}
var envBuf bytes.Buffer
for _, env := range n.vnetNode.Env() {
fmt.Fprintf(&envBuf, " tailscaled.env=%s=%s", env.Key, env.Value)
}
sysLogAddr := net.JoinHostPort(vnet.FakeSyslogIPv4().String(), "995")
if n.vnetNode.IsV6Only() {
sysLogAddr = net.JoinHostPort(vnet.FakeSyslogIPv6().String(), "995")
}
logPath := filepath.Join(e.tempDir, n.name+".log")
args := []string{
"-M", "microvm,isa-serial=off",
"-m", fmt.Sprintf("%dM", n.os.MemoryMB),
"-nodefaults", "-no-user-config", "-nographic",
"-kernel", e.gokrazyKernel,
"-append", "console=hvc0 root=PARTUUID=60c24cc1-f3f9-427a-8199-76baa2d60001/PARTNROFF=1 ro init=/gokrazy/init panic=10 oops=panic pci=off nousb tsc=unstable clocksource=hpet gokrazy.remote_syslog.target=" + sysLogAddr + " tailscale-tta=1" + envBuf.String(),
"-drive", "id=blk0,file=" + disk + ",format=qcow2",
"-device", "virtio-blk-device,drive=blk0",
"-device", "virtio-serial-device",
"-device", "virtio-rng-device",
"-chardev", "file,id=virtiocon0,path=" + logPath,
"-device", "virtconsole,chardev=virtiocon0",
}
// Add network devices — one per NIC.
for i := range n.vnetNode.NumNICs() {
mac := n.vnetNode.NICMac(i)
netdevID := fmt.Sprintf("net%d", i)
args = append(args,
"-netdev", fmt.Sprintf("stream,id=%s,addr.type=unix,addr.path=%s", netdevID, e.sockAddr),
"-device", fmt.Sprintf("virtio-net-device,netdev=%s,mac=%s", netdevID, mac),
)
}
return e.launchQEMU(n.name, logPath, args)
}
// startCloudQEMU launches a QEMU process for a cloud image (Ubuntu, Debian, etc).
func (e *Env) startCloudQEMU(n *Node) error {
basePath := cachedImagePath(n.os)
disk := filepath.Join(e.tempDir, fmt.Sprintf("%s.qcow2", n.name))
if err := createOverlay(basePath, disk); err != nil {
return err
}
// Create a seed ISO with cloud-init config (meta-data, user-data, network-config).
// This MUST be a local ISO (not HTTP) so cloud-init reads network-config during
// init-local, before systemd-networkd-wait-online blocks boot.
seedISO, err := e.createCloudInitISO(n)
if err != nil {
return fmt.Errorf("creating cloud-init ISO: %w", err)
}
logPath := filepath.Join(e.tempDir, n.name+".log")
qmpSock := filepath.Join(e.tempDir, n.name+"-qmp.sock")
args := []string{
"-machine", "q35,accel=kvm",
"-m", fmt.Sprintf("%dM", n.os.MemoryMB),
"-cpu", "host",
"-smp", "2",
"-display", "none",
"-drive", fmt.Sprintf("file=%s,if=virtio", disk),
"-drive", fmt.Sprintf("file=%s,if=virtio,media=cdrom,readonly=on", seedISO),
"-smbios", "type=1,serial=ds=nocloud",
"-serial", "file:" + logPath,
"-qmp", "unix:" + qmpSock + ",server,nowait",
}
// Add network devices — one per NIC.
// romfile="" disables the iPXE option ROM entirely, saving ~5s per NIC at boot
// and avoiding "duplicate fw_cfg file name" errors with multiple NICs.
for i := range n.vnetNode.NumNICs() {
mac := n.vnetNode.NICMac(i)
netdevID := fmt.Sprintf("net%d", i)
args = append(args,
"-netdev", fmt.Sprintf("stream,id=%s,addr.type=unix,addr.path=%s", netdevID, e.sockAddr),
"-device", fmt.Sprintf("virtio-net-pci,netdev=%s,mac=%s,romfile=", netdevID, mac),
)
}
// Add a debug NIC with user-mode networking for SSH access from the host.
// Use port 0 so the OS picks a free port; we query the actual port via QMP after launch.
args = append(args,
"-netdev", "user,id=debug0,hostfwd=tcp:127.0.0.1:0-:22",
"-device", "virtio-net-pci,netdev=debug0,romfile=",
)
if err := e.launchQEMU(n.name, logPath, args); err != nil {
return err
}
// Query QMP to find the actual SSH port that QEMU allocated.
port, err := qmpQueryHostFwd(qmpSock)
if err != nil {
return fmt.Errorf("querying SSH port via QMP: %w", err)
}
n.sshPort = port
e.t.Logf("[%s] SSH debug: ssh -p %d root@127.0.0.1 (password: root)", n.name, port)
return nil
}
// launchQEMU starts a qemu-system-x86_64 process with the given args.
// VM console output goes to logPath (via QEMU's -serial or -chardev).
// QEMU's own stdout/stderr go to logPath.qemu for diagnostics.
func (e *Env) launchQEMU(name, logPath string, args []string) error {
cmd := exec.Command("qemu-system-x86_64", args...)
// Send stdout/stderr to the log file for any QEMU diagnostic messages.
// Stdin must be /dev/null to prevent QEMU from trying to read.
devNull, err := os.Open(os.DevNull)
if err != nil {
return fmt.Errorf("open /dev/null: %w", err)
}
cmd.Stdin = devNull
qemuLog, err := os.Create(logPath + ".qemu")
if err != nil {
devNull.Close()
return err
}
cmd.Stdout = qemuLog
cmd.Stderr = qemuLog
if err := cmd.Start(); err != nil {
devNull.Close()
qemuLog.Close()
return fmt.Errorf("qemu for %s: %w", name, err)
}
e.t.Logf("launched QEMU for %s (pid %d), log: %s", name, cmd.Process.Pid, logPath)
e.qemuProcs = append(e.qemuProcs, cmd)
e.t.Cleanup(func() {
cmd.Process.Kill()
cmd.Wait()
devNull.Close()
qemuLog.Close()
// Dump tail of VM log on failure for debugging.
if e.t.Failed() {
if data, err := os.ReadFile(logPath); err == nil {
lines := bytes.Split(data, []byte("\n"))
start := 0
if len(lines) > 50 {
start = len(lines) - 50
}
e.t.Logf("=== last 50 lines of %s log ===", name)
for _, line := range lines[start:] {
e.t.Logf("[%s] %s", name, line)
}
}
}
})
return nil
}
// qmpQueryHostFwd connects to a QEMU QMP socket and queries the host port
// assigned to the first TCP host forward rule (the SSH debug port).
func qmpQueryHostFwd(sockPath string) (int, error) {
// Wait for the QMP socket to appear.
var conn net.Conn
for range 50 {
var err error
conn, err = net.Dial("unix", sockPath)
if err == nil {
break
}
time.Sleep(100 * time.Millisecond)
}
if conn == nil {
return 0, fmt.Errorf("QMP socket %s not available", sockPath)
}
defer conn.Close()
conn.SetDeadline(time.Now().Add(5 * time.Second))
// Read the QMP greeting.
var greeting json.RawMessage
dec := json.NewDecoder(conn)
if err := dec.Decode(&greeting); err != nil {
return 0, fmt.Errorf("reading QMP greeting: %w", err)
}
// Send qmp_capabilities to initialize.
fmt.Fprintf(conn, `{"execute":"qmp_capabilities"}`+"\n")
var capsResp json.RawMessage
if err := dec.Decode(&capsResp); err != nil {
return 0, fmt.Errorf("reading qmp_capabilities response: %w", err)
}
// Query "info usernet" via human-monitor-command.
fmt.Fprintf(conn, `{"execute":"human-monitor-command","arguments":{"command-line":"info usernet"}}`+"\n")
var hmpResp struct {
Return string `json:"return"`
}
if err := dec.Decode(&hmpResp); err != nil {
return 0, fmt.Errorf("reading info usernet response: %w", err)
}
// Parse the port from output like:
// TCP[HOST_FORWARD] 12 127.0.0.1 35323 10.0.2.15 22
re := regexp.MustCompile(`TCP\[HOST_FORWARD\]\s+\d+\s+127\.0\.0\.1\s+(\d+)\s+`)
m := re.FindStringSubmatch(hmpResp.Return)
if m == nil {
return 0, fmt.Errorf("no hostfwd port found in: %s", hmpResp.Return)
}
return strconv.Atoi(m[1])
}

@ -0,0 +1,676 @@
// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause
// Package vmtest provides a high-level framework for running integration tests
// across multiple QEMU virtual machines connected by natlab's vnet virtual
// network infrastructure. It supports mixed OS types (gokrazy, Ubuntu, Debian)
// and multi-NIC configurations for scenarios like subnet routing.
//
// Prerequisites:
// - qemu-system-x86_64 and KVM access (typically the "kvm" group; no root required)
// - A built gokrazy natlabapp image (auto-built on first run via "make natlab" in gokrazy/)
//
// Run tests with:
//
// go test ./tstest/natlab/vmtest/ --run-vm-tests -v
package vmtest
import (
"context"
"flag"
"fmt"
"io"
"net"
"net/http"
"net/netip"
"os"
"os/exec"
"path/filepath"
"slices"
"strings"
"testing"
"time"
"golang.org/x/sync/errgroup"
"tailscale.com/client/local"
"tailscale.com/ipn"
"tailscale.com/tailcfg"
"tailscale.com/tstest/natlab/vnet"
"tailscale.com/util/set"
)
var (
runVMTests = flag.Bool("run-vm-tests", false, "run tests that require VMs with KVM")
verboseVMDebug = flag.Bool("verbose-vm-debug", false, "enable verbose debug logging for VM tests")
)
// Env is a test environment that manages virtual networks and QEMU VMs.
// Create one with New, add networks and nodes, then call Start.
type Env struct {
t testing.TB
cfg vnet.Config
server *vnet.Server
nodes []*Node
tempDir string
sockAddr string // shared Unix socket path for all QEMU netdevs
binDir string // directory for compiled binaries
// gokrazy-specific paths
gokrazyBase string // path to gokrazy base qcow2 image
gokrazyKernel string // path to gokrazy kernel
qemuProcs []*exec.Cmd // launched QEMU processes
}
// logVerbosef logs a message only when --verbose-vm-debug is set.
func (e *Env) logVerbosef(format string, args ...any) {
if *verboseVMDebug {
e.t.Helper()
e.t.Logf(format, args...)
}
}
// New creates a new test environment. It skips the test if --run-vm-tests is not set.
func New(t testing.TB) *Env {
if !*runVMTests {
t.Skip("skipping VM test; set --run-vm-tests to run")
}
tempDir := t.TempDir()
return &Env{
t: t,
tempDir: tempDir,
binDir: filepath.Join(tempDir, "bin"),
}
}
// AddNetwork creates a new virtual network. Arguments follow the same pattern as
// vnet.Config.AddNetwork (string IPs, NAT types, NetworkService values).
func (e *Env) AddNetwork(opts ...any) *vnet.Network {
return e.cfg.AddNetwork(opts...)
}
// Node represents a virtual machine in the test environment.
type Node struct {
name string
num int // assigned during AddNode
os OSImage
nets []*vnet.Network
vnetNode *vnet.Node // primary vnet node (set during Start)
agent *vnet.NodeAgentClient
joinTailnet bool
advertiseRoutes string
webServerPort int
sshPort int // host port for SSH debug access (cloud VMs only)
}
// AddNode creates a new VM node. The name is used for identification and as the
// webserver greeting. Options can be *vnet.Network (for network attachment),
// NodeOption values, or vnet node options (like vnet.TailscaledEnv).
func (e *Env) AddNode(name string, opts ...any) *Node {
n := &Node{
name: name,
os: Gokrazy, // default
joinTailnet: true,
}
e.nodes = append(e.nodes, n)
// Separate network options from other options.
var vnetOpts []any
for _, o := range opts {
switch o := o.(type) {
case *vnet.Network:
n.nets = append(n.nets, o)
vnetOpts = append(vnetOpts, o)
case nodeOptOS:
n.os = OSImage(o)
case nodeOptNoTailscale:
n.joinTailnet = false
vnetOpts = append(vnetOpts, vnet.DontJoinTailnet)
case nodeOptAdvertiseRoutes:
n.advertiseRoutes = string(o)
case nodeOptWebServer:
n.webServerPort = int(o)
default:
// Pass through to vnet (TailscaledEnv, NodeOption, MAC, etc.)
vnetOpts = append(vnetOpts, o)
}
}
n.vnetNode = e.cfg.AddNode(vnetOpts...)
n.num = n.vnetNode.Num()
return n
}
// LanIP returns the LAN IPv4 address of this node on the given network.
// This is only valid after Env.Start() has been called.
func (n *Node) LanIP(net *vnet.Network) netip.Addr {
return n.vnetNode.LanIP(net)
}
// NodeOption types for configuring nodes.
type nodeOptOS OSImage
type nodeOptNoTailscale struct{}
type nodeOptAdvertiseRoutes string
type nodeOptWebServer int
// OS returns a NodeOption that sets the node's operating system image.
func OS(img OSImage) nodeOptOS { return nodeOptOS(img) }
// DontJoinTailnet returns a NodeOption that prevents the node from running tailscale up.
func DontJoinTailnet() nodeOptNoTailscale { return nodeOptNoTailscale{} }
// AdvertiseRoutes returns a NodeOption that configures the node to advertise
// the given routes (comma-separated CIDRs) when joining the tailnet.
func AdvertiseRoutes(routes string) nodeOptAdvertiseRoutes {
return nodeOptAdvertiseRoutes(routes)
}
// WebServer returns a NodeOption that starts a webserver on the given port.
// The webserver responds with "Hello world I am <nodename>" on all requests.
func WebServer(port int) nodeOptWebServer { return nodeOptWebServer(port) }
// Start initializes the virtual network, builds/downloads images, compiles
// binaries, launches QEMU processes, and waits for all TTA agents to connect.
// It should be called after all AddNetwork/AddNode calls.
func (e *Env) Start() {
t := e.t
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
t.Cleanup(cancel)
if err := os.MkdirAll(e.binDir, 0755); err != nil {
t.Fatal(err)
}
// Determine if we have any non-gokrazy "cloud" images (e.g. Ubuntu, Debian)
// that require compiled binaries pushed into their image later. (Gokrazy
// has them built-in, so doesn't need the compileBinaries step.)
needBuildBinaries := slices.ContainsFunc(e.nodes, func(n *Node) bool { return !n.os.IsGokrazy })
// Compile binaries and download/build images in parallel.
// Any failure cancels the others via the errgroup context.
eg, egCtx := errgroup.WithContext(ctx)
if needBuildBinaries {
eg.Go(func() error {
return e.compileBinaries(egCtx)
})
}
didOS := set.Set[string]{} // dedup by image name
for _, n := range e.nodes {
if didOS.Contains(n.os.Name) {
continue
}
didOS.Add(n.os.Name)
if n.os.IsGokrazy {
eg.Go(func() error {
return e.ensureGokrazy(egCtx)
})
} else {
eg.Go(func() error {
return ensureImage(egCtx, n.os)
})
}
}
if err := eg.Wait(); err != nil {
t.Fatalf("setup: %v", err)
}
// Create the vnet server.
var err error
e.server, err = vnet.New(&e.cfg)
if err != nil {
t.Fatalf("vnet.New: %v", err)
}
t.Cleanup(func() { e.server.Close() })
// Register compiled binaries with the file server VIP.
if needBuildBinaries {
for _, name := range []string{"tta", "tailscale", "tailscaled"} {
data, err := os.ReadFile(filepath.Join(e.binDir, name))
if err != nil {
t.Fatalf("reading compiled %s: %v", name, err)
}
e.server.RegisterFile(name, data)
}
}
// Cloud-init config is delivered via local seed ISOs (created in startCloudQEMU),
// not via the cloud-init HTTP VIP, because network-config must be available
// during init-local before systemd-networkd-wait-online blocks.
// Start Unix socket listener.
e.sockAddr = filepath.Join(e.tempDir, "vnet.sock")
srv, err := net.Listen("unix", e.sockAddr)
if err != nil {
t.Fatalf("listen unix: %v", err)
}
t.Cleanup(func() { srv.Close() })
go func() {
for {
c, err := srv.Accept()
if err != nil {
return
}
go e.server.ServeUnixConn(c.(*net.UnixConn), vnet.ProtocolQEMU)
}
}()
// Launch QEMU processes.
for _, n := range e.nodes {
if err := e.startQEMU(n); err != nil {
t.Fatalf("startQEMU(%s): %v", n.name, err)
}
}
// Set up agent clients and wait for all agents to connect.
for _, n := range e.nodes {
n.agent = e.server.NodeAgentClient(n.vnetNode)
n.vnetNode.SetClient(n.agent)
}
// Wait for agents, then bring up tailscale.
var agentEg errgroup.Group
for _, n := range e.nodes {
agentEg.Go(func() error {
t.Logf("[%s] waiting for agent...", n.name)
st, err := n.agent.Status(ctx)
if err != nil {
return fmt.Errorf("[%s] agent status: %w", n.name, err)
}
t.Logf("[%s] agent connected, backend state: %s", n.name, st.BackendState)
if n.vnetNode.HostFirewall() {
if err := n.agent.EnableHostFirewall(ctx); err != nil {
return fmt.Errorf("[%s] enable firewall: %w", n.name, err)
}
}
if n.joinTailnet {
if err := e.tailscaleUp(ctx, n); err != nil {
return fmt.Errorf("[%s] tailscale up: %w", n.name, err)
}
st, err = n.agent.Status(ctx)
if err != nil {
return fmt.Errorf("[%s] status after up: %w", n.name, err)
}
if st.BackendState != "Running" {
return fmt.Errorf("[%s] state = %q, want Running", n.name, st.BackendState)
}
t.Logf("[%s] up with %v", n.name, st.Self.TailscaleIPs)
}
return nil
})
}
if err := agentEg.Wait(); err != nil {
t.Fatal(err)
}
// Start webservers.
for _, n := range e.nodes {
if n.webServerPort > 0 {
if err := e.startWebServer(ctx, n); err != nil {
t.Fatalf("startWebServer(%s): %v", n.name, err)
}
}
}
}
// tailscaleUp runs "tailscale up" on the node via TTA.
func (e *Env) tailscaleUp(ctx context.Context, n *Node) error {
url := "http://unused/up?accept-routes=true"
if n.advertiseRoutes != "" {
url += "&advertise-routes=" + n.advertiseRoutes
}
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return err
}
res, err := n.agent.HTTPClient.Do(req)
if err != nil {
return err
}
defer res.Body.Close()
body, _ := io.ReadAll(res.Body)
if res.StatusCode != 200 {
return fmt.Errorf("tailscale up: %s: %s", res.Status, body)
}
return nil
}
// startWebServer tells TTA on the node to start a webserver.
func (e *Env) startWebServer(ctx context.Context, n *Node) error {
url := fmt.Sprintf("http://unused/start-webserver?port=%d&name=%s", n.webServerPort, n.name)
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return err
}
res, err := n.agent.HTTPClient.Do(req)
if err != nil {
return err
}
defer res.Body.Close()
if res.StatusCode != 200 {
body, _ := io.ReadAll(res.Body)
return fmt.Errorf("start-webserver: %s: %s", res.Status, body)
}
e.t.Logf("[%s] webserver started on port %d", n.name, n.webServerPort)
return nil
}
// ApproveRoutes tells the test control server to approve subnet routes
// for the given node. The routes should be CIDR strings.
func (e *Env) ApproveRoutes(n *Node, routes ...string) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
// Get the node's public key from its status.
st, err := n.agent.Status(ctx)
if err != nil {
e.t.Fatalf("ApproveRoutes: status for %s: %v", n.name, err)
}
nodeKey := st.Self.PublicKey
var prefixes []netip.Prefix
for _, r := range routes {
p, err := netip.ParsePrefix(r)
if err != nil {
e.t.Fatalf("ApproveRoutes: bad route %q: %v", r, err)
}
prefixes = append(prefixes, p)
}
// Enable --accept-routes on all other tailscale nodes BEFORE setting the
// routes on the control server. This way, when the map update arrives with
// the new peer routes, peers will immediately install them.
for _, other := range e.nodes {
if other == n || !other.joinTailnet {
continue
}
if _, err := other.agent.EditPrefs(ctx, &ipn.MaskedPrefs{
Prefs: ipn.Prefs{RouteAll: true},
RouteAllSet: true,
}); err != nil {
e.t.Fatalf("ApproveRoutes: set accept-routes on %s: %v", other.name, err)
}
}
// Approve the routes on the control server. SetSubnetRoutes notifies all
// peers via updatePeerChanged, so they'll re-fetch their MapResponse.
e.server.ControlServer().SetSubnetRoutes(nodeKey, prefixes)
// Wait for each peer to see the routes.
for _, r := range routes {
for _, other := range e.nodes {
if other == n || !other.joinTailnet {
continue
}
if !e.waitForPeerRoute(other, r, 15*time.Second) {
e.DumpStatus(other)
e.t.Fatalf("ApproveRoutes: %s never saw route %s", other.name, r)
}
}
}
e.t.Logf("approved routes %v on %s", routes, n.name)
// Ping the advertiser from each peer to establish WireGuard tunnels.
for _, other := range e.nodes {
if other == n || !other.joinTailnet {
continue
}
e.ping(other, n)
}
}
// ping pings from one node to another's Tailscale IP, retrying until it succeeds
// or the timeout expires. This establishes the WireGuard tunnel between the nodes.
func (e *Env) ping(from, to *Node) {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
toSt, err := to.agent.Status(ctx)
if err != nil {
e.t.Fatalf("ping: can't get %s status: %v", to.name, err)
}
if len(toSt.Self.TailscaleIPs) == 0 {
e.t.Fatalf("ping: %s has no Tailscale IPs", to.name)
}
targetIP := toSt.Self.TailscaleIPs[0]
for {
pingCtx, pingCancel := context.WithTimeout(ctx, 3*time.Second)
pr, err := from.agent.PingWithOpts(pingCtx, targetIP, tailcfg.PingDisco, local.PingOpts{})
pingCancel()
if err == nil && pr.Err == "" {
e.logVerbosef("ping: %s -> %s OK", from.name, targetIP)
return
}
if ctx.Err() != nil {
e.t.Fatalf("ping: %s -> %s timed out", from.name, targetIP)
}
time.Sleep(time.Second)
}
}
// SSHExec runs a command on a cloud VM via its debug SSH NIC.
// Only works for cloud VMs that have the debug NIC and SSH key configured.
// Returns stdout and any error.
func (e *Env) SSHExec(n *Node, cmd string) (string, error) {
if n.sshPort == 0 {
return "", fmt.Errorf("node %s has no SSH debug port", n.name)
}
sshCmd := exec.Command("ssh",
"-o", "StrictHostKeyChecking=no",
"-o", "UserKnownHostsFile=/dev/null",
"-o", "ConnectTimeout=5",
"-i", "/tmp/vmtest_key",
"-p", fmt.Sprintf("%d", n.sshPort),
"root@127.0.0.1",
cmd)
out, err := sshCmd.CombinedOutput()
return string(out), err
}
// DumpStatus logs the tailscale status of a node, including its peers and their
// AllowedIPs. Useful for debugging routing issues.
func (e *Env) DumpStatus(n *Node) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
st, err := n.agent.Status(ctx)
if err != nil {
e.t.Logf("[%s] DumpStatus error: %v", n.name, err)
return
}
var selfAllowed []string
if st.Self.AllowedIPs != nil {
for i := range st.Self.AllowedIPs.Len() {
selfAllowed = append(selfAllowed, st.Self.AllowedIPs.At(i).String())
}
}
var selfPrimary []string
if st.Self.PrimaryRoutes != nil {
for i := range st.Self.PrimaryRoutes.Len() {
selfPrimary = append(selfPrimary, st.Self.PrimaryRoutes.At(i).String())
}
}
e.t.Logf("[%s] self: %v, backend=%s, AllowedIPs=%v, PrimaryRoutes=%v", n.name, st.Self.TailscaleIPs, st.BackendState, selfAllowed, selfPrimary)
for _, peer := range st.Peer {
var aips []string
if peer.AllowedIPs != nil {
for i := range peer.AllowedIPs.Len() {
aips = append(aips, peer.AllowedIPs.At(i).String())
}
}
e.t.Logf("[%s] peer %s (%s): AllowedIPs=%v, Online=%v, Relay=%q, CurAddr=%q",
n.name, peer.HostName, peer.TailscaleIPs,
aips, peer.Online, peer.Relay, peer.CurAddr)
}
}
// waitForPeerRoute polls the node's status until it sees the given route prefix
// in a peer's AllowedIPs, or until timeout. Returns true if found.
func (e *Env) waitForPeerRoute(n *Node, prefix string, timeout time.Duration) bool {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
for {
st, err := n.agent.Status(ctx)
if err != nil {
return false
}
for _, peer := range st.Peer {
if peer.AllowedIPs != nil {
for i := range peer.AllowedIPs.Len() {
if peer.AllowedIPs.At(i).String() == prefix {
return true
}
}
}
}
if ctx.Err() != nil {
return false
}
time.Sleep(time.Second)
}
}
// HTTPGet makes an HTTP GET request from the given node to the specified URL.
// The request is proxied through TTA's /http-get handler.
func (e *Env) HTTPGet(from *Node, targetURL string) string {
for attempt := range 3 {
ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second)
reqURL := "http://unused/http-get?url=" + targetURL
req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
if err != nil {
cancel()
e.t.Fatalf("HTTPGet: %v", err)
}
res, err := from.agent.HTTPClient.Do(req)
cancel()
if err != nil {
e.logVerbosef("HTTPGet attempt %d from %s: %v", attempt+1, from.name, err)
continue
}
body, _ := io.ReadAll(res.Body)
res.Body.Close()
if res.StatusCode == http.StatusBadGateway || res.StatusCode == http.StatusServiceUnavailable {
e.t.Logf("HTTPGet attempt %d from %s: status %d, body: %s", attempt+1, from.name, res.StatusCode, string(body))
time.Sleep(2 * time.Second)
continue
}
return string(body)
}
e.t.Fatalf("HTTPGet from %s to %s: all attempts failed", from.name, targetURL)
return ""
}
// ensureGokrazy finds or builds the gokrazy base image and kernel.
func (e *Env) ensureGokrazy(ctx context.Context) error {
if e.gokrazyBase != "" {
return nil // already found
}
modRoot, err := findModRoot()
if err != nil {
return err
}
e.gokrazyBase = filepath.Join(modRoot, "gokrazy/natlabapp.qcow2")
if _, err := os.Stat(e.gokrazyBase); err != nil {
if !os.IsNotExist(err) {
return err
}
e.t.Logf("building gokrazy natlab image...")
cmd := exec.CommandContext(ctx, "make", "natlab")
cmd.Dir = filepath.Join(modRoot, "gokrazy")
cmd.Stderr = os.Stderr
cmd.Stdout = os.Stdout
if err := cmd.Run(); err != nil {
return fmt.Errorf("make natlab: %w", err)
}
}
kernel, err := findKernelPath(filepath.Join(modRoot, "go.mod"))
if err != nil {
return fmt.Errorf("finding kernel: %w", err)
}
e.gokrazyKernel = kernel
return nil
}
// compileBinaries cross-compiles tta, tailscale, and tailscaled for linux/amd64
// and places them in e.binDir.
func (e *Env) compileBinaries(ctx context.Context) error {
modRoot, err := findModRoot()
if err != nil {
return err
}
binaries := []struct{ name, pkg string }{
{"tta", "./cmd/tta"},
{"tailscale", "./cmd/tailscale"},
{"tailscaled", "./cmd/tailscaled"},
}
var eg errgroup.Group
for _, bin := range binaries {
eg.Go(func() error {
outPath := filepath.Join(e.binDir, bin.name)
e.t.Logf("compiling %s...", bin.name)
cmd := exec.CommandContext(ctx, "go", "build", "-o", outPath, bin.pkg)
cmd.Dir = modRoot
cmd.Env = append(os.Environ(), "GOOS=linux", "GOARCH=amd64", "CGO_ENABLED=0")
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("building %s: %v\n%s", bin.name, err, out)
}
e.t.Logf("compiled %s", bin.name)
return nil
})
}
return eg.Wait()
}
// findModRoot returns the root of the Go module (where go.mod is).
func findModRoot() (string, error) {
out, err := exec.Command("go", "env", "GOMOD").CombinedOutput()
if err != nil {
return "", fmt.Errorf("go env GOMOD: %w", err)
}
gomod := strings.TrimSpace(string(out))
if gomod == "" || gomod == os.DevNull {
return "", fmt.Errorf("not in a Go module")
}
return filepath.Dir(gomod), nil
}
// findKernelPath finds the gokrazy kernel vmlinuz path from go.mod.
func findKernelPath(goMod string) (string, error) {
// Import the same logic as nat_test.go.
b, err := os.ReadFile(goMod)
if err != nil {
return "", err
}
goModCacheB, err := exec.Command("go", "env", "GOMODCACHE").CombinedOutput()
if err != nil {
return "", err
}
goModCache := strings.TrimSpace(string(goModCacheB))
// Parse go.mod to find gokrazy-kernel version.
for _, line := range strings.Split(string(b), "\n") {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "github.com/tailscale/gokrazy-kernel") {
parts := strings.Fields(line)
if len(parts) >= 2 {
return filepath.Join(goModCache, parts[0]+"@"+parts[1], "vmlinuz"), nil
}
}
}
return "", fmt.Errorf("gokrazy-kernel not found in %s", goMod)
}

@ -0,0 +1,38 @@
// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause
package vmtest_test
import (
"fmt"
"strings"
"testing"
"tailscale.com/tstest/natlab/vmtest"
"tailscale.com/tstest/natlab/vnet"
)
func TestSubnetRouter(t *testing.T) {
env := vmtest.New(t)
clientNet := env.AddNetwork("2.1.1.1", "192.168.1.1/24", "2000:1::1/64", vnet.EasyNAT)
internalNet := env.AddNetwork("10.0.0.1/24", "2000:2::1/64")
client := env.AddNode("client", clientNet,
vmtest.OS(vmtest.Gokrazy))
sr := env.AddNode("subnet-router", clientNet, internalNet,
vmtest.OS(vmtest.Ubuntu2404),
vmtest.AdvertiseRoutes("10.0.0.0/24"))
backend := env.AddNode("backend", internalNet,
vmtest.OS(vmtest.Gokrazy),
vmtest.DontJoinTailnet(),
vmtest.WebServer(8080))
env.Start()
env.ApproveRoutes(sr, "10.0.0.0/24")
body := env.HTTPGet(client, fmt.Sprintf("http://%s:8080/", backend.LanIP(internalNet)))
if !strings.Contains(body, "Hello world I am backend") {
t.Fatalf("got %q", body)
}
}

@ -294,6 +294,24 @@ func stringifyTEI(tei stack.TransportEndpointID) string {
return fmt.Sprintf("%s -> %s", remoteHostPort, localHostPort)
}
// vipNameOf returns the VIP name for the given IP, or "" if it's not a VIP.
func vipNameOf(ip netip.Addr) string {
for _, v := range vips {
if v.Match(ip) {
return v.name
}
}
return ""
}
// nodeNameOf returns the node's name for the given IP on this network, or "" if unknown.
func (n *network) nodeNameOf(ip netip.Addr) string {
if node, ok := n.nodeByIP(ip); ok {
return node.String()
}
return ""
}
func (n *network) acceptTCP(r *tcp.ForwarderRequest) {
reqDetails := r.ID()
@ -305,7 +323,17 @@ func (n *network) acceptTCP(r *tcp.ForwarderRequest) {
return
}
log.Printf("vnet-AcceptTCP: %v", stringifyTEI(reqDetails))
// Annotate the log with node/VIP names for readability.
srcHP := net.JoinHostPort(clientRemoteIP.String(), strconv.Itoa(int(reqDetails.RemotePort)))
srcStr := srcHP
if name := n.nodeNameOf(clientRemoteIP); name != "" {
srcStr = fmt.Sprintf("%s (%s)", srcHP, name)
}
dstStr := net.JoinHostPort(destIP.String(), strconv.Itoa(int(destPort)))
if name := vipNameOf(destIP); name != "" {
dstStr = fmt.Sprintf("%s (%s)", dstStr, name)
}
log.Printf("vnet-AcceptTCP: %s -> %s", srcStr, dstStr)
var wq waiter.Queue
ep, err := r.CreateEndpoint(&wq)
@ -1466,6 +1494,12 @@ func (n *network) HandleEthernetPacketForRouter(ep EthernetPacket) {
return
}
if toForward {
// Traffic to destinations we don't handle (e.g. VMs trying to reach
// the real internet for NTP, package updates, etc). Expected; drop silently.
return
}
n.logf("router got unknown packet: %v", packet)
}

Loading…
Cancel
Save