diff --git a/cmd/tta/tta.go b/cmd/tta/tta.go index 28bb48b23..cf5dc4162 100644 --- a/cmd/tta/tta.go +++ b/cmd/tta/tta.go @@ -15,6 +15,7 @@ import ( "context" "errors" "flag" + "fmt" "io" "log" "net" @@ -181,7 +182,27 @@ func main() { return }) ttaMux.HandleFunc("/up", func(w http.ResponseWriter, r *http.Request) { - serveCmd(w, "tailscale", "up", "--login-server=http://control.tailscale") + args := []string{"up", "--login-server=http://control.tailscale"} + if routes := r.URL.Query().Get("advertise-routes"); routes != "" { + args = append(args, "--advertise-routes="+routes) + } + if snat := r.URL.Query().Get("snat-subnet-routes"); snat != "" { + args = append(args, "--snat-subnet-routes="+snat) + } + if r.URL.Query().Get("accept-routes") == "true" { + args = append(args, "--accept-routes") + } + serveCmd(w, "tailscale", args...) + }) + ttaMux.HandleFunc("/set", func(w http.ResponseWriter, r *http.Request) { + args := []string{"set"} + if r.URL.Query().Get("accept-routes") == "true" { + args = append(args, "--accept-routes") + } + if routes := r.URL.Query().Get("advertise-routes"); routes != "" { + args = append(args, "--advertise-routes="+routes) + } + serveCmd(w, "tailscale", args...) }) ttaMux.HandleFunc("/ip", func(w http.ResponseWriter, r *http.Request) { conn, ok := r.Context().Value(connContextKey).(net.Conn) @@ -192,12 +213,85 @@ func main() { w.Write([]byte(conn.LocalAddr().String())) }) ttaMux.HandleFunc("/ping", func(w http.ResponseWriter, r *http.Request) { - // Send 4 packets and wait a maximum of 1 second for each. The deadline - // is required for ping to return a non-zero exit code on no response. - // The busybox in question here is the breakglass busybox inside the - // natlab QEMU image - the host running the test does not need to have - // busybox installed at that path, or at all. - serveCmd(w, "/usr/local/bin/busybox", "ping", "-c", "4", "-W", "1", r.URL.Query().Get("host")) + host := r.URL.Query().Get("host") + if distro.Get() == distro.Gokrazy { + // The busybox in question here is the breakglass busybox inside the + // natlab QEMU image. + serveCmd(w, "/usr/local/bin/busybox", "ping", "-c", "4", "-W", "1", host) + } else { + serveCmd(w, "ping", "-c", "4", "-W", "1", host) + } + }) + ttaMux.HandleFunc("/start-webserver", func(w http.ResponseWriter, r *http.Request) { + port := r.URL.Query().Get("port") + name := r.URL.Query().Get("name") + if port == "" { + http.Error(w, "missing port", http.StatusBadRequest) + return + } + if name == "" { + name = "unnamed" + } + log.Printf("Starting webserver on port %s as %q", port, name) + go func() { + mux := http.NewServeMux() + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + fmt.Fprintf(w, "Hello world I am %s", name) + }) + if err := http.ListenAndServe(":"+port, mux); err != nil { + log.Printf("webserver on :%s failed: %v", port, err) + } + }() + io.WriteString(w, "OK\n") + }) + ttaMux.HandleFunc("/http-get", func(w http.ResponseWriter, r *http.Request) { + targetURL := r.URL.Query().Get("url") + if targetURL == "" { + http.Error(w, "missing url", http.StatusBadRequest) + return + } + log.Printf("HTTP GET %s", targetURL) + ctx, cancel := context.WithTimeout(r.Context(), 10*time.Second) + defer cancel() + req, err := http.NewRequestWithContext(ctx, "GET", targetURL, nil) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + // Use Tailscale's SOCKS5 proxy if available, so traffic to Tailscale + // subnet routes goes through the WireGuard tunnel instead of the + // host network stack (which may not have the routes, especially + // in userspace networking mode). + client := &http.Client{ + Transport: &http.Transport{ + DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { + // Try the Tailscale localapi proxy dialer first. + host, portStr, err := net.SplitHostPort(addr) + if err != nil { + var d net.Dialer + return d.DialContext(ctx, network, addr) + } + port, _ := strconv.ParseUint(portStr, 10, 16) + var lc local.Client + conn, err := lc.UserDial(ctx, network, host, uint16(port)) + if err == nil { + return conn, nil + } + log.Printf("http-get: UserDial failed, falling back to direct: %v", err) + var d net.Dialer + return d.DialContext(ctx, network, addr) + }, + }, + } + resp, err := client.Do(req) + if err != nil { + http.Error(w, err.Error(), http.StatusBadGateway) + return + } + defer resp.Body.Close() + w.Header().Set("X-Upstream-Status", strconv.Itoa(resp.StatusCode)) + w.WriteHeader(resp.StatusCode) + io.Copy(w, resp.Body) }) ttaMux.HandleFunc("/fw", addFirewallHandler) ttaMux.HandleFunc("/logs", func(w http.ResponseWriter, r *http.Request) { diff --git a/flake.nix b/flake.nix index acb8ea7be..d3d5b173d 100644 --- a/flake.nix +++ b/flake.nix @@ -163,4 +163,4 @@ }); }; } -# nix-direnv cache busting line: sha256-PLt+IPqemF3agESg6jV8AzbiOpgL45mJ/AymcNUo7VU= +# nix-direnv cache busting line: sha256-GB5riRI9hkutLc2wBzv2jil+Tf6fogLxUw54HRSPNUk= diff --git a/go.mod b/go.mod index 102b26b6f..ee582d571 100644 --- a/go.mod +++ b/go.mod @@ -67,6 +67,7 @@ require ( github.com/jellydator/ttlcache/v3 v3.1.0 github.com/jsimonetti/rtnetlink v1.4.0 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 + github.com/kdomanski/iso9660 v0.4.0 github.com/klauspost/compress v1.18.2 github.com/kortschak/wol v0.0.0-20200729010619-da482cc4850a github.com/mattn/go-colorable v0.1.13 diff --git a/go.mod.sri b/go.mod.sri index d14a565a6..044d0db74 100644 --- a/go.mod.sri +++ b/go.mod.sri @@ -1 +1 @@ -sha256-PLt+IPqemF3agESg6jV8AzbiOpgL45mJ/AymcNUo7VU= +sha256-GB5riRI9hkutLc2wBzv2jil+Tf6fogLxUw54HRSPNUk= diff --git a/go.sum b/go.sum index af3abb648..81b8e4e0c 100644 --- a/go.sum +++ b/go.sum @@ -753,6 +753,8 @@ github.com/karamaru-alpha/copyloopvar v1.0.8 h1:gieLARwuByhEMxRwM3GRS/juJqFbLraf github.com/karamaru-alpha/copyloopvar v1.0.8/go.mod h1:u7CIfztblY0jZLOQZgH3oYsJzpC2A7S6u/lfgSXHy0k= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= +github.com/kdomanski/iso9660 v0.4.0 h1:BPKKdcINz3m0MdjIMwS0wx1nofsOjxOq8TOr45WGHFg= +github.com/kdomanski/iso9660 v0.4.0/go.mod h1:OxUSupHsO9ceI8lBLPJKWBTphLemjrCQY8LPXM7qSzU= github.com/kenshaw/evdev v0.1.0 h1:wmtceEOFfilChgdNT+c/djPJ2JineVsQ0N14kGzFRUo= github.com/kenshaw/evdev v0.1.0/go.mod h1:B/fErKCihUyEobz0mjn2qQbHgyJKFQAxkXSvkeeA/Wo= github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4= diff --git a/shell.nix b/shell.nix index 6f396d981..90ef454d6 100644 --- a/shell.nix +++ b/shell.nix @@ -16,4 +16,4 @@ ) { src = ./.; }).shellNix -# nix-direnv cache busting line: sha256-PLt+IPqemF3agESg6jV8AzbiOpgL45mJ/AymcNUo7VU= +# nix-direnv cache busting line: sha256-GB5riRI9hkutLc2wBzv2jil+Tf6fogLxUw54HRSPNUk= diff --git a/tstest/natlab/vmtest/cloudinit.go b/tstest/natlab/vmtest/cloudinit.go new file mode 100644 index 000000000..41b9ae10e --- /dev/null +++ b/tstest/natlab/vmtest/cloudinit.go @@ -0,0 +1,117 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +package vmtest + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/kdomanski/iso9660" +) + +// createCloudInitISO creates a cidata seed ISO for the given cloud VM node. +// The ISO contains meta-data, user-data, and network-config files. +// Cloud-init reads these during init-local (pre-network), which is critical +// for network-config to take effect before systemd-networkd-wait-online runs. +func (e *Env) createCloudInitISO(n *Node) (string, error) { + metaData := fmt.Sprintf("instance-id: %s\nlocal-hostname: %s\n", n.name, n.name) + userData := e.generateUserData(n) + + // Network config: DHCP all ethernet interfaces. + // The "optional: true" prevents systemd-networkd-wait-online from blocking. + // The first vnet NIC gets the default route (metric 100). + // Other interfaces get higher metrics to avoid routing conflicts. + networkConfig := `version: 2 +ethernets: + primary: + match: + macaddress: "` + n.vnetNode.NICMac(0).String() + `" + dhcp4: true + dhcp4-overrides: + route-metric: 100 + optional: true + secondary: + match: + name: "en*" + dhcp4: true + dhcp4-overrides: + route-metric: 200 + optional: true +` + + iw, err := iso9660.NewWriter() + if err != nil { + return "", fmt.Errorf("creating ISO writer: %w", err) + } + defer iw.Cleanup() + + for name, content := range map[string]string{ + "meta-data": metaData, + "user-data": userData, + "network-config": networkConfig, + } { + if err := iw.AddFile(strings.NewReader(content), name); err != nil { + return "", fmt.Errorf("adding %s to ISO: %w", name, err) + } + } + + isoPath := filepath.Join(e.tempDir, n.name+"-seed.iso") + f, err := os.Create(isoPath) + if err != nil { + return "", err + } + defer f.Close() + if err := iw.WriteTo(f, "cidata"); err != nil { + return "", fmt.Errorf("writing seed ISO: %w", err) + } + return isoPath, nil +} + +// generateUserData creates the cloud-init user-data (#cloud-config) for a node. +func (e *Env) generateUserData(n *Node) string { + var ud strings.Builder + ud.WriteString("#cloud-config\n") + + // Enable root SSH login for debugging via the debug NIC. + ud.WriteString("ssh_pwauth: true\n") + ud.WriteString("disable_root: false\n") + ud.WriteString("users:\n") + ud.WriteString(" - name: root\n") + ud.WriteString(" lock_passwd: false\n") + ud.WriteString(" plain_text_passwd: root\n") + // Also inject the host's SSH key if available. + if pubkey, err := os.ReadFile("/tmp/vmtest_key.pub"); err == nil { + ud.WriteString(fmt.Sprintf(" ssh_authorized_keys:\n - %s\n", strings.TrimSpace(string(pubkey)))) + } + + ud.WriteString("runcmd:\n") + + // Remove the default route from the debug NIC (enp0s4) so traffic goes through vnet. + // The debug NIC is only for SSH access from the host. + ud.WriteString(" - [\"/bin/sh\", \"-c\", \"ip route del default via 10.0.2.2 dev enp0s4 2>/dev/null || true\"]\n") + + // Download binaries from the files.tailscale VIP (52.52.0.6). + // Use the IP directly to avoid DNS resolution issues during early boot. + for _, bin := range []string{"tailscaled", "tailscale", "tta"} { + fmt.Fprintf(&ud, " - [\"/bin/sh\", \"-c\", \"curl -v --retry 10 --retry-delay 2 --retry-all-errors -o /usr/local/bin/%s http://52.52.0.6/%s 2>&1\"]\n", bin, bin) + } + ud.WriteString(" - [\"chmod\", \"+x\", \"/usr/local/bin/tailscaled\", \"/usr/local/bin/tailscale\", \"/usr/local/bin/tta\"]\n") + + // Enable IP forwarding for subnet routers. + if n.advertiseRoutes != "" { + ud.WriteString(" - [\"sysctl\", \"-w\", \"net.ipv4.ip_forward=1\"]\n") + ud.WriteString(" - [\"sysctl\", \"-w\", \"net.ipv6.conf.all.forwarding=1\"]\n") + } + + // Start tailscaled in the background. + ud.WriteString(" - [\"/bin/sh\", \"-c\", \"/usr/local/bin/tailscaled --state=mem: &\"]\n") + ud.WriteString(" - [\"sleep\", \"2\"]\n") + + // Start tta (Tailscale Test Agent). + ud.WriteString(" - [\"/bin/sh\", \"-c\", \"/usr/local/bin/tta &\"]\n") + + return ud.String() +} diff --git a/tstest/natlab/vmtest/images.go b/tstest/natlab/vmtest/images.go new file mode 100644 index 000000000..54e7eb577 --- /dev/null +++ b/tstest/natlab/vmtest/images.go @@ -0,0 +1,170 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +package vmtest + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "log" + "net/http" + "os" + "os/exec" + "path/filepath" +) + +// OSImage describes a VM operating system image. +type OSImage struct { + Name string + URL string // download URL for the cloud image + SHA256 string // expected SHA256 hash of the image + MemoryMB int // RAM for the VM + IsGokrazy bool // true for gokrazy images (different QEMU setup) +} + +var ( + // Gokrazy is a minimal Tailscale appliance image built from the gokrazy/natlabapp directory. + Gokrazy = OSImage{ + Name: "gokrazy", + IsGokrazy: true, + MemoryMB: 384, + } + + // Ubuntu2404 is Ubuntu 24.04 LTS (Noble Numbat) cloud image. + Ubuntu2404 = OSImage{ + Name: "ubuntu-24.04", + URL: "https://cloud-images.ubuntu.com/noble/current/noble-server-cloudimg-amd64.img", + MemoryMB: 1024, + } + + // Debian12 is Debian 12 (Bookworm) generic cloud image. + Debian12 = OSImage{ + Name: "debian-12", + URL: "https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-generic-amd64.qcow2", + MemoryMB: 1024, + } +) + +// imageCacheDir returns the directory for cached VM images. +func imageCacheDir() string { + if d := os.Getenv("VMTEST_CACHE_DIR"); d != "" { + return d + } + home, _ := os.UserHomeDir() + return filepath.Join(home, ".cache", "tailscale", "vmtest", "images") +} + +// ensureImage downloads and caches the OS image if not already present. +func ensureImage(ctx context.Context, img OSImage) error { + if img.IsGokrazy { + return nil // gokrazy images are handled separately + } + + cacheDir := imageCacheDir() + if err := os.MkdirAll(cacheDir, 0755); err != nil { + return err + } + + // Use a filename based on the image name. + cachedPath := filepath.Join(cacheDir, img.Name+".qcow2") + if _, err := os.Stat(cachedPath); err == nil { + // If we have a SHA256 to verify, check it. + if img.SHA256 != "" { + if err := verifySHA256(cachedPath, img.SHA256); err != nil { + log.Printf("cached image %s failed SHA256 check, re-downloading: %v", img.Name, err) + os.Remove(cachedPath) + } else { + return nil + } + } else { + return nil // exists, no hash to verify + } + } + + log.Printf("downloading %s from %s...", img.Name, img.URL) + + req, err := http.NewRequestWithContext(ctx, "GET", img.URL, nil) + if err != nil { + return fmt.Errorf("downloading %s: %w", img.Name, err) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return fmt.Errorf("downloading %s: %w", img.Name, err) + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + return fmt.Errorf("downloading %s: HTTP %s", img.Name, resp.Status) + } + + tmpFile := cachedPath + ".tmp" + f, err := os.Create(tmpFile) + if err != nil { + return err + } + defer func() { + f.Close() + os.Remove(tmpFile) + }() + + h := sha256.New() + w := io.MultiWriter(f, h) + + if _, err := io.Copy(w, resp.Body); err != nil { + return fmt.Errorf("downloading %s: %w", img.Name, err) + } + if err := f.Close(); err != nil { + return err + } + + if img.SHA256 != "" { + got := hex.EncodeToString(h.Sum(nil)) + if got != img.SHA256 { + return fmt.Errorf("SHA256 mismatch for %s: got %s, want %s", img.Name, got, img.SHA256) + } + } + + if err := os.Rename(tmpFile, cachedPath); err != nil { + return err + } + log.Printf("downloaded %s", img.Name) + return nil +} + +// verifySHA256 checks that the file at path has the expected SHA256 hash. +func verifySHA256(path, expected string) error { + f, err := os.Open(path) + if err != nil { + return err + } + defer f.Close() + h := sha256.New() + if _, err := io.Copy(h, f); err != nil { + return err + } + got := hex.EncodeToString(h.Sum(nil)) + if got != expected { + return fmt.Errorf("got %s, want %s", got, expected) + } + return nil +} + +// cachedImagePath returns the filesystem path to the cached image for the given OS. +func cachedImagePath(img OSImage) string { + return filepath.Join(imageCacheDir(), img.Name+".qcow2") +} + +// createOverlay creates a qcow2 overlay image on top of the given base image. +func createOverlay(base, overlay string) error { + out, err := exec.Command("qemu-img", "create", + "-f", "qcow2", + "-F", "qcow2", + "-b", base, + overlay).CombinedOutput() + if err != nil { + return fmt.Errorf("qemu-img create overlay: %v: %s", err, out) + } + return nil +} diff --git a/tstest/natlab/vmtest/qemu.go b/tstest/natlab/vmtest/qemu.go new file mode 100644 index 000000000..03d424010 --- /dev/null +++ b/tstest/natlab/vmtest/qemu.go @@ -0,0 +1,239 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +package vmtest + +import ( + "bytes" + "encoding/json" + "fmt" + "net" + "os" + "os/exec" + "path/filepath" + "regexp" + "strconv" + "time" + + "tailscale.com/tstest/natlab/vnet" +) + +// startQEMU launches a QEMU process for the given node. +func (e *Env) startQEMU(n *Node) error { + if n.os.IsGokrazy { + return e.startGokrazyQEMU(n) + } + return e.startCloudQEMU(n) +} + +// startGokrazyQEMU launches a QEMU process for a gokrazy node. +// This follows the same pattern as tstest/integration/nat/nat_test.go. +func (e *Env) startGokrazyQEMU(n *Node) error { + disk := filepath.Join(e.tempDir, fmt.Sprintf("%s.qcow2", n.name)) + if err := createOverlay(e.gokrazyBase, disk); err != nil { + return err + } + + var envBuf bytes.Buffer + for _, env := range n.vnetNode.Env() { + fmt.Fprintf(&envBuf, " tailscaled.env=%s=%s", env.Key, env.Value) + } + sysLogAddr := net.JoinHostPort(vnet.FakeSyslogIPv4().String(), "995") + if n.vnetNode.IsV6Only() { + sysLogAddr = net.JoinHostPort(vnet.FakeSyslogIPv6().String(), "995") + } + + logPath := filepath.Join(e.tempDir, n.name+".log") + + args := []string{ + "-M", "microvm,isa-serial=off", + "-m", fmt.Sprintf("%dM", n.os.MemoryMB), + "-nodefaults", "-no-user-config", "-nographic", + "-kernel", e.gokrazyKernel, + "-append", "console=hvc0 root=PARTUUID=60c24cc1-f3f9-427a-8199-76baa2d60001/PARTNROFF=1 ro init=/gokrazy/init panic=10 oops=panic pci=off nousb tsc=unstable clocksource=hpet gokrazy.remote_syslog.target=" + sysLogAddr + " tailscale-tta=1" + envBuf.String(), + "-drive", "id=blk0,file=" + disk + ",format=qcow2", + "-device", "virtio-blk-device,drive=blk0", + "-device", "virtio-serial-device", + "-device", "virtio-rng-device", + "-chardev", "file,id=virtiocon0,path=" + logPath, + "-device", "virtconsole,chardev=virtiocon0", + } + + // Add network devices — one per NIC. + for i := range n.vnetNode.NumNICs() { + mac := n.vnetNode.NICMac(i) + netdevID := fmt.Sprintf("net%d", i) + args = append(args, + "-netdev", fmt.Sprintf("stream,id=%s,addr.type=unix,addr.path=%s", netdevID, e.sockAddr), + "-device", fmt.Sprintf("virtio-net-device,netdev=%s,mac=%s", netdevID, mac), + ) + } + + return e.launchQEMU(n.name, logPath, args) +} + +// startCloudQEMU launches a QEMU process for a cloud image (Ubuntu, Debian, etc). +func (e *Env) startCloudQEMU(n *Node) error { + basePath := cachedImagePath(n.os) + disk := filepath.Join(e.tempDir, fmt.Sprintf("%s.qcow2", n.name)) + if err := createOverlay(basePath, disk); err != nil { + return err + } + + // Create a seed ISO with cloud-init config (meta-data, user-data, network-config). + // This MUST be a local ISO (not HTTP) so cloud-init reads network-config during + // init-local, before systemd-networkd-wait-online blocks boot. + seedISO, err := e.createCloudInitISO(n) + if err != nil { + return fmt.Errorf("creating cloud-init ISO: %w", err) + } + + logPath := filepath.Join(e.tempDir, n.name+".log") + qmpSock := filepath.Join(e.tempDir, n.name+"-qmp.sock") + + args := []string{ + "-machine", "q35,accel=kvm", + "-m", fmt.Sprintf("%dM", n.os.MemoryMB), + "-cpu", "host", + "-smp", "2", + "-display", "none", + "-drive", fmt.Sprintf("file=%s,if=virtio", disk), + "-drive", fmt.Sprintf("file=%s,if=virtio,media=cdrom,readonly=on", seedISO), + "-smbios", "type=1,serial=ds=nocloud", + "-serial", "file:" + logPath, + "-qmp", "unix:" + qmpSock + ",server,nowait", + } + + // Add network devices — one per NIC. + // romfile="" disables the iPXE option ROM entirely, saving ~5s per NIC at boot + // and avoiding "duplicate fw_cfg file name" errors with multiple NICs. + for i := range n.vnetNode.NumNICs() { + mac := n.vnetNode.NICMac(i) + netdevID := fmt.Sprintf("net%d", i) + args = append(args, + "-netdev", fmt.Sprintf("stream,id=%s,addr.type=unix,addr.path=%s", netdevID, e.sockAddr), + "-device", fmt.Sprintf("virtio-net-pci,netdev=%s,mac=%s,romfile=", netdevID, mac), + ) + } + + // Add a debug NIC with user-mode networking for SSH access from the host. + // Use port 0 so the OS picks a free port; we query the actual port via QMP after launch. + args = append(args, + "-netdev", "user,id=debug0,hostfwd=tcp:127.0.0.1:0-:22", + "-device", "virtio-net-pci,netdev=debug0,romfile=", + ) + + if err := e.launchQEMU(n.name, logPath, args); err != nil { + return err + } + + // Query QMP to find the actual SSH port that QEMU allocated. + port, err := qmpQueryHostFwd(qmpSock) + if err != nil { + return fmt.Errorf("querying SSH port via QMP: %w", err) + } + n.sshPort = port + e.t.Logf("[%s] SSH debug: ssh -p %d root@127.0.0.1 (password: root)", n.name, port) + return nil +} + +// launchQEMU starts a qemu-system-x86_64 process with the given args. +// VM console output goes to logPath (via QEMU's -serial or -chardev). +// QEMU's own stdout/stderr go to logPath.qemu for diagnostics. +func (e *Env) launchQEMU(name, logPath string, args []string) error { + cmd := exec.Command("qemu-system-x86_64", args...) + // Send stdout/stderr to the log file for any QEMU diagnostic messages. + // Stdin must be /dev/null to prevent QEMU from trying to read. + devNull, err := os.Open(os.DevNull) + if err != nil { + return fmt.Errorf("open /dev/null: %w", err) + } + cmd.Stdin = devNull + qemuLog, err := os.Create(logPath + ".qemu") + if err != nil { + devNull.Close() + return err + } + cmd.Stdout = qemuLog + cmd.Stderr = qemuLog + if err := cmd.Start(); err != nil { + devNull.Close() + qemuLog.Close() + return fmt.Errorf("qemu for %s: %w", name, err) + } + e.t.Logf("launched QEMU for %s (pid %d), log: %s", name, cmd.Process.Pid, logPath) + e.qemuProcs = append(e.qemuProcs, cmd) + e.t.Cleanup(func() { + cmd.Process.Kill() + cmd.Wait() + devNull.Close() + qemuLog.Close() + // Dump tail of VM log on failure for debugging. + if e.t.Failed() { + if data, err := os.ReadFile(logPath); err == nil { + lines := bytes.Split(data, []byte("\n")) + start := 0 + if len(lines) > 50 { + start = len(lines) - 50 + } + e.t.Logf("=== last 50 lines of %s log ===", name) + for _, line := range lines[start:] { + e.t.Logf("[%s] %s", name, line) + } + } + } + }) + return nil +} + +// qmpQueryHostFwd connects to a QEMU QMP socket and queries the host port +// assigned to the first TCP host forward rule (the SSH debug port). +func qmpQueryHostFwd(sockPath string) (int, error) { + // Wait for the QMP socket to appear. + var conn net.Conn + for range 50 { + var err error + conn, err = net.Dial("unix", sockPath) + if err == nil { + break + } + time.Sleep(100 * time.Millisecond) + } + if conn == nil { + return 0, fmt.Errorf("QMP socket %s not available", sockPath) + } + defer conn.Close() + conn.SetDeadline(time.Now().Add(5 * time.Second)) + + // Read the QMP greeting. + var greeting json.RawMessage + dec := json.NewDecoder(conn) + if err := dec.Decode(&greeting); err != nil { + return 0, fmt.Errorf("reading QMP greeting: %w", err) + } + + // Send qmp_capabilities to initialize. + fmt.Fprintf(conn, `{"execute":"qmp_capabilities"}`+"\n") + var capsResp json.RawMessage + if err := dec.Decode(&capsResp); err != nil { + return 0, fmt.Errorf("reading qmp_capabilities response: %w", err) + } + + // Query "info usernet" via human-monitor-command. + fmt.Fprintf(conn, `{"execute":"human-monitor-command","arguments":{"command-line":"info usernet"}}`+"\n") + var hmpResp struct { + Return string `json:"return"` + } + if err := dec.Decode(&hmpResp); err != nil { + return 0, fmt.Errorf("reading info usernet response: %w", err) + } + + // Parse the port from output like: + // TCP[HOST_FORWARD] 12 127.0.0.1 35323 10.0.2.15 22 + re := regexp.MustCompile(`TCP\[HOST_FORWARD\]\s+\d+\s+127\.0\.0\.1\s+(\d+)\s+`) + m := re.FindStringSubmatch(hmpResp.Return) + if m == nil { + return 0, fmt.Errorf("no hostfwd port found in: %s", hmpResp.Return) + } + return strconv.Atoi(m[1]) +} diff --git a/tstest/natlab/vmtest/vmtest.go b/tstest/natlab/vmtest/vmtest.go new file mode 100644 index 000000000..f028a7c6b --- /dev/null +++ b/tstest/natlab/vmtest/vmtest.go @@ -0,0 +1,676 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +// Package vmtest provides a high-level framework for running integration tests +// across multiple QEMU virtual machines connected by natlab's vnet virtual +// network infrastructure. It supports mixed OS types (gokrazy, Ubuntu, Debian) +// and multi-NIC configurations for scenarios like subnet routing. +// +// Prerequisites: +// - qemu-system-x86_64 and KVM access (typically the "kvm" group; no root required) +// - A built gokrazy natlabapp image (auto-built on first run via "make natlab" in gokrazy/) +// +// Run tests with: +// +// go test ./tstest/natlab/vmtest/ --run-vm-tests -v +package vmtest + +import ( + "context" + "flag" + "fmt" + "io" + "net" + "net/http" + "net/netip" + "os" + "os/exec" + "path/filepath" + "slices" + "strings" + "testing" + "time" + + "golang.org/x/sync/errgroup" + "tailscale.com/client/local" + "tailscale.com/ipn" + "tailscale.com/tailcfg" + "tailscale.com/tstest/natlab/vnet" + "tailscale.com/util/set" +) + +var ( + runVMTests = flag.Bool("run-vm-tests", false, "run tests that require VMs with KVM") + verboseVMDebug = flag.Bool("verbose-vm-debug", false, "enable verbose debug logging for VM tests") +) + +// Env is a test environment that manages virtual networks and QEMU VMs. +// Create one with New, add networks and nodes, then call Start. +type Env struct { + t testing.TB + cfg vnet.Config + server *vnet.Server + nodes []*Node + tempDir string + + sockAddr string // shared Unix socket path for all QEMU netdevs + binDir string // directory for compiled binaries + + // gokrazy-specific paths + gokrazyBase string // path to gokrazy base qcow2 image + gokrazyKernel string // path to gokrazy kernel + + qemuProcs []*exec.Cmd // launched QEMU processes +} + +// logVerbosef logs a message only when --verbose-vm-debug is set. +func (e *Env) logVerbosef(format string, args ...any) { + if *verboseVMDebug { + e.t.Helper() + e.t.Logf(format, args...) + } +} + +// New creates a new test environment. It skips the test if --run-vm-tests is not set. +func New(t testing.TB) *Env { + if !*runVMTests { + t.Skip("skipping VM test; set --run-vm-tests to run") + } + + tempDir := t.TempDir() + return &Env{ + t: t, + tempDir: tempDir, + binDir: filepath.Join(tempDir, "bin"), + } +} + +// AddNetwork creates a new virtual network. Arguments follow the same pattern as +// vnet.Config.AddNetwork (string IPs, NAT types, NetworkService values). +func (e *Env) AddNetwork(opts ...any) *vnet.Network { + return e.cfg.AddNetwork(opts...) +} + +// Node represents a virtual machine in the test environment. +type Node struct { + name string + num int // assigned during AddNode + + os OSImage + nets []*vnet.Network + vnetNode *vnet.Node // primary vnet node (set during Start) + agent *vnet.NodeAgentClient + joinTailnet bool + advertiseRoutes string + webServerPort int + sshPort int // host port for SSH debug access (cloud VMs only) +} + +// AddNode creates a new VM node. The name is used for identification and as the +// webserver greeting. Options can be *vnet.Network (for network attachment), +// NodeOption values, or vnet node options (like vnet.TailscaledEnv). +func (e *Env) AddNode(name string, opts ...any) *Node { + n := &Node{ + name: name, + os: Gokrazy, // default + joinTailnet: true, + } + e.nodes = append(e.nodes, n) + + // Separate network options from other options. + var vnetOpts []any + for _, o := range opts { + switch o := o.(type) { + case *vnet.Network: + n.nets = append(n.nets, o) + vnetOpts = append(vnetOpts, o) + case nodeOptOS: + n.os = OSImage(o) + case nodeOptNoTailscale: + n.joinTailnet = false + vnetOpts = append(vnetOpts, vnet.DontJoinTailnet) + case nodeOptAdvertiseRoutes: + n.advertiseRoutes = string(o) + case nodeOptWebServer: + n.webServerPort = int(o) + default: + // Pass through to vnet (TailscaledEnv, NodeOption, MAC, etc.) + vnetOpts = append(vnetOpts, o) + } + } + + n.vnetNode = e.cfg.AddNode(vnetOpts...) + n.num = n.vnetNode.Num() + return n +} + +// LanIP returns the LAN IPv4 address of this node on the given network. +// This is only valid after Env.Start() has been called. +func (n *Node) LanIP(net *vnet.Network) netip.Addr { + return n.vnetNode.LanIP(net) +} + +// NodeOption types for configuring nodes. + +type nodeOptOS OSImage +type nodeOptNoTailscale struct{} +type nodeOptAdvertiseRoutes string +type nodeOptWebServer int + +// OS returns a NodeOption that sets the node's operating system image. +func OS(img OSImage) nodeOptOS { return nodeOptOS(img) } + +// DontJoinTailnet returns a NodeOption that prevents the node from running tailscale up. +func DontJoinTailnet() nodeOptNoTailscale { return nodeOptNoTailscale{} } + +// AdvertiseRoutes returns a NodeOption that configures the node to advertise +// the given routes (comma-separated CIDRs) when joining the tailnet. +func AdvertiseRoutes(routes string) nodeOptAdvertiseRoutes { + return nodeOptAdvertiseRoutes(routes) +} + +// WebServer returns a NodeOption that starts a webserver on the given port. +// The webserver responds with "Hello world I am " on all requests. +func WebServer(port int) nodeOptWebServer { return nodeOptWebServer(port) } + +// Start initializes the virtual network, builds/downloads images, compiles +// binaries, launches QEMU processes, and waits for all TTA agents to connect. +// It should be called after all AddNetwork/AddNode calls. +func (e *Env) Start() { + t := e.t + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + t.Cleanup(cancel) + + if err := os.MkdirAll(e.binDir, 0755); err != nil { + t.Fatal(err) + } + + // Determine if we have any non-gokrazy "cloud" images (e.g. Ubuntu, Debian) + // that require compiled binaries pushed into their image later. (Gokrazy + // has them built-in, so doesn't need the compileBinaries step.) + needBuildBinaries := slices.ContainsFunc(e.nodes, func(n *Node) bool { return !n.os.IsGokrazy }) + + // Compile binaries and download/build images in parallel. + // Any failure cancels the others via the errgroup context. + eg, egCtx := errgroup.WithContext(ctx) + if needBuildBinaries { + eg.Go(func() error { + return e.compileBinaries(egCtx) + }) + } + didOS := set.Set[string]{} // dedup by image name + for _, n := range e.nodes { + if didOS.Contains(n.os.Name) { + continue + } + didOS.Add(n.os.Name) + if n.os.IsGokrazy { + eg.Go(func() error { + return e.ensureGokrazy(egCtx) + }) + } else { + eg.Go(func() error { + return ensureImage(egCtx, n.os) + }) + } + } + if err := eg.Wait(); err != nil { + t.Fatalf("setup: %v", err) + } + + // Create the vnet server. + var err error + e.server, err = vnet.New(&e.cfg) + if err != nil { + t.Fatalf("vnet.New: %v", err) + } + t.Cleanup(func() { e.server.Close() }) + + // Register compiled binaries with the file server VIP. + if needBuildBinaries { + for _, name := range []string{"tta", "tailscale", "tailscaled"} { + data, err := os.ReadFile(filepath.Join(e.binDir, name)) + if err != nil { + t.Fatalf("reading compiled %s: %v", name, err) + } + e.server.RegisterFile(name, data) + } + } + + // Cloud-init config is delivered via local seed ISOs (created in startCloudQEMU), + // not via the cloud-init HTTP VIP, because network-config must be available + // during init-local before systemd-networkd-wait-online blocks. + + // Start Unix socket listener. + e.sockAddr = filepath.Join(e.tempDir, "vnet.sock") + srv, err := net.Listen("unix", e.sockAddr) + if err != nil { + t.Fatalf("listen unix: %v", err) + } + t.Cleanup(func() { srv.Close() }) + + go func() { + for { + c, err := srv.Accept() + if err != nil { + return + } + go e.server.ServeUnixConn(c.(*net.UnixConn), vnet.ProtocolQEMU) + } + }() + + // Launch QEMU processes. + for _, n := range e.nodes { + if err := e.startQEMU(n); err != nil { + t.Fatalf("startQEMU(%s): %v", n.name, err) + } + } + + // Set up agent clients and wait for all agents to connect. + for _, n := range e.nodes { + n.agent = e.server.NodeAgentClient(n.vnetNode) + n.vnetNode.SetClient(n.agent) + } + + // Wait for agents, then bring up tailscale. + var agentEg errgroup.Group + for _, n := range e.nodes { + agentEg.Go(func() error { + t.Logf("[%s] waiting for agent...", n.name) + st, err := n.agent.Status(ctx) + if err != nil { + return fmt.Errorf("[%s] agent status: %w", n.name, err) + } + t.Logf("[%s] agent connected, backend state: %s", n.name, st.BackendState) + + if n.vnetNode.HostFirewall() { + if err := n.agent.EnableHostFirewall(ctx); err != nil { + return fmt.Errorf("[%s] enable firewall: %w", n.name, err) + } + } + + if n.joinTailnet { + if err := e.tailscaleUp(ctx, n); err != nil { + return fmt.Errorf("[%s] tailscale up: %w", n.name, err) + } + st, err = n.agent.Status(ctx) + if err != nil { + return fmt.Errorf("[%s] status after up: %w", n.name, err) + } + if st.BackendState != "Running" { + return fmt.Errorf("[%s] state = %q, want Running", n.name, st.BackendState) + } + t.Logf("[%s] up with %v", n.name, st.Self.TailscaleIPs) + } + + return nil + }) + } + if err := agentEg.Wait(); err != nil { + t.Fatal(err) + } + + // Start webservers. + for _, n := range e.nodes { + if n.webServerPort > 0 { + if err := e.startWebServer(ctx, n); err != nil { + t.Fatalf("startWebServer(%s): %v", n.name, err) + } + } + } +} + +// tailscaleUp runs "tailscale up" on the node via TTA. +func (e *Env) tailscaleUp(ctx context.Context, n *Node) error { + url := "http://unused/up?accept-routes=true" + if n.advertiseRoutes != "" { + url += "&advertise-routes=" + n.advertiseRoutes + } + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return err + } + res, err := n.agent.HTTPClient.Do(req) + if err != nil { + return err + } + defer res.Body.Close() + body, _ := io.ReadAll(res.Body) + if res.StatusCode != 200 { + return fmt.Errorf("tailscale up: %s: %s", res.Status, body) + } + return nil +} + +// startWebServer tells TTA on the node to start a webserver. +func (e *Env) startWebServer(ctx context.Context, n *Node) error { + url := fmt.Sprintf("http://unused/start-webserver?port=%d&name=%s", n.webServerPort, n.name) + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return err + } + res, err := n.agent.HTTPClient.Do(req) + if err != nil { + return err + } + defer res.Body.Close() + if res.StatusCode != 200 { + body, _ := io.ReadAll(res.Body) + return fmt.Errorf("start-webserver: %s: %s", res.Status, body) + } + e.t.Logf("[%s] webserver started on port %d", n.name, n.webServerPort) + return nil +} + +// ApproveRoutes tells the test control server to approve subnet routes +// for the given node. The routes should be CIDR strings. +func (e *Env) ApproveRoutes(n *Node, routes ...string) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + // Get the node's public key from its status. + st, err := n.agent.Status(ctx) + if err != nil { + e.t.Fatalf("ApproveRoutes: status for %s: %v", n.name, err) + } + nodeKey := st.Self.PublicKey + + var prefixes []netip.Prefix + for _, r := range routes { + p, err := netip.ParsePrefix(r) + if err != nil { + e.t.Fatalf("ApproveRoutes: bad route %q: %v", r, err) + } + prefixes = append(prefixes, p) + } + + // Enable --accept-routes on all other tailscale nodes BEFORE setting the + // routes on the control server. This way, when the map update arrives with + // the new peer routes, peers will immediately install them. + for _, other := range e.nodes { + if other == n || !other.joinTailnet { + continue + } + if _, err := other.agent.EditPrefs(ctx, &ipn.MaskedPrefs{ + Prefs: ipn.Prefs{RouteAll: true}, + RouteAllSet: true, + }); err != nil { + e.t.Fatalf("ApproveRoutes: set accept-routes on %s: %v", other.name, err) + } + } + + // Approve the routes on the control server. SetSubnetRoutes notifies all + // peers via updatePeerChanged, so they'll re-fetch their MapResponse. + e.server.ControlServer().SetSubnetRoutes(nodeKey, prefixes) + + // Wait for each peer to see the routes. + for _, r := range routes { + for _, other := range e.nodes { + if other == n || !other.joinTailnet { + continue + } + if !e.waitForPeerRoute(other, r, 15*time.Second) { + e.DumpStatus(other) + e.t.Fatalf("ApproveRoutes: %s never saw route %s", other.name, r) + } + } + } + e.t.Logf("approved routes %v on %s", routes, n.name) + + // Ping the advertiser from each peer to establish WireGuard tunnels. + for _, other := range e.nodes { + if other == n || !other.joinTailnet { + continue + } + e.ping(other, n) + } +} + +// ping pings from one node to another's Tailscale IP, retrying until it succeeds +// or the timeout expires. This establishes the WireGuard tunnel between the nodes. +func (e *Env) ping(from, to *Node) { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + toSt, err := to.agent.Status(ctx) + if err != nil { + e.t.Fatalf("ping: can't get %s status: %v", to.name, err) + } + if len(toSt.Self.TailscaleIPs) == 0 { + e.t.Fatalf("ping: %s has no Tailscale IPs", to.name) + } + targetIP := toSt.Self.TailscaleIPs[0] + + for { + pingCtx, pingCancel := context.WithTimeout(ctx, 3*time.Second) + pr, err := from.agent.PingWithOpts(pingCtx, targetIP, tailcfg.PingDisco, local.PingOpts{}) + pingCancel() + if err == nil && pr.Err == "" { + e.logVerbosef("ping: %s -> %s OK", from.name, targetIP) + return + } + if ctx.Err() != nil { + e.t.Fatalf("ping: %s -> %s timed out", from.name, targetIP) + } + time.Sleep(time.Second) + } +} + +// SSHExec runs a command on a cloud VM via its debug SSH NIC. +// Only works for cloud VMs that have the debug NIC and SSH key configured. +// Returns stdout and any error. +func (e *Env) SSHExec(n *Node, cmd string) (string, error) { + if n.sshPort == 0 { + return "", fmt.Errorf("node %s has no SSH debug port", n.name) + } + sshCmd := exec.Command("ssh", + "-o", "StrictHostKeyChecking=no", + "-o", "UserKnownHostsFile=/dev/null", + "-o", "ConnectTimeout=5", + "-i", "/tmp/vmtest_key", + "-p", fmt.Sprintf("%d", n.sshPort), + "root@127.0.0.1", + cmd) + out, err := sshCmd.CombinedOutput() + return string(out), err +} + +// DumpStatus logs the tailscale status of a node, including its peers and their +// AllowedIPs. Useful for debugging routing issues. +func (e *Env) DumpStatus(n *Node) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + st, err := n.agent.Status(ctx) + if err != nil { + e.t.Logf("[%s] DumpStatus error: %v", n.name, err) + return + } + var selfAllowed []string + if st.Self.AllowedIPs != nil { + for i := range st.Self.AllowedIPs.Len() { + selfAllowed = append(selfAllowed, st.Self.AllowedIPs.At(i).String()) + } + } + var selfPrimary []string + if st.Self.PrimaryRoutes != nil { + for i := range st.Self.PrimaryRoutes.Len() { + selfPrimary = append(selfPrimary, st.Self.PrimaryRoutes.At(i).String()) + } + } + e.t.Logf("[%s] self: %v, backend=%s, AllowedIPs=%v, PrimaryRoutes=%v", n.name, st.Self.TailscaleIPs, st.BackendState, selfAllowed, selfPrimary) + for _, peer := range st.Peer { + var aips []string + if peer.AllowedIPs != nil { + for i := range peer.AllowedIPs.Len() { + aips = append(aips, peer.AllowedIPs.At(i).String()) + } + } + e.t.Logf("[%s] peer %s (%s): AllowedIPs=%v, Online=%v, Relay=%q, CurAddr=%q", + n.name, peer.HostName, peer.TailscaleIPs, + aips, peer.Online, peer.Relay, peer.CurAddr) + } +} + +// waitForPeerRoute polls the node's status until it sees the given route prefix +// in a peer's AllowedIPs, or until timeout. Returns true if found. +func (e *Env) waitForPeerRoute(n *Node, prefix string, timeout time.Duration) bool { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + for { + st, err := n.agent.Status(ctx) + if err != nil { + return false + } + for _, peer := range st.Peer { + if peer.AllowedIPs != nil { + for i := range peer.AllowedIPs.Len() { + if peer.AllowedIPs.At(i).String() == prefix { + return true + } + } + } + } + if ctx.Err() != nil { + return false + } + time.Sleep(time.Second) + } +} + +// HTTPGet makes an HTTP GET request from the given node to the specified URL. +// The request is proxied through TTA's /http-get handler. +func (e *Env) HTTPGet(from *Node, targetURL string) string { + for attempt := range 3 { + ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second) + reqURL := "http://unused/http-get?url=" + targetURL + req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil) + if err != nil { + cancel() + e.t.Fatalf("HTTPGet: %v", err) + } + res, err := from.agent.HTTPClient.Do(req) + cancel() + if err != nil { + e.logVerbosef("HTTPGet attempt %d from %s: %v", attempt+1, from.name, err) + continue + } + body, _ := io.ReadAll(res.Body) + res.Body.Close() + if res.StatusCode == http.StatusBadGateway || res.StatusCode == http.StatusServiceUnavailable { + e.t.Logf("HTTPGet attempt %d from %s: status %d, body: %s", attempt+1, from.name, res.StatusCode, string(body)) + time.Sleep(2 * time.Second) + continue + } + return string(body) + } + e.t.Fatalf("HTTPGet from %s to %s: all attempts failed", from.name, targetURL) + return "" +} + +// ensureGokrazy finds or builds the gokrazy base image and kernel. +func (e *Env) ensureGokrazy(ctx context.Context) error { + if e.gokrazyBase != "" { + return nil // already found + } + + modRoot, err := findModRoot() + if err != nil { + return err + } + + e.gokrazyBase = filepath.Join(modRoot, "gokrazy/natlabapp.qcow2") + if _, err := os.Stat(e.gokrazyBase); err != nil { + if !os.IsNotExist(err) { + return err + } + e.t.Logf("building gokrazy natlab image...") + cmd := exec.CommandContext(ctx, "make", "natlab") + cmd.Dir = filepath.Join(modRoot, "gokrazy") + cmd.Stderr = os.Stderr + cmd.Stdout = os.Stdout + if err := cmd.Run(); err != nil { + return fmt.Errorf("make natlab: %w", err) + } + } + + kernel, err := findKernelPath(filepath.Join(modRoot, "go.mod")) + if err != nil { + return fmt.Errorf("finding kernel: %w", err) + } + e.gokrazyKernel = kernel + return nil +} + +// compileBinaries cross-compiles tta, tailscale, and tailscaled for linux/amd64 +// and places them in e.binDir. +func (e *Env) compileBinaries(ctx context.Context) error { + modRoot, err := findModRoot() + if err != nil { + return err + } + + binaries := []struct{ name, pkg string }{ + {"tta", "./cmd/tta"}, + {"tailscale", "./cmd/tailscale"}, + {"tailscaled", "./cmd/tailscaled"}, + } + + var eg errgroup.Group + for _, bin := range binaries { + eg.Go(func() error { + outPath := filepath.Join(e.binDir, bin.name) + e.t.Logf("compiling %s...", bin.name) + cmd := exec.CommandContext(ctx, "go", "build", "-o", outPath, bin.pkg) + cmd.Dir = modRoot + cmd.Env = append(os.Environ(), "GOOS=linux", "GOARCH=amd64", "CGO_ENABLED=0") + if out, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("building %s: %v\n%s", bin.name, err, out) + } + e.t.Logf("compiled %s", bin.name) + return nil + }) + } + return eg.Wait() +} + +// findModRoot returns the root of the Go module (where go.mod is). +func findModRoot() (string, error) { + out, err := exec.Command("go", "env", "GOMOD").CombinedOutput() + if err != nil { + return "", fmt.Errorf("go env GOMOD: %w", err) + } + gomod := strings.TrimSpace(string(out)) + if gomod == "" || gomod == os.DevNull { + return "", fmt.Errorf("not in a Go module") + } + return filepath.Dir(gomod), nil +} + +// findKernelPath finds the gokrazy kernel vmlinuz path from go.mod. +func findKernelPath(goMod string) (string, error) { + // Import the same logic as nat_test.go. + b, err := os.ReadFile(goMod) + if err != nil { + return "", err + } + + goModCacheB, err := exec.Command("go", "env", "GOMODCACHE").CombinedOutput() + if err != nil { + return "", err + } + goModCache := strings.TrimSpace(string(goModCacheB)) + + // Parse go.mod to find gokrazy-kernel version. + for _, line := range strings.Split(string(b), "\n") { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "github.com/tailscale/gokrazy-kernel") { + parts := strings.Fields(line) + if len(parts) >= 2 { + return filepath.Join(goModCache, parts[0]+"@"+parts[1], "vmlinuz"), nil + } + } + } + return "", fmt.Errorf("gokrazy-kernel not found in %s", goMod) +} diff --git a/tstest/natlab/vmtest/vmtest_test.go b/tstest/natlab/vmtest/vmtest_test.go new file mode 100644 index 000000000..de8199139 --- /dev/null +++ b/tstest/natlab/vmtest/vmtest_test.go @@ -0,0 +1,38 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +package vmtest_test + +import ( + "fmt" + "strings" + "testing" + + "tailscale.com/tstest/natlab/vmtest" + "tailscale.com/tstest/natlab/vnet" +) + +func TestSubnetRouter(t *testing.T) { + env := vmtest.New(t) + + clientNet := env.AddNetwork("2.1.1.1", "192.168.1.1/24", "2000:1::1/64", vnet.EasyNAT) + internalNet := env.AddNetwork("10.0.0.1/24", "2000:2::1/64") + + client := env.AddNode("client", clientNet, + vmtest.OS(vmtest.Gokrazy)) + sr := env.AddNode("subnet-router", clientNet, internalNet, + vmtest.OS(vmtest.Ubuntu2404), + vmtest.AdvertiseRoutes("10.0.0.0/24")) + backend := env.AddNode("backend", internalNet, + vmtest.OS(vmtest.Gokrazy), + vmtest.DontJoinTailnet(), + vmtest.WebServer(8080)) + + env.Start() + env.ApproveRoutes(sr, "10.0.0.0/24") + + body := env.HTTPGet(client, fmt.Sprintf("http://%s:8080/", backend.LanIP(internalNet))) + if !strings.Contains(body, "Hello world I am backend") { + t.Fatalf("got %q", body) + } +} diff --git a/tstest/natlab/vnet/vnet.go b/tstest/natlab/vnet/vnet.go index 2365d03eb..43256dafe 100644 --- a/tstest/natlab/vnet/vnet.go +++ b/tstest/natlab/vnet/vnet.go @@ -294,6 +294,24 @@ func stringifyTEI(tei stack.TransportEndpointID) string { return fmt.Sprintf("%s -> %s", remoteHostPort, localHostPort) } +// vipNameOf returns the VIP name for the given IP, or "" if it's not a VIP. +func vipNameOf(ip netip.Addr) string { + for _, v := range vips { + if v.Match(ip) { + return v.name + } + } + return "" +} + +// nodeNameOf returns the node's name for the given IP on this network, or "" if unknown. +func (n *network) nodeNameOf(ip netip.Addr) string { + if node, ok := n.nodeByIP(ip); ok { + return node.String() + } + return "" +} + func (n *network) acceptTCP(r *tcp.ForwarderRequest) { reqDetails := r.ID() @@ -305,7 +323,17 @@ func (n *network) acceptTCP(r *tcp.ForwarderRequest) { return } - log.Printf("vnet-AcceptTCP: %v", stringifyTEI(reqDetails)) + // Annotate the log with node/VIP names for readability. + srcHP := net.JoinHostPort(clientRemoteIP.String(), strconv.Itoa(int(reqDetails.RemotePort))) + srcStr := srcHP + if name := n.nodeNameOf(clientRemoteIP); name != "" { + srcStr = fmt.Sprintf("%s (%s)", srcHP, name) + } + dstStr := net.JoinHostPort(destIP.String(), strconv.Itoa(int(destPort))) + if name := vipNameOf(destIP); name != "" { + dstStr = fmt.Sprintf("%s (%s)", dstStr, name) + } + log.Printf("vnet-AcceptTCP: %s -> %s", srcStr, dstStr) var wq waiter.Queue ep, err := r.CreateEndpoint(&wq) @@ -1466,6 +1494,12 @@ func (n *network) HandleEthernetPacketForRouter(ep EthernetPacket) { return } + if toForward { + // Traffic to destinations we don't handle (e.g. VMs trying to reach + // the real internet for NTP, package updates, etc). Expected; drop silently. + return + } + n.logf("router got unknown packet: %v", packet) }