vmtest: add VM-based integration test framework
Add tstest/natlab/vmtest, a high-level framework for running multi-VM
integration tests with mixed OS types (gokrazy + Ubuntu/Debian cloud
images) connected via natlab's vnet virtual network.
The vmtest package provides:
- Env type that orchestrates vnet, QEMU processes, and agent connections
- OS image support (Gokrazy, Ubuntu2404, Debian12) with download/cache
- QEMU launch per OS type (microvm for gokrazy, q35+KVM for cloud)
- Cloud-init seed ISO generation with network-config for multi-NIC
- Cross-compilation of test binaries for cloud VMs
- Debug SSH NIC on cloud VMs for interactive debugging
- Test helpers: ApproveRoutes, HTTPGet, TailscalePing, DumpStatus,
WaitForPeerRoute, SSHExec
TTA enhancements (cmd/tta):
- Parameterize /up (accept-routes, advertise-routes, snat-subnet-routes)
- Add /set, /start-webserver, /http-get endpoints
- /http-get uses local.Client.UserDial for Tailscale-routed requests
- Fix /ping for non-gokrazy systems
TestSubnetRouter exercises a 3-VM subnet router scenario:
client (gokrazy) → subnet-router (Ubuntu, dual-NIC) → backend (gokrazy)
Verifies HTTP access to the backend webserver through the Tailscale
subnet route. Passes in ~30 seconds.
Updates tailscale/tailscale#13038
Change-Id: I165b64af241d37f5f5870e796a52502fc56146fa
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
committed by
Brad Fitzpatrick
parent
d948b78b23
commit
ec0b23a21f
+101
-7
@@ -15,6 +15,7 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net"
|
||||
@@ -181,7 +182,27 @@ func main() {
|
||||
return
|
||||
})
|
||||
ttaMux.HandleFunc("/up", func(w http.ResponseWriter, r *http.Request) {
|
||||
serveCmd(w, "tailscale", "up", "--login-server=http://control.tailscale")
|
||||
args := []string{"up", "--login-server=http://control.tailscale"}
|
||||
if routes := r.URL.Query().Get("advertise-routes"); routes != "" {
|
||||
args = append(args, "--advertise-routes="+routes)
|
||||
}
|
||||
if snat := r.URL.Query().Get("snat-subnet-routes"); snat != "" {
|
||||
args = append(args, "--snat-subnet-routes="+snat)
|
||||
}
|
||||
if r.URL.Query().Get("accept-routes") == "true" {
|
||||
args = append(args, "--accept-routes")
|
||||
}
|
||||
serveCmd(w, "tailscale", args...)
|
||||
})
|
||||
ttaMux.HandleFunc("/set", func(w http.ResponseWriter, r *http.Request) {
|
||||
args := []string{"set"}
|
||||
if r.URL.Query().Get("accept-routes") == "true" {
|
||||
args = append(args, "--accept-routes")
|
||||
}
|
||||
if routes := r.URL.Query().Get("advertise-routes"); routes != "" {
|
||||
args = append(args, "--advertise-routes="+routes)
|
||||
}
|
||||
serveCmd(w, "tailscale", args...)
|
||||
})
|
||||
ttaMux.HandleFunc("/ip", func(w http.ResponseWriter, r *http.Request) {
|
||||
conn, ok := r.Context().Value(connContextKey).(net.Conn)
|
||||
@@ -192,12 +213,85 @@ func main() {
|
||||
w.Write([]byte(conn.LocalAddr().String()))
|
||||
})
|
||||
ttaMux.HandleFunc("/ping", func(w http.ResponseWriter, r *http.Request) {
|
||||
// Send 4 packets and wait a maximum of 1 second for each. The deadline
|
||||
// is required for ping to return a non-zero exit code on no response.
|
||||
// The busybox in question here is the breakglass busybox inside the
|
||||
// natlab QEMU image - the host running the test does not need to have
|
||||
// busybox installed at that path, or at all.
|
||||
serveCmd(w, "/usr/local/bin/busybox", "ping", "-c", "4", "-W", "1", r.URL.Query().Get("host"))
|
||||
host := r.URL.Query().Get("host")
|
||||
if distro.Get() == distro.Gokrazy {
|
||||
// The busybox in question here is the breakglass busybox inside the
|
||||
// natlab QEMU image.
|
||||
serveCmd(w, "/usr/local/bin/busybox", "ping", "-c", "4", "-W", "1", host)
|
||||
} else {
|
||||
serveCmd(w, "ping", "-c", "4", "-W", "1", host)
|
||||
}
|
||||
})
|
||||
ttaMux.HandleFunc("/start-webserver", func(w http.ResponseWriter, r *http.Request) {
|
||||
port := r.URL.Query().Get("port")
|
||||
name := r.URL.Query().Get("name")
|
||||
if port == "" {
|
||||
http.Error(w, "missing port", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
if name == "" {
|
||||
name = "unnamed"
|
||||
}
|
||||
log.Printf("Starting webserver on port %s as %q", port, name)
|
||||
go func() {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||
fmt.Fprintf(w, "Hello world I am %s", name)
|
||||
})
|
||||
if err := http.ListenAndServe(":"+port, mux); err != nil {
|
||||
log.Printf("webserver on :%s failed: %v", port, err)
|
||||
}
|
||||
}()
|
||||
io.WriteString(w, "OK\n")
|
||||
})
|
||||
ttaMux.HandleFunc("/http-get", func(w http.ResponseWriter, r *http.Request) {
|
||||
targetURL := r.URL.Query().Get("url")
|
||||
if targetURL == "" {
|
||||
http.Error(w, "missing url", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
log.Printf("HTTP GET %s", targetURL)
|
||||
ctx, cancel := context.WithTimeout(r.Context(), 10*time.Second)
|
||||
defer cancel()
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", targetURL, nil)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
// Use Tailscale's SOCKS5 proxy if available, so traffic to Tailscale
|
||||
// subnet routes goes through the WireGuard tunnel instead of the
|
||||
// host network stack (which may not have the routes, especially
|
||||
// in userspace networking mode).
|
||||
client := &http.Client{
|
||||
Transport: &http.Transport{
|
||||
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
|
||||
// Try the Tailscale localapi proxy dialer first.
|
||||
host, portStr, err := net.SplitHostPort(addr)
|
||||
if err != nil {
|
||||
var d net.Dialer
|
||||
return d.DialContext(ctx, network, addr)
|
||||
}
|
||||
port, _ := strconv.ParseUint(portStr, 10, 16)
|
||||
var lc local.Client
|
||||
conn, err := lc.UserDial(ctx, network, host, uint16(port))
|
||||
if err == nil {
|
||||
return conn, nil
|
||||
}
|
||||
log.Printf("http-get: UserDial failed, falling back to direct: %v", err)
|
||||
var d net.Dialer
|
||||
return d.DialContext(ctx, network, addr)
|
||||
},
|
||||
},
|
||||
}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadGateway)
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
w.Header().Set("X-Upstream-Status", strconv.Itoa(resp.StatusCode))
|
||||
w.WriteHeader(resp.StatusCode)
|
||||
io.Copy(w, resp.Body)
|
||||
})
|
||||
ttaMux.HandleFunc("/fw", addFirewallHandler)
|
||||
ttaMux.HandleFunc("/logs", func(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
@@ -163,4 +163,4 @@
|
||||
});
|
||||
};
|
||||
}
|
||||
# nix-direnv cache busting line: sha256-PLt+IPqemF3agESg6jV8AzbiOpgL45mJ/AymcNUo7VU=
|
||||
# nix-direnv cache busting line: sha256-GB5riRI9hkutLc2wBzv2jil+Tf6fogLxUw54HRSPNUk=
|
||||
|
||||
@@ -67,6 +67,7 @@ require (
|
||||
github.com/jellydator/ttlcache/v3 v3.1.0
|
||||
github.com/jsimonetti/rtnetlink v1.4.0
|
||||
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51
|
||||
github.com/kdomanski/iso9660 v0.4.0
|
||||
github.com/klauspost/compress v1.18.2
|
||||
github.com/kortschak/wol v0.0.0-20200729010619-da482cc4850a
|
||||
github.com/mattn/go-colorable v0.1.13
|
||||
|
||||
+1
-1
@@ -1 +1 @@
|
||||
sha256-PLt+IPqemF3agESg6jV8AzbiOpgL45mJ/AymcNUo7VU=
|
||||
sha256-GB5riRI9hkutLc2wBzv2jil+Tf6fogLxUw54HRSPNUk=
|
||||
|
||||
@@ -753,6 +753,8 @@ github.com/karamaru-alpha/copyloopvar v1.0.8 h1:gieLARwuByhEMxRwM3GRS/juJqFbLraf
|
||||
github.com/karamaru-alpha/copyloopvar v1.0.8/go.mod h1:u7CIfztblY0jZLOQZgH3oYsJzpC2A7S6u/lfgSXHy0k=
|
||||
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs=
|
||||
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8=
|
||||
github.com/kdomanski/iso9660 v0.4.0 h1:BPKKdcINz3m0MdjIMwS0wx1nofsOjxOq8TOr45WGHFg=
|
||||
github.com/kdomanski/iso9660 v0.4.0/go.mod h1:OxUSupHsO9ceI8lBLPJKWBTphLemjrCQY8LPXM7qSzU=
|
||||
github.com/kenshaw/evdev v0.1.0 h1:wmtceEOFfilChgdNT+c/djPJ2JineVsQ0N14kGzFRUo=
|
||||
github.com/kenshaw/evdev v0.1.0/go.mod h1:B/fErKCihUyEobz0mjn2qQbHgyJKFQAxkXSvkeeA/Wo=
|
||||
github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4=
|
||||
|
||||
@@ -16,4 +16,4 @@
|
||||
) {
|
||||
src = ./.;
|
||||
}).shellNix
|
||||
# nix-direnv cache busting line: sha256-PLt+IPqemF3agESg6jV8AzbiOpgL45mJ/AymcNUo7VU=
|
||||
# nix-direnv cache busting line: sha256-GB5riRI9hkutLc2wBzv2jil+Tf6fogLxUw54HRSPNUk=
|
||||
|
||||
@@ -0,0 +1,117 @@
|
||||
// Copyright (c) Tailscale Inc & contributors
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package vmtest
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/kdomanski/iso9660"
|
||||
)
|
||||
|
||||
// createCloudInitISO creates a cidata seed ISO for the given cloud VM node.
|
||||
// The ISO contains meta-data, user-data, and network-config files.
|
||||
// Cloud-init reads these during init-local (pre-network), which is critical
|
||||
// for network-config to take effect before systemd-networkd-wait-online runs.
|
||||
func (e *Env) createCloudInitISO(n *Node) (string, error) {
|
||||
metaData := fmt.Sprintf("instance-id: %s\nlocal-hostname: %s\n", n.name, n.name)
|
||||
userData := e.generateUserData(n)
|
||||
|
||||
// Network config: DHCP all ethernet interfaces.
|
||||
// The "optional: true" prevents systemd-networkd-wait-online from blocking.
|
||||
// The first vnet NIC gets the default route (metric 100).
|
||||
// Other interfaces get higher metrics to avoid routing conflicts.
|
||||
networkConfig := `version: 2
|
||||
ethernets:
|
||||
primary:
|
||||
match:
|
||||
macaddress: "` + n.vnetNode.NICMac(0).String() + `"
|
||||
dhcp4: true
|
||||
dhcp4-overrides:
|
||||
route-metric: 100
|
||||
optional: true
|
||||
secondary:
|
||||
match:
|
||||
name: "en*"
|
||||
dhcp4: true
|
||||
dhcp4-overrides:
|
||||
route-metric: 200
|
||||
optional: true
|
||||
`
|
||||
|
||||
iw, err := iso9660.NewWriter()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("creating ISO writer: %w", err)
|
||||
}
|
||||
defer iw.Cleanup()
|
||||
|
||||
for name, content := range map[string]string{
|
||||
"meta-data": metaData,
|
||||
"user-data": userData,
|
||||
"network-config": networkConfig,
|
||||
} {
|
||||
if err := iw.AddFile(strings.NewReader(content), name); err != nil {
|
||||
return "", fmt.Errorf("adding %s to ISO: %w", name, err)
|
||||
}
|
||||
}
|
||||
|
||||
isoPath := filepath.Join(e.tempDir, n.name+"-seed.iso")
|
||||
f, err := os.Create(isoPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
if err := iw.WriteTo(f, "cidata"); err != nil {
|
||||
return "", fmt.Errorf("writing seed ISO: %w", err)
|
||||
}
|
||||
return isoPath, nil
|
||||
}
|
||||
|
||||
// generateUserData creates the cloud-init user-data (#cloud-config) for a node.
|
||||
func (e *Env) generateUserData(n *Node) string {
|
||||
var ud strings.Builder
|
||||
ud.WriteString("#cloud-config\n")
|
||||
|
||||
// Enable root SSH login for debugging via the debug NIC.
|
||||
ud.WriteString("ssh_pwauth: true\n")
|
||||
ud.WriteString("disable_root: false\n")
|
||||
ud.WriteString("users:\n")
|
||||
ud.WriteString(" - name: root\n")
|
||||
ud.WriteString(" lock_passwd: false\n")
|
||||
ud.WriteString(" plain_text_passwd: root\n")
|
||||
// Also inject the host's SSH key if available.
|
||||
if pubkey, err := os.ReadFile("/tmp/vmtest_key.pub"); err == nil {
|
||||
ud.WriteString(fmt.Sprintf(" ssh_authorized_keys:\n - %s\n", strings.TrimSpace(string(pubkey))))
|
||||
}
|
||||
|
||||
ud.WriteString("runcmd:\n")
|
||||
|
||||
// Remove the default route from the debug NIC (enp0s4) so traffic goes through vnet.
|
||||
// The debug NIC is only for SSH access from the host.
|
||||
ud.WriteString(" - [\"/bin/sh\", \"-c\", \"ip route del default via 10.0.2.2 dev enp0s4 2>/dev/null || true\"]\n")
|
||||
|
||||
// Download binaries from the files.tailscale VIP (52.52.0.6).
|
||||
// Use the IP directly to avoid DNS resolution issues during early boot.
|
||||
for _, bin := range []string{"tailscaled", "tailscale", "tta"} {
|
||||
fmt.Fprintf(&ud, " - [\"/bin/sh\", \"-c\", \"curl -v --retry 10 --retry-delay 2 --retry-all-errors -o /usr/local/bin/%s http://52.52.0.6/%s 2>&1\"]\n", bin, bin)
|
||||
}
|
||||
ud.WriteString(" - [\"chmod\", \"+x\", \"/usr/local/bin/tailscaled\", \"/usr/local/bin/tailscale\", \"/usr/local/bin/tta\"]\n")
|
||||
|
||||
// Enable IP forwarding for subnet routers.
|
||||
if n.advertiseRoutes != "" {
|
||||
ud.WriteString(" - [\"sysctl\", \"-w\", \"net.ipv4.ip_forward=1\"]\n")
|
||||
ud.WriteString(" - [\"sysctl\", \"-w\", \"net.ipv6.conf.all.forwarding=1\"]\n")
|
||||
}
|
||||
|
||||
// Start tailscaled in the background.
|
||||
ud.WriteString(" - [\"/bin/sh\", \"-c\", \"/usr/local/bin/tailscaled --state=mem: &\"]\n")
|
||||
ud.WriteString(" - [\"sleep\", \"2\"]\n")
|
||||
|
||||
// Start tta (Tailscale Test Agent).
|
||||
ud.WriteString(" - [\"/bin/sh\", \"-c\", \"/usr/local/bin/tta &\"]\n")
|
||||
|
||||
return ud.String()
|
||||
}
|
||||
@@ -0,0 +1,170 @@
|
||||
// Copyright (c) Tailscale Inc & contributors
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package vmtest
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// OSImage describes a VM operating system image.
|
||||
type OSImage struct {
|
||||
Name string
|
||||
URL string // download URL for the cloud image
|
||||
SHA256 string // expected SHA256 hash of the image
|
||||
MemoryMB int // RAM for the VM
|
||||
IsGokrazy bool // true for gokrazy images (different QEMU setup)
|
||||
}
|
||||
|
||||
var (
|
||||
// Gokrazy is a minimal Tailscale appliance image built from the gokrazy/natlabapp directory.
|
||||
Gokrazy = OSImage{
|
||||
Name: "gokrazy",
|
||||
IsGokrazy: true,
|
||||
MemoryMB: 384,
|
||||
}
|
||||
|
||||
// Ubuntu2404 is Ubuntu 24.04 LTS (Noble Numbat) cloud image.
|
||||
Ubuntu2404 = OSImage{
|
||||
Name: "ubuntu-24.04",
|
||||
URL: "https://cloud-images.ubuntu.com/noble/current/noble-server-cloudimg-amd64.img",
|
||||
MemoryMB: 1024,
|
||||
}
|
||||
|
||||
// Debian12 is Debian 12 (Bookworm) generic cloud image.
|
||||
Debian12 = OSImage{
|
||||
Name: "debian-12",
|
||||
URL: "https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-generic-amd64.qcow2",
|
||||
MemoryMB: 1024,
|
||||
}
|
||||
)
|
||||
|
||||
// imageCacheDir returns the directory for cached VM images.
|
||||
func imageCacheDir() string {
|
||||
if d := os.Getenv("VMTEST_CACHE_DIR"); d != "" {
|
||||
return d
|
||||
}
|
||||
home, _ := os.UserHomeDir()
|
||||
return filepath.Join(home, ".cache", "tailscale", "vmtest", "images")
|
||||
}
|
||||
|
||||
// ensureImage downloads and caches the OS image if not already present.
|
||||
func ensureImage(ctx context.Context, img OSImage) error {
|
||||
if img.IsGokrazy {
|
||||
return nil // gokrazy images are handled separately
|
||||
}
|
||||
|
||||
cacheDir := imageCacheDir()
|
||||
if err := os.MkdirAll(cacheDir, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Use a filename based on the image name.
|
||||
cachedPath := filepath.Join(cacheDir, img.Name+".qcow2")
|
||||
if _, err := os.Stat(cachedPath); err == nil {
|
||||
// If we have a SHA256 to verify, check it.
|
||||
if img.SHA256 != "" {
|
||||
if err := verifySHA256(cachedPath, img.SHA256); err != nil {
|
||||
log.Printf("cached image %s failed SHA256 check, re-downloading: %v", img.Name, err)
|
||||
os.Remove(cachedPath)
|
||||
} else {
|
||||
return nil
|
||||
}
|
||||
} else {
|
||||
return nil // exists, no hash to verify
|
||||
}
|
||||
}
|
||||
|
||||
log.Printf("downloading %s from %s...", img.Name, img.URL)
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", img.URL, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("downloading %s: %w", img.Name, err)
|
||||
}
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("downloading %s: %w", img.Name, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != 200 {
|
||||
return fmt.Errorf("downloading %s: HTTP %s", img.Name, resp.Status)
|
||||
}
|
||||
|
||||
tmpFile := cachedPath + ".tmp"
|
||||
f, err := os.Create(tmpFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
f.Close()
|
||||
os.Remove(tmpFile)
|
||||
}()
|
||||
|
||||
h := sha256.New()
|
||||
w := io.MultiWriter(f, h)
|
||||
|
||||
if _, err := io.Copy(w, resp.Body); err != nil {
|
||||
return fmt.Errorf("downloading %s: %w", img.Name, err)
|
||||
}
|
||||
if err := f.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if img.SHA256 != "" {
|
||||
got := hex.EncodeToString(h.Sum(nil))
|
||||
if got != img.SHA256 {
|
||||
return fmt.Errorf("SHA256 mismatch for %s: got %s, want %s", img.Name, got, img.SHA256)
|
||||
}
|
||||
}
|
||||
|
||||
if err := os.Rename(tmpFile, cachedPath); err != nil {
|
||||
return err
|
||||
}
|
||||
log.Printf("downloaded %s", img.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
// verifySHA256 checks that the file at path has the expected SHA256 hash.
|
||||
func verifySHA256(path, expected string) error {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
h := sha256.New()
|
||||
if _, err := io.Copy(h, f); err != nil {
|
||||
return err
|
||||
}
|
||||
got := hex.EncodeToString(h.Sum(nil))
|
||||
if got != expected {
|
||||
return fmt.Errorf("got %s, want %s", got, expected)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// cachedImagePath returns the filesystem path to the cached image for the given OS.
|
||||
func cachedImagePath(img OSImage) string {
|
||||
return filepath.Join(imageCacheDir(), img.Name+".qcow2")
|
||||
}
|
||||
|
||||
// createOverlay creates a qcow2 overlay image on top of the given base image.
|
||||
func createOverlay(base, overlay string) error {
|
||||
out, err := exec.Command("qemu-img", "create",
|
||||
"-f", "qcow2",
|
||||
"-F", "qcow2",
|
||||
"-b", base,
|
||||
overlay).CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("qemu-img create overlay: %v: %s", err, out)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,239 @@
|
||||
// Copyright (c) Tailscale Inc & contributors
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package vmtest
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"tailscale.com/tstest/natlab/vnet"
|
||||
)
|
||||
|
||||
// startQEMU launches a QEMU process for the given node.
|
||||
func (e *Env) startQEMU(n *Node) error {
|
||||
if n.os.IsGokrazy {
|
||||
return e.startGokrazyQEMU(n)
|
||||
}
|
||||
return e.startCloudQEMU(n)
|
||||
}
|
||||
|
||||
// startGokrazyQEMU launches a QEMU process for a gokrazy node.
|
||||
// This follows the same pattern as tstest/integration/nat/nat_test.go.
|
||||
func (e *Env) startGokrazyQEMU(n *Node) error {
|
||||
disk := filepath.Join(e.tempDir, fmt.Sprintf("%s.qcow2", n.name))
|
||||
if err := createOverlay(e.gokrazyBase, disk); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var envBuf bytes.Buffer
|
||||
for _, env := range n.vnetNode.Env() {
|
||||
fmt.Fprintf(&envBuf, " tailscaled.env=%s=%s", env.Key, env.Value)
|
||||
}
|
||||
sysLogAddr := net.JoinHostPort(vnet.FakeSyslogIPv4().String(), "995")
|
||||
if n.vnetNode.IsV6Only() {
|
||||
sysLogAddr = net.JoinHostPort(vnet.FakeSyslogIPv6().String(), "995")
|
||||
}
|
||||
|
||||
logPath := filepath.Join(e.tempDir, n.name+".log")
|
||||
|
||||
args := []string{
|
||||
"-M", "microvm,isa-serial=off",
|
||||
"-m", fmt.Sprintf("%dM", n.os.MemoryMB),
|
||||
"-nodefaults", "-no-user-config", "-nographic",
|
||||
"-kernel", e.gokrazyKernel,
|
||||
"-append", "console=hvc0 root=PARTUUID=60c24cc1-f3f9-427a-8199-76baa2d60001/PARTNROFF=1 ro init=/gokrazy/init panic=10 oops=panic pci=off nousb tsc=unstable clocksource=hpet gokrazy.remote_syslog.target=" + sysLogAddr + " tailscale-tta=1" + envBuf.String(),
|
||||
"-drive", "id=blk0,file=" + disk + ",format=qcow2",
|
||||
"-device", "virtio-blk-device,drive=blk0",
|
||||
"-device", "virtio-serial-device",
|
||||
"-device", "virtio-rng-device",
|
||||
"-chardev", "file,id=virtiocon0,path=" + logPath,
|
||||
"-device", "virtconsole,chardev=virtiocon0",
|
||||
}
|
||||
|
||||
// Add network devices — one per NIC.
|
||||
for i := range n.vnetNode.NumNICs() {
|
||||
mac := n.vnetNode.NICMac(i)
|
||||
netdevID := fmt.Sprintf("net%d", i)
|
||||
args = append(args,
|
||||
"-netdev", fmt.Sprintf("stream,id=%s,addr.type=unix,addr.path=%s", netdevID, e.sockAddr),
|
||||
"-device", fmt.Sprintf("virtio-net-device,netdev=%s,mac=%s", netdevID, mac),
|
||||
)
|
||||
}
|
||||
|
||||
return e.launchQEMU(n.name, logPath, args)
|
||||
}
|
||||
|
||||
// startCloudQEMU launches a QEMU process for a cloud image (Ubuntu, Debian, etc).
|
||||
func (e *Env) startCloudQEMU(n *Node) error {
|
||||
basePath := cachedImagePath(n.os)
|
||||
disk := filepath.Join(e.tempDir, fmt.Sprintf("%s.qcow2", n.name))
|
||||
if err := createOverlay(basePath, disk); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create a seed ISO with cloud-init config (meta-data, user-data, network-config).
|
||||
// This MUST be a local ISO (not HTTP) so cloud-init reads network-config during
|
||||
// init-local, before systemd-networkd-wait-online blocks boot.
|
||||
seedISO, err := e.createCloudInitISO(n)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating cloud-init ISO: %w", err)
|
||||
}
|
||||
|
||||
logPath := filepath.Join(e.tempDir, n.name+".log")
|
||||
qmpSock := filepath.Join(e.tempDir, n.name+"-qmp.sock")
|
||||
|
||||
args := []string{
|
||||
"-machine", "q35,accel=kvm",
|
||||
"-m", fmt.Sprintf("%dM", n.os.MemoryMB),
|
||||
"-cpu", "host",
|
||||
"-smp", "2",
|
||||
"-display", "none",
|
||||
"-drive", fmt.Sprintf("file=%s,if=virtio", disk),
|
||||
"-drive", fmt.Sprintf("file=%s,if=virtio,media=cdrom,readonly=on", seedISO),
|
||||
"-smbios", "type=1,serial=ds=nocloud",
|
||||
"-serial", "file:" + logPath,
|
||||
"-qmp", "unix:" + qmpSock + ",server,nowait",
|
||||
}
|
||||
|
||||
// Add network devices — one per NIC.
|
||||
// romfile="" disables the iPXE option ROM entirely, saving ~5s per NIC at boot
|
||||
// and avoiding "duplicate fw_cfg file name" errors with multiple NICs.
|
||||
for i := range n.vnetNode.NumNICs() {
|
||||
mac := n.vnetNode.NICMac(i)
|
||||
netdevID := fmt.Sprintf("net%d", i)
|
||||
args = append(args,
|
||||
"-netdev", fmt.Sprintf("stream,id=%s,addr.type=unix,addr.path=%s", netdevID, e.sockAddr),
|
||||
"-device", fmt.Sprintf("virtio-net-pci,netdev=%s,mac=%s,romfile=", netdevID, mac),
|
||||
)
|
||||
}
|
||||
|
||||
// Add a debug NIC with user-mode networking for SSH access from the host.
|
||||
// Use port 0 so the OS picks a free port; we query the actual port via QMP after launch.
|
||||
args = append(args,
|
||||
"-netdev", "user,id=debug0,hostfwd=tcp:127.0.0.1:0-:22",
|
||||
"-device", "virtio-net-pci,netdev=debug0,romfile=",
|
||||
)
|
||||
|
||||
if err := e.launchQEMU(n.name, logPath, args); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Query QMP to find the actual SSH port that QEMU allocated.
|
||||
port, err := qmpQueryHostFwd(qmpSock)
|
||||
if err != nil {
|
||||
return fmt.Errorf("querying SSH port via QMP: %w", err)
|
||||
}
|
||||
n.sshPort = port
|
||||
e.t.Logf("[%s] SSH debug: ssh -p %d root@127.0.0.1 (password: root)", n.name, port)
|
||||
return nil
|
||||
}
|
||||
|
||||
// launchQEMU starts a qemu-system-x86_64 process with the given args.
|
||||
// VM console output goes to logPath (via QEMU's -serial or -chardev).
|
||||
// QEMU's own stdout/stderr go to logPath.qemu for diagnostics.
|
||||
func (e *Env) launchQEMU(name, logPath string, args []string) error {
|
||||
cmd := exec.Command("qemu-system-x86_64", args...)
|
||||
// Send stdout/stderr to the log file for any QEMU diagnostic messages.
|
||||
// Stdin must be /dev/null to prevent QEMU from trying to read.
|
||||
devNull, err := os.Open(os.DevNull)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open /dev/null: %w", err)
|
||||
}
|
||||
cmd.Stdin = devNull
|
||||
qemuLog, err := os.Create(logPath + ".qemu")
|
||||
if err != nil {
|
||||
devNull.Close()
|
||||
return err
|
||||
}
|
||||
cmd.Stdout = qemuLog
|
||||
cmd.Stderr = qemuLog
|
||||
if err := cmd.Start(); err != nil {
|
||||
devNull.Close()
|
||||
qemuLog.Close()
|
||||
return fmt.Errorf("qemu for %s: %w", name, err)
|
||||
}
|
||||
e.t.Logf("launched QEMU for %s (pid %d), log: %s", name, cmd.Process.Pid, logPath)
|
||||
e.qemuProcs = append(e.qemuProcs, cmd)
|
||||
e.t.Cleanup(func() {
|
||||
cmd.Process.Kill()
|
||||
cmd.Wait()
|
||||
devNull.Close()
|
||||
qemuLog.Close()
|
||||
// Dump tail of VM log on failure for debugging.
|
||||
if e.t.Failed() {
|
||||
if data, err := os.ReadFile(logPath); err == nil {
|
||||
lines := bytes.Split(data, []byte("\n"))
|
||||
start := 0
|
||||
if len(lines) > 50 {
|
||||
start = len(lines) - 50
|
||||
}
|
||||
e.t.Logf("=== last 50 lines of %s log ===", name)
|
||||
for _, line := range lines[start:] {
|
||||
e.t.Logf("[%s] %s", name, line)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
// qmpQueryHostFwd connects to a QEMU QMP socket and queries the host port
|
||||
// assigned to the first TCP host forward rule (the SSH debug port).
|
||||
func qmpQueryHostFwd(sockPath string) (int, error) {
|
||||
// Wait for the QMP socket to appear.
|
||||
var conn net.Conn
|
||||
for range 50 {
|
||||
var err error
|
||||
conn, err = net.Dial("unix", sockPath)
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
if conn == nil {
|
||||
return 0, fmt.Errorf("QMP socket %s not available", sockPath)
|
||||
}
|
||||
defer conn.Close()
|
||||
conn.SetDeadline(time.Now().Add(5 * time.Second))
|
||||
|
||||
// Read the QMP greeting.
|
||||
var greeting json.RawMessage
|
||||
dec := json.NewDecoder(conn)
|
||||
if err := dec.Decode(&greeting); err != nil {
|
||||
return 0, fmt.Errorf("reading QMP greeting: %w", err)
|
||||
}
|
||||
|
||||
// Send qmp_capabilities to initialize.
|
||||
fmt.Fprintf(conn, `{"execute":"qmp_capabilities"}`+"\n")
|
||||
var capsResp json.RawMessage
|
||||
if err := dec.Decode(&capsResp); err != nil {
|
||||
return 0, fmt.Errorf("reading qmp_capabilities response: %w", err)
|
||||
}
|
||||
|
||||
// Query "info usernet" via human-monitor-command.
|
||||
fmt.Fprintf(conn, `{"execute":"human-monitor-command","arguments":{"command-line":"info usernet"}}`+"\n")
|
||||
var hmpResp struct {
|
||||
Return string `json:"return"`
|
||||
}
|
||||
if err := dec.Decode(&hmpResp); err != nil {
|
||||
return 0, fmt.Errorf("reading info usernet response: %w", err)
|
||||
}
|
||||
|
||||
// Parse the port from output like:
|
||||
// TCP[HOST_FORWARD] 12 127.0.0.1 35323 10.0.2.15 22
|
||||
re := regexp.MustCompile(`TCP\[HOST_FORWARD\]\s+\d+\s+127\.0\.0\.1\s+(\d+)\s+`)
|
||||
m := re.FindStringSubmatch(hmpResp.Return)
|
||||
if m == nil {
|
||||
return 0, fmt.Errorf("no hostfwd port found in: %s", hmpResp.Return)
|
||||
}
|
||||
return strconv.Atoi(m[1])
|
||||
}
|
||||
@@ -0,0 +1,676 @@
|
||||
// Copyright (c) Tailscale Inc & contributors
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
// Package vmtest provides a high-level framework for running integration tests
|
||||
// across multiple QEMU virtual machines connected by natlab's vnet virtual
|
||||
// network infrastructure. It supports mixed OS types (gokrazy, Ubuntu, Debian)
|
||||
// and multi-NIC configurations for scenarios like subnet routing.
|
||||
//
|
||||
// Prerequisites:
|
||||
// - qemu-system-x86_64 and KVM access (typically the "kvm" group; no root required)
|
||||
// - A built gokrazy natlabapp image (auto-built on first run via "make natlab" in gokrazy/)
|
||||
//
|
||||
// Run tests with:
|
||||
//
|
||||
// go test ./tstest/natlab/vmtest/ --run-vm-tests -v
|
||||
package vmtest
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/netip"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"golang.org/x/sync/errgroup"
|
||||
"tailscale.com/client/local"
|
||||
"tailscale.com/ipn"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/tstest/natlab/vnet"
|
||||
"tailscale.com/util/set"
|
||||
)
|
||||
|
||||
var (
|
||||
runVMTests = flag.Bool("run-vm-tests", false, "run tests that require VMs with KVM")
|
||||
verboseVMDebug = flag.Bool("verbose-vm-debug", false, "enable verbose debug logging for VM tests")
|
||||
)
|
||||
|
||||
// Env is a test environment that manages virtual networks and QEMU VMs.
|
||||
// Create one with New, add networks and nodes, then call Start.
|
||||
type Env struct {
|
||||
t testing.TB
|
||||
cfg vnet.Config
|
||||
server *vnet.Server
|
||||
nodes []*Node
|
||||
tempDir string
|
||||
|
||||
sockAddr string // shared Unix socket path for all QEMU netdevs
|
||||
binDir string // directory for compiled binaries
|
||||
|
||||
// gokrazy-specific paths
|
||||
gokrazyBase string // path to gokrazy base qcow2 image
|
||||
gokrazyKernel string // path to gokrazy kernel
|
||||
|
||||
qemuProcs []*exec.Cmd // launched QEMU processes
|
||||
}
|
||||
|
||||
// logVerbosef logs a message only when --verbose-vm-debug is set.
|
||||
func (e *Env) logVerbosef(format string, args ...any) {
|
||||
if *verboseVMDebug {
|
||||
e.t.Helper()
|
||||
e.t.Logf(format, args...)
|
||||
}
|
||||
}
|
||||
|
||||
// New creates a new test environment. It skips the test if --run-vm-tests is not set.
|
||||
func New(t testing.TB) *Env {
|
||||
if !*runVMTests {
|
||||
t.Skip("skipping VM test; set --run-vm-tests to run")
|
||||
}
|
||||
|
||||
tempDir := t.TempDir()
|
||||
return &Env{
|
||||
t: t,
|
||||
tempDir: tempDir,
|
||||
binDir: filepath.Join(tempDir, "bin"),
|
||||
}
|
||||
}
|
||||
|
||||
// AddNetwork creates a new virtual network. Arguments follow the same pattern as
|
||||
// vnet.Config.AddNetwork (string IPs, NAT types, NetworkService values).
|
||||
func (e *Env) AddNetwork(opts ...any) *vnet.Network {
|
||||
return e.cfg.AddNetwork(opts...)
|
||||
}
|
||||
|
||||
// Node represents a virtual machine in the test environment.
|
||||
type Node struct {
|
||||
name string
|
||||
num int // assigned during AddNode
|
||||
|
||||
os OSImage
|
||||
nets []*vnet.Network
|
||||
vnetNode *vnet.Node // primary vnet node (set during Start)
|
||||
agent *vnet.NodeAgentClient
|
||||
joinTailnet bool
|
||||
advertiseRoutes string
|
||||
webServerPort int
|
||||
sshPort int // host port for SSH debug access (cloud VMs only)
|
||||
}
|
||||
|
||||
// AddNode creates a new VM node. The name is used for identification and as the
|
||||
// webserver greeting. Options can be *vnet.Network (for network attachment),
|
||||
// NodeOption values, or vnet node options (like vnet.TailscaledEnv).
|
||||
func (e *Env) AddNode(name string, opts ...any) *Node {
|
||||
n := &Node{
|
||||
name: name,
|
||||
os: Gokrazy, // default
|
||||
joinTailnet: true,
|
||||
}
|
||||
e.nodes = append(e.nodes, n)
|
||||
|
||||
// Separate network options from other options.
|
||||
var vnetOpts []any
|
||||
for _, o := range opts {
|
||||
switch o := o.(type) {
|
||||
case *vnet.Network:
|
||||
n.nets = append(n.nets, o)
|
||||
vnetOpts = append(vnetOpts, o)
|
||||
case nodeOptOS:
|
||||
n.os = OSImage(o)
|
||||
case nodeOptNoTailscale:
|
||||
n.joinTailnet = false
|
||||
vnetOpts = append(vnetOpts, vnet.DontJoinTailnet)
|
||||
case nodeOptAdvertiseRoutes:
|
||||
n.advertiseRoutes = string(o)
|
||||
case nodeOptWebServer:
|
||||
n.webServerPort = int(o)
|
||||
default:
|
||||
// Pass through to vnet (TailscaledEnv, NodeOption, MAC, etc.)
|
||||
vnetOpts = append(vnetOpts, o)
|
||||
}
|
||||
}
|
||||
|
||||
n.vnetNode = e.cfg.AddNode(vnetOpts...)
|
||||
n.num = n.vnetNode.Num()
|
||||
return n
|
||||
}
|
||||
|
||||
// LanIP returns the LAN IPv4 address of this node on the given network.
|
||||
// This is only valid after Env.Start() has been called.
|
||||
func (n *Node) LanIP(net *vnet.Network) netip.Addr {
|
||||
return n.vnetNode.LanIP(net)
|
||||
}
|
||||
|
||||
// NodeOption types for configuring nodes.
|
||||
|
||||
type nodeOptOS OSImage
|
||||
type nodeOptNoTailscale struct{}
|
||||
type nodeOptAdvertiseRoutes string
|
||||
type nodeOptWebServer int
|
||||
|
||||
// OS returns a NodeOption that sets the node's operating system image.
|
||||
func OS(img OSImage) nodeOptOS { return nodeOptOS(img) }
|
||||
|
||||
// DontJoinTailnet returns a NodeOption that prevents the node from running tailscale up.
|
||||
func DontJoinTailnet() nodeOptNoTailscale { return nodeOptNoTailscale{} }
|
||||
|
||||
// AdvertiseRoutes returns a NodeOption that configures the node to advertise
|
||||
// the given routes (comma-separated CIDRs) when joining the tailnet.
|
||||
func AdvertiseRoutes(routes string) nodeOptAdvertiseRoutes {
|
||||
return nodeOptAdvertiseRoutes(routes)
|
||||
}
|
||||
|
||||
// WebServer returns a NodeOption that starts a webserver on the given port.
|
||||
// The webserver responds with "Hello world I am <nodename>" on all requests.
|
||||
func WebServer(port int) nodeOptWebServer { return nodeOptWebServer(port) }
|
||||
|
||||
// Start initializes the virtual network, builds/downloads images, compiles
|
||||
// binaries, launches QEMU processes, and waits for all TTA agents to connect.
|
||||
// It should be called after all AddNetwork/AddNode calls.
|
||||
func (e *Env) Start() {
|
||||
t := e.t
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
|
||||
t.Cleanup(cancel)
|
||||
|
||||
if err := os.MkdirAll(e.binDir, 0755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Determine if we have any non-gokrazy "cloud" images (e.g. Ubuntu, Debian)
|
||||
// that require compiled binaries pushed into their image later. (Gokrazy
|
||||
// has them built-in, so doesn't need the compileBinaries step.)
|
||||
needBuildBinaries := slices.ContainsFunc(e.nodes, func(n *Node) bool { return !n.os.IsGokrazy })
|
||||
|
||||
// Compile binaries and download/build images in parallel.
|
||||
// Any failure cancels the others via the errgroup context.
|
||||
eg, egCtx := errgroup.WithContext(ctx)
|
||||
if needBuildBinaries {
|
||||
eg.Go(func() error {
|
||||
return e.compileBinaries(egCtx)
|
||||
})
|
||||
}
|
||||
didOS := set.Set[string]{} // dedup by image name
|
||||
for _, n := range e.nodes {
|
||||
if didOS.Contains(n.os.Name) {
|
||||
continue
|
||||
}
|
||||
didOS.Add(n.os.Name)
|
||||
if n.os.IsGokrazy {
|
||||
eg.Go(func() error {
|
||||
return e.ensureGokrazy(egCtx)
|
||||
})
|
||||
} else {
|
||||
eg.Go(func() error {
|
||||
return ensureImage(egCtx, n.os)
|
||||
})
|
||||
}
|
||||
}
|
||||
if err := eg.Wait(); err != nil {
|
||||
t.Fatalf("setup: %v", err)
|
||||
}
|
||||
|
||||
// Create the vnet server.
|
||||
var err error
|
||||
e.server, err = vnet.New(&e.cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("vnet.New: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { e.server.Close() })
|
||||
|
||||
// Register compiled binaries with the file server VIP.
|
||||
if needBuildBinaries {
|
||||
for _, name := range []string{"tta", "tailscale", "tailscaled"} {
|
||||
data, err := os.ReadFile(filepath.Join(e.binDir, name))
|
||||
if err != nil {
|
||||
t.Fatalf("reading compiled %s: %v", name, err)
|
||||
}
|
||||
e.server.RegisterFile(name, data)
|
||||
}
|
||||
}
|
||||
|
||||
// Cloud-init config is delivered via local seed ISOs (created in startCloudQEMU),
|
||||
// not via the cloud-init HTTP VIP, because network-config must be available
|
||||
// during init-local before systemd-networkd-wait-online blocks.
|
||||
|
||||
// Start Unix socket listener.
|
||||
e.sockAddr = filepath.Join(e.tempDir, "vnet.sock")
|
||||
srv, err := net.Listen("unix", e.sockAddr)
|
||||
if err != nil {
|
||||
t.Fatalf("listen unix: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { srv.Close() })
|
||||
|
||||
go func() {
|
||||
for {
|
||||
c, err := srv.Accept()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
go e.server.ServeUnixConn(c.(*net.UnixConn), vnet.ProtocolQEMU)
|
||||
}
|
||||
}()
|
||||
|
||||
// Launch QEMU processes.
|
||||
for _, n := range e.nodes {
|
||||
if err := e.startQEMU(n); err != nil {
|
||||
t.Fatalf("startQEMU(%s): %v", n.name, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Set up agent clients and wait for all agents to connect.
|
||||
for _, n := range e.nodes {
|
||||
n.agent = e.server.NodeAgentClient(n.vnetNode)
|
||||
n.vnetNode.SetClient(n.agent)
|
||||
}
|
||||
|
||||
// Wait for agents, then bring up tailscale.
|
||||
var agentEg errgroup.Group
|
||||
for _, n := range e.nodes {
|
||||
agentEg.Go(func() error {
|
||||
t.Logf("[%s] waiting for agent...", n.name)
|
||||
st, err := n.agent.Status(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("[%s] agent status: %w", n.name, err)
|
||||
}
|
||||
t.Logf("[%s] agent connected, backend state: %s", n.name, st.BackendState)
|
||||
|
||||
if n.vnetNode.HostFirewall() {
|
||||
if err := n.agent.EnableHostFirewall(ctx); err != nil {
|
||||
return fmt.Errorf("[%s] enable firewall: %w", n.name, err)
|
||||
}
|
||||
}
|
||||
|
||||
if n.joinTailnet {
|
||||
if err := e.tailscaleUp(ctx, n); err != nil {
|
||||
return fmt.Errorf("[%s] tailscale up: %w", n.name, err)
|
||||
}
|
||||
st, err = n.agent.Status(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("[%s] status after up: %w", n.name, err)
|
||||
}
|
||||
if st.BackendState != "Running" {
|
||||
return fmt.Errorf("[%s] state = %q, want Running", n.name, st.BackendState)
|
||||
}
|
||||
t.Logf("[%s] up with %v", n.name, st.Self.TailscaleIPs)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
}
|
||||
if err := agentEg.Wait(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Start webservers.
|
||||
for _, n := range e.nodes {
|
||||
if n.webServerPort > 0 {
|
||||
if err := e.startWebServer(ctx, n); err != nil {
|
||||
t.Fatalf("startWebServer(%s): %v", n.name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// tailscaleUp runs "tailscale up" on the node via TTA.
|
||||
func (e *Env) tailscaleUp(ctx context.Context, n *Node) error {
|
||||
url := "http://unused/up?accept-routes=true"
|
||||
if n.advertiseRoutes != "" {
|
||||
url += "&advertise-routes=" + n.advertiseRoutes
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
res, err := n.agent.HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer res.Body.Close()
|
||||
body, _ := io.ReadAll(res.Body)
|
||||
if res.StatusCode != 200 {
|
||||
return fmt.Errorf("tailscale up: %s: %s", res.Status, body)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// startWebServer tells TTA on the node to start a webserver.
|
||||
func (e *Env) startWebServer(ctx context.Context, n *Node) error {
|
||||
url := fmt.Sprintf("http://unused/start-webserver?port=%d&name=%s", n.webServerPort, n.name)
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
res, err := n.agent.HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != 200 {
|
||||
body, _ := io.ReadAll(res.Body)
|
||||
return fmt.Errorf("start-webserver: %s: %s", res.Status, body)
|
||||
}
|
||||
e.t.Logf("[%s] webserver started on port %d", n.name, n.webServerPort)
|
||||
return nil
|
||||
}
|
||||
|
||||
// ApproveRoutes tells the test control server to approve subnet routes
|
||||
// for the given node. The routes should be CIDR strings.
|
||||
func (e *Env) ApproveRoutes(n *Node, routes ...string) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// Get the node's public key from its status.
|
||||
st, err := n.agent.Status(ctx)
|
||||
if err != nil {
|
||||
e.t.Fatalf("ApproveRoutes: status for %s: %v", n.name, err)
|
||||
}
|
||||
nodeKey := st.Self.PublicKey
|
||||
|
||||
var prefixes []netip.Prefix
|
||||
for _, r := range routes {
|
||||
p, err := netip.ParsePrefix(r)
|
||||
if err != nil {
|
||||
e.t.Fatalf("ApproveRoutes: bad route %q: %v", r, err)
|
||||
}
|
||||
prefixes = append(prefixes, p)
|
||||
}
|
||||
|
||||
// Enable --accept-routes on all other tailscale nodes BEFORE setting the
|
||||
// routes on the control server. This way, when the map update arrives with
|
||||
// the new peer routes, peers will immediately install them.
|
||||
for _, other := range e.nodes {
|
||||
if other == n || !other.joinTailnet {
|
||||
continue
|
||||
}
|
||||
if _, err := other.agent.EditPrefs(ctx, &ipn.MaskedPrefs{
|
||||
Prefs: ipn.Prefs{RouteAll: true},
|
||||
RouteAllSet: true,
|
||||
}); err != nil {
|
||||
e.t.Fatalf("ApproveRoutes: set accept-routes on %s: %v", other.name, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Approve the routes on the control server. SetSubnetRoutes notifies all
|
||||
// peers via updatePeerChanged, so they'll re-fetch their MapResponse.
|
||||
e.server.ControlServer().SetSubnetRoutes(nodeKey, prefixes)
|
||||
|
||||
// Wait for each peer to see the routes.
|
||||
for _, r := range routes {
|
||||
for _, other := range e.nodes {
|
||||
if other == n || !other.joinTailnet {
|
||||
continue
|
||||
}
|
||||
if !e.waitForPeerRoute(other, r, 15*time.Second) {
|
||||
e.DumpStatus(other)
|
||||
e.t.Fatalf("ApproveRoutes: %s never saw route %s", other.name, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
e.t.Logf("approved routes %v on %s", routes, n.name)
|
||||
|
||||
// Ping the advertiser from each peer to establish WireGuard tunnels.
|
||||
for _, other := range e.nodes {
|
||||
if other == n || !other.joinTailnet {
|
||||
continue
|
||||
}
|
||||
e.ping(other, n)
|
||||
}
|
||||
}
|
||||
|
||||
// ping pings from one node to another's Tailscale IP, retrying until it succeeds
|
||||
// or the timeout expires. This establishes the WireGuard tunnel between the nodes.
|
||||
func (e *Env) ping(from, to *Node) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
toSt, err := to.agent.Status(ctx)
|
||||
if err != nil {
|
||||
e.t.Fatalf("ping: can't get %s status: %v", to.name, err)
|
||||
}
|
||||
if len(toSt.Self.TailscaleIPs) == 0 {
|
||||
e.t.Fatalf("ping: %s has no Tailscale IPs", to.name)
|
||||
}
|
||||
targetIP := toSt.Self.TailscaleIPs[0]
|
||||
|
||||
for {
|
||||
pingCtx, pingCancel := context.WithTimeout(ctx, 3*time.Second)
|
||||
pr, err := from.agent.PingWithOpts(pingCtx, targetIP, tailcfg.PingDisco, local.PingOpts{})
|
||||
pingCancel()
|
||||
if err == nil && pr.Err == "" {
|
||||
e.logVerbosef("ping: %s -> %s OK", from.name, targetIP)
|
||||
return
|
||||
}
|
||||
if ctx.Err() != nil {
|
||||
e.t.Fatalf("ping: %s -> %s timed out", from.name, targetIP)
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
// SSHExec runs a command on a cloud VM via its debug SSH NIC.
|
||||
// Only works for cloud VMs that have the debug NIC and SSH key configured.
|
||||
// Returns stdout and any error.
|
||||
func (e *Env) SSHExec(n *Node, cmd string) (string, error) {
|
||||
if n.sshPort == 0 {
|
||||
return "", fmt.Errorf("node %s has no SSH debug port", n.name)
|
||||
}
|
||||
sshCmd := exec.Command("ssh",
|
||||
"-o", "StrictHostKeyChecking=no",
|
||||
"-o", "UserKnownHostsFile=/dev/null",
|
||||
"-o", "ConnectTimeout=5",
|
||||
"-i", "/tmp/vmtest_key",
|
||||
"-p", fmt.Sprintf("%d", n.sshPort),
|
||||
"root@127.0.0.1",
|
||||
cmd)
|
||||
out, err := sshCmd.CombinedOutput()
|
||||
return string(out), err
|
||||
}
|
||||
|
||||
// DumpStatus logs the tailscale status of a node, including its peers and their
|
||||
// AllowedIPs. Useful for debugging routing issues.
|
||||
func (e *Env) DumpStatus(n *Node) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
st, err := n.agent.Status(ctx)
|
||||
if err != nil {
|
||||
e.t.Logf("[%s] DumpStatus error: %v", n.name, err)
|
||||
return
|
||||
}
|
||||
var selfAllowed []string
|
||||
if st.Self.AllowedIPs != nil {
|
||||
for i := range st.Self.AllowedIPs.Len() {
|
||||
selfAllowed = append(selfAllowed, st.Self.AllowedIPs.At(i).String())
|
||||
}
|
||||
}
|
||||
var selfPrimary []string
|
||||
if st.Self.PrimaryRoutes != nil {
|
||||
for i := range st.Self.PrimaryRoutes.Len() {
|
||||
selfPrimary = append(selfPrimary, st.Self.PrimaryRoutes.At(i).String())
|
||||
}
|
||||
}
|
||||
e.t.Logf("[%s] self: %v, backend=%s, AllowedIPs=%v, PrimaryRoutes=%v", n.name, st.Self.TailscaleIPs, st.BackendState, selfAllowed, selfPrimary)
|
||||
for _, peer := range st.Peer {
|
||||
var aips []string
|
||||
if peer.AllowedIPs != nil {
|
||||
for i := range peer.AllowedIPs.Len() {
|
||||
aips = append(aips, peer.AllowedIPs.At(i).String())
|
||||
}
|
||||
}
|
||||
e.t.Logf("[%s] peer %s (%s): AllowedIPs=%v, Online=%v, Relay=%q, CurAddr=%q",
|
||||
n.name, peer.HostName, peer.TailscaleIPs,
|
||||
aips, peer.Online, peer.Relay, peer.CurAddr)
|
||||
}
|
||||
}
|
||||
|
||||
// waitForPeerRoute polls the node's status until it sees the given route prefix
|
||||
// in a peer's AllowedIPs, or until timeout. Returns true if found.
|
||||
func (e *Env) waitForPeerRoute(n *Node, prefix string, timeout time.Duration) bool {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer cancel()
|
||||
|
||||
for {
|
||||
st, err := n.agent.Status(ctx)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
for _, peer := range st.Peer {
|
||||
if peer.AllowedIPs != nil {
|
||||
for i := range peer.AllowedIPs.Len() {
|
||||
if peer.AllowedIPs.At(i).String() == prefix {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if ctx.Err() != nil {
|
||||
return false
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
// HTTPGet makes an HTTP GET request from the given node to the specified URL.
|
||||
// The request is proxied through TTA's /http-get handler.
|
||||
func (e *Env) HTTPGet(from *Node, targetURL string) string {
|
||||
for attempt := range 3 {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second)
|
||||
reqURL := "http://unused/http-get?url=" + targetURL
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
|
||||
if err != nil {
|
||||
cancel()
|
||||
e.t.Fatalf("HTTPGet: %v", err)
|
||||
}
|
||||
res, err := from.agent.HTTPClient.Do(req)
|
||||
cancel()
|
||||
if err != nil {
|
||||
e.logVerbosef("HTTPGet attempt %d from %s: %v", attempt+1, from.name, err)
|
||||
continue
|
||||
}
|
||||
body, _ := io.ReadAll(res.Body)
|
||||
res.Body.Close()
|
||||
if res.StatusCode == http.StatusBadGateway || res.StatusCode == http.StatusServiceUnavailable {
|
||||
e.t.Logf("HTTPGet attempt %d from %s: status %d, body: %s", attempt+1, from.name, res.StatusCode, string(body))
|
||||
time.Sleep(2 * time.Second)
|
||||
continue
|
||||
}
|
||||
return string(body)
|
||||
}
|
||||
e.t.Fatalf("HTTPGet from %s to %s: all attempts failed", from.name, targetURL)
|
||||
return ""
|
||||
}
|
||||
|
||||
// ensureGokrazy finds or builds the gokrazy base image and kernel.
|
||||
func (e *Env) ensureGokrazy(ctx context.Context) error {
|
||||
if e.gokrazyBase != "" {
|
||||
return nil // already found
|
||||
}
|
||||
|
||||
modRoot, err := findModRoot()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
e.gokrazyBase = filepath.Join(modRoot, "gokrazy/natlabapp.qcow2")
|
||||
if _, err := os.Stat(e.gokrazyBase); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
e.t.Logf("building gokrazy natlab image...")
|
||||
cmd := exec.CommandContext(ctx, "make", "natlab")
|
||||
cmd.Dir = filepath.Join(modRoot, "gokrazy")
|
||||
cmd.Stderr = os.Stderr
|
||||
cmd.Stdout = os.Stdout
|
||||
if err := cmd.Run(); err != nil {
|
||||
return fmt.Errorf("make natlab: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
kernel, err := findKernelPath(filepath.Join(modRoot, "go.mod"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("finding kernel: %w", err)
|
||||
}
|
||||
e.gokrazyKernel = kernel
|
||||
return nil
|
||||
}
|
||||
|
||||
// compileBinaries cross-compiles tta, tailscale, and tailscaled for linux/amd64
|
||||
// and places them in e.binDir.
|
||||
func (e *Env) compileBinaries(ctx context.Context) error {
|
||||
modRoot, err := findModRoot()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
binaries := []struct{ name, pkg string }{
|
||||
{"tta", "./cmd/tta"},
|
||||
{"tailscale", "./cmd/tailscale"},
|
||||
{"tailscaled", "./cmd/tailscaled"},
|
||||
}
|
||||
|
||||
var eg errgroup.Group
|
||||
for _, bin := range binaries {
|
||||
eg.Go(func() error {
|
||||
outPath := filepath.Join(e.binDir, bin.name)
|
||||
e.t.Logf("compiling %s...", bin.name)
|
||||
cmd := exec.CommandContext(ctx, "go", "build", "-o", outPath, bin.pkg)
|
||||
cmd.Dir = modRoot
|
||||
cmd.Env = append(os.Environ(), "GOOS=linux", "GOARCH=amd64", "CGO_ENABLED=0")
|
||||
if out, err := cmd.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("building %s: %v\n%s", bin.name, err, out)
|
||||
}
|
||||
e.t.Logf("compiled %s", bin.name)
|
||||
return nil
|
||||
})
|
||||
}
|
||||
return eg.Wait()
|
||||
}
|
||||
|
||||
// findModRoot returns the root of the Go module (where go.mod is).
|
||||
func findModRoot() (string, error) {
|
||||
out, err := exec.Command("go", "env", "GOMOD").CombinedOutput()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("go env GOMOD: %w", err)
|
||||
}
|
||||
gomod := strings.TrimSpace(string(out))
|
||||
if gomod == "" || gomod == os.DevNull {
|
||||
return "", fmt.Errorf("not in a Go module")
|
||||
}
|
||||
return filepath.Dir(gomod), nil
|
||||
}
|
||||
|
||||
// findKernelPath finds the gokrazy kernel vmlinuz path from go.mod.
|
||||
func findKernelPath(goMod string) (string, error) {
|
||||
// Import the same logic as nat_test.go.
|
||||
b, err := os.ReadFile(goMod)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
goModCacheB, err := exec.Command("go", "env", "GOMODCACHE").CombinedOutput()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
goModCache := strings.TrimSpace(string(goModCacheB))
|
||||
|
||||
// Parse go.mod to find gokrazy-kernel version.
|
||||
for _, line := range strings.Split(string(b), "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if strings.HasPrefix(line, "github.com/tailscale/gokrazy-kernel") {
|
||||
parts := strings.Fields(line)
|
||||
if len(parts) >= 2 {
|
||||
return filepath.Join(goModCache, parts[0]+"@"+parts[1], "vmlinuz"), nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("gokrazy-kernel not found in %s", goMod)
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
// Copyright (c) Tailscale Inc & contributors
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package vmtest_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"tailscale.com/tstest/natlab/vmtest"
|
||||
"tailscale.com/tstest/natlab/vnet"
|
||||
)
|
||||
|
||||
func TestSubnetRouter(t *testing.T) {
|
||||
env := vmtest.New(t)
|
||||
|
||||
clientNet := env.AddNetwork("2.1.1.1", "192.168.1.1/24", "2000:1::1/64", vnet.EasyNAT)
|
||||
internalNet := env.AddNetwork("10.0.0.1/24", "2000:2::1/64")
|
||||
|
||||
client := env.AddNode("client", clientNet,
|
||||
vmtest.OS(vmtest.Gokrazy))
|
||||
sr := env.AddNode("subnet-router", clientNet, internalNet,
|
||||
vmtest.OS(vmtest.Ubuntu2404),
|
||||
vmtest.AdvertiseRoutes("10.0.0.0/24"))
|
||||
backend := env.AddNode("backend", internalNet,
|
||||
vmtest.OS(vmtest.Gokrazy),
|
||||
vmtest.DontJoinTailnet(),
|
||||
vmtest.WebServer(8080))
|
||||
|
||||
env.Start()
|
||||
env.ApproveRoutes(sr, "10.0.0.0/24")
|
||||
|
||||
body := env.HTTPGet(client, fmt.Sprintf("http://%s:8080/", backend.LanIP(internalNet)))
|
||||
if !strings.Contains(body, "Hello world I am backend") {
|
||||
t.Fatalf("got %q", body)
|
||||
}
|
||||
}
|
||||
@@ -294,6 +294,24 @@ func stringifyTEI(tei stack.TransportEndpointID) string {
|
||||
return fmt.Sprintf("%s -> %s", remoteHostPort, localHostPort)
|
||||
}
|
||||
|
||||
// vipNameOf returns the VIP name for the given IP, or "" if it's not a VIP.
|
||||
func vipNameOf(ip netip.Addr) string {
|
||||
for _, v := range vips {
|
||||
if v.Match(ip) {
|
||||
return v.name
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// nodeNameOf returns the node's name for the given IP on this network, or "" if unknown.
|
||||
func (n *network) nodeNameOf(ip netip.Addr) string {
|
||||
if node, ok := n.nodeByIP(ip); ok {
|
||||
return node.String()
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (n *network) acceptTCP(r *tcp.ForwarderRequest) {
|
||||
reqDetails := r.ID()
|
||||
|
||||
@@ -305,7 +323,17 @@ func (n *network) acceptTCP(r *tcp.ForwarderRequest) {
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("vnet-AcceptTCP: %v", stringifyTEI(reqDetails))
|
||||
// Annotate the log with node/VIP names for readability.
|
||||
srcHP := net.JoinHostPort(clientRemoteIP.String(), strconv.Itoa(int(reqDetails.RemotePort)))
|
||||
srcStr := srcHP
|
||||
if name := n.nodeNameOf(clientRemoteIP); name != "" {
|
||||
srcStr = fmt.Sprintf("%s (%s)", srcHP, name)
|
||||
}
|
||||
dstStr := net.JoinHostPort(destIP.String(), strconv.Itoa(int(destPort)))
|
||||
if name := vipNameOf(destIP); name != "" {
|
||||
dstStr = fmt.Sprintf("%s (%s)", dstStr, name)
|
||||
}
|
||||
log.Printf("vnet-AcceptTCP: %s -> %s", srcStr, dstStr)
|
||||
|
||||
var wq waiter.Queue
|
||||
ep, err := r.CreateEndpoint(&wq)
|
||||
@@ -1466,6 +1494,12 @@ func (n *network) HandleEthernetPacketForRouter(ep EthernetPacket) {
|
||||
return
|
||||
}
|
||||
|
||||
if toForward {
|
||||
// Traffic to destinations we don't handle (e.g. VMs trying to reach
|
||||
// the real internet for NTP, package updates, etc). Expected; drop silently.
|
||||
return
|
||||
}
|
||||
|
||||
n.logf("router got unknown packet: %v", packet)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user