Add tstest/natlab/vmtest, a high-level framework for running multi-VM
integration tests with mixed OS types (gokrazy + Ubuntu/Debian cloud
images) connected via natlab's vnet virtual network.
The vmtest package provides:
- Env type that orchestrates vnet, QEMU processes, and agent connections
- OS image support (Gokrazy, Ubuntu2404, Debian12) with download/cache
- QEMU launch per OS type (microvm for gokrazy, q35+KVM for cloud)
- Cloud-init seed ISO generation with network-config for multi-NIC
- Cross-compilation of test binaries for cloud VMs
- Debug SSH NIC on cloud VMs for interactive debugging
- Test helpers: ApproveRoutes, HTTPGet, TailscalePing, DumpStatus,
WaitForPeerRoute, SSHExec
TTA enhancements (cmd/tta):
- Parameterize /up (accept-routes, advertise-routes, snat-subnet-routes)
- Add /set, /start-webserver, /http-get endpoints
- /http-get uses local.Client.UserDial for Tailscale-routed requests
- Fix /ping for non-gokrazy systems
TestSubnetRouter exercises a 3-VM subnet router scenario:
client (gokrazy) → subnet-router (Ubuntu, dual-NIC) → backend (gokrazy)
Verifies HTTP access to the backend webserver through the Tailscale
subnet route. Passes in ~30 seconds.
Updates tailscale/tailscale#13038
Change-Id: I165b64af241d37f5f5870e796a52502fc56146fa
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
main
parent
d948b78b23
commit
ec0b23a21f
@ -1 +1 @@ |
|||||||
sha256-PLt+IPqemF3agESg6jV8AzbiOpgL45mJ/AymcNUo7VU= |
sha256-GB5riRI9hkutLc2wBzv2jil+Tf6fogLxUw54HRSPNUk= |
||||||
|
|||||||
@ -0,0 +1,117 @@ |
|||||||
|
// Copyright (c) Tailscale Inc & contributors
|
||||||
|
// SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
|
||||||
|
package vmtest |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"os" |
||||||
|
"path/filepath" |
||||||
|
"strings" |
||||||
|
|
||||||
|
"github.com/kdomanski/iso9660" |
||||||
|
) |
||||||
|
|
||||||
|
// createCloudInitISO creates a cidata seed ISO for the given cloud VM node.
|
||||||
|
// The ISO contains meta-data, user-data, and network-config files.
|
||||||
|
// Cloud-init reads these during init-local (pre-network), which is critical
|
||||||
|
// for network-config to take effect before systemd-networkd-wait-online runs.
|
||||||
|
func (e *Env) createCloudInitISO(n *Node) (string, error) { |
||||||
|
metaData := fmt.Sprintf("instance-id: %s\nlocal-hostname: %s\n", n.name, n.name) |
||||||
|
userData := e.generateUserData(n) |
||||||
|
|
||||||
|
// Network config: DHCP all ethernet interfaces.
|
||||||
|
// The "optional: true" prevents systemd-networkd-wait-online from blocking.
|
||||||
|
// The first vnet NIC gets the default route (metric 100).
|
||||||
|
// Other interfaces get higher metrics to avoid routing conflicts.
|
||||||
|
networkConfig := `version: 2 |
||||||
|
ethernets: |
||||||
|
primary: |
||||||
|
match: |
||||||
|
macaddress: "` + n.vnetNode.NICMac(0).String() + `" |
||||||
|
dhcp4: true |
||||||
|
dhcp4-overrides: |
||||||
|
route-metric: 100 |
||||||
|
optional: true |
||||||
|
secondary: |
||||||
|
match: |
||||||
|
name: "en*" |
||||||
|
dhcp4: true |
||||||
|
dhcp4-overrides: |
||||||
|
route-metric: 200 |
||||||
|
optional: true |
||||||
|
` |
||||||
|
|
||||||
|
iw, err := iso9660.NewWriter() |
||||||
|
if err != nil { |
||||||
|
return "", fmt.Errorf("creating ISO writer: %w", err) |
||||||
|
} |
||||||
|
defer iw.Cleanup() |
||||||
|
|
||||||
|
for name, content := range map[string]string{ |
||||||
|
"meta-data": metaData, |
||||||
|
"user-data": userData, |
||||||
|
"network-config": networkConfig, |
||||||
|
} { |
||||||
|
if err := iw.AddFile(strings.NewReader(content), name); err != nil { |
||||||
|
return "", fmt.Errorf("adding %s to ISO: %w", name, err) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
isoPath := filepath.Join(e.tempDir, n.name+"-seed.iso") |
||||||
|
f, err := os.Create(isoPath) |
||||||
|
if err != nil { |
||||||
|
return "", err |
||||||
|
} |
||||||
|
defer f.Close() |
||||||
|
if err := iw.WriteTo(f, "cidata"); err != nil { |
||||||
|
return "", fmt.Errorf("writing seed ISO: %w", err) |
||||||
|
} |
||||||
|
return isoPath, nil |
||||||
|
} |
||||||
|
|
||||||
|
// generateUserData creates the cloud-init user-data (#cloud-config) for a node.
|
||||||
|
func (e *Env) generateUserData(n *Node) string { |
||||||
|
var ud strings.Builder |
||||||
|
ud.WriteString("#cloud-config\n") |
||||||
|
|
||||||
|
// Enable root SSH login for debugging via the debug NIC.
|
||||||
|
ud.WriteString("ssh_pwauth: true\n") |
||||||
|
ud.WriteString("disable_root: false\n") |
||||||
|
ud.WriteString("users:\n") |
||||||
|
ud.WriteString(" - name: root\n") |
||||||
|
ud.WriteString(" lock_passwd: false\n") |
||||||
|
ud.WriteString(" plain_text_passwd: root\n") |
||||||
|
// Also inject the host's SSH key if available.
|
||||||
|
if pubkey, err := os.ReadFile("/tmp/vmtest_key.pub"); err == nil { |
||||||
|
ud.WriteString(fmt.Sprintf(" ssh_authorized_keys:\n - %s\n", strings.TrimSpace(string(pubkey)))) |
||||||
|
} |
||||||
|
|
||||||
|
ud.WriteString("runcmd:\n") |
||||||
|
|
||||||
|
// Remove the default route from the debug NIC (enp0s4) so traffic goes through vnet.
|
||||||
|
// The debug NIC is only for SSH access from the host.
|
||||||
|
ud.WriteString(" - [\"/bin/sh\", \"-c\", \"ip route del default via 10.0.2.2 dev enp0s4 2>/dev/null || true\"]\n") |
||||||
|
|
||||||
|
// Download binaries from the files.tailscale VIP (52.52.0.6).
|
||||||
|
// Use the IP directly to avoid DNS resolution issues during early boot.
|
||||||
|
for _, bin := range []string{"tailscaled", "tailscale", "tta"} { |
||||||
|
fmt.Fprintf(&ud, " - [\"/bin/sh\", \"-c\", \"curl -v --retry 10 --retry-delay 2 --retry-all-errors -o /usr/local/bin/%s http://52.52.0.6/%s 2>&1\"]\n", bin, bin) |
||||||
|
} |
||||||
|
ud.WriteString(" - [\"chmod\", \"+x\", \"/usr/local/bin/tailscaled\", \"/usr/local/bin/tailscale\", \"/usr/local/bin/tta\"]\n") |
||||||
|
|
||||||
|
// Enable IP forwarding for subnet routers.
|
||||||
|
if n.advertiseRoutes != "" { |
||||||
|
ud.WriteString(" - [\"sysctl\", \"-w\", \"net.ipv4.ip_forward=1\"]\n") |
||||||
|
ud.WriteString(" - [\"sysctl\", \"-w\", \"net.ipv6.conf.all.forwarding=1\"]\n") |
||||||
|
} |
||||||
|
|
||||||
|
// Start tailscaled in the background.
|
||||||
|
ud.WriteString(" - [\"/bin/sh\", \"-c\", \"/usr/local/bin/tailscaled --state=mem: &\"]\n") |
||||||
|
ud.WriteString(" - [\"sleep\", \"2\"]\n") |
||||||
|
|
||||||
|
// Start tta (Tailscale Test Agent).
|
||||||
|
ud.WriteString(" - [\"/bin/sh\", \"-c\", \"/usr/local/bin/tta &\"]\n") |
||||||
|
|
||||||
|
return ud.String() |
||||||
|
} |
||||||
@ -0,0 +1,170 @@ |
|||||||
|
// Copyright (c) Tailscale Inc & contributors
|
||||||
|
// SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
|
||||||
|
package vmtest |
||||||
|
|
||||||
|
import ( |
||||||
|
"context" |
||||||
|
"crypto/sha256" |
||||||
|
"encoding/hex" |
||||||
|
"fmt" |
||||||
|
"io" |
||||||
|
"log" |
||||||
|
"net/http" |
||||||
|
"os" |
||||||
|
"os/exec" |
||||||
|
"path/filepath" |
||||||
|
) |
||||||
|
|
||||||
|
// OSImage describes a VM operating system image.
|
||||||
|
type OSImage struct { |
||||||
|
Name string |
||||||
|
URL string // download URL for the cloud image
|
||||||
|
SHA256 string // expected SHA256 hash of the image
|
||||||
|
MemoryMB int // RAM for the VM
|
||||||
|
IsGokrazy bool // true for gokrazy images (different QEMU setup)
|
||||||
|
} |
||||||
|
|
||||||
|
var ( |
||||||
|
// Gokrazy is a minimal Tailscale appliance image built from the gokrazy/natlabapp directory.
|
||||||
|
Gokrazy = OSImage{ |
||||||
|
Name: "gokrazy", |
||||||
|
IsGokrazy: true, |
||||||
|
MemoryMB: 384, |
||||||
|
} |
||||||
|
|
||||||
|
// Ubuntu2404 is Ubuntu 24.04 LTS (Noble Numbat) cloud image.
|
||||||
|
Ubuntu2404 = OSImage{ |
||||||
|
Name: "ubuntu-24.04", |
||||||
|
URL: "https://cloud-images.ubuntu.com/noble/current/noble-server-cloudimg-amd64.img", |
||||||
|
MemoryMB: 1024, |
||||||
|
} |
||||||
|
|
||||||
|
// Debian12 is Debian 12 (Bookworm) generic cloud image.
|
||||||
|
Debian12 = OSImage{ |
||||||
|
Name: "debian-12", |
||||||
|
URL: "https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-generic-amd64.qcow2", |
||||||
|
MemoryMB: 1024, |
||||||
|
} |
||||||
|
) |
||||||
|
|
||||||
|
// imageCacheDir returns the directory for cached VM images.
|
||||||
|
func imageCacheDir() string { |
||||||
|
if d := os.Getenv("VMTEST_CACHE_DIR"); d != "" { |
||||||
|
return d |
||||||
|
} |
||||||
|
home, _ := os.UserHomeDir() |
||||||
|
return filepath.Join(home, ".cache", "tailscale", "vmtest", "images") |
||||||
|
} |
||||||
|
|
||||||
|
// ensureImage downloads and caches the OS image if not already present.
|
||||||
|
func ensureImage(ctx context.Context, img OSImage) error { |
||||||
|
if img.IsGokrazy { |
||||||
|
return nil // gokrazy images are handled separately
|
||||||
|
} |
||||||
|
|
||||||
|
cacheDir := imageCacheDir() |
||||||
|
if err := os.MkdirAll(cacheDir, 0755); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
// Use a filename based on the image name.
|
||||||
|
cachedPath := filepath.Join(cacheDir, img.Name+".qcow2") |
||||||
|
if _, err := os.Stat(cachedPath); err == nil { |
||||||
|
// If we have a SHA256 to verify, check it.
|
||||||
|
if img.SHA256 != "" { |
||||||
|
if err := verifySHA256(cachedPath, img.SHA256); err != nil { |
||||||
|
log.Printf("cached image %s failed SHA256 check, re-downloading: %v", img.Name, err) |
||||||
|
os.Remove(cachedPath) |
||||||
|
} else { |
||||||
|
return nil |
||||||
|
} |
||||||
|
} else { |
||||||
|
return nil // exists, no hash to verify
|
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
log.Printf("downloading %s from %s...", img.Name, img.URL) |
||||||
|
|
||||||
|
req, err := http.NewRequestWithContext(ctx, "GET", img.URL, nil) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("downloading %s: %w", img.Name, err) |
||||||
|
} |
||||||
|
resp, err := http.DefaultClient.Do(req) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("downloading %s: %w", img.Name, err) |
||||||
|
} |
||||||
|
defer resp.Body.Close() |
||||||
|
if resp.StatusCode != 200 { |
||||||
|
return fmt.Errorf("downloading %s: HTTP %s", img.Name, resp.Status) |
||||||
|
} |
||||||
|
|
||||||
|
tmpFile := cachedPath + ".tmp" |
||||||
|
f, err := os.Create(tmpFile) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
defer func() { |
||||||
|
f.Close() |
||||||
|
os.Remove(tmpFile) |
||||||
|
}() |
||||||
|
|
||||||
|
h := sha256.New() |
||||||
|
w := io.MultiWriter(f, h) |
||||||
|
|
||||||
|
if _, err := io.Copy(w, resp.Body); err != nil { |
||||||
|
return fmt.Errorf("downloading %s: %w", img.Name, err) |
||||||
|
} |
||||||
|
if err := f.Close(); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
if img.SHA256 != "" { |
||||||
|
got := hex.EncodeToString(h.Sum(nil)) |
||||||
|
if got != img.SHA256 { |
||||||
|
return fmt.Errorf("SHA256 mismatch for %s: got %s, want %s", img.Name, got, img.SHA256) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if err := os.Rename(tmpFile, cachedPath); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
log.Printf("downloaded %s", img.Name) |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// verifySHA256 checks that the file at path has the expected SHA256 hash.
|
||||||
|
func verifySHA256(path, expected string) error { |
||||||
|
f, err := os.Open(path) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
defer f.Close() |
||||||
|
h := sha256.New() |
||||||
|
if _, err := io.Copy(h, f); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
got := hex.EncodeToString(h.Sum(nil)) |
||||||
|
if got != expected { |
||||||
|
return fmt.Errorf("got %s, want %s", got, expected) |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// cachedImagePath returns the filesystem path to the cached image for the given OS.
|
||||||
|
func cachedImagePath(img OSImage) string { |
||||||
|
return filepath.Join(imageCacheDir(), img.Name+".qcow2") |
||||||
|
} |
||||||
|
|
||||||
|
// createOverlay creates a qcow2 overlay image on top of the given base image.
|
||||||
|
func createOverlay(base, overlay string) error { |
||||||
|
out, err := exec.Command("qemu-img", "create", |
||||||
|
"-f", "qcow2", |
||||||
|
"-F", "qcow2", |
||||||
|
"-b", base, |
||||||
|
overlay).CombinedOutput() |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("qemu-img create overlay: %v: %s", err, out) |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
@ -0,0 +1,239 @@ |
|||||||
|
// Copyright (c) Tailscale Inc & contributors
|
||||||
|
// SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
|
||||||
|
package vmtest |
||||||
|
|
||||||
|
import ( |
||||||
|
"bytes" |
||||||
|
"encoding/json" |
||||||
|
"fmt" |
||||||
|
"net" |
||||||
|
"os" |
||||||
|
"os/exec" |
||||||
|
"path/filepath" |
||||||
|
"regexp" |
||||||
|
"strconv" |
||||||
|
"time" |
||||||
|
|
||||||
|
"tailscale.com/tstest/natlab/vnet" |
||||||
|
) |
||||||
|
|
||||||
|
// startQEMU launches a QEMU process for the given node.
|
||||||
|
func (e *Env) startQEMU(n *Node) error { |
||||||
|
if n.os.IsGokrazy { |
||||||
|
return e.startGokrazyQEMU(n) |
||||||
|
} |
||||||
|
return e.startCloudQEMU(n) |
||||||
|
} |
||||||
|
|
||||||
|
// startGokrazyQEMU launches a QEMU process for a gokrazy node.
|
||||||
|
// This follows the same pattern as tstest/integration/nat/nat_test.go.
|
||||||
|
func (e *Env) startGokrazyQEMU(n *Node) error { |
||||||
|
disk := filepath.Join(e.tempDir, fmt.Sprintf("%s.qcow2", n.name)) |
||||||
|
if err := createOverlay(e.gokrazyBase, disk); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
var envBuf bytes.Buffer |
||||||
|
for _, env := range n.vnetNode.Env() { |
||||||
|
fmt.Fprintf(&envBuf, " tailscaled.env=%s=%s", env.Key, env.Value) |
||||||
|
} |
||||||
|
sysLogAddr := net.JoinHostPort(vnet.FakeSyslogIPv4().String(), "995") |
||||||
|
if n.vnetNode.IsV6Only() { |
||||||
|
sysLogAddr = net.JoinHostPort(vnet.FakeSyslogIPv6().String(), "995") |
||||||
|
} |
||||||
|
|
||||||
|
logPath := filepath.Join(e.tempDir, n.name+".log") |
||||||
|
|
||||||
|
args := []string{ |
||||||
|
"-M", "microvm,isa-serial=off", |
||||||
|
"-m", fmt.Sprintf("%dM", n.os.MemoryMB), |
||||||
|
"-nodefaults", "-no-user-config", "-nographic", |
||||||
|
"-kernel", e.gokrazyKernel, |
||||||
|
"-append", "console=hvc0 root=PARTUUID=60c24cc1-f3f9-427a-8199-76baa2d60001/PARTNROFF=1 ro init=/gokrazy/init panic=10 oops=panic pci=off nousb tsc=unstable clocksource=hpet gokrazy.remote_syslog.target=" + sysLogAddr + " tailscale-tta=1" + envBuf.String(), |
||||||
|
"-drive", "id=blk0,file=" + disk + ",format=qcow2", |
||||||
|
"-device", "virtio-blk-device,drive=blk0", |
||||||
|
"-device", "virtio-serial-device", |
||||||
|
"-device", "virtio-rng-device", |
||||||
|
"-chardev", "file,id=virtiocon0,path=" + logPath, |
||||||
|
"-device", "virtconsole,chardev=virtiocon0", |
||||||
|
} |
||||||
|
|
||||||
|
// Add network devices — one per NIC.
|
||||||
|
for i := range n.vnetNode.NumNICs() { |
||||||
|
mac := n.vnetNode.NICMac(i) |
||||||
|
netdevID := fmt.Sprintf("net%d", i) |
||||||
|
args = append(args, |
||||||
|
"-netdev", fmt.Sprintf("stream,id=%s,addr.type=unix,addr.path=%s", netdevID, e.sockAddr), |
||||||
|
"-device", fmt.Sprintf("virtio-net-device,netdev=%s,mac=%s", netdevID, mac), |
||||||
|
) |
||||||
|
} |
||||||
|
|
||||||
|
return e.launchQEMU(n.name, logPath, args) |
||||||
|
} |
||||||
|
|
||||||
|
// startCloudQEMU launches a QEMU process for a cloud image (Ubuntu, Debian, etc).
|
||||||
|
func (e *Env) startCloudQEMU(n *Node) error { |
||||||
|
basePath := cachedImagePath(n.os) |
||||||
|
disk := filepath.Join(e.tempDir, fmt.Sprintf("%s.qcow2", n.name)) |
||||||
|
if err := createOverlay(basePath, disk); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
// Create a seed ISO with cloud-init config (meta-data, user-data, network-config).
|
||||||
|
// This MUST be a local ISO (not HTTP) so cloud-init reads network-config during
|
||||||
|
// init-local, before systemd-networkd-wait-online blocks boot.
|
||||||
|
seedISO, err := e.createCloudInitISO(n) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("creating cloud-init ISO: %w", err) |
||||||
|
} |
||||||
|
|
||||||
|
logPath := filepath.Join(e.tempDir, n.name+".log") |
||||||
|
qmpSock := filepath.Join(e.tempDir, n.name+"-qmp.sock") |
||||||
|
|
||||||
|
args := []string{ |
||||||
|
"-machine", "q35,accel=kvm", |
||||||
|
"-m", fmt.Sprintf("%dM", n.os.MemoryMB), |
||||||
|
"-cpu", "host", |
||||||
|
"-smp", "2", |
||||||
|
"-display", "none", |
||||||
|
"-drive", fmt.Sprintf("file=%s,if=virtio", disk), |
||||||
|
"-drive", fmt.Sprintf("file=%s,if=virtio,media=cdrom,readonly=on", seedISO), |
||||||
|
"-smbios", "type=1,serial=ds=nocloud", |
||||||
|
"-serial", "file:" + logPath, |
||||||
|
"-qmp", "unix:" + qmpSock + ",server,nowait", |
||||||
|
} |
||||||
|
|
||||||
|
// Add network devices — one per NIC.
|
||||||
|
// romfile="" disables the iPXE option ROM entirely, saving ~5s per NIC at boot
|
||||||
|
// and avoiding "duplicate fw_cfg file name" errors with multiple NICs.
|
||||||
|
for i := range n.vnetNode.NumNICs() { |
||||||
|
mac := n.vnetNode.NICMac(i) |
||||||
|
netdevID := fmt.Sprintf("net%d", i) |
||||||
|
args = append(args, |
||||||
|
"-netdev", fmt.Sprintf("stream,id=%s,addr.type=unix,addr.path=%s", netdevID, e.sockAddr), |
||||||
|
"-device", fmt.Sprintf("virtio-net-pci,netdev=%s,mac=%s,romfile=", netdevID, mac), |
||||||
|
) |
||||||
|
} |
||||||
|
|
||||||
|
// Add a debug NIC with user-mode networking for SSH access from the host.
|
||||||
|
// Use port 0 so the OS picks a free port; we query the actual port via QMP after launch.
|
||||||
|
args = append(args, |
||||||
|
"-netdev", "user,id=debug0,hostfwd=tcp:127.0.0.1:0-:22", |
||||||
|
"-device", "virtio-net-pci,netdev=debug0,romfile=", |
||||||
|
) |
||||||
|
|
||||||
|
if err := e.launchQEMU(n.name, logPath, args); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
// Query QMP to find the actual SSH port that QEMU allocated.
|
||||||
|
port, err := qmpQueryHostFwd(qmpSock) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("querying SSH port via QMP: %w", err) |
||||||
|
} |
||||||
|
n.sshPort = port |
||||||
|
e.t.Logf("[%s] SSH debug: ssh -p %d root@127.0.0.1 (password: root)", n.name, port) |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// launchQEMU starts a qemu-system-x86_64 process with the given args.
|
||||||
|
// VM console output goes to logPath (via QEMU's -serial or -chardev).
|
||||||
|
// QEMU's own stdout/stderr go to logPath.qemu for diagnostics.
|
||||||
|
func (e *Env) launchQEMU(name, logPath string, args []string) error { |
||||||
|
cmd := exec.Command("qemu-system-x86_64", args...) |
||||||
|
// Send stdout/stderr to the log file for any QEMU diagnostic messages.
|
||||||
|
// Stdin must be /dev/null to prevent QEMU from trying to read.
|
||||||
|
devNull, err := os.Open(os.DevNull) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("open /dev/null: %w", err) |
||||||
|
} |
||||||
|
cmd.Stdin = devNull |
||||||
|
qemuLog, err := os.Create(logPath + ".qemu") |
||||||
|
if err != nil { |
||||||
|
devNull.Close() |
||||||
|
return err |
||||||
|
} |
||||||
|
cmd.Stdout = qemuLog |
||||||
|
cmd.Stderr = qemuLog |
||||||
|
if err := cmd.Start(); err != nil { |
||||||
|
devNull.Close() |
||||||
|
qemuLog.Close() |
||||||
|
return fmt.Errorf("qemu for %s: %w", name, err) |
||||||
|
} |
||||||
|
e.t.Logf("launched QEMU for %s (pid %d), log: %s", name, cmd.Process.Pid, logPath) |
||||||
|
e.qemuProcs = append(e.qemuProcs, cmd) |
||||||
|
e.t.Cleanup(func() { |
||||||
|
cmd.Process.Kill() |
||||||
|
cmd.Wait() |
||||||
|
devNull.Close() |
||||||
|
qemuLog.Close() |
||||||
|
// Dump tail of VM log on failure for debugging.
|
||||||
|
if e.t.Failed() { |
||||||
|
if data, err := os.ReadFile(logPath); err == nil { |
||||||
|
lines := bytes.Split(data, []byte("\n")) |
||||||
|
start := 0 |
||||||
|
if len(lines) > 50 { |
||||||
|
start = len(lines) - 50 |
||||||
|
} |
||||||
|
e.t.Logf("=== last 50 lines of %s log ===", name) |
||||||
|
for _, line := range lines[start:] { |
||||||
|
e.t.Logf("[%s] %s", name, line) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
}) |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// qmpQueryHostFwd connects to a QEMU QMP socket and queries the host port
|
||||||
|
// assigned to the first TCP host forward rule (the SSH debug port).
|
||||||
|
func qmpQueryHostFwd(sockPath string) (int, error) { |
||||||
|
// Wait for the QMP socket to appear.
|
||||||
|
var conn net.Conn |
||||||
|
for range 50 { |
||||||
|
var err error |
||||||
|
conn, err = net.Dial("unix", sockPath) |
||||||
|
if err == nil { |
||||||
|
break |
||||||
|
} |
||||||
|
time.Sleep(100 * time.Millisecond) |
||||||
|
} |
||||||
|
if conn == nil { |
||||||
|
return 0, fmt.Errorf("QMP socket %s not available", sockPath) |
||||||
|
} |
||||||
|
defer conn.Close() |
||||||
|
conn.SetDeadline(time.Now().Add(5 * time.Second)) |
||||||
|
|
||||||
|
// Read the QMP greeting.
|
||||||
|
var greeting json.RawMessage |
||||||
|
dec := json.NewDecoder(conn) |
||||||
|
if err := dec.Decode(&greeting); err != nil { |
||||||
|
return 0, fmt.Errorf("reading QMP greeting: %w", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Send qmp_capabilities to initialize.
|
||||||
|
fmt.Fprintf(conn, `{"execute":"qmp_capabilities"}`+"\n") |
||||||
|
var capsResp json.RawMessage |
||||||
|
if err := dec.Decode(&capsResp); err != nil { |
||||||
|
return 0, fmt.Errorf("reading qmp_capabilities response: %w", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Query "info usernet" via human-monitor-command.
|
||||||
|
fmt.Fprintf(conn, `{"execute":"human-monitor-command","arguments":{"command-line":"info usernet"}}`+"\n") |
||||||
|
var hmpResp struct { |
||||||
|
Return string `json:"return"` |
||||||
|
} |
||||||
|
if err := dec.Decode(&hmpResp); err != nil { |
||||||
|
return 0, fmt.Errorf("reading info usernet response: %w", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Parse the port from output like:
|
||||||
|
// TCP[HOST_FORWARD] 12 127.0.0.1 35323 10.0.2.15 22
|
||||||
|
re := regexp.MustCompile(`TCP\[HOST_FORWARD\]\s+\d+\s+127\.0\.0\.1\s+(\d+)\s+`) |
||||||
|
m := re.FindStringSubmatch(hmpResp.Return) |
||||||
|
if m == nil { |
||||||
|
return 0, fmt.Errorf("no hostfwd port found in: %s", hmpResp.Return) |
||||||
|
} |
||||||
|
return strconv.Atoi(m[1]) |
||||||
|
} |
||||||
@ -0,0 +1,676 @@ |
|||||||
|
// Copyright (c) Tailscale Inc & contributors
|
||||||
|
// SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
|
||||||
|
// Package vmtest provides a high-level framework for running integration tests
|
||||||
|
// across multiple QEMU virtual machines connected by natlab's vnet virtual
|
||||||
|
// network infrastructure. It supports mixed OS types (gokrazy, Ubuntu, Debian)
|
||||||
|
// and multi-NIC configurations for scenarios like subnet routing.
|
||||||
|
//
|
||||||
|
// Prerequisites:
|
||||||
|
// - qemu-system-x86_64 and KVM access (typically the "kvm" group; no root required)
|
||||||
|
// - A built gokrazy natlabapp image (auto-built on first run via "make natlab" in gokrazy/)
|
||||||
|
//
|
||||||
|
// Run tests with:
|
||||||
|
//
|
||||||
|
// go test ./tstest/natlab/vmtest/ --run-vm-tests -v
|
||||||
|
package vmtest |
||||||
|
|
||||||
|
import ( |
||||||
|
"context" |
||||||
|
"flag" |
||||||
|
"fmt" |
||||||
|
"io" |
||||||
|
"net" |
||||||
|
"net/http" |
||||||
|
"net/netip" |
||||||
|
"os" |
||||||
|
"os/exec" |
||||||
|
"path/filepath" |
||||||
|
"slices" |
||||||
|
"strings" |
||||||
|
"testing" |
||||||
|
"time" |
||||||
|
|
||||||
|
"golang.org/x/sync/errgroup" |
||||||
|
"tailscale.com/client/local" |
||||||
|
"tailscale.com/ipn" |
||||||
|
"tailscale.com/tailcfg" |
||||||
|
"tailscale.com/tstest/natlab/vnet" |
||||||
|
"tailscale.com/util/set" |
||||||
|
) |
||||||
|
|
||||||
|
var ( |
||||||
|
runVMTests = flag.Bool("run-vm-tests", false, "run tests that require VMs with KVM") |
||||||
|
verboseVMDebug = flag.Bool("verbose-vm-debug", false, "enable verbose debug logging for VM tests") |
||||||
|
) |
||||||
|
|
||||||
|
// Env is a test environment that manages virtual networks and QEMU VMs.
|
||||||
|
// Create one with New, add networks and nodes, then call Start.
|
||||||
|
type Env struct { |
||||||
|
t testing.TB |
||||||
|
cfg vnet.Config |
||||||
|
server *vnet.Server |
||||||
|
nodes []*Node |
||||||
|
tempDir string |
||||||
|
|
||||||
|
sockAddr string // shared Unix socket path for all QEMU netdevs
|
||||||
|
binDir string // directory for compiled binaries
|
||||||
|
|
||||||
|
// gokrazy-specific paths
|
||||||
|
gokrazyBase string // path to gokrazy base qcow2 image
|
||||||
|
gokrazyKernel string // path to gokrazy kernel
|
||||||
|
|
||||||
|
qemuProcs []*exec.Cmd // launched QEMU processes
|
||||||
|
} |
||||||
|
|
||||||
|
// logVerbosef logs a message only when --verbose-vm-debug is set.
|
||||||
|
func (e *Env) logVerbosef(format string, args ...any) { |
||||||
|
if *verboseVMDebug { |
||||||
|
e.t.Helper() |
||||||
|
e.t.Logf(format, args...) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// New creates a new test environment. It skips the test if --run-vm-tests is not set.
|
||||||
|
func New(t testing.TB) *Env { |
||||||
|
if !*runVMTests { |
||||||
|
t.Skip("skipping VM test; set --run-vm-tests to run") |
||||||
|
} |
||||||
|
|
||||||
|
tempDir := t.TempDir() |
||||||
|
return &Env{ |
||||||
|
t: t, |
||||||
|
tempDir: tempDir, |
||||||
|
binDir: filepath.Join(tempDir, "bin"), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// AddNetwork creates a new virtual network. Arguments follow the same pattern as
|
||||||
|
// vnet.Config.AddNetwork (string IPs, NAT types, NetworkService values).
|
||||||
|
func (e *Env) AddNetwork(opts ...any) *vnet.Network { |
||||||
|
return e.cfg.AddNetwork(opts...) |
||||||
|
} |
||||||
|
|
||||||
|
// Node represents a virtual machine in the test environment.
|
||||||
|
type Node struct { |
||||||
|
name string |
||||||
|
num int // assigned during AddNode
|
||||||
|
|
||||||
|
os OSImage |
||||||
|
nets []*vnet.Network |
||||||
|
vnetNode *vnet.Node // primary vnet node (set during Start)
|
||||||
|
agent *vnet.NodeAgentClient |
||||||
|
joinTailnet bool |
||||||
|
advertiseRoutes string |
||||||
|
webServerPort int |
||||||
|
sshPort int // host port for SSH debug access (cloud VMs only)
|
||||||
|
} |
||||||
|
|
||||||
|
// AddNode creates a new VM node. The name is used for identification and as the
|
||||||
|
// webserver greeting. Options can be *vnet.Network (for network attachment),
|
||||||
|
// NodeOption values, or vnet node options (like vnet.TailscaledEnv).
|
||||||
|
func (e *Env) AddNode(name string, opts ...any) *Node { |
||||||
|
n := &Node{ |
||||||
|
name: name, |
||||||
|
os: Gokrazy, // default
|
||||||
|
joinTailnet: true, |
||||||
|
} |
||||||
|
e.nodes = append(e.nodes, n) |
||||||
|
|
||||||
|
// Separate network options from other options.
|
||||||
|
var vnetOpts []any |
||||||
|
for _, o := range opts { |
||||||
|
switch o := o.(type) { |
||||||
|
case *vnet.Network: |
||||||
|
n.nets = append(n.nets, o) |
||||||
|
vnetOpts = append(vnetOpts, o) |
||||||
|
case nodeOptOS: |
||||||
|
n.os = OSImage(o) |
||||||
|
case nodeOptNoTailscale: |
||||||
|
n.joinTailnet = false |
||||||
|
vnetOpts = append(vnetOpts, vnet.DontJoinTailnet) |
||||||
|
case nodeOptAdvertiseRoutes: |
||||||
|
n.advertiseRoutes = string(o) |
||||||
|
case nodeOptWebServer: |
||||||
|
n.webServerPort = int(o) |
||||||
|
default: |
||||||
|
// Pass through to vnet (TailscaledEnv, NodeOption, MAC, etc.)
|
||||||
|
vnetOpts = append(vnetOpts, o) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
n.vnetNode = e.cfg.AddNode(vnetOpts...) |
||||||
|
n.num = n.vnetNode.Num() |
||||||
|
return n |
||||||
|
} |
||||||
|
|
||||||
|
// LanIP returns the LAN IPv4 address of this node on the given network.
|
||||||
|
// This is only valid after Env.Start() has been called.
|
||||||
|
func (n *Node) LanIP(net *vnet.Network) netip.Addr { |
||||||
|
return n.vnetNode.LanIP(net) |
||||||
|
} |
||||||
|
|
||||||
|
// NodeOption types for configuring nodes.
|
||||||
|
|
||||||
|
type nodeOptOS OSImage |
||||||
|
type nodeOptNoTailscale struct{} |
||||||
|
type nodeOptAdvertiseRoutes string |
||||||
|
type nodeOptWebServer int |
||||||
|
|
||||||
|
// OS returns a NodeOption that sets the node's operating system image.
|
||||||
|
func OS(img OSImage) nodeOptOS { return nodeOptOS(img) } |
||||||
|
|
||||||
|
// DontJoinTailnet returns a NodeOption that prevents the node from running tailscale up.
|
||||||
|
func DontJoinTailnet() nodeOptNoTailscale { return nodeOptNoTailscale{} } |
||||||
|
|
||||||
|
// AdvertiseRoutes returns a NodeOption that configures the node to advertise
|
||||||
|
// the given routes (comma-separated CIDRs) when joining the tailnet.
|
||||||
|
func AdvertiseRoutes(routes string) nodeOptAdvertiseRoutes { |
||||||
|
return nodeOptAdvertiseRoutes(routes) |
||||||
|
} |
||||||
|
|
||||||
|
// WebServer returns a NodeOption that starts a webserver on the given port.
|
||||||
|
// The webserver responds with "Hello world I am <nodename>" on all requests.
|
||||||
|
func WebServer(port int) nodeOptWebServer { return nodeOptWebServer(port) } |
||||||
|
|
||||||
|
// Start initializes the virtual network, builds/downloads images, compiles
|
||||||
|
// binaries, launches QEMU processes, and waits for all TTA agents to connect.
|
||||||
|
// It should be called after all AddNetwork/AddNode calls.
|
||||||
|
func (e *Env) Start() { |
||||||
|
t := e.t |
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) |
||||||
|
t.Cleanup(cancel) |
||||||
|
|
||||||
|
if err := os.MkdirAll(e.binDir, 0755); err != nil { |
||||||
|
t.Fatal(err) |
||||||
|
} |
||||||
|
|
||||||
|
// Determine if we have any non-gokrazy "cloud" images (e.g. Ubuntu, Debian)
|
||||||
|
// that require compiled binaries pushed into their image later. (Gokrazy
|
||||||
|
// has them built-in, so doesn't need the compileBinaries step.)
|
||||||
|
needBuildBinaries := slices.ContainsFunc(e.nodes, func(n *Node) bool { return !n.os.IsGokrazy }) |
||||||
|
|
||||||
|
// Compile binaries and download/build images in parallel.
|
||||||
|
// Any failure cancels the others via the errgroup context.
|
||||||
|
eg, egCtx := errgroup.WithContext(ctx) |
||||||
|
if needBuildBinaries { |
||||||
|
eg.Go(func() error { |
||||||
|
return e.compileBinaries(egCtx) |
||||||
|
}) |
||||||
|
} |
||||||
|
didOS := set.Set[string]{} // dedup by image name
|
||||||
|
for _, n := range e.nodes { |
||||||
|
if didOS.Contains(n.os.Name) { |
||||||
|
continue |
||||||
|
} |
||||||
|
didOS.Add(n.os.Name) |
||||||
|
if n.os.IsGokrazy { |
||||||
|
eg.Go(func() error { |
||||||
|
return e.ensureGokrazy(egCtx) |
||||||
|
}) |
||||||
|
} else { |
||||||
|
eg.Go(func() error { |
||||||
|
return ensureImage(egCtx, n.os) |
||||||
|
}) |
||||||
|
} |
||||||
|
} |
||||||
|
if err := eg.Wait(); err != nil { |
||||||
|
t.Fatalf("setup: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Create the vnet server.
|
||||||
|
var err error |
||||||
|
e.server, err = vnet.New(&e.cfg) |
||||||
|
if err != nil { |
||||||
|
t.Fatalf("vnet.New: %v", err) |
||||||
|
} |
||||||
|
t.Cleanup(func() { e.server.Close() }) |
||||||
|
|
||||||
|
// Register compiled binaries with the file server VIP.
|
||||||
|
if needBuildBinaries { |
||||||
|
for _, name := range []string{"tta", "tailscale", "tailscaled"} { |
||||||
|
data, err := os.ReadFile(filepath.Join(e.binDir, name)) |
||||||
|
if err != nil { |
||||||
|
t.Fatalf("reading compiled %s: %v", name, err) |
||||||
|
} |
||||||
|
e.server.RegisterFile(name, data) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Cloud-init config is delivered via local seed ISOs (created in startCloudQEMU),
|
||||||
|
// not via the cloud-init HTTP VIP, because network-config must be available
|
||||||
|
// during init-local before systemd-networkd-wait-online blocks.
|
||||||
|
|
||||||
|
// Start Unix socket listener.
|
||||||
|
e.sockAddr = filepath.Join(e.tempDir, "vnet.sock") |
||||||
|
srv, err := net.Listen("unix", e.sockAddr) |
||||||
|
if err != nil { |
||||||
|
t.Fatalf("listen unix: %v", err) |
||||||
|
} |
||||||
|
t.Cleanup(func() { srv.Close() }) |
||||||
|
|
||||||
|
go func() { |
||||||
|
for { |
||||||
|
c, err := srv.Accept() |
||||||
|
if err != nil { |
||||||
|
return |
||||||
|
} |
||||||
|
go e.server.ServeUnixConn(c.(*net.UnixConn), vnet.ProtocolQEMU) |
||||||
|
} |
||||||
|
}() |
||||||
|
|
||||||
|
// Launch QEMU processes.
|
||||||
|
for _, n := range e.nodes { |
||||||
|
if err := e.startQEMU(n); err != nil { |
||||||
|
t.Fatalf("startQEMU(%s): %v", n.name, err) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Set up agent clients and wait for all agents to connect.
|
||||||
|
for _, n := range e.nodes { |
||||||
|
n.agent = e.server.NodeAgentClient(n.vnetNode) |
||||||
|
n.vnetNode.SetClient(n.agent) |
||||||
|
} |
||||||
|
|
||||||
|
// Wait for agents, then bring up tailscale.
|
||||||
|
var agentEg errgroup.Group |
||||||
|
for _, n := range e.nodes { |
||||||
|
agentEg.Go(func() error { |
||||||
|
t.Logf("[%s] waiting for agent...", n.name) |
||||||
|
st, err := n.agent.Status(ctx) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("[%s] agent status: %w", n.name, err) |
||||||
|
} |
||||||
|
t.Logf("[%s] agent connected, backend state: %s", n.name, st.BackendState) |
||||||
|
|
||||||
|
if n.vnetNode.HostFirewall() { |
||||||
|
if err := n.agent.EnableHostFirewall(ctx); err != nil { |
||||||
|
return fmt.Errorf("[%s] enable firewall: %w", n.name, err) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if n.joinTailnet { |
||||||
|
if err := e.tailscaleUp(ctx, n); err != nil { |
||||||
|
return fmt.Errorf("[%s] tailscale up: %w", n.name, err) |
||||||
|
} |
||||||
|
st, err = n.agent.Status(ctx) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("[%s] status after up: %w", n.name, err) |
||||||
|
} |
||||||
|
if st.BackendState != "Running" { |
||||||
|
return fmt.Errorf("[%s] state = %q, want Running", n.name, st.BackendState) |
||||||
|
} |
||||||
|
t.Logf("[%s] up with %v", n.name, st.Self.TailscaleIPs) |
||||||
|
} |
||||||
|
|
||||||
|
return nil |
||||||
|
}) |
||||||
|
} |
||||||
|
if err := agentEg.Wait(); err != nil { |
||||||
|
t.Fatal(err) |
||||||
|
} |
||||||
|
|
||||||
|
// Start webservers.
|
||||||
|
for _, n := range e.nodes { |
||||||
|
if n.webServerPort > 0 { |
||||||
|
if err := e.startWebServer(ctx, n); err != nil { |
||||||
|
t.Fatalf("startWebServer(%s): %v", n.name, err) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// tailscaleUp runs "tailscale up" on the node via TTA.
|
||||||
|
func (e *Env) tailscaleUp(ctx context.Context, n *Node) error { |
||||||
|
url := "http://unused/up?accept-routes=true" |
||||||
|
if n.advertiseRoutes != "" { |
||||||
|
url += "&advertise-routes=" + n.advertiseRoutes |
||||||
|
} |
||||||
|
req, err := http.NewRequestWithContext(ctx, "GET", url, nil) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
res, err := n.agent.HTTPClient.Do(req) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
defer res.Body.Close() |
||||||
|
body, _ := io.ReadAll(res.Body) |
||||||
|
if res.StatusCode != 200 { |
||||||
|
return fmt.Errorf("tailscale up: %s: %s", res.Status, body) |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// startWebServer tells TTA on the node to start a webserver.
|
||||||
|
func (e *Env) startWebServer(ctx context.Context, n *Node) error { |
||||||
|
url := fmt.Sprintf("http://unused/start-webserver?port=%d&name=%s", n.webServerPort, n.name) |
||||||
|
req, err := http.NewRequestWithContext(ctx, "GET", url, nil) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
res, err := n.agent.HTTPClient.Do(req) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
defer res.Body.Close() |
||||||
|
if res.StatusCode != 200 { |
||||||
|
body, _ := io.ReadAll(res.Body) |
||||||
|
return fmt.Errorf("start-webserver: %s: %s", res.Status, body) |
||||||
|
} |
||||||
|
e.t.Logf("[%s] webserver started on port %d", n.name, n.webServerPort) |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// ApproveRoutes tells the test control server to approve subnet routes
|
||||||
|
// for the given node. The routes should be CIDR strings.
|
||||||
|
func (e *Env) ApproveRoutes(n *Node, routes ...string) { |
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) |
||||||
|
defer cancel() |
||||||
|
|
||||||
|
// Get the node's public key from its status.
|
||||||
|
st, err := n.agent.Status(ctx) |
||||||
|
if err != nil { |
||||||
|
e.t.Fatalf("ApproveRoutes: status for %s: %v", n.name, err) |
||||||
|
} |
||||||
|
nodeKey := st.Self.PublicKey |
||||||
|
|
||||||
|
var prefixes []netip.Prefix |
||||||
|
for _, r := range routes { |
||||||
|
p, err := netip.ParsePrefix(r) |
||||||
|
if err != nil { |
||||||
|
e.t.Fatalf("ApproveRoutes: bad route %q: %v", r, err) |
||||||
|
} |
||||||
|
prefixes = append(prefixes, p) |
||||||
|
} |
||||||
|
|
||||||
|
// Enable --accept-routes on all other tailscale nodes BEFORE setting the
|
||||||
|
// routes on the control server. This way, when the map update arrives with
|
||||||
|
// the new peer routes, peers will immediately install them.
|
||||||
|
for _, other := range e.nodes { |
||||||
|
if other == n || !other.joinTailnet { |
||||||
|
continue |
||||||
|
} |
||||||
|
if _, err := other.agent.EditPrefs(ctx, &ipn.MaskedPrefs{ |
||||||
|
Prefs: ipn.Prefs{RouteAll: true}, |
||||||
|
RouteAllSet: true, |
||||||
|
}); err != nil { |
||||||
|
e.t.Fatalf("ApproveRoutes: set accept-routes on %s: %v", other.name, err) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Approve the routes on the control server. SetSubnetRoutes notifies all
|
||||||
|
// peers via updatePeerChanged, so they'll re-fetch their MapResponse.
|
||||||
|
e.server.ControlServer().SetSubnetRoutes(nodeKey, prefixes) |
||||||
|
|
||||||
|
// Wait for each peer to see the routes.
|
||||||
|
for _, r := range routes { |
||||||
|
for _, other := range e.nodes { |
||||||
|
if other == n || !other.joinTailnet { |
||||||
|
continue |
||||||
|
} |
||||||
|
if !e.waitForPeerRoute(other, r, 15*time.Second) { |
||||||
|
e.DumpStatus(other) |
||||||
|
e.t.Fatalf("ApproveRoutes: %s never saw route %s", other.name, r) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
e.t.Logf("approved routes %v on %s", routes, n.name) |
||||||
|
|
||||||
|
// Ping the advertiser from each peer to establish WireGuard tunnels.
|
||||||
|
for _, other := range e.nodes { |
||||||
|
if other == n || !other.joinTailnet { |
||||||
|
continue |
||||||
|
} |
||||||
|
e.ping(other, n) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// ping pings from one node to another's Tailscale IP, retrying until it succeeds
|
||||||
|
// or the timeout expires. This establishes the WireGuard tunnel between the nodes.
|
||||||
|
func (e *Env) ping(from, to *Node) { |
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) |
||||||
|
defer cancel() |
||||||
|
|
||||||
|
toSt, err := to.agent.Status(ctx) |
||||||
|
if err != nil { |
||||||
|
e.t.Fatalf("ping: can't get %s status: %v", to.name, err) |
||||||
|
} |
||||||
|
if len(toSt.Self.TailscaleIPs) == 0 { |
||||||
|
e.t.Fatalf("ping: %s has no Tailscale IPs", to.name) |
||||||
|
} |
||||||
|
targetIP := toSt.Self.TailscaleIPs[0] |
||||||
|
|
||||||
|
for { |
||||||
|
pingCtx, pingCancel := context.WithTimeout(ctx, 3*time.Second) |
||||||
|
pr, err := from.agent.PingWithOpts(pingCtx, targetIP, tailcfg.PingDisco, local.PingOpts{}) |
||||||
|
pingCancel() |
||||||
|
if err == nil && pr.Err == "" { |
||||||
|
e.logVerbosef("ping: %s -> %s OK", from.name, targetIP) |
||||||
|
return |
||||||
|
} |
||||||
|
if ctx.Err() != nil { |
||||||
|
e.t.Fatalf("ping: %s -> %s timed out", from.name, targetIP) |
||||||
|
} |
||||||
|
time.Sleep(time.Second) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// SSHExec runs a command on a cloud VM via its debug SSH NIC.
|
||||||
|
// Only works for cloud VMs that have the debug NIC and SSH key configured.
|
||||||
|
// Returns stdout and any error.
|
||||||
|
func (e *Env) SSHExec(n *Node, cmd string) (string, error) { |
||||||
|
if n.sshPort == 0 { |
||||||
|
return "", fmt.Errorf("node %s has no SSH debug port", n.name) |
||||||
|
} |
||||||
|
sshCmd := exec.Command("ssh", |
||||||
|
"-o", "StrictHostKeyChecking=no", |
||||||
|
"-o", "UserKnownHostsFile=/dev/null", |
||||||
|
"-o", "ConnectTimeout=5", |
||||||
|
"-i", "/tmp/vmtest_key", |
||||||
|
"-p", fmt.Sprintf("%d", n.sshPort), |
||||||
|
"root@127.0.0.1", |
||||||
|
cmd) |
||||||
|
out, err := sshCmd.CombinedOutput() |
||||||
|
return string(out), err |
||||||
|
} |
||||||
|
|
||||||
|
// DumpStatus logs the tailscale status of a node, including its peers and their
|
||||||
|
// AllowedIPs. Useful for debugging routing issues.
|
||||||
|
func (e *Env) DumpStatus(n *Node) { |
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) |
||||||
|
defer cancel() |
||||||
|
|
||||||
|
st, err := n.agent.Status(ctx) |
||||||
|
if err != nil { |
||||||
|
e.t.Logf("[%s] DumpStatus error: %v", n.name, err) |
||||||
|
return |
||||||
|
} |
||||||
|
var selfAllowed []string |
||||||
|
if st.Self.AllowedIPs != nil { |
||||||
|
for i := range st.Self.AllowedIPs.Len() { |
||||||
|
selfAllowed = append(selfAllowed, st.Self.AllowedIPs.At(i).String()) |
||||||
|
} |
||||||
|
} |
||||||
|
var selfPrimary []string |
||||||
|
if st.Self.PrimaryRoutes != nil { |
||||||
|
for i := range st.Self.PrimaryRoutes.Len() { |
||||||
|
selfPrimary = append(selfPrimary, st.Self.PrimaryRoutes.At(i).String()) |
||||||
|
} |
||||||
|
} |
||||||
|
e.t.Logf("[%s] self: %v, backend=%s, AllowedIPs=%v, PrimaryRoutes=%v", n.name, st.Self.TailscaleIPs, st.BackendState, selfAllowed, selfPrimary) |
||||||
|
for _, peer := range st.Peer { |
||||||
|
var aips []string |
||||||
|
if peer.AllowedIPs != nil { |
||||||
|
for i := range peer.AllowedIPs.Len() { |
||||||
|
aips = append(aips, peer.AllowedIPs.At(i).String()) |
||||||
|
} |
||||||
|
} |
||||||
|
e.t.Logf("[%s] peer %s (%s): AllowedIPs=%v, Online=%v, Relay=%q, CurAddr=%q", |
||||||
|
n.name, peer.HostName, peer.TailscaleIPs, |
||||||
|
aips, peer.Online, peer.Relay, peer.CurAddr) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// waitForPeerRoute polls the node's status until it sees the given route prefix
|
||||||
|
// in a peer's AllowedIPs, or until timeout. Returns true if found.
|
||||||
|
func (e *Env) waitForPeerRoute(n *Node, prefix string, timeout time.Duration) bool { |
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), timeout) |
||||||
|
defer cancel() |
||||||
|
|
||||||
|
for { |
||||||
|
st, err := n.agent.Status(ctx) |
||||||
|
if err != nil { |
||||||
|
return false |
||||||
|
} |
||||||
|
for _, peer := range st.Peer { |
||||||
|
if peer.AllowedIPs != nil { |
||||||
|
for i := range peer.AllowedIPs.Len() { |
||||||
|
if peer.AllowedIPs.At(i).String() == prefix { |
||||||
|
return true |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
if ctx.Err() != nil { |
||||||
|
return false |
||||||
|
} |
||||||
|
time.Sleep(time.Second) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// HTTPGet makes an HTTP GET request from the given node to the specified URL.
|
||||||
|
// The request is proxied through TTA's /http-get handler.
|
||||||
|
func (e *Env) HTTPGet(from *Node, targetURL string) string { |
||||||
|
for attempt := range 3 { |
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second) |
||||||
|
reqURL := "http://unused/http-get?url=" + targetURL |
||||||
|
req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil) |
||||||
|
if err != nil { |
||||||
|
cancel() |
||||||
|
e.t.Fatalf("HTTPGet: %v", err) |
||||||
|
} |
||||||
|
res, err := from.agent.HTTPClient.Do(req) |
||||||
|
cancel() |
||||||
|
if err != nil { |
||||||
|
e.logVerbosef("HTTPGet attempt %d from %s: %v", attempt+1, from.name, err) |
||||||
|
continue |
||||||
|
} |
||||||
|
body, _ := io.ReadAll(res.Body) |
||||||
|
res.Body.Close() |
||||||
|
if res.StatusCode == http.StatusBadGateway || res.StatusCode == http.StatusServiceUnavailable { |
||||||
|
e.t.Logf("HTTPGet attempt %d from %s: status %d, body: %s", attempt+1, from.name, res.StatusCode, string(body)) |
||||||
|
time.Sleep(2 * time.Second) |
||||||
|
continue |
||||||
|
} |
||||||
|
return string(body) |
||||||
|
} |
||||||
|
e.t.Fatalf("HTTPGet from %s to %s: all attempts failed", from.name, targetURL) |
||||||
|
return "" |
||||||
|
} |
||||||
|
|
||||||
|
// ensureGokrazy finds or builds the gokrazy base image and kernel.
|
||||||
|
func (e *Env) ensureGokrazy(ctx context.Context) error { |
||||||
|
if e.gokrazyBase != "" { |
||||||
|
return nil // already found
|
||||||
|
} |
||||||
|
|
||||||
|
modRoot, err := findModRoot() |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
e.gokrazyBase = filepath.Join(modRoot, "gokrazy/natlabapp.qcow2") |
||||||
|
if _, err := os.Stat(e.gokrazyBase); err != nil { |
||||||
|
if !os.IsNotExist(err) { |
||||||
|
return err |
||||||
|
} |
||||||
|
e.t.Logf("building gokrazy natlab image...") |
||||||
|
cmd := exec.CommandContext(ctx, "make", "natlab") |
||||||
|
cmd.Dir = filepath.Join(modRoot, "gokrazy") |
||||||
|
cmd.Stderr = os.Stderr |
||||||
|
cmd.Stdout = os.Stdout |
||||||
|
if err := cmd.Run(); err != nil { |
||||||
|
return fmt.Errorf("make natlab: %w", err) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
kernel, err := findKernelPath(filepath.Join(modRoot, "go.mod")) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("finding kernel: %w", err) |
||||||
|
} |
||||||
|
e.gokrazyKernel = kernel |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// compileBinaries cross-compiles tta, tailscale, and tailscaled for linux/amd64
|
||||||
|
// and places them in e.binDir.
|
||||||
|
func (e *Env) compileBinaries(ctx context.Context) error { |
||||||
|
modRoot, err := findModRoot() |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
binaries := []struct{ name, pkg string }{ |
||||||
|
{"tta", "./cmd/tta"}, |
||||||
|
{"tailscale", "./cmd/tailscale"}, |
||||||
|
{"tailscaled", "./cmd/tailscaled"}, |
||||||
|
} |
||||||
|
|
||||||
|
var eg errgroup.Group |
||||||
|
for _, bin := range binaries { |
||||||
|
eg.Go(func() error { |
||||||
|
outPath := filepath.Join(e.binDir, bin.name) |
||||||
|
e.t.Logf("compiling %s...", bin.name) |
||||||
|
cmd := exec.CommandContext(ctx, "go", "build", "-o", outPath, bin.pkg) |
||||||
|
cmd.Dir = modRoot |
||||||
|
cmd.Env = append(os.Environ(), "GOOS=linux", "GOARCH=amd64", "CGO_ENABLED=0") |
||||||
|
if out, err := cmd.CombinedOutput(); err != nil { |
||||||
|
return fmt.Errorf("building %s: %v\n%s", bin.name, err, out) |
||||||
|
} |
||||||
|
e.t.Logf("compiled %s", bin.name) |
||||||
|
return nil |
||||||
|
}) |
||||||
|
} |
||||||
|
return eg.Wait() |
||||||
|
} |
||||||
|
|
||||||
|
// findModRoot returns the root of the Go module (where go.mod is).
|
||||||
|
func findModRoot() (string, error) { |
||||||
|
out, err := exec.Command("go", "env", "GOMOD").CombinedOutput() |
||||||
|
if err != nil { |
||||||
|
return "", fmt.Errorf("go env GOMOD: %w", err) |
||||||
|
} |
||||||
|
gomod := strings.TrimSpace(string(out)) |
||||||
|
if gomod == "" || gomod == os.DevNull { |
||||||
|
return "", fmt.Errorf("not in a Go module") |
||||||
|
} |
||||||
|
return filepath.Dir(gomod), nil |
||||||
|
} |
||||||
|
|
||||||
|
// findKernelPath finds the gokrazy kernel vmlinuz path from go.mod.
|
||||||
|
func findKernelPath(goMod string) (string, error) { |
||||||
|
// Import the same logic as nat_test.go.
|
||||||
|
b, err := os.ReadFile(goMod) |
||||||
|
if err != nil { |
||||||
|
return "", err |
||||||
|
} |
||||||
|
|
||||||
|
goModCacheB, err := exec.Command("go", "env", "GOMODCACHE").CombinedOutput() |
||||||
|
if err != nil { |
||||||
|
return "", err |
||||||
|
} |
||||||
|
goModCache := strings.TrimSpace(string(goModCacheB)) |
||||||
|
|
||||||
|
// Parse go.mod to find gokrazy-kernel version.
|
||||||
|
for _, line := range strings.Split(string(b), "\n") { |
||||||
|
line = strings.TrimSpace(line) |
||||||
|
if strings.HasPrefix(line, "github.com/tailscale/gokrazy-kernel") { |
||||||
|
parts := strings.Fields(line) |
||||||
|
if len(parts) >= 2 { |
||||||
|
return filepath.Join(goModCache, parts[0]+"@"+parts[1], "vmlinuz"), nil |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
return "", fmt.Errorf("gokrazy-kernel not found in %s", goMod) |
||||||
|
} |
||||||
@ -0,0 +1,38 @@ |
|||||||
|
// Copyright (c) Tailscale Inc & contributors
|
||||||
|
// SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
|
||||||
|
package vmtest_test |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"strings" |
||||||
|
"testing" |
||||||
|
|
||||||
|
"tailscale.com/tstest/natlab/vmtest" |
||||||
|
"tailscale.com/tstest/natlab/vnet" |
||||||
|
) |
||||||
|
|
||||||
|
func TestSubnetRouter(t *testing.T) { |
||||||
|
env := vmtest.New(t) |
||||||
|
|
||||||
|
clientNet := env.AddNetwork("2.1.1.1", "192.168.1.1/24", "2000:1::1/64", vnet.EasyNAT) |
||||||
|
internalNet := env.AddNetwork("10.0.0.1/24", "2000:2::1/64") |
||||||
|
|
||||||
|
client := env.AddNode("client", clientNet, |
||||||
|
vmtest.OS(vmtest.Gokrazy)) |
||||||
|
sr := env.AddNode("subnet-router", clientNet, internalNet, |
||||||
|
vmtest.OS(vmtest.Ubuntu2404), |
||||||
|
vmtest.AdvertiseRoutes("10.0.0.0/24")) |
||||||
|
backend := env.AddNode("backend", internalNet, |
||||||
|
vmtest.OS(vmtest.Gokrazy), |
||||||
|
vmtest.DontJoinTailnet(), |
||||||
|
vmtest.WebServer(8080)) |
||||||
|
|
||||||
|
env.Start() |
||||||
|
env.ApproveRoutes(sr, "10.0.0.0/24") |
||||||
|
|
||||||
|
body := env.HTTPGet(client, fmt.Sprintf("http://%s:8080/", backend.LanIP(internalNet))) |
||||||
|
if !strings.Contains(body, "Hello world I am backend") { |
||||||
|
t.Fatalf("got %q", body) |
||||||
|
} |
||||||
|
} |
||||||
Loading…
Reference in new issue