tstest/natlab/vmtest: add macOS VM snapshot caching for fast test starts
Cache a pre-booted macOS VM snapshot on disk so subsequent test runs restore from the snapshot instead of cold-booting. The snapshot is keyed by the Tart base image digest and a code version constant (macOSSnapshotCodeVersion); bumping either invalidates the cache. Snapshot preparation (one-time): - Boot the Tart base image with a NAT NIC (--nat-nic flag) - Wait for SSH, compile and install cmd/tta as a LaunchDaemon - TTA polls the host via AF_VSOCK for an IP assignment; during prep the host replies "wait" - Disconnect NIC, save VM state via SIGINT Test fast path (cached, ~7s to agent connected): - APFS clone the snapshot, write test-specific config.json - Launch Host.app with --disconnected-nic --attach-network --assign-ip - VZ restores from SaveFile.vzvmsave (~5s with 4GB RAM) - TTA's vsock poll gets the IP config, sets static IP via ifconfig (bypasses DHCP entirely), switches driver addr to the IP directly (bypasses DNS), and resets the dial context so the reverse-dial reconnects immediately - TTA agent connects to test driver within ~2s of IP assignment Key optimizations: - 4GB RAM instead of 8GB: halves SaveFile.vzvmsave (1.4GB vs 2.4GB), halves restore time (5.5s vs 11s) - AF_VSOCK IP assignment: bypasses macOS DHCP (~5-7s saved) - Direct IP dial: bypasses DNS resolution for test-driver.tailscale - Dial context reset: cancels stale in-flight dials from snapshot - Kill instead of SIGINT for test VM cleanup (no state save needed) - Parallel VM launches Also: - Add TestDriverIPv4/TestDriverPort constants to vnet - Add --nat-nic and --assign-ip flags to Host.app - Fix SIGINT handler: retain DispatchSource globally, use dispatchMain() - Add vsock listener (port 51011) to Host.app for IP config protocol - Add disconnectNetwork() to VMController for clean snapshot state - Fix Makefile: set -o pipefail so xcodebuild failures aren't swallowed Updates #13038 Change-Id: Icbab73b57af7df3ae96136fb49cda2536310f31b Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
committed by
Brad Fitzpatrick
parent
7b53550fe6
commit
02ffe5baa8
@@ -20,12 +20,69 @@ import (
|
||||
"tailscale.com/tstest/natlab/vnet"
|
||||
)
|
||||
|
||||
// startQEMU launches a QEMU process for the given node.
|
||||
func (e *Env) startQEMU(n *Node) error {
|
||||
if n.os.IsGokrazy {
|
||||
return e.startGokrazyQEMU(n)
|
||||
// gokrazyPlatform boots gokrazy (Linux) VMs via QEMU.
|
||||
type gokrazyPlatform struct{}
|
||||
|
||||
func (gokrazyPlatform) planSteps(e *Env, n *Node) {
|
||||
e.Step("Build gokrazy image")
|
||||
e.Step("Launch QEMU: " + n.name)
|
||||
}
|
||||
|
||||
func (gokrazyPlatform) boot(ctx context.Context, e *Env, n *Node) error {
|
||||
e.gokrazyOnce.Do(func() {
|
||||
step := e.Step("Build gokrazy image")
|
||||
step.Begin()
|
||||
if err := e.ensureGokrazy(ctx); err != nil {
|
||||
step.End(err)
|
||||
e.t.Fatalf("ensureGokrazy: %v", err)
|
||||
}
|
||||
step.End(nil)
|
||||
})
|
||||
|
||||
e.ensureQEMUSocket()
|
||||
|
||||
vmStep := e.Step("Launch QEMU: " + n.name)
|
||||
vmStep.Begin()
|
||||
if err := e.startGokrazyQEMU(n); err != nil {
|
||||
vmStep.End(err)
|
||||
return err
|
||||
}
|
||||
return e.startCloudQEMU(n)
|
||||
vmStep.End(nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
// qemuCloudPlatform boots cloud images (Ubuntu, Debian, FreeBSD) via QEMU.
|
||||
type qemuCloudPlatform struct{}
|
||||
|
||||
func (qemuCloudPlatform) planSteps(e *Env, n *Node) {
|
||||
e.Step(fmt.Sprintf("Compile %s_%s binaries", n.os.GOOS(), n.os.GOARCH()))
|
||||
e.Step(fmt.Sprintf("Prepare %s image", n.os.Name))
|
||||
e.Step("Launch QEMU: " + n.name)
|
||||
}
|
||||
|
||||
func (qemuCloudPlatform) boot(ctx context.Context, e *Env, n *Node) error {
|
||||
goos, goarch := n.os.GOOS(), n.os.GOARCH()
|
||||
|
||||
e.ensureCompiled(ctx, goos, goarch)
|
||||
|
||||
imgStep := e.Step(fmt.Sprintf("Prepare %s image", n.os.Name))
|
||||
imgStep.Begin()
|
||||
if err := ensureImage(ctx, n.os); err != nil {
|
||||
imgStep.End(err)
|
||||
return err
|
||||
}
|
||||
imgStep.End(nil)
|
||||
|
||||
e.ensureQEMUSocket()
|
||||
|
||||
vmStep := e.Step("Launch QEMU: " + n.name)
|
||||
vmStep.Begin()
|
||||
if err := e.startCloudQEMU(n); err != nil {
|
||||
vmStep.End(err)
|
||||
return err
|
||||
}
|
||||
vmStep.End(nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
// startGokrazyQEMU launches a QEMU process for a gokrazy node.
|
||||
|
||||
+456
-33
@@ -5,31 +5,76 @@ package vmtest
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/netip"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
// macPlatform boots macOS VMs via Tart base images and tailmac Host.app.
|
||||
type macPlatform struct{}
|
||||
|
||||
func (macPlatform) planSteps(e *Env, n *Node) {
|
||||
e.Step("Prepare macOS Tart image")
|
||||
e.Step("Launch macOS VM: " + n.name)
|
||||
}
|
||||
|
||||
func (macPlatform) boot(ctx context.Context, e *Env, n *Node) error {
|
||||
imgStep := e.Step("Prepare macOS Tart image")
|
||||
e.macosSnapshotOnce.Do(func() {
|
||||
imgStep.Begin()
|
||||
e.macosSnapshot = ensureSnapshot(e.t)
|
||||
imgStep.End(nil)
|
||||
})
|
||||
|
||||
e.ensureDgramSocket()
|
||||
|
||||
vmStep := e.Step("Launch macOS VM: " + n.name)
|
||||
vmStep.Begin()
|
||||
if err := e.startTailMacVM(n); err != nil {
|
||||
vmStep.End(err)
|
||||
return err
|
||||
}
|
||||
vmStep.End(nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
const tartImage = "ghcr.io/cirruslabs/macos-tahoe-base:latest"
|
||||
|
||||
// macOSSnapshotCodeVersion is bumped when the snapshot preparation logic
|
||||
// changes in a way that invalidates old snapshots. Old snapshots with a
|
||||
// different version are cleaned up automatically.
|
||||
const macOSSnapshotCodeVersion = 5
|
||||
|
||||
// tartConfig is the subset of Tart's config.json we need.
|
||||
type tartConfig struct {
|
||||
HardwareModel string `json:"hardwareModel"` // base64
|
||||
ECID string `json:"ecid"` // base64
|
||||
}
|
||||
|
||||
// tartManifest is the subset of Tart's OCI manifest.json we need.
|
||||
type tartManifest struct {
|
||||
Config struct {
|
||||
Digest string `json:"digest"` // e.g. "sha256:3a6cb4eb6201..."
|
||||
} `json:"config"`
|
||||
}
|
||||
|
||||
// ensureTartImage checks that the Tart base image is available, pulling it
|
||||
// if necessary. Returns the path to a directory containing disk.img,
|
||||
// nvram.bin, and config.json.
|
||||
// if necessary. Returns the path to the OCI cache directory containing
|
||||
// disk.img, nvram.bin, config.json, and manifest.json.
|
||||
func ensureTartImage(t testing.TB) string {
|
||||
if _, err := exec.LookPath("tart"); err != nil {
|
||||
t.Skip("tart not installed; skipping macOS VM test")
|
||||
@@ -40,7 +85,6 @@ func ensureTartImage(t testing.TB) string {
|
||||
t.Fatalf("UserHomeDir: %v", err)
|
||||
}
|
||||
|
||||
// Check OCI cache first (from a previous "tart pull").
|
||||
ociDir := filepath.Join(home, ".tart", "cache", "OCIs",
|
||||
"ghcr.io", "cirruslabs", "macos-tahoe-base", "latest")
|
||||
if _, err := os.Stat(filepath.Join(ociDir, "disk.img")); err == nil {
|
||||
@@ -55,7 +99,6 @@ func ensureTartImage(t testing.TB) string {
|
||||
t.Fatalf("tart pull: %v", err)
|
||||
}
|
||||
|
||||
// After pull, the OCI cache should have it.
|
||||
if _, err := os.Stat(filepath.Join(ociDir, "disk.img")); err == nil {
|
||||
return ociDir
|
||||
}
|
||||
@@ -63,6 +106,368 @@ func ensureTartImage(t testing.TB) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// snapshotCacheKey computes a cache key for the macOS VM snapshot.
|
||||
// The key combines the image name, the first 12 hex chars of the Tart
|
||||
// config digest (changes when the upstream image is updated), and the
|
||||
// snapshot code version (changes when our prep logic changes).
|
||||
func snapshotCacheKey(tartDir string) (string, error) {
|
||||
manifestPath := filepath.Join(tartDir, "manifest.json")
|
||||
data, err := os.ReadFile(manifestPath)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("reading manifest: %w", err)
|
||||
}
|
||||
var m tartManifest
|
||||
if err := json.Unmarshal(data, &m); err != nil {
|
||||
return "", fmt.Errorf("parsing manifest: %w", err)
|
||||
}
|
||||
digest := m.Config.Digest
|
||||
// Strip "sha256:" prefix and take first 12 hex chars.
|
||||
digest = strings.TrimPrefix(digest, "sha256:")
|
||||
if len(digest) > 12 {
|
||||
digest = digest[:12]
|
||||
}
|
||||
return fmt.Sprintf("snap-tahoe-%s-v%d", digest, macOSSnapshotCodeVersion), nil
|
||||
}
|
||||
|
||||
// macosVMBaseDir returns ~/.cache/tailscale/vmtest/macos/, the directory
|
||||
// where Host.app expects to find VM directories by ID.
|
||||
func macosVMBaseDir() (string, error) {
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return filepath.Join(home, ".cache", "tailscale", "vmtest", "macos"), nil
|
||||
}
|
||||
|
||||
// cleanOldSnapshots removes any snapshot directories for the given image
|
||||
// prefix (e.g. "snap-tahoe") that don't match the current cache key.
|
||||
func cleanOldSnapshots(t testing.TB, imagePrefix, currentKey string) {
|
||||
base, err := macosVMBaseDir()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
matches, _ := filepath.Glob(filepath.Join(base, imagePrefix+"-*"))
|
||||
currentPath := filepath.Join(base, currentKey)
|
||||
for _, m := range matches {
|
||||
if m != currentPath {
|
||||
t.Logf("removing stale snapshot: %s", filepath.Base(m))
|
||||
os.RemoveAll(m)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ensureSnapshot returns the path to a cached macOS VM snapshot, creating
|
||||
// one if necessary. The snapshot contains a fully booted VM with
|
||||
// SaveFile.vzvmsave ready for fast restore.
|
||||
func ensureSnapshot(t testing.TB) string {
|
||||
tartDir := ensureTartImage(t)
|
||||
|
||||
key, err := snapshotCacheKey(tartDir)
|
||||
if err != nil {
|
||||
t.Fatalf("snapshot cache key: %v", err)
|
||||
}
|
||||
|
||||
base, err := macosVMBaseDir()
|
||||
if err != nil {
|
||||
t.Fatalf("macOS VM base dir: %v", err)
|
||||
}
|
||||
os.MkdirAll(base, 0755)
|
||||
|
||||
snapDir := filepath.Join(base, key)
|
||||
saveFile := filepath.Join(snapDir, "SaveFile.vzvmsave")
|
||||
if _, err := os.Stat(saveFile); err == nil {
|
||||
t.Logf("using cached macOS snapshot: %s", key)
|
||||
return snapDir
|
||||
}
|
||||
|
||||
// Clean up old snapshots for this image.
|
||||
cleanOldSnapshots(t, "snap-tahoe", key)
|
||||
|
||||
t.Logf("preparing macOS snapshot: %s (this takes ~30s on first run)", key)
|
||||
if err := prepareSnapshot(t, tartDir, snapDir); err != nil {
|
||||
os.RemoveAll(snapDir)
|
||||
t.Fatalf("preparing snapshot: %v", err)
|
||||
}
|
||||
return snapDir
|
||||
}
|
||||
|
||||
// prepareSnapshot creates a new macOS VM snapshot by booting the Tart base
|
||||
// image with a NAT NIC, waiting for SSH, and saving VM state.
|
||||
func prepareSnapshot(t testing.TB, tartDir, snapDir string) error {
|
||||
// The vmID must match the directory name under macosVMBaseDir
|
||||
// because Host.app looks up VM files at <base>/<vmID>/.
|
||||
snapID := filepath.Base(snapDir)
|
||||
|
||||
if err := cloneTartToTailmac(tartDir, snapDir, snapID, "52:cc:cc:cc:ce:01", "/dev/null"); err != nil {
|
||||
return fmt.Errorf("cloning tart: %w", err)
|
||||
}
|
||||
|
||||
modRoot, err := findModRoot()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tailmacDir := filepath.Join(modRoot, "tstest", "tailmac", "bin")
|
||||
hostBin := filepath.Join(tailmacDir, "Host.app", "Contents", "MacOS", "Host")
|
||||
if _, err := os.Stat(hostBin); err != nil {
|
||||
return fmt.Errorf("Host.app not found at %s; run 'make all' in tstest/tailmac/", hostBin)
|
||||
}
|
||||
|
||||
// Host.app reads VM files from ~/.cache/tailscale/vmtest/macos/<id>/.
|
||||
// Our snapDir is already under that tree, and the config.json vmID matches.
|
||||
cmd := exec.Command(hostBin, "run", "--id", snapID, "--headless", "--nat-nic")
|
||||
cmd.Env = append(os.Environ(), "NSUnbufferedIO=YES")
|
||||
|
||||
logPath := snapDir + ".prep.log"
|
||||
logFile, err := os.Create(logPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer logFile.Close()
|
||||
cmd.Stdout = logFile
|
||||
cmd.Stderr = logFile
|
||||
devNull, _ := os.Open(os.DevNull)
|
||||
cmd.Stdin = devNull
|
||||
defer devNull.Close()
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
return fmt.Errorf("starting Host.app: %w", err)
|
||||
}
|
||||
t.Logf("snapshot prep: launched Host.app (pid %d)", cmd.Process.Pid)
|
||||
|
||||
// Wait for SSH to become available via the NAT NIC.
|
||||
// The VM gets an IP from macOS's vmnet DHCP (typically 192.168.64.x).
|
||||
ip, err := waitForVMIP(t, "52:cc:cc:cc:ce:01", 60*time.Second)
|
||||
if err != nil {
|
||||
cmd.Process.Kill()
|
||||
cmd.Wait()
|
||||
return fmt.Errorf("waiting for VM IP: %w", err)
|
||||
}
|
||||
t.Logf("snapshot prep: VM IP is %s, waiting for SSH...", ip)
|
||||
|
||||
sc, err := waitForSSH(ip, 60*time.Second)
|
||||
if err != nil {
|
||||
cmd.Process.Kill()
|
||||
cmd.Wait()
|
||||
return fmt.Errorf("waiting for SSH: %w", err)
|
||||
}
|
||||
t.Logf("snapshot prep: SSH connected")
|
||||
|
||||
// Compile and install TTA in the macOS VM.
|
||||
t.Logf("snapshot prep: installing TTA...")
|
||||
if err := installTTA(t, sc); err != nil {
|
||||
sc.Close()
|
||||
cmd.Process.Kill()
|
||||
cmd.Wait()
|
||||
return fmt.Errorf("installing TTA: %w", err)
|
||||
}
|
||||
sc.Close()
|
||||
|
||||
// Save VM state by sending SIGINT.
|
||||
t.Logf("snapshot prep: saving VM state...")
|
||||
cmd.Process.Signal(os.Interrupt)
|
||||
done := make(chan error, 1)
|
||||
go func() { done <- cmd.Wait() }()
|
||||
select {
|
||||
case err := <-done:
|
||||
if err != nil {
|
||||
// Host.app exits 0 after saving state, non-zero is unexpected.
|
||||
t.Logf("snapshot prep: Host.app exited with: %v", err)
|
||||
}
|
||||
case <-time.After(60 * time.Second):
|
||||
cmd.Process.Kill()
|
||||
<-done
|
||||
return fmt.Errorf("Host.app did not exit after SIGINT")
|
||||
}
|
||||
|
||||
// Verify the save file was created.
|
||||
saveFile := filepath.Join(snapDir, "SaveFile.vzvmsave")
|
||||
if _, err := os.Stat(saveFile); err != nil {
|
||||
return fmt.Errorf("SaveFile.vzvmsave not found after prep")
|
||||
}
|
||||
t.Logf("snapshot prep: done, saved to %s", filepath.Base(snapDir))
|
||||
os.Remove(logPath)
|
||||
return nil
|
||||
}
|
||||
|
||||
// installTTA compiles TTA for darwin/arm64 and installs it in the macOS VM
|
||||
// as a LaunchDaemon via SSH/SCP.
|
||||
func installTTA(t testing.TB, sc *ssh.Client) error {
|
||||
modRoot, err := findModRoot()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Compile TTA for the macOS VM.
|
||||
tmpDir := t.TempDir()
|
||||
ttaBin := filepath.Join(tmpDir, "tta")
|
||||
t.Logf("snapshot prep: compiling TTA for darwin/arm64...")
|
||||
buildCmd := exec.Command("go", "build", "-o", ttaBin, "./cmd/tta")
|
||||
buildCmd.Dir = modRoot
|
||||
buildCmd.Env = append(os.Environ(), "GOOS=darwin", "GOARCH=arm64", "CGO_ENABLED=0")
|
||||
if out, err := buildCmd.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("compiling TTA: %v\n%s", err, out)
|
||||
}
|
||||
|
||||
// Read the binary.
|
||||
ttaData, err := os.ReadFile(ttaBin)
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading TTA binary: %w", err)
|
||||
}
|
||||
t.Logf("snapshot prep: TTA binary is %d bytes", len(ttaData))
|
||||
|
||||
// SCP the TTA binary to the VM via a temp file (admin user can't write /usr/local/bin directly).
|
||||
if err := scpFile(sc, ttaData, "/tmp/tta", 0755); err != nil {
|
||||
return fmt.Errorf("uploading TTA: %w", err)
|
||||
}
|
||||
if err := runSSHCmd(sc, "echo admin | sudo -S mv /tmp/tta /usr/local/bin/tta"); err != nil {
|
||||
return fmt.Errorf("moving TTA to /usr/local/bin: %w", err)
|
||||
}
|
||||
|
||||
// Install the LaunchDaemon plist.
|
||||
plist := `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>Label</key>
|
||||
<string>com.tailscale.tta</string>
|
||||
<key>ProgramArguments</key>
|
||||
<array>
|
||||
<string>/usr/local/bin/tta</string>
|
||||
</array>
|
||||
<key>RunAtLoad</key>
|
||||
<true/>
|
||||
<key>KeepAlive</key>
|
||||
<true/>
|
||||
<key>StandardOutPath</key>
|
||||
<string>/tmp/tta.log</string>
|
||||
<key>StandardErrorPath</key>
|
||||
<string>/tmp/tta.log</string>
|
||||
</dict>
|
||||
</plist>
|
||||
`
|
||||
if err := scpFile(sc, []byte(plist), "/tmp/com.tailscale.tta.plist", 0644); err != nil {
|
||||
return fmt.Errorf("uploading plist: %w", err)
|
||||
}
|
||||
if err := runSSHCmd(sc, "echo admin | sudo -S mv /tmp/com.tailscale.tta.plist /Library/LaunchDaemons/ && echo admin | sudo -S chown root:wheel /Library/LaunchDaemons/com.tailscale.tta.plist"); err != nil {
|
||||
return fmt.Errorf("installing plist: %w", err)
|
||||
}
|
||||
|
||||
// Load the LaunchDaemon.
|
||||
if err := runSSHCmd(sc, "echo admin | sudo -S launchctl load /Library/LaunchDaemons/com.tailscale.tta.plist"); err != nil {
|
||||
return fmt.Errorf("loading LaunchDaemon: %w", err)
|
||||
}
|
||||
|
||||
// Wait for TTA to start.
|
||||
for range 20 {
|
||||
if err := runSSHCmd(sc, "pgrep -x tta"); err == nil {
|
||||
break
|
||||
}
|
||||
time.Sleep(250 * time.Millisecond)
|
||||
}
|
||||
if err := runSSHCmd(sc, "pgrep -x tta"); err != nil {
|
||||
return fmt.Errorf("TTA not running after install: %w", err)
|
||||
}
|
||||
t.Logf("snapshot prep: TTA installed and running")
|
||||
return nil
|
||||
}
|
||||
|
||||
// scpFile uploads data to a remote path via SSH/SCP.
|
||||
func scpFile(sc *ssh.Client, data []byte, remotePath string, mode os.FileMode) error {
|
||||
sess, err := sc.NewSession()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sess.Close()
|
||||
|
||||
// Use a simple shell command to write the file.
|
||||
cmd := fmt.Sprintf("cat > %s && chmod %o %s", remotePath, mode, remotePath)
|
||||
sess.Stdin = bytes.NewReader(data)
|
||||
out, err := sess.CombinedOutput(cmd)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s: %v: %s", cmd, err, out)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// runSSHCmd runs a command on the SSH client and returns an error if it fails.
|
||||
func runSSHCmd(sc *ssh.Client, cmd string) error {
|
||||
sess, err := sc.NewSession()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sess.Close()
|
||||
out, err := sess.CombinedOutput(cmd)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s: %v: %s", cmd, err, out)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// waitForVMIP polls /var/db/dhcpd_leases for a DHCP lease matching the
|
||||
// given MAC address (from macOS's vmnet NAT). Returns the IP.
|
||||
func waitForVMIP(t testing.TB, mac string, timeout time.Duration) (string, error) {
|
||||
// Normalize MAC format: vmnet leases use "1,xx:xx:xx:xx:xx:xx" format
|
||||
// with leading zeros stripped from each octet (e.g. "1,52:cc:cc:cc:ce:1"
|
||||
// instead of "1,52:cc:cc:cc:ce:01").
|
||||
mac = strings.ToLower(mac)
|
||||
parts := strings.Split(mac, ":")
|
||||
for i, p := range parts {
|
||||
parts[i] = strings.TrimLeft(p, "0")
|
||||
if parts[i] == "" {
|
||||
parts[i] = "0"
|
||||
}
|
||||
}
|
||||
leaseMAC := "1," + strings.Join(parts, ":")
|
||||
|
||||
deadline := time.Now().Add(timeout)
|
||||
for time.Now().Before(deadline) {
|
||||
data, err := os.ReadFile("/var/db/dhcpd_leases")
|
||||
if err == nil {
|
||||
// Parse the plist-like lease file.
|
||||
lines := strings.Split(string(data), "\n")
|
||||
var currentIP string
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if strings.HasPrefix(line, "ip_address=") {
|
||||
currentIP = strings.TrimPrefix(line, "ip_address=")
|
||||
}
|
||||
if strings.HasPrefix(line, "hw_address=") {
|
||||
hw := strings.TrimPrefix(line, "hw_address=")
|
||||
if strings.ToLower(hw) == leaseMAC && currentIP != "" {
|
||||
return currentIP, nil
|
||||
}
|
||||
}
|
||||
if line == "}" {
|
||||
currentIP = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
return "", fmt.Errorf("no DHCP lease for MAC %s after %v", mac, timeout)
|
||||
}
|
||||
|
||||
// waitForSSH retries SSH connection to the given IP until it succeeds or
|
||||
// the timeout expires.
|
||||
func waitForSSH(ip string, timeout time.Duration) (*ssh.Client, error) {
|
||||
deadline := time.Now().Add(timeout)
|
||||
addr := net.JoinHostPort(ip, "22")
|
||||
cfg := &ssh.ClientConfig{
|
||||
User: "admin",
|
||||
Auth: []ssh.AuthMethod{ssh.Password("admin")},
|
||||
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
|
||||
Timeout: 2 * time.Second,
|
||||
}
|
||||
for time.Now().Before(deadline) {
|
||||
sc, err := ssh.Dial("tcp", addr, cfg)
|
||||
if err == nil {
|
||||
return sc, nil
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
return nil, fmt.Errorf("SSH to %s timed out after %v", addr, timeout)
|
||||
}
|
||||
|
||||
// ensureTailMac locates the pre-built tailmac Host.app binary.
|
||||
func (e *Env) ensureTailMac() error {
|
||||
modRoot, err := findModRoot()
|
||||
@@ -85,7 +490,6 @@ func cloneTartToTailmac(tartDir, cloneDir, testID, mac, dgramSock string) error
|
||||
return err
|
||||
}
|
||||
|
||||
// Read Tart's config.json for hardware identity.
|
||||
cfgData, err := os.ReadFile(filepath.Join(tartDir, "config.json"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading tart config: %w", err)
|
||||
@@ -95,7 +499,6 @@ func cloneTartToTailmac(tartDir, cloneDir, testID, mac, dgramSock string) error
|
||||
return fmt.Errorf("parsing tart config: %w", err)
|
||||
}
|
||||
|
||||
// Decode and write HardwareModel.
|
||||
hwModel, err := base64.StdEncoding.DecodeString(tc.HardwareModel)
|
||||
if err != nil {
|
||||
return fmt.Errorf("decoding hardwareModel: %w", err)
|
||||
@@ -104,7 +507,6 @@ func cloneTartToTailmac(tartDir, cloneDir, testID, mac, dgramSock string) error
|
||||
return err
|
||||
}
|
||||
|
||||
// Decode and write MachineIdentifier (ECID).
|
||||
ecid, err := base64.StdEncoding.DecodeString(tc.ECID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("decoding ecid: %w", err)
|
||||
@@ -113,22 +515,18 @@ func cloneTartToTailmac(tartDir, cloneDir, testID, mac, dgramSock string) error
|
||||
return err
|
||||
}
|
||||
|
||||
// APFS clone the disk image (nearly instant, copy-on-write).
|
||||
if out, err := exec.Command("cp", "-c", filepath.Join(tartDir, "disk.img"), filepath.Join(cloneDir, "Disk.img")).CombinedOutput(); err != nil {
|
||||
// Fallback to regular copy.
|
||||
if out2, err2 := exec.Command("cp", filepath.Join(tartDir, "disk.img"), filepath.Join(cloneDir, "Disk.img")).CombinedOutput(); err2 != nil {
|
||||
return fmt.Errorf("copying disk: %v: %s (APFS clone: %v: %s)", err2, out2, err, out)
|
||||
}
|
||||
}
|
||||
|
||||
// APFS clone the NVRAM.
|
||||
if out, err := exec.Command("cp", "-c", filepath.Join(tartDir, "nvram.bin"), filepath.Join(cloneDir, "AuxiliaryStorage")).CombinedOutput(); err != nil {
|
||||
if out2, err2 := exec.Command("cp", filepath.Join(tartDir, "nvram.bin"), filepath.Join(cloneDir, "AuxiliaryStorage")).CombinedOutput(); err2 != nil {
|
||||
return fmt.Errorf("copying nvram: %v: %s (APFS clone: %v: %s)", err2, out2, err, out)
|
||||
}
|
||||
}
|
||||
|
||||
// Write tailmac config.json.
|
||||
tmCfg := struct {
|
||||
VMid string `json:"vmID"`
|
||||
ServerSocket string `json:"serverSocket"`
|
||||
@@ -137,17 +535,17 @@ func cloneTartToTailmac(tartDir, cloneDir, testID, mac, dgramSock string) error
|
||||
}{
|
||||
VMid: testID,
|
||||
ServerSocket: dgramSock,
|
||||
MemorySize: 8 * 1024 * 1024 * 1024,
|
||||
MemorySize: 4 * 1024 * 1024 * 1024,
|
||||
Mac: mac,
|
||||
}
|
||||
tmData, _ := json.MarshalIndent(tmCfg, "", " ")
|
||||
return os.WriteFile(filepath.Join(cloneDir, "config.json"), tmData, 0644)
|
||||
}
|
||||
|
||||
// startTailMacVM clones a Tart base image and launches it via tailmac
|
||||
// Host.app in headless mode, connected to vnet's dgram socket.
|
||||
// startTailMacVM restores a macOS VM from a cached snapshot and launches it
|
||||
// via tailmac Host.app in headless mode, connected to vnet's dgram socket.
|
||||
func (e *Env) startTailMacVM(n *Node) error {
|
||||
tartDir := ensureTartImage(e.t)
|
||||
snapDir := e.macosSnapshot
|
||||
|
||||
if err := e.ensureTailMac(); err != nil {
|
||||
return err
|
||||
@@ -156,7 +554,6 @@ func (e *Env) startTailMacVM(n *Node) error {
|
||||
testID := fmt.Sprintf("vmtest-%s-%d", n.name, os.Getpid())
|
||||
|
||||
// Host.app expects VM files under ~/.cache/tailscale/vmtest/macos/<id>/
|
||||
// (hardcoded in Config.swift's vmBundleURL).
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return fmt.Errorf("UserHomeDir: %w", err)
|
||||
@@ -165,16 +562,51 @@ func (e *Env) startTailMacVM(n *Node) error {
|
||||
os.MkdirAll(vmBase, 0755)
|
||||
cloneDir := filepath.Join(vmBase, testID)
|
||||
|
||||
mac := n.vnetNode.NICMac(0)
|
||||
e.t.Logf("[%s] cloning Tart image -> %s (mac=%s)", n.name, testID, mac)
|
||||
if err := cloneTartToTailmac(tartDir, cloneDir, testID, mac.String(), e.dgramSockAddr); err != nil {
|
||||
return fmt.Errorf("cloning tart VM: %w", err)
|
||||
// APFS clone the entire snapshot directory (includes SaveFile.vzvmsave).
|
||||
e.t.Logf("[%s] cloning snapshot -> %s", n.name, testID)
|
||||
if out, err := exec.Command("cp", "-c", "-r", snapDir, cloneDir).CombinedOutput(); err != nil {
|
||||
if out2, err2 := exec.Command("cp", "-r", snapDir, cloneDir).CombinedOutput(); err2 != nil {
|
||||
return fmt.Errorf("cloning snapshot: %v: %s (APFS clone: %v: %s)", err2, out2, err, out)
|
||||
}
|
||||
}
|
||||
e.t.Cleanup(func() { os.RemoveAll(cloneDir) })
|
||||
|
||||
// Write test-specific config.json with the vnet MAC and dgram socket.
|
||||
mac := n.vnetNode.NICMac(0)
|
||||
cfg := struct {
|
||||
VMid string `json:"vmID"`
|
||||
ServerSocket string `json:"serverSocket"`
|
||||
MemorySize uint64 `json:"memorySize"`
|
||||
Mac string `json:"mac"`
|
||||
}{
|
||||
VMid: testID,
|
||||
ServerSocket: e.dgramSockAddr,
|
||||
MemorySize: 8 * 1024 * 1024 * 1024,
|
||||
Mac: mac.String(),
|
||||
}
|
||||
cfgData, _ := json.MarshalIndent(cfg, "", " ")
|
||||
if err := os.WriteFile(filepath.Join(cloneDir, "config.json"), cfgData, 0644); err != nil {
|
||||
return fmt.Errorf("writing config.json: %w", err)
|
||||
}
|
||||
|
||||
// Launch Host.app with disconnected NIC + hot-swap to vnet.
|
||||
// Host.app will restore from SaveFile.vzvmsave (fast), then
|
||||
// hot-swap the NIC to the vnet dgram socket.
|
||||
hostBin := filepath.Join(e.tailmacDir, "Host.app", "Contents", "MacOS", "Host")
|
||||
|
||||
// Compute the node's IP and gateway for static assignment via vsock.
|
||||
nodeIP := n.vnetNode.LanIP(n.nets[0])
|
||||
// The gateway is the network's base address (e.g. 192.168.1.1 for /24).
|
||||
// We derive it from the node IP: same /24 prefix, host part = 1.
|
||||
gwIP := nodeIP.As4()
|
||||
gwIP[3] = 1
|
||||
gateway := netip.AddrFrom4(gwIP)
|
||||
|
||||
args := []string{
|
||||
"run", "--id", testID, "--headless",
|
||||
"--disconnected-nic",
|
||||
"--attach-network", e.dgramSockAddr,
|
||||
"--assign-ip", fmt.Sprintf("%s/255.255.255.0/%s", nodeIP, gateway),
|
||||
}
|
||||
|
||||
wantScreenshots := *vmtestWeb != ""
|
||||
@@ -191,8 +623,6 @@ func (e *Env) startTailMacVM(n *Node) error {
|
||||
cmd := exec.Command(hostBin, args...)
|
||||
cmd.Env = append(os.Environ(), "NSUnbufferedIO=YES")
|
||||
|
||||
// If screenshots are enabled, we need to parse stdout for the
|
||||
// SCREENSHOT_PORT=<port> line, while also logging everything to file.
|
||||
var stdoutPipe io.ReadCloser
|
||||
if wantScreenshots {
|
||||
stdoutPipe, err = cmd.StdoutPipe()
|
||||
@@ -219,14 +649,13 @@ func (e *Env) startTailMacVM(n *Node) error {
|
||||
}
|
||||
e.t.Logf("[%s] launched tailmac (pid %d), log: %s", n.name, cmd.Process.Pid, logPath)
|
||||
|
||||
// Parse screenshot port from stdout and start polling goroutine.
|
||||
if wantScreenshots {
|
||||
screenshotPortCh := make(chan int, 1)
|
||||
go func() {
|
||||
scanner := bufio.NewScanner(stdoutPipe)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
fmt.Fprintln(logFile, line) // tee to log file
|
||||
fmt.Fprintln(logFile, line)
|
||||
if port := 0; strings.HasPrefix(line, "SCREENSHOT_PORT=") {
|
||||
fmt.Sscanf(line, "SCREENSHOT_PORT=%d", &port)
|
||||
if port > 0 {
|
||||
@@ -252,15 +681,9 @@ func (e *Env) startTailMacVM(n *Node) error {
|
||||
clientSock := fmt.Sprintf("/tmp/qemu-dgram-%s.sock", testID)
|
||||
|
||||
e.t.Cleanup(func() {
|
||||
cmd.Process.Signal(os.Interrupt)
|
||||
done := make(chan error, 1)
|
||||
go func() { done <- cmd.Wait() }()
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(15 * time.Second):
|
||||
cmd.Process.Kill()
|
||||
<-done
|
||||
}
|
||||
// Kill immediately — no need to save state for ephemeral test clones.
|
||||
cmd.Process.Kill()
|
||||
cmd.Wait()
|
||||
devNull.Close()
|
||||
logFile.Close()
|
||||
os.Remove(clientSock)
|
||||
|
||||
+233
-258
@@ -78,16 +78,28 @@ type Env struct {
|
||||
gokrazyKernel string // path to gokrazy kernel
|
||||
|
||||
// tailmac-specific paths (macOS VMs)
|
||||
tailmacDir string // path to tailmac bin/ directory containing Host.app
|
||||
tailmacDir string // path to tailmac bin/ directory containing Host.app
|
||||
macosSnapshot string // path to cached macOS VM snapshot directory
|
||||
macosSnapshotOnce sync.Once
|
||||
|
||||
qemuProcs []*exec.Cmd // launched QEMU processes
|
||||
|
||||
sameTailnetUser bool // all nodes register as the same Tailnet user
|
||||
|
||||
// Shared resource initialization (sync.Once for things multiple nodes share).
|
||||
vnetOnce sync.Once
|
||||
gokrazyOnce sync.Once
|
||||
qemuSockOnce sync.Once
|
||||
dgramSockOnce sync.Once
|
||||
compileMu sync.Mutex
|
||||
compiled set.Set[string]
|
||||
|
||||
// Web UI support.
|
||||
ctx context.Context // cancelled when test ends
|
||||
eventBus *EventBus
|
||||
testStatus *TestStatus
|
||||
stepsMu sync.Mutex
|
||||
stepsByKey map[string]*Step
|
||||
steps []*Step
|
||||
|
||||
nodeStatusMu sync.Mutex
|
||||
@@ -102,6 +114,28 @@ func (e *Env) logVerbosef(format string, args ...any) {
|
||||
}
|
||||
}
|
||||
|
||||
// vmPlatform defines how a VM type boots. Each OS image type (gokrazy,
|
||||
// cloud, macOS) implements this interface.
|
||||
type vmPlatform interface {
|
||||
// planSteps registers steps with the web UI in a dry-run pass.
|
||||
planSteps(e *Env, n *Node)
|
||||
|
||||
// boot does everything needed to get this node running: ensure images,
|
||||
// compile binaries, set up sockets, launch VM. Called concurrently.
|
||||
boot(ctx context.Context, e *Env, n *Node) error
|
||||
}
|
||||
|
||||
// platform returns the vmPlatform for this node's OS type.
|
||||
func (n *Node) platform() vmPlatform {
|
||||
if n.os.IsMacOS {
|
||||
return macPlatform{}
|
||||
}
|
||||
if n.os.IsGokrazy {
|
||||
return gokrazyPlatform{}
|
||||
}
|
||||
return qemuCloudPlatform{}
|
||||
}
|
||||
|
||||
// AddStep declares an expected stage of the test. The web UI shows all steps
|
||||
// from the start, tracking their progress. Call before or during the test.
|
||||
// Returns a *Step whose Begin/End methods drive the progress display.
|
||||
@@ -115,6 +149,28 @@ func (e *Env) AddStep(name string) *Step {
|
||||
return s
|
||||
}
|
||||
|
||||
// Step returns a step by key, creating it if it doesn't exist.
|
||||
// Safe for concurrent use. Both planSteps (dry-run) and boot (real-run)
|
||||
// call this to get the same Step object.
|
||||
func (e *Env) Step(key string) *Step {
|
||||
e.stepsMu.Lock()
|
||||
defer e.stepsMu.Unlock()
|
||||
if s, ok := e.stepsByKey[key]; ok {
|
||||
return s
|
||||
}
|
||||
s := &Step{
|
||||
name: key,
|
||||
index: len(e.steps),
|
||||
env: e,
|
||||
}
|
||||
e.steps = append(e.steps, s)
|
||||
if e.stepsByKey == nil {
|
||||
e.stepsByKey = make(map[string]*Step)
|
||||
}
|
||||
e.stepsByKey[key] = s
|
||||
return s
|
||||
}
|
||||
|
||||
// Steps returns all declared steps in order.
|
||||
func (e *Env) Steps() []*Step {
|
||||
return e.steps
|
||||
@@ -397,16 +453,14 @@ func SNATSubnetRoutes(v bool) nodeOptSNATSubnetRoutes { return nodeOptSNATSubnet
|
||||
// The webserver responds with "Hello world I am <nodename> from <sourceIP>" on all requests.
|
||||
func WebServer(port int) nodeOptWebServer { return nodeOptWebServer(port) }
|
||||
|
||||
// Start initializes the virtual network, builds/downloads images, compiles
|
||||
// binaries, launches QEMU processes, and waits for all TTA agents to connect.
|
||||
// It should be called after all AddNetwork/AddNode calls.
|
||||
// Start initializes the virtual network, boots all VMs in parallel, and waits
|
||||
// for all TTA agents to connect. It should be called after all AddNetwork/AddNode calls.
|
||||
func (e *Env) Start() {
|
||||
t := e.t
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
|
||||
t.Cleanup(cancel)
|
||||
e.ctx = ctx
|
||||
|
||||
// Initialize node status and start web UI as early as possible.
|
||||
e.initNodeStatus()
|
||||
e.maybeStartWebServer()
|
||||
|
||||
@@ -414,8 +468,6 @@ func (e *Env) Start() {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Resolve --test-version up front (e.g. "unstable" -> "1.97.255") so all
|
||||
// platforms see the same concrete version.
|
||||
if *testVersion != "" {
|
||||
v, err := resolveTestVersion(ctx, *testVersion)
|
||||
if err != nil {
|
||||
@@ -425,267 +477,42 @@ func (e *Env) Start() {
|
||||
t.Logf("using Tailscale release version %s (from --test-version=%q)", v, *testVersion)
|
||||
}
|
||||
|
||||
// Check if any macOS nodes are present; if so, verify prerequisites.
|
||||
hasMacOS := false
|
||||
for _, n := range e.nodes {
|
||||
if n.os.IsMacOS {
|
||||
hasMacOS = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if hasMacOS {
|
||||
if runtime.GOOS != "darwin" || runtime.GOARCH != "arm64" {
|
||||
if n.os.IsMacOS && (runtime.GOOS != "darwin" || runtime.GOARCH != "arm64") {
|
||||
t.Skip("macOS VM tests require macOS arm64 host")
|
||||
}
|
||||
}
|
||||
|
||||
// Determine which GOOS/GOARCH pairs need compiled binaries (non-gokrazy,
|
||||
// non-macOS images). Gokrazy has binaries built-in. macOS VMs don't use
|
||||
// compiled binaries (no TTA agent).
|
||||
type platform struct{ goos, goarch string }
|
||||
needPlatform := set.Set[platform]{}
|
||||
for _, n := range e.nodes {
|
||||
if !n.os.IsGokrazy && !n.os.IsMacOS {
|
||||
needPlatform.Add(platform{n.os.GOOS(), n.os.GOARCH()})
|
||||
}
|
||||
}
|
||||
|
||||
// Declare framework steps for the web UI.
|
||||
// User-declared steps (from AddStep before Start) get moved to the end
|
||||
// so framework steps (compile, image, QEMU, etc.) come first.
|
||||
// Dry-run: let each platform register its steps with the web UI.
|
||||
userSteps := e.steps
|
||||
e.steps = nil
|
||||
|
||||
compileSteps := map[platform]*Step{}
|
||||
for _, p := range needPlatform.Slice() {
|
||||
compileSteps[p] = e.AddStep(fmt.Sprintf("Compile %s_%s binaries", p.goos, p.goarch))
|
||||
}
|
||||
imageSteps := map[string]*Step{} // keyed by OS name
|
||||
didOS := set.Set[string]{} // dedup by image name
|
||||
for _, n := range e.nodes {
|
||||
if didOS.Contains(n.os.Name) {
|
||||
continue
|
||||
}
|
||||
didOS.Add(n.os.Name)
|
||||
if n.os.IsMacOS {
|
||||
imageSteps[n.os.Name] = e.AddStep("Prepare macOS Tart image")
|
||||
} else if n.os.IsGokrazy {
|
||||
imageSteps["gokrazy"] = e.AddStep("Build gokrazy image")
|
||||
} else {
|
||||
imageSteps[n.os.Name] = e.AddStep(fmt.Sprintf("Prepare %s image", n.os.Name))
|
||||
}
|
||||
n.platform().planSteps(e, n)
|
||||
}
|
||||
vnetStep := e.AddStep("Create virtual network")
|
||||
|
||||
vmSteps := map[string]*Step{}
|
||||
agentSteps := map[string]*Step{}
|
||||
tsUpSteps := map[string]*Step{}
|
||||
for _, n := range e.nodes {
|
||||
if n.os.IsMacOS {
|
||||
vmSteps[n.name] = e.AddStep(fmt.Sprintf("Launch macOS VM: %s", n.name))
|
||||
} else {
|
||||
vmSteps[n.name] = e.AddStep(fmt.Sprintf("Launch QEMU: %s", n.name))
|
||||
}
|
||||
if !n.noAgent {
|
||||
agentSteps[n.name] = e.AddStep(fmt.Sprintf("Wait for agent: %s", n.name))
|
||||
e.Step("Wait for agent: " + n.name)
|
||||
}
|
||||
if n.joinTailnet {
|
||||
tsUpSteps[n.name] = e.AddStep(fmt.Sprintf("Tailscale up: %s", n.name))
|
||||
e.Step("Tailscale up: " + n.name)
|
||||
}
|
||||
}
|
||||
|
||||
// Re-append user-declared steps after all framework steps.
|
||||
for _, s := range userSteps {
|
||||
s.index = len(e.steps)
|
||||
e.steps = append(e.steps, s)
|
||||
}
|
||||
|
||||
// Compile binaries and download/build images in parallel.
|
||||
// Any failure cancels the others via the errgroup context.
|
||||
eg, egCtx := errgroup.WithContext(ctx)
|
||||
for _, p := range needPlatform.Slice() {
|
||||
step := compileSteps[p]
|
||||
eg.Go(func() error {
|
||||
step.Begin()
|
||||
err := e.compileBinariesForOS(egCtx, p.goos, p.goarch)
|
||||
if err != nil {
|
||||
step.End(err)
|
||||
return err
|
||||
}
|
||||
step.End(nil)
|
||||
return nil
|
||||
// Boot all nodes in parallel. Each platform handles its own
|
||||
// dependencies (image prep, binary compilation, socket setup)
|
||||
// via sync.Once, so independent work overlaps naturally.
|
||||
var bootEg errgroup.Group
|
||||
for _, n := range e.nodes {
|
||||
bootEg.Go(func() error {
|
||||
return n.platform().boot(ctx, e, n)
|
||||
})
|
||||
}
|
||||
didOS = set.Set[string]{} // reset for second pass
|
||||
for _, n := range e.nodes {
|
||||
if didOS.Contains(n.os.Name) {
|
||||
continue
|
||||
}
|
||||
didOS.Add(n.os.Name)
|
||||
if n.os.IsMacOS {
|
||||
step := imageSteps[n.os.Name]
|
||||
eg.Go(func() error {
|
||||
step.Begin()
|
||||
ensureTartImage(t)
|
||||
step.End(nil)
|
||||
return nil
|
||||
})
|
||||
} else if n.os.IsGokrazy {
|
||||
step := imageSteps["gokrazy"]
|
||||
eg.Go(func() error {
|
||||
step.Begin()
|
||||
err := e.ensureGokrazy(egCtx)
|
||||
if err != nil {
|
||||
step.End(err)
|
||||
return err
|
||||
}
|
||||
step.End(nil)
|
||||
return nil
|
||||
})
|
||||
} else {
|
||||
step := imageSteps[n.os.Name]
|
||||
osImg := n.os
|
||||
eg.Go(func() error {
|
||||
step.Begin()
|
||||
err := ensureImage(egCtx, osImg)
|
||||
if err != nil {
|
||||
step.End(err)
|
||||
return err
|
||||
}
|
||||
step.End(nil)
|
||||
return nil
|
||||
})
|
||||
}
|
||||
}
|
||||
if err := eg.Wait(); err != nil {
|
||||
t.Fatalf("setup: %v", err)
|
||||
}
|
||||
|
||||
// Create the vnet server.
|
||||
vnetStep.Begin()
|
||||
var err error
|
||||
e.server, err = vnet.New(&e.cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("vnet.New: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { e.server.Close() })
|
||||
|
||||
// Register DHCP event callback for the web UI.
|
||||
e.server.SetDHCPCallback(func(mac vnet.MAC, nodeNum int, msgType layers.DHCPMsgType, ip netip.Addr) {
|
||||
name := e.nodeNameByNum(nodeNum)
|
||||
nicIdx := e.nicIndexForMAC(name, mac)
|
||||
ipStr := ip.String()
|
||||
switch msgType {
|
||||
case layers.DHCPMsgTypeDiscover:
|
||||
e.setNodeDHCP(name, nicIdx, "Discover sent")
|
||||
e.eventBus.Publish(VMEvent{
|
||||
NodeName: name,
|
||||
Type: EventDHCPDiscover,
|
||||
Message: "DHCP Discover sent",
|
||||
NIC: nicIdx,
|
||||
})
|
||||
case layers.DHCPMsgTypeOffer:
|
||||
e.setNodeDHCP(name, nicIdx, "Offered "+ipStr)
|
||||
e.eventBus.Publish(VMEvent{
|
||||
NodeName: name,
|
||||
Type: EventDHCPOffer,
|
||||
Message: "DHCP Offer received",
|
||||
Detail: ipStr,
|
||||
NIC: nicIdx,
|
||||
})
|
||||
case layers.DHCPMsgTypeRequest:
|
||||
e.setNodeDHCP(name, nicIdx, "Requesting "+ipStr)
|
||||
e.eventBus.Publish(VMEvent{
|
||||
NodeName: name,
|
||||
Type: EventDHCPRequest,
|
||||
Message: "DHCP Request sent",
|
||||
Detail: ipStr,
|
||||
NIC: nicIdx,
|
||||
})
|
||||
case layers.DHCPMsgTypeAck:
|
||||
e.setNodeDHCP(name, nicIdx, "Got "+ipStr)
|
||||
e.eventBus.Publish(VMEvent{
|
||||
NodeName: name,
|
||||
Type: EventDHCPAck,
|
||||
Message: "DHCP Ack: got " + ipStr,
|
||||
Detail: ipStr,
|
||||
NIC: nicIdx,
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
if e.sameTailnetUser {
|
||||
e.server.ControlServer().AllNodesSameUser = true
|
||||
}
|
||||
|
||||
// Register compiled binaries with the file server VIP.
|
||||
// Binaries are registered at <goos>_<goarch>/<name> (e.g. "linux_amd64/tta").
|
||||
for _, p := range needPlatform.Slice() {
|
||||
dir := p.goos + "_" + p.goarch
|
||||
for _, name := range []string{"tta", "tailscale", "tailscaled"} {
|
||||
data, err := os.ReadFile(filepath.Join(e.binDir, dir, name))
|
||||
if err != nil {
|
||||
t.Fatalf("reading compiled %s/%s: %v", dir, name, err)
|
||||
}
|
||||
e.server.RegisterFile(dir+"/"+name, data)
|
||||
}
|
||||
}
|
||||
vnetStep.End(nil)
|
||||
|
||||
// Cloud-init config is delivered via local seed ISOs (created in startCloudQEMU),
|
||||
// not via the cloud-init HTTP VIP, because network-config must be available
|
||||
// during init-local before systemd-networkd-wait-online blocks.
|
||||
|
||||
// Start Unix stream socket listener (for QEMU VMs).
|
||||
e.sockAddr = filepath.Join(e.tempDir, "vnet.sock")
|
||||
srv, err := net.Listen("unix", e.sockAddr)
|
||||
if err != nil {
|
||||
t.Fatalf("listen unix: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { srv.Close() })
|
||||
|
||||
go func() {
|
||||
for {
|
||||
c, err := srv.Accept()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
go e.server.ServeUnixConn(c.(*net.UnixConn), vnet.ProtocolQEMU)
|
||||
}
|
||||
}()
|
||||
|
||||
// Start Unix dgram socket listener (for macOS VMs via tailmac).
|
||||
// Use /tmp/ instead of the test temp dir because Unix socket paths
|
||||
// are limited to 104 bytes on macOS, and test temp dir paths are long.
|
||||
if hasMacOS {
|
||||
e.dgramSockAddr = fmt.Sprintf("/tmp/vmtest-dgram-%d.sock", os.Getpid())
|
||||
t.Cleanup(func() { os.Remove(e.dgramSockAddr) })
|
||||
dgramAddr, err := net.ResolveUnixAddr("unixgram", e.dgramSockAddr)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve dgram addr: %v", err)
|
||||
}
|
||||
uc, err := net.ListenUnixgram("unixgram", dgramAddr)
|
||||
if err != nil {
|
||||
t.Fatalf("listen unixgram: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { uc.Close() })
|
||||
go e.server.ServeUnixConn(uc, vnet.ProtocolUnixDGRAM)
|
||||
}
|
||||
|
||||
// Launch VM processes.
|
||||
for _, n := range e.nodes {
|
||||
step := vmSteps[n.name]
|
||||
step.Begin()
|
||||
if n.os.IsMacOS {
|
||||
if err := e.startTailMacVM(n); err != nil {
|
||||
t.Fatalf("startTailMacVM(%s): %v", n.name, err)
|
||||
}
|
||||
} else {
|
||||
if err := e.startQEMU(n); err != nil {
|
||||
t.Fatalf("startQEMU(%s): %v", n.name, err)
|
||||
}
|
||||
}
|
||||
step.End(nil)
|
||||
if err := bootEg.Wait(); err != nil {
|
||||
t.Fatalf("boot: %v", err)
|
||||
}
|
||||
|
||||
// Set up agent clients and wait for all agents to connect.
|
||||
@@ -693,25 +520,32 @@ func (e *Env) Start() {
|
||||
if n.noAgent {
|
||||
continue
|
||||
}
|
||||
e.initVnet() // ensure vnet is ready for agent clients
|
||||
n.agent = e.server.NodeAgentClient(n.vnetNode)
|
||||
n.vnetNode.SetClient(n.agent)
|
||||
}
|
||||
|
||||
// Wait for agents, then bring up tailscale.
|
||||
var agentEg errgroup.Group
|
||||
for _, n := range e.nodes {
|
||||
if n.noAgent {
|
||||
continue
|
||||
}
|
||||
agentEg.Go(func() error {
|
||||
aStep := agentSteps[n.name]
|
||||
aStep := e.Step("Wait for agent: " + n.name)
|
||||
aStep.Begin()
|
||||
t.Logf("[%s] waiting for agent...", n.name)
|
||||
st, err := n.agent.Status(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("[%s] agent status: %w", n.name, err)
|
||||
if n.joinTailnet {
|
||||
st, err := n.agent.Status(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("[%s] agent status: %w", n.name, err)
|
||||
}
|
||||
t.Logf("[%s] agent connected, backend state: %s", n.name, st.BackendState)
|
||||
} else {
|
||||
if err := e.waitForAgentConn(ctx, n); err != nil {
|
||||
return fmt.Errorf("[%s] agent connect: %w", n.name, err)
|
||||
}
|
||||
t.Logf("[%s] agent connected (no tailscale)", n.name)
|
||||
}
|
||||
t.Logf("[%s] agent connected, backend state: %s", n.name, st.BackendState)
|
||||
aStep.End(nil)
|
||||
|
||||
if n.vnetNode.HostFirewall() {
|
||||
@@ -721,21 +555,21 @@ func (e *Env) Start() {
|
||||
}
|
||||
|
||||
if n.joinTailnet {
|
||||
tsStep := tsUpSteps[n.name]
|
||||
tsStep := e.Step("Tailscale up: " + n.name)
|
||||
tsStep.Begin()
|
||||
if err := e.tailscaleUp(ctx, n); err != nil {
|
||||
return fmt.Errorf("[%s] tailscale up: %w", n.name, err)
|
||||
}
|
||||
st, err = n.agent.Status(ctx)
|
||||
st2, err := n.agent.Status(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("[%s] status after up: %w", n.name, err)
|
||||
}
|
||||
if st.BackendState != "Running" {
|
||||
return fmt.Errorf("[%s] state = %q, want Running", n.name, st.BackendState)
|
||||
if st2.BackendState != "Running" {
|
||||
return fmt.Errorf("[%s] state = %q, want Running", n.name, st2.BackendState)
|
||||
}
|
||||
ips := fmt.Sprintf("%v", st.Self.TailscaleIPs)
|
||||
ips := fmt.Sprintf("%v", st2.Self.TailscaleIPs)
|
||||
e.setNodeTailscale(n.name, "Running "+ips)
|
||||
t.Logf("[%s] up with %v", n.name, st.Self.TailscaleIPs)
|
||||
t.Logf("[%s] up with %v", n.name, st2.Self.TailscaleIPs)
|
||||
tsStep.End(nil)
|
||||
}
|
||||
|
||||
@@ -1226,6 +1060,147 @@ func (e *Env) nodeScreenshotPort(name string) int {
|
||||
return 0
|
||||
}
|
||||
|
||||
// initVnet creates the vnet server. Called once via sync.Once.
|
||||
func (e *Env) initVnet() {
|
||||
e.vnetOnce.Do(func() {
|
||||
var err error
|
||||
e.server, err = vnet.New(&e.cfg)
|
||||
if err != nil {
|
||||
e.t.Fatalf("vnet.New: %v", err)
|
||||
}
|
||||
e.t.Cleanup(func() { e.server.Close() })
|
||||
|
||||
e.server.SetDHCPCallback(func(mac vnet.MAC, nodeNum int, msgType layers.DHCPMsgType, ip netip.Addr) {
|
||||
name := e.nodeNameByNum(nodeNum)
|
||||
nicIdx := e.nicIndexForMAC(name, mac)
|
||||
ipStr := ip.String()
|
||||
switch msgType {
|
||||
case layers.DHCPMsgTypeDiscover:
|
||||
e.setNodeDHCP(name, nicIdx, "Discover sent")
|
||||
e.eventBus.Publish(VMEvent{NodeName: name, Type: EventDHCPDiscover, Message: "DHCP Discover sent", NIC: nicIdx})
|
||||
case layers.DHCPMsgTypeOffer:
|
||||
e.setNodeDHCP(name, nicIdx, "Offered "+ipStr)
|
||||
e.eventBus.Publish(VMEvent{NodeName: name, Type: EventDHCPOffer, Message: "DHCP Offer received", Detail: ipStr, NIC: nicIdx})
|
||||
case layers.DHCPMsgTypeRequest:
|
||||
e.setNodeDHCP(name, nicIdx, "Requesting "+ipStr)
|
||||
e.eventBus.Publish(VMEvent{NodeName: name, Type: EventDHCPRequest, Message: "DHCP Request sent", Detail: ipStr, NIC: nicIdx})
|
||||
case layers.DHCPMsgTypeAck:
|
||||
e.setNodeDHCP(name, nicIdx, "Got "+ipStr)
|
||||
e.eventBus.Publish(VMEvent{NodeName: name, Type: EventDHCPAck, Message: "DHCP Ack: got " + ipStr, Detail: ipStr, NIC: nicIdx})
|
||||
}
|
||||
})
|
||||
|
||||
if e.sameTailnetUser {
|
||||
e.server.ControlServer().AllNodesSameUser = true
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// ensureQEMUSocket creates the Unix stream socket for QEMU VMs. Called once.
|
||||
func (e *Env) ensureQEMUSocket() {
|
||||
e.qemuSockOnce.Do(func() {
|
||||
e.initVnet()
|
||||
e.sockAddr = filepath.Join(e.tempDir, "vnet.sock")
|
||||
srv, err := net.Listen("unix", e.sockAddr)
|
||||
if err != nil {
|
||||
e.t.Fatalf("listen unix: %v", err)
|
||||
}
|
||||
e.t.Cleanup(func() { srv.Close() })
|
||||
go func() {
|
||||
for {
|
||||
c, err := srv.Accept()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
go e.server.ServeUnixConn(c.(*net.UnixConn), vnet.ProtocolQEMU)
|
||||
}
|
||||
}()
|
||||
})
|
||||
}
|
||||
|
||||
// ensureDgramSocket creates the Unix dgram socket for macOS VMs. Called once.
|
||||
func (e *Env) ensureDgramSocket() {
|
||||
e.dgramSockOnce.Do(func() {
|
||||
e.initVnet()
|
||||
e.dgramSockAddr = fmt.Sprintf("/tmp/vmtest-dgram-%d.sock", os.Getpid())
|
||||
e.t.Cleanup(func() { os.Remove(e.dgramSockAddr) })
|
||||
dgramAddr, err := net.ResolveUnixAddr("unixgram", e.dgramSockAddr)
|
||||
if err != nil {
|
||||
e.t.Fatalf("resolve dgram addr: %v", err)
|
||||
}
|
||||
uc, err := net.ListenUnixgram("unixgram", dgramAddr)
|
||||
if err != nil {
|
||||
e.t.Fatalf("listen unixgram: %v", err)
|
||||
}
|
||||
e.t.Cleanup(func() { uc.Close() })
|
||||
go e.server.ServeUnixConn(uc, vnet.ProtocolUnixDGRAM)
|
||||
})
|
||||
}
|
||||
|
||||
// ensureCompiled compiles binaries for the given platform and registers them
|
||||
// with the vnet file server. Safe for concurrent use; only compiles once per platform.
|
||||
func (e *Env) ensureCompiled(ctx context.Context, goos, goarch string) {
|
||||
key := goos + "_" + goarch
|
||||
|
||||
e.compileMu.Lock()
|
||||
if e.compiled.Contains(key) {
|
||||
e.compileMu.Unlock()
|
||||
return
|
||||
}
|
||||
e.compileMu.Unlock()
|
||||
|
||||
step := e.Step(fmt.Sprintf("Compile %s_%s binaries", goos, goarch))
|
||||
step.Begin()
|
||||
if err := e.compileBinariesForOS(ctx, goos, goarch); err != nil {
|
||||
step.End(err)
|
||||
e.t.Fatalf("compileBinariesForOS(%s, %s): %v", goos, goarch, err)
|
||||
}
|
||||
step.End(nil)
|
||||
e.registerBinaries(goos, goarch)
|
||||
|
||||
e.compileMu.Lock()
|
||||
e.compiled.Make()
|
||||
e.compiled.Add(key)
|
||||
e.compileMu.Unlock()
|
||||
}
|
||||
|
||||
// registerBinaries registers compiled binaries with the vnet file server.
|
||||
// Safe for concurrent use.
|
||||
func (e *Env) registerBinaries(goos, goarch string) {
|
||||
e.initVnet()
|
||||
dir := goos + "_" + goarch
|
||||
for _, name := range []string{"tta", "tailscale", "tailscaled"} {
|
||||
data, err := os.ReadFile(filepath.Join(e.binDir, dir, name))
|
||||
if err != nil {
|
||||
e.t.Fatalf("reading compiled %s/%s: %v", dir, name, err)
|
||||
}
|
||||
e.server.RegisterFile(dir+"/"+name, data)
|
||||
}
|
||||
}
|
||||
|
||||
// waitForAgentConn waits for a TTA agent to connect by issuing a simple
|
||||
// HTTP GET to the root endpoint, without requiring tailscaled.
|
||||
func (e *Env) waitForAgentConn(ctx context.Context, n *Node) error {
|
||||
for {
|
||||
reqCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
|
||||
req, err := http.NewRequestWithContext(reqCtx, "GET", "http://unused/", nil)
|
||||
if err != nil {
|
||||
cancel()
|
||||
return err
|
||||
}
|
||||
res, err := n.agent.HTTPClient.Do(req)
|
||||
cancel()
|
||||
if err == nil {
|
||||
res.Body.Close()
|
||||
return nil
|
||||
}
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
}
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
// Agent returns the node's TTA agent client, or nil if NoAgent is set.
|
||||
func (n *Node) Agent() *vnet.NodeAgentClient {
|
||||
return n.agent
|
||||
|
||||
@@ -26,16 +26,32 @@ func TestMacOSAndLinuxCanPing(t *testing.T) {
|
||||
vmtest.DontJoinTailnet())
|
||||
macos := env.AddNode("macos", lan,
|
||||
vmtest.OS(vmtest.MacOS),
|
||||
vmtest.DontJoinTailnet(),
|
||||
vmtest.NoAgent())
|
||||
vmtest.DontJoinTailnet())
|
||||
|
||||
env.Start()
|
||||
|
||||
// Ping from Linux (which has TTA) to macOS (which just responds to ICMP).
|
||||
// LANPing retries until the macOS VM has booted and acquired a DHCP lease.
|
||||
env.LANPing(linux, macos.LanIP(lan))
|
||||
}
|
||||
|
||||
func TestTwoMacOSVMsCanPing(t *testing.T) {
|
||||
env := vmtest.New(t)
|
||||
|
||||
lan := env.AddNetwork("192.168.1.1/24")
|
||||
|
||||
mac1 := env.AddNode("mac1", lan,
|
||||
vmtest.OS(vmtest.MacOS),
|
||||
vmtest.DontJoinTailnet())
|
||||
mac2 := env.AddNode("mac2", lan,
|
||||
vmtest.OS(vmtest.MacOS),
|
||||
vmtest.DontJoinTailnet())
|
||||
|
||||
env.Start()
|
||||
|
||||
// Both macOS VMs have TTA. Ping from mac1 to mac2 and vice versa.
|
||||
env.LANPing(mac1, mac2.LanIP(lan))
|
||||
env.LANPing(mac2, mac1.LanIP(lan))
|
||||
}
|
||||
|
||||
func TestSubnetRouter(t *testing.T) {
|
||||
testSubnetRouterForOS(t, vmtest.Ubuntu2404)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user