tka: keep the CompactionDefaults alongside the other limits #6

Merged
codinget merged 216 commits from upstream/2026-05-18 into main 2026-05-18 21:22:49 +02:00
11 changed files with 1063 additions and 318 deletions
Showing only changes of commit 02ffe5baa8 - Show all commits
+135
View File
@@ -0,0 +1,135 @@
// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause
//go:build darwin
package main
import (
"encoding/json"
"fmt"
"log"
"net"
"os/exec"
"strconv"
"time"
"unsafe"
"golang.org/x/sys/unix"
"tailscale.com/tstest/natlab/vnet"
)
const (
afVSOCK = 40 // AF_VSOCK on macOS
vmaddrCIDHost = 2 // VMADDR_CID_HOST
vsockPort = 51011 // port for IP assignment protocol
)
// sockaddrVM is the Go equivalent of struct sockaddr_vm from <sys/vsock.h>.
type sockaddrVM struct {
Len uint8
Family uint8
Reserved1 uint16
Port uint32
CID uint32
}
type netConfig struct {
IP string `json:"ip"`
Mask string `json:"mask"`
GW string `json:"gw"`
}
// startIPAssignLoop starts a background goroutine that polls the host
// via the virtio socket for an IP assignment. When the host responds
// with a JSON config (rather than "wait"), TTA sets the IP statically
// using ifconfig and stops polling.
func startIPAssignLoop() {
go ipAssignLoop()
}
func ipAssignLoop() {
log.Printf("ipassign: starting vsock poll loop")
var lastErr string
for attempt := 0; ; attempt++ {
resp, err := askHostForIP()
if err != nil {
if e := err.Error(); e != lastErr {
log.Printf("ipassign: attempt %d: %v", attempt, err)
lastErr = e
}
time.Sleep(500 * time.Millisecond)
continue
}
if resp == "wait" {
time.Sleep(500 * time.Millisecond)
continue
}
var nc netConfig
if err := json.Unmarshal([]byte(resp), &nc); err != nil {
log.Printf("ipassign: bad config: %v", err)
time.Sleep(500 * time.Millisecond)
continue
}
if err := setStaticIP(nc); err != nil {
log.Printf("ipassign: %v", err)
time.Sleep(500 * time.Millisecond)
continue
}
log.Printf("ipassign: configured en0 with %s/%s gw %s", nc.IP, nc.Mask, nc.GW)
// Switch the driver address from the DNS name to the IP directly
// (avoids DNS resolution delay) and kick the dial-out loop so it
// retries immediately with the new address.
ipAddr := net.JoinHostPort(vnet.TestDriverIPv4().String(), strconv.Itoa(vnet.TestDriverPort))
*driverAddr = ipAddr
log.Printf("ipassign: switched driver addr to %s", ipAddr)
resetDialCancels()
return
}
}
// askHostForIP connects to the host via AF_VSOCK and reads the response.
func askHostForIP() (string, error) {
fd, err := unix.Socket(afVSOCK, unix.SOCK_STREAM, 0)
if err != nil {
return "", fmt.Errorf("socket: %w", err)
}
defer unix.Close(fd)
// Set a short connect+read timeout via SO_RCVTIMEO.
tv := unix.Timeval{Sec: 1}
unix.SetsockoptTimeval(fd, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &tv)
addr := sockaddrVM{
Len: uint8(unsafe.Sizeof(sockaddrVM{})),
Family: afVSOCK,
Port: vsockPort,
CID: vmaddrCIDHost,
}
_, _, errno := unix.RawSyscall(unix.SYS_CONNECT, uintptr(fd),
uintptr(unsafe.Pointer(&addr)), unsafe.Sizeof(addr))
if errno != 0 {
return "", fmt.Errorf("connect: %w", errno)
}
var buf [1024]byte
n, err := unix.Read(fd, buf[:])
if err != nil {
return "", fmt.Errorf("read: %w", err)
}
return string(buf[:n]), nil
}
// setStaticIP configures en0 with a static IP address and default route.
func setStaticIP(nc netConfig) error {
out, err := exec.Command("ifconfig", "en0", nc.IP, "netmask", nc.Mask, "up").CombinedOutput()
if err != nil {
return fmt.Errorf("ifconfig: %v: %s", err, out)
}
out, err = exec.Command("route", "add", "default", nc.GW).CombinedOutput()
if err != nil {
return fmt.Errorf("route add: %v: %s", err, out)
}
return nil
}
+14
View File
@@ -0,0 +1,14 @@
// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause
//go:build !darwin
package main
// startIPAssignLoop is a no-op on non-macOS platforms.
// macOS VMs use vsock-based IP assignment to bypass slow DHCP.
func startIPAssignLoop() {}
// Reference resetDialCancels to prevent unused-function lint errors.
// It's called from ipassign_darwin.go on macOS builds.
var _ = resetDialCancels
+46 -2
View File
@@ -105,6 +105,10 @@ func main() {
}
flag.Parse()
// On macOS VMs, start polling the host via vsock for an IP assignment.
// This bypasses DHCP for near-instant network configuration.
startIPAssignLoop()
debug := false
if distro.Get() == distro.Gokrazy {
cmdLine, _ := os.ReadFile("/proc/cmdline")
@@ -408,12 +412,48 @@ func main() {
revSt.runDialOutLoop(conns)
}
// dialCancels tracks cancel funcs for in-flight connect() and sleep contexts.
// resetDialCancels cancels them all so the dial loop retries immediately.
var (
dialCancelMu sync.Mutex
dialCancels set.HandleSet[context.CancelFunc]
)
// registerDialCancel adds a cancel func and returns a handle for removal.
func registerDialCancel(cancel context.CancelFunc) set.Handle {
dialCancelMu.Lock()
defer dialCancelMu.Unlock()
return dialCancels.Add(cancel)
}
// unregisterDialCancel removes a previously registered cancel func.
func unregisterDialCancel(h set.Handle) {
dialCancelMu.Lock()
defer dialCancelMu.Unlock()
delete(dialCancels, h)
}
// resetDialCancels cancels all in-flight connect and sleep contexts,
// causing the dial loop to retry immediately with the updated driver address.
func resetDialCancels() {
dialCancelMu.Lock()
defer dialCancelMu.Unlock()
for h, cancel := range dialCancels {
cancel()
delete(dialCancels, h)
}
}
func connect() (net.Conn, error) {
d := net.Dialer{
Control: bypassControlFunc,
}
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
h := registerDialCancel(cancel)
defer func() {
cancel()
unregisterDialCancel(h)
}()
c, err := d.DialContext(ctx, "tcp", *driverAddr)
if err != nil {
return nil, err
@@ -510,7 +550,11 @@ func (s *revDialState) runDialOutLoop(conns chan<- net.Conn) {
log.Printf("[dial-driver] connect failure: %v", s)
}
lastErr = s
time.Sleep(time.Second)
sleepCtx, sleepCancel := context.WithTimeout(context.Background(), time.Second)
h := registerDialCancel(sleepCancel)
<-sleepCtx.Done()
sleepCancel()
unregisterDialCancel(h)
continue
}
if !connected {
+62 -5
View File
@@ -20,12 +20,69 @@ import (
"tailscale.com/tstest/natlab/vnet"
)
// startQEMU launches a QEMU process for the given node.
func (e *Env) startQEMU(n *Node) error {
if n.os.IsGokrazy {
return e.startGokrazyQEMU(n)
// gokrazyPlatform boots gokrazy (Linux) VMs via QEMU.
type gokrazyPlatform struct{}
func (gokrazyPlatform) planSteps(e *Env, n *Node) {
e.Step("Build gokrazy image")
e.Step("Launch QEMU: " + n.name)
}
func (gokrazyPlatform) boot(ctx context.Context, e *Env, n *Node) error {
e.gokrazyOnce.Do(func() {
step := e.Step("Build gokrazy image")
step.Begin()
if err := e.ensureGokrazy(ctx); err != nil {
step.End(err)
e.t.Fatalf("ensureGokrazy: %v", err)
}
step.End(nil)
})
e.ensureQEMUSocket()
vmStep := e.Step("Launch QEMU: " + n.name)
vmStep.Begin()
if err := e.startGokrazyQEMU(n); err != nil {
vmStep.End(err)
return err
}
return e.startCloudQEMU(n)
vmStep.End(nil)
return nil
}
// qemuCloudPlatform boots cloud images (Ubuntu, Debian, FreeBSD) via QEMU.
type qemuCloudPlatform struct{}
func (qemuCloudPlatform) planSteps(e *Env, n *Node) {
e.Step(fmt.Sprintf("Compile %s_%s binaries", n.os.GOOS(), n.os.GOARCH()))
e.Step(fmt.Sprintf("Prepare %s image", n.os.Name))
e.Step("Launch QEMU: " + n.name)
}
func (qemuCloudPlatform) boot(ctx context.Context, e *Env, n *Node) error {
goos, goarch := n.os.GOOS(), n.os.GOARCH()
e.ensureCompiled(ctx, goos, goarch)
imgStep := e.Step(fmt.Sprintf("Prepare %s image", n.os.Name))
imgStep.Begin()
if err := ensureImage(ctx, n.os); err != nil {
imgStep.End(err)
return err
}
imgStep.End(nil)
e.ensureQEMUSocket()
vmStep := e.Step("Launch QEMU: " + n.name)
vmStep.Begin()
if err := e.startCloudQEMU(n); err != nil {
vmStep.End(err)
return err
}
vmStep.End(nil)
return nil
}
// startGokrazyQEMU launches a QEMU process for a gokrazy node.
+456 -33
View File
@@ -5,31 +5,76 @@ package vmtest
import (
"bufio"
"bytes"
"context"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"net/netip"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
"time"
"golang.org/x/crypto/ssh"
)
// macPlatform boots macOS VMs via Tart base images and tailmac Host.app.
type macPlatform struct{}
func (macPlatform) planSteps(e *Env, n *Node) {
e.Step("Prepare macOS Tart image")
e.Step("Launch macOS VM: " + n.name)
}
func (macPlatform) boot(ctx context.Context, e *Env, n *Node) error {
imgStep := e.Step("Prepare macOS Tart image")
e.macosSnapshotOnce.Do(func() {
imgStep.Begin()
e.macosSnapshot = ensureSnapshot(e.t)
imgStep.End(nil)
})
e.ensureDgramSocket()
vmStep := e.Step("Launch macOS VM: " + n.name)
vmStep.Begin()
if err := e.startTailMacVM(n); err != nil {
vmStep.End(err)
return err
}
vmStep.End(nil)
return nil
}
const tartImage = "ghcr.io/cirruslabs/macos-tahoe-base:latest"
// macOSSnapshotCodeVersion is bumped when the snapshot preparation logic
// changes in a way that invalidates old snapshots. Old snapshots with a
// different version are cleaned up automatically.
const macOSSnapshotCodeVersion = 5
// tartConfig is the subset of Tart's config.json we need.
type tartConfig struct {
HardwareModel string `json:"hardwareModel"` // base64
ECID string `json:"ecid"` // base64
}
// tartManifest is the subset of Tart's OCI manifest.json we need.
type tartManifest struct {
Config struct {
Digest string `json:"digest"` // e.g. "sha256:3a6cb4eb6201..."
} `json:"config"`
}
// ensureTartImage checks that the Tart base image is available, pulling it
// if necessary. Returns the path to a directory containing disk.img,
// nvram.bin, and config.json.
// if necessary. Returns the path to the OCI cache directory containing
// disk.img, nvram.bin, config.json, and manifest.json.
func ensureTartImage(t testing.TB) string {
if _, err := exec.LookPath("tart"); err != nil {
t.Skip("tart not installed; skipping macOS VM test")
@@ -40,7 +85,6 @@ func ensureTartImage(t testing.TB) string {
t.Fatalf("UserHomeDir: %v", err)
}
// Check OCI cache first (from a previous "tart pull").
ociDir := filepath.Join(home, ".tart", "cache", "OCIs",
"ghcr.io", "cirruslabs", "macos-tahoe-base", "latest")
if _, err := os.Stat(filepath.Join(ociDir, "disk.img")); err == nil {
@@ -55,7 +99,6 @@ func ensureTartImage(t testing.TB) string {
t.Fatalf("tart pull: %v", err)
}
// After pull, the OCI cache should have it.
if _, err := os.Stat(filepath.Join(ociDir, "disk.img")); err == nil {
return ociDir
}
@@ -63,6 +106,368 @@ func ensureTartImage(t testing.TB) string {
return ""
}
// snapshotCacheKey computes a cache key for the macOS VM snapshot.
// The key combines the image name, the first 12 hex chars of the Tart
// config digest (changes when the upstream image is updated), and the
// snapshot code version (changes when our prep logic changes).
func snapshotCacheKey(tartDir string) (string, error) {
manifestPath := filepath.Join(tartDir, "manifest.json")
data, err := os.ReadFile(manifestPath)
if err != nil {
return "", fmt.Errorf("reading manifest: %w", err)
}
var m tartManifest
if err := json.Unmarshal(data, &m); err != nil {
return "", fmt.Errorf("parsing manifest: %w", err)
}
digest := m.Config.Digest
// Strip "sha256:" prefix and take first 12 hex chars.
digest = strings.TrimPrefix(digest, "sha256:")
if len(digest) > 12 {
digest = digest[:12]
}
return fmt.Sprintf("snap-tahoe-%s-v%d", digest, macOSSnapshotCodeVersion), nil
}
// macosVMBaseDir returns ~/.cache/tailscale/vmtest/macos/, the directory
// where Host.app expects to find VM directories by ID.
func macosVMBaseDir() (string, error) {
home, err := os.UserHomeDir()
if err != nil {
return "", err
}
return filepath.Join(home, ".cache", "tailscale", "vmtest", "macos"), nil
}
// cleanOldSnapshots removes any snapshot directories for the given image
// prefix (e.g. "snap-tahoe") that don't match the current cache key.
func cleanOldSnapshots(t testing.TB, imagePrefix, currentKey string) {
base, err := macosVMBaseDir()
if err != nil {
return
}
matches, _ := filepath.Glob(filepath.Join(base, imagePrefix+"-*"))
currentPath := filepath.Join(base, currentKey)
for _, m := range matches {
if m != currentPath {
t.Logf("removing stale snapshot: %s", filepath.Base(m))
os.RemoveAll(m)
}
}
}
// ensureSnapshot returns the path to a cached macOS VM snapshot, creating
// one if necessary. The snapshot contains a fully booted VM with
// SaveFile.vzvmsave ready for fast restore.
func ensureSnapshot(t testing.TB) string {
tartDir := ensureTartImage(t)
key, err := snapshotCacheKey(tartDir)
if err != nil {
t.Fatalf("snapshot cache key: %v", err)
}
base, err := macosVMBaseDir()
if err != nil {
t.Fatalf("macOS VM base dir: %v", err)
}
os.MkdirAll(base, 0755)
snapDir := filepath.Join(base, key)
saveFile := filepath.Join(snapDir, "SaveFile.vzvmsave")
if _, err := os.Stat(saveFile); err == nil {
t.Logf("using cached macOS snapshot: %s", key)
return snapDir
}
// Clean up old snapshots for this image.
cleanOldSnapshots(t, "snap-tahoe", key)
t.Logf("preparing macOS snapshot: %s (this takes ~30s on first run)", key)
if err := prepareSnapshot(t, tartDir, snapDir); err != nil {
os.RemoveAll(snapDir)
t.Fatalf("preparing snapshot: %v", err)
}
return snapDir
}
// prepareSnapshot creates a new macOS VM snapshot by booting the Tart base
// image with a NAT NIC, waiting for SSH, and saving VM state.
func prepareSnapshot(t testing.TB, tartDir, snapDir string) error {
// The vmID must match the directory name under macosVMBaseDir
// because Host.app looks up VM files at <base>/<vmID>/.
snapID := filepath.Base(snapDir)
if err := cloneTartToTailmac(tartDir, snapDir, snapID, "52:cc:cc:cc:ce:01", "/dev/null"); err != nil {
return fmt.Errorf("cloning tart: %w", err)
}
modRoot, err := findModRoot()
if err != nil {
return err
}
tailmacDir := filepath.Join(modRoot, "tstest", "tailmac", "bin")
hostBin := filepath.Join(tailmacDir, "Host.app", "Contents", "MacOS", "Host")
if _, err := os.Stat(hostBin); err != nil {
return fmt.Errorf("Host.app not found at %s; run 'make all' in tstest/tailmac/", hostBin)
}
// Host.app reads VM files from ~/.cache/tailscale/vmtest/macos/<id>/.
// Our snapDir is already under that tree, and the config.json vmID matches.
cmd := exec.Command(hostBin, "run", "--id", snapID, "--headless", "--nat-nic")
cmd.Env = append(os.Environ(), "NSUnbufferedIO=YES")
logPath := snapDir + ".prep.log"
logFile, err := os.Create(logPath)
if err != nil {
return err
}
defer logFile.Close()
cmd.Stdout = logFile
cmd.Stderr = logFile
devNull, _ := os.Open(os.DevNull)
cmd.Stdin = devNull
defer devNull.Close()
if err := cmd.Start(); err != nil {
return fmt.Errorf("starting Host.app: %w", err)
}
t.Logf("snapshot prep: launched Host.app (pid %d)", cmd.Process.Pid)
// Wait for SSH to become available via the NAT NIC.
// The VM gets an IP from macOS's vmnet DHCP (typically 192.168.64.x).
ip, err := waitForVMIP(t, "52:cc:cc:cc:ce:01", 60*time.Second)
if err != nil {
cmd.Process.Kill()
cmd.Wait()
return fmt.Errorf("waiting for VM IP: %w", err)
}
t.Logf("snapshot prep: VM IP is %s, waiting for SSH...", ip)
sc, err := waitForSSH(ip, 60*time.Second)
if err != nil {
cmd.Process.Kill()
cmd.Wait()
return fmt.Errorf("waiting for SSH: %w", err)
}
t.Logf("snapshot prep: SSH connected")
// Compile and install TTA in the macOS VM.
t.Logf("snapshot prep: installing TTA...")
if err := installTTA(t, sc); err != nil {
sc.Close()
cmd.Process.Kill()
cmd.Wait()
return fmt.Errorf("installing TTA: %w", err)
}
sc.Close()
// Save VM state by sending SIGINT.
t.Logf("snapshot prep: saving VM state...")
cmd.Process.Signal(os.Interrupt)
done := make(chan error, 1)
go func() { done <- cmd.Wait() }()
select {
case err := <-done:
if err != nil {
// Host.app exits 0 after saving state, non-zero is unexpected.
t.Logf("snapshot prep: Host.app exited with: %v", err)
}
case <-time.After(60 * time.Second):
cmd.Process.Kill()
<-done
return fmt.Errorf("Host.app did not exit after SIGINT")
}
// Verify the save file was created.
saveFile := filepath.Join(snapDir, "SaveFile.vzvmsave")
if _, err := os.Stat(saveFile); err != nil {
return fmt.Errorf("SaveFile.vzvmsave not found after prep")
}
t.Logf("snapshot prep: done, saved to %s", filepath.Base(snapDir))
os.Remove(logPath)
return nil
}
// installTTA compiles TTA for darwin/arm64 and installs it in the macOS VM
// as a LaunchDaemon via SSH/SCP.
func installTTA(t testing.TB, sc *ssh.Client) error {
modRoot, err := findModRoot()
if err != nil {
return err
}
// Compile TTA for the macOS VM.
tmpDir := t.TempDir()
ttaBin := filepath.Join(tmpDir, "tta")
t.Logf("snapshot prep: compiling TTA for darwin/arm64...")
buildCmd := exec.Command("go", "build", "-o", ttaBin, "./cmd/tta")
buildCmd.Dir = modRoot
buildCmd.Env = append(os.Environ(), "GOOS=darwin", "GOARCH=arm64", "CGO_ENABLED=0")
if out, err := buildCmd.CombinedOutput(); err != nil {
return fmt.Errorf("compiling TTA: %v\n%s", err, out)
}
// Read the binary.
ttaData, err := os.ReadFile(ttaBin)
if err != nil {
return fmt.Errorf("reading TTA binary: %w", err)
}
t.Logf("snapshot prep: TTA binary is %d bytes", len(ttaData))
// SCP the TTA binary to the VM via a temp file (admin user can't write /usr/local/bin directly).
if err := scpFile(sc, ttaData, "/tmp/tta", 0755); err != nil {
return fmt.Errorf("uploading TTA: %w", err)
}
if err := runSSHCmd(sc, "echo admin | sudo -S mv /tmp/tta /usr/local/bin/tta"); err != nil {
return fmt.Errorf("moving TTA to /usr/local/bin: %w", err)
}
// Install the LaunchDaemon plist.
plist := `<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>Label</key>
<string>com.tailscale.tta</string>
<key>ProgramArguments</key>
<array>
<string>/usr/local/bin/tta</string>
</array>
<key>RunAtLoad</key>
<true/>
<key>KeepAlive</key>
<true/>
<key>StandardOutPath</key>
<string>/tmp/tta.log</string>
<key>StandardErrorPath</key>
<string>/tmp/tta.log</string>
</dict>
</plist>
`
if err := scpFile(sc, []byte(plist), "/tmp/com.tailscale.tta.plist", 0644); err != nil {
return fmt.Errorf("uploading plist: %w", err)
}
if err := runSSHCmd(sc, "echo admin | sudo -S mv /tmp/com.tailscale.tta.plist /Library/LaunchDaemons/ && echo admin | sudo -S chown root:wheel /Library/LaunchDaemons/com.tailscale.tta.plist"); err != nil {
return fmt.Errorf("installing plist: %w", err)
}
// Load the LaunchDaemon.
if err := runSSHCmd(sc, "echo admin | sudo -S launchctl load /Library/LaunchDaemons/com.tailscale.tta.plist"); err != nil {
return fmt.Errorf("loading LaunchDaemon: %w", err)
}
// Wait for TTA to start.
for range 20 {
if err := runSSHCmd(sc, "pgrep -x tta"); err == nil {
break
}
time.Sleep(250 * time.Millisecond)
}
if err := runSSHCmd(sc, "pgrep -x tta"); err != nil {
return fmt.Errorf("TTA not running after install: %w", err)
}
t.Logf("snapshot prep: TTA installed and running")
return nil
}
// scpFile uploads data to a remote path via SSH/SCP.
func scpFile(sc *ssh.Client, data []byte, remotePath string, mode os.FileMode) error {
sess, err := sc.NewSession()
if err != nil {
return err
}
defer sess.Close()
// Use a simple shell command to write the file.
cmd := fmt.Sprintf("cat > %s && chmod %o %s", remotePath, mode, remotePath)
sess.Stdin = bytes.NewReader(data)
out, err := sess.CombinedOutput(cmd)
if err != nil {
return fmt.Errorf("%s: %v: %s", cmd, err, out)
}
return nil
}
// runSSHCmd runs a command on the SSH client and returns an error if it fails.
func runSSHCmd(sc *ssh.Client, cmd string) error {
sess, err := sc.NewSession()
if err != nil {
return err
}
defer sess.Close()
out, err := sess.CombinedOutput(cmd)
if err != nil {
return fmt.Errorf("%s: %v: %s", cmd, err, out)
}
return nil
}
// waitForVMIP polls /var/db/dhcpd_leases for a DHCP lease matching the
// given MAC address (from macOS's vmnet NAT). Returns the IP.
func waitForVMIP(t testing.TB, mac string, timeout time.Duration) (string, error) {
// Normalize MAC format: vmnet leases use "1,xx:xx:xx:xx:xx:xx" format
// with leading zeros stripped from each octet (e.g. "1,52:cc:cc:cc:ce:1"
// instead of "1,52:cc:cc:cc:ce:01").
mac = strings.ToLower(mac)
parts := strings.Split(mac, ":")
for i, p := range parts {
parts[i] = strings.TrimLeft(p, "0")
if parts[i] == "" {
parts[i] = "0"
}
}
leaseMAC := "1," + strings.Join(parts, ":")
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
data, err := os.ReadFile("/var/db/dhcpd_leases")
if err == nil {
// Parse the plist-like lease file.
lines := strings.Split(string(data), "\n")
var currentIP string
for _, line := range lines {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "ip_address=") {
currentIP = strings.TrimPrefix(line, "ip_address=")
}
if strings.HasPrefix(line, "hw_address=") {
hw := strings.TrimPrefix(line, "hw_address=")
if strings.ToLower(hw) == leaseMAC && currentIP != "" {
return currentIP, nil
}
}
if line == "}" {
currentIP = ""
}
}
}
time.Sleep(time.Second)
}
return "", fmt.Errorf("no DHCP lease for MAC %s after %v", mac, timeout)
}
// waitForSSH retries SSH connection to the given IP until it succeeds or
// the timeout expires.
func waitForSSH(ip string, timeout time.Duration) (*ssh.Client, error) {
deadline := time.Now().Add(timeout)
addr := net.JoinHostPort(ip, "22")
cfg := &ssh.ClientConfig{
User: "admin",
Auth: []ssh.AuthMethod{ssh.Password("admin")},
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
Timeout: 2 * time.Second,
}
for time.Now().Before(deadline) {
sc, err := ssh.Dial("tcp", addr, cfg)
if err == nil {
return sc, nil
}
time.Sleep(time.Second)
}
return nil, fmt.Errorf("SSH to %s timed out after %v", addr, timeout)
}
// ensureTailMac locates the pre-built tailmac Host.app binary.
func (e *Env) ensureTailMac() error {
modRoot, err := findModRoot()
@@ -85,7 +490,6 @@ func cloneTartToTailmac(tartDir, cloneDir, testID, mac, dgramSock string) error
return err
}
// Read Tart's config.json for hardware identity.
cfgData, err := os.ReadFile(filepath.Join(tartDir, "config.json"))
if err != nil {
return fmt.Errorf("reading tart config: %w", err)
@@ -95,7 +499,6 @@ func cloneTartToTailmac(tartDir, cloneDir, testID, mac, dgramSock string) error
return fmt.Errorf("parsing tart config: %w", err)
}
// Decode and write HardwareModel.
hwModel, err := base64.StdEncoding.DecodeString(tc.HardwareModel)
if err != nil {
return fmt.Errorf("decoding hardwareModel: %w", err)
@@ -104,7 +507,6 @@ func cloneTartToTailmac(tartDir, cloneDir, testID, mac, dgramSock string) error
return err
}
// Decode and write MachineIdentifier (ECID).
ecid, err := base64.StdEncoding.DecodeString(tc.ECID)
if err != nil {
return fmt.Errorf("decoding ecid: %w", err)
@@ -113,22 +515,18 @@ func cloneTartToTailmac(tartDir, cloneDir, testID, mac, dgramSock string) error
return err
}
// APFS clone the disk image (nearly instant, copy-on-write).
if out, err := exec.Command("cp", "-c", filepath.Join(tartDir, "disk.img"), filepath.Join(cloneDir, "Disk.img")).CombinedOutput(); err != nil {
// Fallback to regular copy.
if out2, err2 := exec.Command("cp", filepath.Join(tartDir, "disk.img"), filepath.Join(cloneDir, "Disk.img")).CombinedOutput(); err2 != nil {
return fmt.Errorf("copying disk: %v: %s (APFS clone: %v: %s)", err2, out2, err, out)
}
}
// APFS clone the NVRAM.
if out, err := exec.Command("cp", "-c", filepath.Join(tartDir, "nvram.bin"), filepath.Join(cloneDir, "AuxiliaryStorage")).CombinedOutput(); err != nil {
if out2, err2 := exec.Command("cp", filepath.Join(tartDir, "nvram.bin"), filepath.Join(cloneDir, "AuxiliaryStorage")).CombinedOutput(); err2 != nil {
return fmt.Errorf("copying nvram: %v: %s (APFS clone: %v: %s)", err2, out2, err, out)
}
}
// Write tailmac config.json.
tmCfg := struct {
VMid string `json:"vmID"`
ServerSocket string `json:"serverSocket"`
@@ -137,17 +535,17 @@ func cloneTartToTailmac(tartDir, cloneDir, testID, mac, dgramSock string) error
}{
VMid: testID,
ServerSocket: dgramSock,
MemorySize: 8 * 1024 * 1024 * 1024,
MemorySize: 4 * 1024 * 1024 * 1024,
Mac: mac,
}
tmData, _ := json.MarshalIndent(tmCfg, "", " ")
return os.WriteFile(filepath.Join(cloneDir, "config.json"), tmData, 0644)
}
// startTailMacVM clones a Tart base image and launches it via tailmac
// Host.app in headless mode, connected to vnet's dgram socket.
// startTailMacVM restores a macOS VM from a cached snapshot and launches it
// via tailmac Host.app in headless mode, connected to vnet's dgram socket.
func (e *Env) startTailMacVM(n *Node) error {
tartDir := ensureTartImage(e.t)
snapDir := e.macosSnapshot
if err := e.ensureTailMac(); err != nil {
return err
@@ -156,7 +554,6 @@ func (e *Env) startTailMacVM(n *Node) error {
testID := fmt.Sprintf("vmtest-%s-%d", n.name, os.Getpid())
// Host.app expects VM files under ~/.cache/tailscale/vmtest/macos/<id>/
// (hardcoded in Config.swift's vmBundleURL).
home, err := os.UserHomeDir()
if err != nil {
return fmt.Errorf("UserHomeDir: %w", err)
@@ -165,16 +562,51 @@ func (e *Env) startTailMacVM(n *Node) error {
os.MkdirAll(vmBase, 0755)
cloneDir := filepath.Join(vmBase, testID)
mac := n.vnetNode.NICMac(0)
e.t.Logf("[%s] cloning Tart image -> %s (mac=%s)", n.name, testID, mac)
if err := cloneTartToTailmac(tartDir, cloneDir, testID, mac.String(), e.dgramSockAddr); err != nil {
return fmt.Errorf("cloning tart VM: %w", err)
// APFS clone the entire snapshot directory (includes SaveFile.vzvmsave).
e.t.Logf("[%s] cloning snapshot -> %s", n.name, testID)
if out, err := exec.Command("cp", "-c", "-r", snapDir, cloneDir).CombinedOutput(); err != nil {
if out2, err2 := exec.Command("cp", "-r", snapDir, cloneDir).CombinedOutput(); err2 != nil {
return fmt.Errorf("cloning snapshot: %v: %s (APFS clone: %v: %s)", err2, out2, err, out)
}
}
e.t.Cleanup(func() { os.RemoveAll(cloneDir) })
// Write test-specific config.json with the vnet MAC and dgram socket.
mac := n.vnetNode.NICMac(0)
cfg := struct {
VMid string `json:"vmID"`
ServerSocket string `json:"serverSocket"`
MemorySize uint64 `json:"memorySize"`
Mac string `json:"mac"`
}{
VMid: testID,
ServerSocket: e.dgramSockAddr,
MemorySize: 8 * 1024 * 1024 * 1024,
Mac: mac.String(),
}
cfgData, _ := json.MarshalIndent(cfg, "", " ")
if err := os.WriteFile(filepath.Join(cloneDir, "config.json"), cfgData, 0644); err != nil {
return fmt.Errorf("writing config.json: %w", err)
}
// Launch Host.app with disconnected NIC + hot-swap to vnet.
// Host.app will restore from SaveFile.vzvmsave (fast), then
// hot-swap the NIC to the vnet dgram socket.
hostBin := filepath.Join(e.tailmacDir, "Host.app", "Contents", "MacOS", "Host")
// Compute the node's IP and gateway for static assignment via vsock.
nodeIP := n.vnetNode.LanIP(n.nets[0])
// The gateway is the network's base address (e.g. 192.168.1.1 for /24).
// We derive it from the node IP: same /24 prefix, host part = 1.
gwIP := nodeIP.As4()
gwIP[3] = 1
gateway := netip.AddrFrom4(gwIP)
args := []string{
"run", "--id", testID, "--headless",
"--disconnected-nic",
"--attach-network", e.dgramSockAddr,
"--assign-ip", fmt.Sprintf("%s/255.255.255.0/%s", nodeIP, gateway),
}
wantScreenshots := *vmtestWeb != ""
@@ -191,8 +623,6 @@ func (e *Env) startTailMacVM(n *Node) error {
cmd := exec.Command(hostBin, args...)
cmd.Env = append(os.Environ(), "NSUnbufferedIO=YES")
// If screenshots are enabled, we need to parse stdout for the
// SCREENSHOT_PORT=<port> line, while also logging everything to file.
var stdoutPipe io.ReadCloser
if wantScreenshots {
stdoutPipe, err = cmd.StdoutPipe()
@@ -219,14 +649,13 @@ func (e *Env) startTailMacVM(n *Node) error {
}
e.t.Logf("[%s] launched tailmac (pid %d), log: %s", n.name, cmd.Process.Pid, logPath)
// Parse screenshot port from stdout and start polling goroutine.
if wantScreenshots {
screenshotPortCh := make(chan int, 1)
go func() {
scanner := bufio.NewScanner(stdoutPipe)
for scanner.Scan() {
line := scanner.Text()
fmt.Fprintln(logFile, line) // tee to log file
fmt.Fprintln(logFile, line)
if port := 0; strings.HasPrefix(line, "SCREENSHOT_PORT=") {
fmt.Sscanf(line, "SCREENSHOT_PORT=%d", &port)
if port > 0 {
@@ -252,15 +681,9 @@ func (e *Env) startTailMacVM(n *Node) error {
clientSock := fmt.Sprintf("/tmp/qemu-dgram-%s.sock", testID)
e.t.Cleanup(func() {
cmd.Process.Signal(os.Interrupt)
done := make(chan error, 1)
go func() { done <- cmd.Wait() }()
select {
case <-done:
case <-time.After(15 * time.Second):
cmd.Process.Kill()
<-done
}
// Kill immediately — no need to save state for ephemeral test clones.
cmd.Process.Kill()
cmd.Wait()
devNull.Close()
logFile.Close()
os.Remove(clientSock)
+233 -258
View File
@@ -78,16 +78,28 @@ type Env struct {
gokrazyKernel string // path to gokrazy kernel
// tailmac-specific paths (macOS VMs)
tailmacDir string // path to tailmac bin/ directory containing Host.app
tailmacDir string // path to tailmac bin/ directory containing Host.app
macosSnapshot string // path to cached macOS VM snapshot directory
macosSnapshotOnce sync.Once
qemuProcs []*exec.Cmd // launched QEMU processes
sameTailnetUser bool // all nodes register as the same Tailnet user
// Shared resource initialization (sync.Once for things multiple nodes share).
vnetOnce sync.Once
gokrazyOnce sync.Once
qemuSockOnce sync.Once
dgramSockOnce sync.Once
compileMu sync.Mutex
compiled set.Set[string]
// Web UI support.
ctx context.Context // cancelled when test ends
eventBus *EventBus
testStatus *TestStatus
stepsMu sync.Mutex
stepsByKey map[string]*Step
steps []*Step
nodeStatusMu sync.Mutex
@@ -102,6 +114,28 @@ func (e *Env) logVerbosef(format string, args ...any) {
}
}
// vmPlatform defines how a VM type boots. Each OS image type (gokrazy,
// cloud, macOS) implements this interface.
type vmPlatform interface {
// planSteps registers steps with the web UI in a dry-run pass.
planSteps(e *Env, n *Node)
// boot does everything needed to get this node running: ensure images,
// compile binaries, set up sockets, launch VM. Called concurrently.
boot(ctx context.Context, e *Env, n *Node) error
}
// platform returns the vmPlatform for this node's OS type.
func (n *Node) platform() vmPlatform {
if n.os.IsMacOS {
return macPlatform{}
}
if n.os.IsGokrazy {
return gokrazyPlatform{}
}
return qemuCloudPlatform{}
}
// AddStep declares an expected stage of the test. The web UI shows all steps
// from the start, tracking their progress. Call before or during the test.
// Returns a *Step whose Begin/End methods drive the progress display.
@@ -115,6 +149,28 @@ func (e *Env) AddStep(name string) *Step {
return s
}
// Step returns a step by key, creating it if it doesn't exist.
// Safe for concurrent use. Both planSteps (dry-run) and boot (real-run)
// call this to get the same Step object.
func (e *Env) Step(key string) *Step {
e.stepsMu.Lock()
defer e.stepsMu.Unlock()
if s, ok := e.stepsByKey[key]; ok {
return s
}
s := &Step{
name: key,
index: len(e.steps),
env: e,
}
e.steps = append(e.steps, s)
if e.stepsByKey == nil {
e.stepsByKey = make(map[string]*Step)
}
e.stepsByKey[key] = s
return s
}
// Steps returns all declared steps in order.
func (e *Env) Steps() []*Step {
return e.steps
@@ -397,16 +453,14 @@ func SNATSubnetRoutes(v bool) nodeOptSNATSubnetRoutes { return nodeOptSNATSubnet
// The webserver responds with "Hello world I am <nodename> from <sourceIP>" on all requests.
func WebServer(port int) nodeOptWebServer { return nodeOptWebServer(port) }
// Start initializes the virtual network, builds/downloads images, compiles
// binaries, launches QEMU processes, and waits for all TTA agents to connect.
// It should be called after all AddNetwork/AddNode calls.
// Start initializes the virtual network, boots all VMs in parallel, and waits
// for all TTA agents to connect. It should be called after all AddNetwork/AddNode calls.
func (e *Env) Start() {
t := e.t
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
t.Cleanup(cancel)
e.ctx = ctx
// Initialize node status and start web UI as early as possible.
e.initNodeStatus()
e.maybeStartWebServer()
@@ -414,8 +468,6 @@ func (e *Env) Start() {
t.Fatal(err)
}
// Resolve --test-version up front (e.g. "unstable" -> "1.97.255") so all
// platforms see the same concrete version.
if *testVersion != "" {
v, err := resolveTestVersion(ctx, *testVersion)
if err != nil {
@@ -425,267 +477,42 @@ func (e *Env) Start() {
t.Logf("using Tailscale release version %s (from --test-version=%q)", v, *testVersion)
}
// Check if any macOS nodes are present; if so, verify prerequisites.
hasMacOS := false
for _, n := range e.nodes {
if n.os.IsMacOS {
hasMacOS = true
break
}
}
if hasMacOS {
if runtime.GOOS != "darwin" || runtime.GOARCH != "arm64" {
if n.os.IsMacOS && (runtime.GOOS != "darwin" || runtime.GOARCH != "arm64") {
t.Skip("macOS VM tests require macOS arm64 host")
}
}
// Determine which GOOS/GOARCH pairs need compiled binaries (non-gokrazy,
// non-macOS images). Gokrazy has binaries built-in. macOS VMs don't use
// compiled binaries (no TTA agent).
type platform struct{ goos, goarch string }
needPlatform := set.Set[platform]{}
for _, n := range e.nodes {
if !n.os.IsGokrazy && !n.os.IsMacOS {
needPlatform.Add(platform{n.os.GOOS(), n.os.GOARCH()})
}
}
// Declare framework steps for the web UI.
// User-declared steps (from AddStep before Start) get moved to the end
// so framework steps (compile, image, QEMU, etc.) come first.
// Dry-run: let each platform register its steps with the web UI.
userSteps := e.steps
e.steps = nil
compileSteps := map[platform]*Step{}
for _, p := range needPlatform.Slice() {
compileSteps[p] = e.AddStep(fmt.Sprintf("Compile %s_%s binaries", p.goos, p.goarch))
}
imageSteps := map[string]*Step{} // keyed by OS name
didOS := set.Set[string]{} // dedup by image name
for _, n := range e.nodes {
if didOS.Contains(n.os.Name) {
continue
}
didOS.Add(n.os.Name)
if n.os.IsMacOS {
imageSteps[n.os.Name] = e.AddStep("Prepare macOS Tart image")
} else if n.os.IsGokrazy {
imageSteps["gokrazy"] = e.AddStep("Build gokrazy image")
} else {
imageSteps[n.os.Name] = e.AddStep(fmt.Sprintf("Prepare %s image", n.os.Name))
}
n.platform().planSteps(e, n)
}
vnetStep := e.AddStep("Create virtual network")
vmSteps := map[string]*Step{}
agentSteps := map[string]*Step{}
tsUpSteps := map[string]*Step{}
for _, n := range e.nodes {
if n.os.IsMacOS {
vmSteps[n.name] = e.AddStep(fmt.Sprintf("Launch macOS VM: %s", n.name))
} else {
vmSteps[n.name] = e.AddStep(fmt.Sprintf("Launch QEMU: %s", n.name))
}
if !n.noAgent {
agentSteps[n.name] = e.AddStep(fmt.Sprintf("Wait for agent: %s", n.name))
e.Step("Wait for agent: " + n.name)
}
if n.joinTailnet {
tsUpSteps[n.name] = e.AddStep(fmt.Sprintf("Tailscale up: %s", n.name))
e.Step("Tailscale up: " + n.name)
}
}
// Re-append user-declared steps after all framework steps.
for _, s := range userSteps {
s.index = len(e.steps)
e.steps = append(e.steps, s)
}
// Compile binaries and download/build images in parallel.
// Any failure cancels the others via the errgroup context.
eg, egCtx := errgroup.WithContext(ctx)
for _, p := range needPlatform.Slice() {
step := compileSteps[p]
eg.Go(func() error {
step.Begin()
err := e.compileBinariesForOS(egCtx, p.goos, p.goarch)
if err != nil {
step.End(err)
return err
}
step.End(nil)
return nil
// Boot all nodes in parallel. Each platform handles its own
// dependencies (image prep, binary compilation, socket setup)
// via sync.Once, so independent work overlaps naturally.
var bootEg errgroup.Group
for _, n := range e.nodes {
bootEg.Go(func() error {
return n.platform().boot(ctx, e, n)
})
}
didOS = set.Set[string]{} // reset for second pass
for _, n := range e.nodes {
if didOS.Contains(n.os.Name) {
continue
}
didOS.Add(n.os.Name)
if n.os.IsMacOS {
step := imageSteps[n.os.Name]
eg.Go(func() error {
step.Begin()
ensureTartImage(t)
step.End(nil)
return nil
})
} else if n.os.IsGokrazy {
step := imageSteps["gokrazy"]
eg.Go(func() error {
step.Begin()
err := e.ensureGokrazy(egCtx)
if err != nil {
step.End(err)
return err
}
step.End(nil)
return nil
})
} else {
step := imageSteps[n.os.Name]
osImg := n.os
eg.Go(func() error {
step.Begin()
err := ensureImage(egCtx, osImg)
if err != nil {
step.End(err)
return err
}
step.End(nil)
return nil
})
}
}
if err := eg.Wait(); err != nil {
t.Fatalf("setup: %v", err)
}
// Create the vnet server.
vnetStep.Begin()
var err error
e.server, err = vnet.New(&e.cfg)
if err != nil {
t.Fatalf("vnet.New: %v", err)
}
t.Cleanup(func() { e.server.Close() })
// Register DHCP event callback for the web UI.
e.server.SetDHCPCallback(func(mac vnet.MAC, nodeNum int, msgType layers.DHCPMsgType, ip netip.Addr) {
name := e.nodeNameByNum(nodeNum)
nicIdx := e.nicIndexForMAC(name, mac)
ipStr := ip.String()
switch msgType {
case layers.DHCPMsgTypeDiscover:
e.setNodeDHCP(name, nicIdx, "Discover sent")
e.eventBus.Publish(VMEvent{
NodeName: name,
Type: EventDHCPDiscover,
Message: "DHCP Discover sent",
NIC: nicIdx,
})
case layers.DHCPMsgTypeOffer:
e.setNodeDHCP(name, nicIdx, "Offered "+ipStr)
e.eventBus.Publish(VMEvent{
NodeName: name,
Type: EventDHCPOffer,
Message: "DHCP Offer received",
Detail: ipStr,
NIC: nicIdx,
})
case layers.DHCPMsgTypeRequest:
e.setNodeDHCP(name, nicIdx, "Requesting "+ipStr)
e.eventBus.Publish(VMEvent{
NodeName: name,
Type: EventDHCPRequest,
Message: "DHCP Request sent",
Detail: ipStr,
NIC: nicIdx,
})
case layers.DHCPMsgTypeAck:
e.setNodeDHCP(name, nicIdx, "Got "+ipStr)
e.eventBus.Publish(VMEvent{
NodeName: name,
Type: EventDHCPAck,
Message: "DHCP Ack: got " + ipStr,
Detail: ipStr,
NIC: nicIdx,
})
}
})
if e.sameTailnetUser {
e.server.ControlServer().AllNodesSameUser = true
}
// Register compiled binaries with the file server VIP.
// Binaries are registered at <goos>_<goarch>/<name> (e.g. "linux_amd64/tta").
for _, p := range needPlatform.Slice() {
dir := p.goos + "_" + p.goarch
for _, name := range []string{"tta", "tailscale", "tailscaled"} {
data, err := os.ReadFile(filepath.Join(e.binDir, dir, name))
if err != nil {
t.Fatalf("reading compiled %s/%s: %v", dir, name, err)
}
e.server.RegisterFile(dir+"/"+name, data)
}
}
vnetStep.End(nil)
// Cloud-init config is delivered via local seed ISOs (created in startCloudQEMU),
// not via the cloud-init HTTP VIP, because network-config must be available
// during init-local before systemd-networkd-wait-online blocks.
// Start Unix stream socket listener (for QEMU VMs).
e.sockAddr = filepath.Join(e.tempDir, "vnet.sock")
srv, err := net.Listen("unix", e.sockAddr)
if err != nil {
t.Fatalf("listen unix: %v", err)
}
t.Cleanup(func() { srv.Close() })
go func() {
for {
c, err := srv.Accept()
if err != nil {
return
}
go e.server.ServeUnixConn(c.(*net.UnixConn), vnet.ProtocolQEMU)
}
}()
// Start Unix dgram socket listener (for macOS VMs via tailmac).
// Use /tmp/ instead of the test temp dir because Unix socket paths
// are limited to 104 bytes on macOS, and test temp dir paths are long.
if hasMacOS {
e.dgramSockAddr = fmt.Sprintf("/tmp/vmtest-dgram-%d.sock", os.Getpid())
t.Cleanup(func() { os.Remove(e.dgramSockAddr) })
dgramAddr, err := net.ResolveUnixAddr("unixgram", e.dgramSockAddr)
if err != nil {
t.Fatalf("resolve dgram addr: %v", err)
}
uc, err := net.ListenUnixgram("unixgram", dgramAddr)
if err != nil {
t.Fatalf("listen unixgram: %v", err)
}
t.Cleanup(func() { uc.Close() })
go e.server.ServeUnixConn(uc, vnet.ProtocolUnixDGRAM)
}
// Launch VM processes.
for _, n := range e.nodes {
step := vmSteps[n.name]
step.Begin()
if n.os.IsMacOS {
if err := e.startTailMacVM(n); err != nil {
t.Fatalf("startTailMacVM(%s): %v", n.name, err)
}
} else {
if err := e.startQEMU(n); err != nil {
t.Fatalf("startQEMU(%s): %v", n.name, err)
}
}
step.End(nil)
if err := bootEg.Wait(); err != nil {
t.Fatalf("boot: %v", err)
}
// Set up agent clients and wait for all agents to connect.
@@ -693,25 +520,32 @@ func (e *Env) Start() {
if n.noAgent {
continue
}
e.initVnet() // ensure vnet is ready for agent clients
n.agent = e.server.NodeAgentClient(n.vnetNode)
n.vnetNode.SetClient(n.agent)
}
// Wait for agents, then bring up tailscale.
var agentEg errgroup.Group
for _, n := range e.nodes {
if n.noAgent {
continue
}
agentEg.Go(func() error {
aStep := agentSteps[n.name]
aStep := e.Step("Wait for agent: " + n.name)
aStep.Begin()
t.Logf("[%s] waiting for agent...", n.name)
st, err := n.agent.Status(ctx)
if err != nil {
return fmt.Errorf("[%s] agent status: %w", n.name, err)
if n.joinTailnet {
st, err := n.agent.Status(ctx)
if err != nil {
return fmt.Errorf("[%s] agent status: %w", n.name, err)
}
t.Logf("[%s] agent connected, backend state: %s", n.name, st.BackendState)
} else {
if err := e.waitForAgentConn(ctx, n); err != nil {
return fmt.Errorf("[%s] agent connect: %w", n.name, err)
}
t.Logf("[%s] agent connected (no tailscale)", n.name)
}
t.Logf("[%s] agent connected, backend state: %s", n.name, st.BackendState)
aStep.End(nil)
if n.vnetNode.HostFirewall() {
@@ -721,21 +555,21 @@ func (e *Env) Start() {
}
if n.joinTailnet {
tsStep := tsUpSteps[n.name]
tsStep := e.Step("Tailscale up: " + n.name)
tsStep.Begin()
if err := e.tailscaleUp(ctx, n); err != nil {
return fmt.Errorf("[%s] tailscale up: %w", n.name, err)
}
st, err = n.agent.Status(ctx)
st2, err := n.agent.Status(ctx)
if err != nil {
return fmt.Errorf("[%s] status after up: %w", n.name, err)
}
if st.BackendState != "Running" {
return fmt.Errorf("[%s] state = %q, want Running", n.name, st.BackendState)
if st2.BackendState != "Running" {
return fmt.Errorf("[%s] state = %q, want Running", n.name, st2.BackendState)
}
ips := fmt.Sprintf("%v", st.Self.TailscaleIPs)
ips := fmt.Sprintf("%v", st2.Self.TailscaleIPs)
e.setNodeTailscale(n.name, "Running "+ips)
t.Logf("[%s] up with %v", n.name, st.Self.TailscaleIPs)
t.Logf("[%s] up with %v", n.name, st2.Self.TailscaleIPs)
tsStep.End(nil)
}
@@ -1226,6 +1060,147 @@ func (e *Env) nodeScreenshotPort(name string) int {
return 0
}
// initVnet creates the vnet server. Called once via sync.Once.
func (e *Env) initVnet() {
e.vnetOnce.Do(func() {
var err error
e.server, err = vnet.New(&e.cfg)
if err != nil {
e.t.Fatalf("vnet.New: %v", err)
}
e.t.Cleanup(func() { e.server.Close() })
e.server.SetDHCPCallback(func(mac vnet.MAC, nodeNum int, msgType layers.DHCPMsgType, ip netip.Addr) {
name := e.nodeNameByNum(nodeNum)
nicIdx := e.nicIndexForMAC(name, mac)
ipStr := ip.String()
switch msgType {
case layers.DHCPMsgTypeDiscover:
e.setNodeDHCP(name, nicIdx, "Discover sent")
e.eventBus.Publish(VMEvent{NodeName: name, Type: EventDHCPDiscover, Message: "DHCP Discover sent", NIC: nicIdx})
case layers.DHCPMsgTypeOffer:
e.setNodeDHCP(name, nicIdx, "Offered "+ipStr)
e.eventBus.Publish(VMEvent{NodeName: name, Type: EventDHCPOffer, Message: "DHCP Offer received", Detail: ipStr, NIC: nicIdx})
case layers.DHCPMsgTypeRequest:
e.setNodeDHCP(name, nicIdx, "Requesting "+ipStr)
e.eventBus.Publish(VMEvent{NodeName: name, Type: EventDHCPRequest, Message: "DHCP Request sent", Detail: ipStr, NIC: nicIdx})
case layers.DHCPMsgTypeAck:
e.setNodeDHCP(name, nicIdx, "Got "+ipStr)
e.eventBus.Publish(VMEvent{NodeName: name, Type: EventDHCPAck, Message: "DHCP Ack: got " + ipStr, Detail: ipStr, NIC: nicIdx})
}
})
if e.sameTailnetUser {
e.server.ControlServer().AllNodesSameUser = true
}
})
}
// ensureQEMUSocket creates the Unix stream socket for QEMU VMs. Called once.
func (e *Env) ensureQEMUSocket() {
e.qemuSockOnce.Do(func() {
e.initVnet()
e.sockAddr = filepath.Join(e.tempDir, "vnet.sock")
srv, err := net.Listen("unix", e.sockAddr)
if err != nil {
e.t.Fatalf("listen unix: %v", err)
}
e.t.Cleanup(func() { srv.Close() })
go func() {
for {
c, err := srv.Accept()
if err != nil {
return
}
go e.server.ServeUnixConn(c.(*net.UnixConn), vnet.ProtocolQEMU)
}
}()
})
}
// ensureDgramSocket creates the Unix dgram socket for macOS VMs. Called once.
func (e *Env) ensureDgramSocket() {
e.dgramSockOnce.Do(func() {
e.initVnet()
e.dgramSockAddr = fmt.Sprintf("/tmp/vmtest-dgram-%d.sock", os.Getpid())
e.t.Cleanup(func() { os.Remove(e.dgramSockAddr) })
dgramAddr, err := net.ResolveUnixAddr("unixgram", e.dgramSockAddr)
if err != nil {
e.t.Fatalf("resolve dgram addr: %v", err)
}
uc, err := net.ListenUnixgram("unixgram", dgramAddr)
if err != nil {
e.t.Fatalf("listen unixgram: %v", err)
}
e.t.Cleanup(func() { uc.Close() })
go e.server.ServeUnixConn(uc, vnet.ProtocolUnixDGRAM)
})
}
// ensureCompiled compiles binaries for the given platform and registers them
// with the vnet file server. Safe for concurrent use; only compiles once per platform.
func (e *Env) ensureCompiled(ctx context.Context, goos, goarch string) {
key := goos + "_" + goarch
e.compileMu.Lock()
if e.compiled.Contains(key) {
e.compileMu.Unlock()
return
}
e.compileMu.Unlock()
step := e.Step(fmt.Sprintf("Compile %s_%s binaries", goos, goarch))
step.Begin()
if err := e.compileBinariesForOS(ctx, goos, goarch); err != nil {
step.End(err)
e.t.Fatalf("compileBinariesForOS(%s, %s): %v", goos, goarch, err)
}
step.End(nil)
e.registerBinaries(goos, goarch)
e.compileMu.Lock()
e.compiled.Make()
e.compiled.Add(key)
e.compileMu.Unlock()
}
// registerBinaries registers compiled binaries with the vnet file server.
// Safe for concurrent use.
func (e *Env) registerBinaries(goos, goarch string) {
e.initVnet()
dir := goos + "_" + goarch
for _, name := range []string{"tta", "tailscale", "tailscaled"} {
data, err := os.ReadFile(filepath.Join(e.binDir, dir, name))
if err != nil {
e.t.Fatalf("reading compiled %s/%s: %v", dir, name, err)
}
e.server.RegisterFile(dir+"/"+name, data)
}
}
// waitForAgentConn waits for a TTA agent to connect by issuing a simple
// HTTP GET to the root endpoint, without requiring tailscaled.
func (e *Env) waitForAgentConn(ctx context.Context, n *Node) error {
for {
reqCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
req, err := http.NewRequestWithContext(reqCtx, "GET", "http://unused/", nil)
if err != nil {
cancel()
return err
}
res, err := n.agent.HTTPClient.Do(req)
cancel()
if err == nil {
res.Body.Close()
return nil
}
if ctx.Err() != nil {
return ctx.Err()
}
time.Sleep(500 * time.Millisecond)
}
}
// Agent returns the node's TTA agent client, or nil if NoAgent is set.
func (n *Node) Agent() *vnet.NodeAgentClient {
return n.agent
+20 -4
View File
@@ -26,16 +26,32 @@ func TestMacOSAndLinuxCanPing(t *testing.T) {
vmtest.DontJoinTailnet())
macos := env.AddNode("macos", lan,
vmtest.OS(vmtest.MacOS),
vmtest.DontJoinTailnet(),
vmtest.NoAgent())
vmtest.DontJoinTailnet())
env.Start()
// Ping from Linux (which has TTA) to macOS (which just responds to ICMP).
// LANPing retries until the macOS VM has booted and acquired a DHCP lease.
env.LANPing(linux, macos.LanIP(lan))
}
func TestTwoMacOSVMsCanPing(t *testing.T) {
env := vmtest.New(t)
lan := env.AddNetwork("192.168.1.1/24")
mac1 := env.AddNode("mac1", lan,
vmtest.OS(vmtest.MacOS),
vmtest.DontJoinTailnet())
mac2 := env.AddNode("mac2", lan,
vmtest.OS(vmtest.MacOS),
vmtest.DontJoinTailnet())
env.Start()
// Both macOS VMs have TTA. Ping from mac1 to mac2 and vice versa.
env.LANPing(mac1, mac2.LanIP(lan))
env.LANPing(mac2, mac1.LanIP(lan))
}
func TestSubnetRouter(t *testing.T) {
testSubnetRouterForOS(t, vmtest.Ubuntu2404)
}
+7
View File
@@ -33,6 +33,13 @@ func (v virtualIP) Match(a netip.Addr) bool {
return v.v4 == a.Unmap() || v.v6 == a
}
// TestDriverIPv4 returns the IPv4 address of the test driver VIP (52.52.0.2).
// TTA agents dial this IP on port TestDriverPort to connect to the test harness.
func TestDriverIPv4() netip.Addr { return fakeTestAgent.v4 }
// TestDriverPort is the port the test driver listens on.
const TestDriverPort = 8008
// FakeDNSIPv4 returns the fake DNS IPv4 address.
func FakeDNSIPv4() netip.Addr { return fakeDNS.v4 }
+2 -2
View File
@@ -352,7 +352,7 @@ func (n *network) acceptTCP(r *tcp.ForwarderRequest) {
return
}
if destPort == 8008 && fakeTestAgent.Match(destIP) {
if destPort == TestDriverPort && fakeTestAgent.Match(destIP) {
node, ok := n.nodeByIP(clientRemoteIP)
if !ok {
n.logf("unknown client IP %v trying to connect to test driver", clientRemoteIP)
@@ -2106,7 +2106,7 @@ func (s *Server) shouldInterceptTCP(pkt gopacket.Packet) bool {
return true
}
}
if tcp.DstPort == 8008 && fakeTestAgent.Match(flow.dst) {
if tcp.DstPort == TestDriverPort && fakeTestAgent.Match(flow.dst) {
// Connection from cmd/tta.
return true
}
+32 -12
View File
@@ -22,8 +22,10 @@ extension HostCli {
@Option var share: String?
@Flag(help: "Run without GUI (for automated testing)") var headless: Bool = false
@Flag(help: "Create NIC with no attachment (for later hot-swap)") var disconnectedNic: Bool = false
@Flag(help: "Use NAT NIC instead of socket NIC (for snapshot prep)") var natNic: Bool = false
@Option(help: "Hot-swap NIC to this dgram socket path after boot/restore") var attachNetwork: String?
@Option(help: "Serve screenshots on this localhost port (0 = auto)") var screenshotPort: Int?
@Option(help: "Assign IP/mask/gw to guest via vsock (e.g. 192.168.1.2/255.255.255.0/192.168.1.1)") var assignIp: String?
mutating func run() {
config = Config(id)
@@ -32,19 +34,38 @@ extension HostCli {
if headless {
let attachSocket = attachNetwork
let disconnected = disconnectedNic || attachSocket != nil
let useNatNIC = natNic
let disconnected = !useNatNIC && (disconnectedNic || attachSocket != nil)
let wantScreenshots = screenshotPort != nil
let requestedPort = UInt16(screenshotPort ?? 0)
let ipConfig = assignIp
// Set up SIGINT handler before entering the event loop.
// The dispatch source must be stored in a global to prevent ARC deallocation.
signal(SIGINT, SIG_IGN)
let sigintSource = DispatchSource.makeSignalSource(signal: SIGINT, queue: .main)
retainedSigintSource = sigintSource
DispatchQueue.main.async {
let controller = VMController()
controller.createVirtualMachine(headless: true, disconnectedNIC: disconnected)
controller.createVirtualMachine(headless: true, disconnectedNIC: disconnected, natNIC: useNatNIC)
// Start vsock listener for IP assignment.
// If --assign-ip is set, the listener replies with the IP config JSON.
// If not set (snapshot prep), it replies "wait" so TTA keeps polling.
if let ipCfg = ipConfig {
let parts = ipCfg.split(separator: "/")
if parts.count == 3 {
let response = "{\"ip\":\"\(parts[0])\",\"mask\":\"\(parts[1])\",\"gw\":\"\(parts[2])\"}"
controller.startIPConfigListener(response: response)
}
} else {
controller.startIPConfigListener(response: "wait")
}
// Handle SIGINT (from test cleanup) by saving VM state before exit.
let sigintSource = DispatchSource.makeSignalSource(signal: SIGINT, queue: .main)
signal(SIGINT, SIG_IGN) // Let DispatchSource handle it
sigintSource.setEventHandler {
print("SIGINT received, saving VM state...")
print("SIGINT received, disconnecting NIC and saving VM state...")
controller.disconnectNetwork()
controller.pauseAndSaveVirtualMachine {
print("VM state saved, exiting.")
Foundation.exit(0)
@@ -79,11 +100,7 @@ extension HostCli {
let doAttach = {
if let sock = attachSocket {
// Give macOS a moment to settle after boot/restore,
// then hot-swap the NIC attachment.
DispatchQueue.main.asyncAfter(deadline: .now() + 1.0) {
controller.attachNetwork(serverSocket: sock, clientID: config.vmID)
}
controller.attachNetwork(serverSocket: sock, clientID: config.vmID)
}
}
@@ -107,7 +124,9 @@ extension HostCli {
fflush(stdout)
app.run()
} else {
RunLoop.main.run()
// Use dispatchMain() instead of RunLoop.main.run() so that
// GCD dispatch sources (like the SIGINT handler) are processed.
dispatchMain()
}
} else {
_ = NSApplicationMain(CommandLine.argc, CommandLine.unsafeArgv)
@@ -119,6 +138,7 @@ extension HostCli {
// startScreenshotServer starts a localhost HTTP server that serves VM display
// screenshots on GET /screenshot as JPEG. The port is printed to stdout as
// "SCREENSHOT_PORT=<port>" so the Go test harness can discover it.
var retainedSigintSource: DispatchSourceSignal? // prevent ARC deallocation
var screenshotServer: ScreenshotHTTPServer? // prevent GC
func startScreenshotServer(view: NSView, port: UInt16) {
+56 -2
View File
@@ -81,7 +81,7 @@ class VMController: NSObject, VZVirtualMachineDelegate {
return macPlatform
}
func createVirtualMachine(headless: Bool = false, disconnectedNIC: Bool = false) {
func createVirtualMachine(headless: Bool = false, disconnectedNIC: Bool = false, natNIC: Bool = false) {
let virtualMachineConfiguration = VZVirtualMachineConfiguration()
virtualMachineConfiguration.platform = createMacPlaform()
@@ -91,7 +91,10 @@ class VMController: NSObject, VZVirtualMachineDelegate {
virtualMachineConfiguration.graphicsDevices = [helper.createGraphicsDeviceConfiguration()]
virtualMachineConfiguration.storageDevices = [helper.createBlockDeviceConfiguration()]
if headless {
if disconnectedNIC {
if natNIC {
// NAT NIC for SSH access during snapshot preparation.
virtualMachineConfiguration.networkDevices = [helper.createNetworkDeviceConfiguration()]
} else if disconnectedNIC {
// Create a NIC with no attachment. The NIC exists in the hardware
// config (so saved state is compatible) but appears disconnected.
// Call attachNetwork() after restore to hot-swap the attachment.
@@ -120,6 +123,17 @@ class VMController: NSObject, VZVirtualMachineDelegate {
virtualMachine.delegate = self
}
/// Disconnect the NIC by setting its attachment to nil.
/// Call before saving state so the snapshot has no active link.
func disconnectNetwork() {
guard let nic = virtualMachine.networkDevices.first else {
print("disconnectNetwork: no network devices")
return
}
nic.attachment = nil
print("disconnectNetwork: NIC attachment set to nil")
}
/// Hot-swap the NIC attachment on a running VM. The VM must have been
/// created with disconnectedNIC=true. After calling this, the guest
/// sees the link come up and does DHCP.
@@ -157,6 +171,21 @@ class VMController: NSObject, VZVirtualMachineDelegate {
}
}
/// Start a vsock listener that tells the guest TTA agent what IP to configure.
/// If response is nil, the listener replies "wait" (snapshot prep mode).
func startIPConfigListener(response: String) {
guard let device = virtualMachine.socketDevices.first as? VZVirtioSocketDevice else {
print("startIPConfigListener: no socket device")
return
}
let listener = IPConfigListener(response: response)
retainedIPConfigListener = listener
let vsockListener = VZVirtioSocketListener()
vsockListener.delegate = listener
device.setSocketListener(vsockListener, forPort: 51011)
print("startIPConfigListener: listening on vsock port 51011")
}
func resumeVirtualMachine() {
virtualMachine.resume(completionHandler: { (result) in
if case let .failure(error) = result {
@@ -211,3 +240,28 @@ class VMController: NSObject, VZVirtualMachineDelegate {
exit(0)
}
}
// Global to prevent ARC deallocation of the vsock listener.
var retainedIPConfigListener: IPConfigListener?
/// Listens on vsock port 51011 for TTA connections and replies with
/// an IP configuration JSON string (or "wait" during snapshot prep).
class IPConfigListener: NSObject, VZVirtioSocketListenerDelegate {
let response: String
init(response: String) {
self.response = response
}
func listener(_ listener: VZVirtioSocketListener,
shouldAcceptNewConnection connection: VZVirtioSocketConnection,
from socketDevice: VZVirtioSocketDevice) -> Bool {
let fd = connection.fileDescriptor
let data = Array((response + "\n").utf8)
data.withUnsafeBufferPointer { buf in
_ = write(fd, buf.baseAddress!, buf.count)
}
connection.close()
return true
}
}