tka: keep the CompactionDefaults alongside the other limits #6
@@ -0,0 +1,135 @@
|
||||
// Copyright (c) Tailscale Inc & contributors
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
//go:build darwin
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
"tailscale.com/tstest/natlab/vnet"
|
||||
)
|
||||
|
||||
const (
|
||||
afVSOCK = 40 // AF_VSOCK on macOS
|
||||
vmaddrCIDHost = 2 // VMADDR_CID_HOST
|
||||
vsockPort = 51011 // port for IP assignment protocol
|
||||
)
|
||||
|
||||
// sockaddrVM is the Go equivalent of struct sockaddr_vm from <sys/vsock.h>.
|
||||
type sockaddrVM struct {
|
||||
Len uint8
|
||||
Family uint8
|
||||
Reserved1 uint16
|
||||
Port uint32
|
||||
CID uint32
|
||||
}
|
||||
|
||||
type netConfig struct {
|
||||
IP string `json:"ip"`
|
||||
Mask string `json:"mask"`
|
||||
GW string `json:"gw"`
|
||||
}
|
||||
|
||||
// startIPAssignLoop starts a background goroutine that polls the host
|
||||
// via the virtio socket for an IP assignment. When the host responds
|
||||
// with a JSON config (rather than "wait"), TTA sets the IP statically
|
||||
// using ifconfig and stops polling.
|
||||
func startIPAssignLoop() {
|
||||
go ipAssignLoop()
|
||||
}
|
||||
|
||||
func ipAssignLoop() {
|
||||
log.Printf("ipassign: starting vsock poll loop")
|
||||
var lastErr string
|
||||
for attempt := 0; ; attempt++ {
|
||||
resp, err := askHostForIP()
|
||||
if err != nil {
|
||||
if e := err.Error(); e != lastErr {
|
||||
log.Printf("ipassign: attempt %d: %v", attempt, err)
|
||||
lastErr = e
|
||||
}
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
if resp == "wait" {
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
var nc netConfig
|
||||
if err := json.Unmarshal([]byte(resp), &nc); err != nil {
|
||||
log.Printf("ipassign: bad config: %v", err)
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
if err := setStaticIP(nc); err != nil {
|
||||
log.Printf("ipassign: %v", err)
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
log.Printf("ipassign: configured en0 with %s/%s gw %s", nc.IP, nc.Mask, nc.GW)
|
||||
|
||||
// Switch the driver address from the DNS name to the IP directly
|
||||
// (avoids DNS resolution delay) and kick the dial-out loop so it
|
||||
// retries immediately with the new address.
|
||||
ipAddr := net.JoinHostPort(vnet.TestDriverIPv4().String(), strconv.Itoa(vnet.TestDriverPort))
|
||||
*driverAddr = ipAddr
|
||||
log.Printf("ipassign: switched driver addr to %s", ipAddr)
|
||||
resetDialCancels()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// askHostForIP connects to the host via AF_VSOCK and reads the response.
|
||||
func askHostForIP() (string, error) {
|
||||
fd, err := unix.Socket(afVSOCK, unix.SOCK_STREAM, 0)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("socket: %w", err)
|
||||
}
|
||||
defer unix.Close(fd)
|
||||
|
||||
// Set a short connect+read timeout via SO_RCVTIMEO.
|
||||
tv := unix.Timeval{Sec: 1}
|
||||
unix.SetsockoptTimeval(fd, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &tv)
|
||||
|
||||
addr := sockaddrVM{
|
||||
Len: uint8(unsafe.Sizeof(sockaddrVM{})),
|
||||
Family: afVSOCK,
|
||||
Port: vsockPort,
|
||||
CID: vmaddrCIDHost,
|
||||
}
|
||||
_, _, errno := unix.RawSyscall(unix.SYS_CONNECT, uintptr(fd),
|
||||
uintptr(unsafe.Pointer(&addr)), unsafe.Sizeof(addr))
|
||||
if errno != 0 {
|
||||
return "", fmt.Errorf("connect: %w", errno)
|
||||
}
|
||||
|
||||
var buf [1024]byte
|
||||
n, err := unix.Read(fd, buf[:])
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("read: %w", err)
|
||||
}
|
||||
return string(buf[:n]), nil
|
||||
}
|
||||
|
||||
// setStaticIP configures en0 with a static IP address and default route.
|
||||
func setStaticIP(nc netConfig) error {
|
||||
out, err := exec.Command("ifconfig", "en0", nc.IP, "netmask", nc.Mask, "up").CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("ifconfig: %v: %s", err, out)
|
||||
}
|
||||
out, err = exec.Command("route", "add", "default", nc.GW).CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("route add: %v: %s", err, out)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
// Copyright (c) Tailscale Inc & contributors
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
//go:build !darwin
|
||||
|
||||
package main
|
||||
|
||||
// startIPAssignLoop is a no-op on non-macOS platforms.
|
||||
// macOS VMs use vsock-based IP assignment to bypass slow DHCP.
|
||||
func startIPAssignLoop() {}
|
||||
|
||||
// Reference resetDialCancels to prevent unused-function lint errors.
|
||||
// It's called from ipassign_darwin.go on macOS builds.
|
||||
var _ = resetDialCancels
|
||||
+46
-2
@@ -105,6 +105,10 @@ func main() {
|
||||
}
|
||||
flag.Parse()
|
||||
|
||||
// On macOS VMs, start polling the host via vsock for an IP assignment.
|
||||
// This bypasses DHCP for near-instant network configuration.
|
||||
startIPAssignLoop()
|
||||
|
||||
debug := false
|
||||
if distro.Get() == distro.Gokrazy {
|
||||
cmdLine, _ := os.ReadFile("/proc/cmdline")
|
||||
@@ -408,12 +412,48 @@ func main() {
|
||||
revSt.runDialOutLoop(conns)
|
||||
}
|
||||
|
||||
// dialCancels tracks cancel funcs for in-flight connect() and sleep contexts.
|
||||
// resetDialCancels cancels them all so the dial loop retries immediately.
|
||||
var (
|
||||
dialCancelMu sync.Mutex
|
||||
dialCancels set.HandleSet[context.CancelFunc]
|
||||
)
|
||||
|
||||
// registerDialCancel adds a cancel func and returns a handle for removal.
|
||||
func registerDialCancel(cancel context.CancelFunc) set.Handle {
|
||||
dialCancelMu.Lock()
|
||||
defer dialCancelMu.Unlock()
|
||||
return dialCancels.Add(cancel)
|
||||
}
|
||||
|
||||
// unregisterDialCancel removes a previously registered cancel func.
|
||||
func unregisterDialCancel(h set.Handle) {
|
||||
dialCancelMu.Lock()
|
||||
defer dialCancelMu.Unlock()
|
||||
delete(dialCancels, h)
|
||||
}
|
||||
|
||||
// resetDialCancels cancels all in-flight connect and sleep contexts,
|
||||
// causing the dial loop to retry immediately with the updated driver address.
|
||||
func resetDialCancels() {
|
||||
dialCancelMu.Lock()
|
||||
defer dialCancelMu.Unlock()
|
||||
for h, cancel := range dialCancels {
|
||||
cancel()
|
||||
delete(dialCancels, h)
|
||||
}
|
||||
}
|
||||
|
||||
func connect() (net.Conn, error) {
|
||||
d := net.Dialer{
|
||||
Control: bypassControlFunc,
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
h := registerDialCancel(cancel)
|
||||
defer func() {
|
||||
cancel()
|
||||
unregisterDialCancel(h)
|
||||
}()
|
||||
c, err := d.DialContext(ctx, "tcp", *driverAddr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -510,7 +550,11 @@ func (s *revDialState) runDialOutLoop(conns chan<- net.Conn) {
|
||||
log.Printf("[dial-driver] connect failure: %v", s)
|
||||
}
|
||||
lastErr = s
|
||||
time.Sleep(time.Second)
|
||||
sleepCtx, sleepCancel := context.WithTimeout(context.Background(), time.Second)
|
||||
h := registerDialCancel(sleepCancel)
|
||||
<-sleepCtx.Done()
|
||||
sleepCancel()
|
||||
unregisterDialCancel(h)
|
||||
continue
|
||||
}
|
||||
if !connected {
|
||||
|
||||
@@ -20,12 +20,69 @@ import (
|
||||
"tailscale.com/tstest/natlab/vnet"
|
||||
)
|
||||
|
||||
// startQEMU launches a QEMU process for the given node.
|
||||
func (e *Env) startQEMU(n *Node) error {
|
||||
if n.os.IsGokrazy {
|
||||
return e.startGokrazyQEMU(n)
|
||||
// gokrazyPlatform boots gokrazy (Linux) VMs via QEMU.
|
||||
type gokrazyPlatform struct{}
|
||||
|
||||
func (gokrazyPlatform) planSteps(e *Env, n *Node) {
|
||||
e.Step("Build gokrazy image")
|
||||
e.Step("Launch QEMU: " + n.name)
|
||||
}
|
||||
|
||||
func (gokrazyPlatform) boot(ctx context.Context, e *Env, n *Node) error {
|
||||
e.gokrazyOnce.Do(func() {
|
||||
step := e.Step("Build gokrazy image")
|
||||
step.Begin()
|
||||
if err := e.ensureGokrazy(ctx); err != nil {
|
||||
step.End(err)
|
||||
e.t.Fatalf("ensureGokrazy: %v", err)
|
||||
}
|
||||
step.End(nil)
|
||||
})
|
||||
|
||||
e.ensureQEMUSocket()
|
||||
|
||||
vmStep := e.Step("Launch QEMU: " + n.name)
|
||||
vmStep.Begin()
|
||||
if err := e.startGokrazyQEMU(n); err != nil {
|
||||
vmStep.End(err)
|
||||
return err
|
||||
}
|
||||
return e.startCloudQEMU(n)
|
||||
vmStep.End(nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
// qemuCloudPlatform boots cloud images (Ubuntu, Debian, FreeBSD) via QEMU.
|
||||
type qemuCloudPlatform struct{}
|
||||
|
||||
func (qemuCloudPlatform) planSteps(e *Env, n *Node) {
|
||||
e.Step(fmt.Sprintf("Compile %s_%s binaries", n.os.GOOS(), n.os.GOARCH()))
|
||||
e.Step(fmt.Sprintf("Prepare %s image", n.os.Name))
|
||||
e.Step("Launch QEMU: " + n.name)
|
||||
}
|
||||
|
||||
func (qemuCloudPlatform) boot(ctx context.Context, e *Env, n *Node) error {
|
||||
goos, goarch := n.os.GOOS(), n.os.GOARCH()
|
||||
|
||||
e.ensureCompiled(ctx, goos, goarch)
|
||||
|
||||
imgStep := e.Step(fmt.Sprintf("Prepare %s image", n.os.Name))
|
||||
imgStep.Begin()
|
||||
if err := ensureImage(ctx, n.os); err != nil {
|
||||
imgStep.End(err)
|
||||
return err
|
||||
}
|
||||
imgStep.End(nil)
|
||||
|
||||
e.ensureQEMUSocket()
|
||||
|
||||
vmStep := e.Step("Launch QEMU: " + n.name)
|
||||
vmStep.Begin()
|
||||
if err := e.startCloudQEMU(n); err != nil {
|
||||
vmStep.End(err)
|
||||
return err
|
||||
}
|
||||
vmStep.End(nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
// startGokrazyQEMU launches a QEMU process for a gokrazy node.
|
||||
|
||||
+456
-33
@@ -5,31 +5,76 @@ package vmtest
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/netip"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
// macPlatform boots macOS VMs via Tart base images and tailmac Host.app.
|
||||
type macPlatform struct{}
|
||||
|
||||
func (macPlatform) planSteps(e *Env, n *Node) {
|
||||
e.Step("Prepare macOS Tart image")
|
||||
e.Step("Launch macOS VM: " + n.name)
|
||||
}
|
||||
|
||||
func (macPlatform) boot(ctx context.Context, e *Env, n *Node) error {
|
||||
imgStep := e.Step("Prepare macOS Tart image")
|
||||
e.macosSnapshotOnce.Do(func() {
|
||||
imgStep.Begin()
|
||||
e.macosSnapshot = ensureSnapshot(e.t)
|
||||
imgStep.End(nil)
|
||||
})
|
||||
|
||||
e.ensureDgramSocket()
|
||||
|
||||
vmStep := e.Step("Launch macOS VM: " + n.name)
|
||||
vmStep.Begin()
|
||||
if err := e.startTailMacVM(n); err != nil {
|
||||
vmStep.End(err)
|
||||
return err
|
||||
}
|
||||
vmStep.End(nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
const tartImage = "ghcr.io/cirruslabs/macos-tahoe-base:latest"
|
||||
|
||||
// macOSSnapshotCodeVersion is bumped when the snapshot preparation logic
|
||||
// changes in a way that invalidates old snapshots. Old snapshots with a
|
||||
// different version are cleaned up automatically.
|
||||
const macOSSnapshotCodeVersion = 5
|
||||
|
||||
// tartConfig is the subset of Tart's config.json we need.
|
||||
type tartConfig struct {
|
||||
HardwareModel string `json:"hardwareModel"` // base64
|
||||
ECID string `json:"ecid"` // base64
|
||||
}
|
||||
|
||||
// tartManifest is the subset of Tart's OCI manifest.json we need.
|
||||
type tartManifest struct {
|
||||
Config struct {
|
||||
Digest string `json:"digest"` // e.g. "sha256:3a6cb4eb6201..."
|
||||
} `json:"config"`
|
||||
}
|
||||
|
||||
// ensureTartImage checks that the Tart base image is available, pulling it
|
||||
// if necessary. Returns the path to a directory containing disk.img,
|
||||
// nvram.bin, and config.json.
|
||||
// if necessary. Returns the path to the OCI cache directory containing
|
||||
// disk.img, nvram.bin, config.json, and manifest.json.
|
||||
func ensureTartImage(t testing.TB) string {
|
||||
if _, err := exec.LookPath("tart"); err != nil {
|
||||
t.Skip("tart not installed; skipping macOS VM test")
|
||||
@@ -40,7 +85,6 @@ func ensureTartImage(t testing.TB) string {
|
||||
t.Fatalf("UserHomeDir: %v", err)
|
||||
}
|
||||
|
||||
// Check OCI cache first (from a previous "tart pull").
|
||||
ociDir := filepath.Join(home, ".tart", "cache", "OCIs",
|
||||
"ghcr.io", "cirruslabs", "macos-tahoe-base", "latest")
|
||||
if _, err := os.Stat(filepath.Join(ociDir, "disk.img")); err == nil {
|
||||
@@ -55,7 +99,6 @@ func ensureTartImage(t testing.TB) string {
|
||||
t.Fatalf("tart pull: %v", err)
|
||||
}
|
||||
|
||||
// After pull, the OCI cache should have it.
|
||||
if _, err := os.Stat(filepath.Join(ociDir, "disk.img")); err == nil {
|
||||
return ociDir
|
||||
}
|
||||
@@ -63,6 +106,368 @@ func ensureTartImage(t testing.TB) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// snapshotCacheKey computes a cache key for the macOS VM snapshot.
|
||||
// The key combines the image name, the first 12 hex chars of the Tart
|
||||
// config digest (changes when the upstream image is updated), and the
|
||||
// snapshot code version (changes when our prep logic changes).
|
||||
func snapshotCacheKey(tartDir string) (string, error) {
|
||||
manifestPath := filepath.Join(tartDir, "manifest.json")
|
||||
data, err := os.ReadFile(manifestPath)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("reading manifest: %w", err)
|
||||
}
|
||||
var m tartManifest
|
||||
if err := json.Unmarshal(data, &m); err != nil {
|
||||
return "", fmt.Errorf("parsing manifest: %w", err)
|
||||
}
|
||||
digest := m.Config.Digest
|
||||
// Strip "sha256:" prefix and take first 12 hex chars.
|
||||
digest = strings.TrimPrefix(digest, "sha256:")
|
||||
if len(digest) > 12 {
|
||||
digest = digest[:12]
|
||||
}
|
||||
return fmt.Sprintf("snap-tahoe-%s-v%d", digest, macOSSnapshotCodeVersion), nil
|
||||
}
|
||||
|
||||
// macosVMBaseDir returns ~/.cache/tailscale/vmtest/macos/, the directory
|
||||
// where Host.app expects to find VM directories by ID.
|
||||
func macosVMBaseDir() (string, error) {
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return filepath.Join(home, ".cache", "tailscale", "vmtest", "macos"), nil
|
||||
}
|
||||
|
||||
// cleanOldSnapshots removes any snapshot directories for the given image
|
||||
// prefix (e.g. "snap-tahoe") that don't match the current cache key.
|
||||
func cleanOldSnapshots(t testing.TB, imagePrefix, currentKey string) {
|
||||
base, err := macosVMBaseDir()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
matches, _ := filepath.Glob(filepath.Join(base, imagePrefix+"-*"))
|
||||
currentPath := filepath.Join(base, currentKey)
|
||||
for _, m := range matches {
|
||||
if m != currentPath {
|
||||
t.Logf("removing stale snapshot: %s", filepath.Base(m))
|
||||
os.RemoveAll(m)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ensureSnapshot returns the path to a cached macOS VM snapshot, creating
|
||||
// one if necessary. The snapshot contains a fully booted VM with
|
||||
// SaveFile.vzvmsave ready for fast restore.
|
||||
func ensureSnapshot(t testing.TB) string {
|
||||
tartDir := ensureTartImage(t)
|
||||
|
||||
key, err := snapshotCacheKey(tartDir)
|
||||
if err != nil {
|
||||
t.Fatalf("snapshot cache key: %v", err)
|
||||
}
|
||||
|
||||
base, err := macosVMBaseDir()
|
||||
if err != nil {
|
||||
t.Fatalf("macOS VM base dir: %v", err)
|
||||
}
|
||||
os.MkdirAll(base, 0755)
|
||||
|
||||
snapDir := filepath.Join(base, key)
|
||||
saveFile := filepath.Join(snapDir, "SaveFile.vzvmsave")
|
||||
if _, err := os.Stat(saveFile); err == nil {
|
||||
t.Logf("using cached macOS snapshot: %s", key)
|
||||
return snapDir
|
||||
}
|
||||
|
||||
// Clean up old snapshots for this image.
|
||||
cleanOldSnapshots(t, "snap-tahoe", key)
|
||||
|
||||
t.Logf("preparing macOS snapshot: %s (this takes ~30s on first run)", key)
|
||||
if err := prepareSnapshot(t, tartDir, snapDir); err != nil {
|
||||
os.RemoveAll(snapDir)
|
||||
t.Fatalf("preparing snapshot: %v", err)
|
||||
}
|
||||
return snapDir
|
||||
}
|
||||
|
||||
// prepareSnapshot creates a new macOS VM snapshot by booting the Tart base
|
||||
// image with a NAT NIC, waiting for SSH, and saving VM state.
|
||||
func prepareSnapshot(t testing.TB, tartDir, snapDir string) error {
|
||||
// The vmID must match the directory name under macosVMBaseDir
|
||||
// because Host.app looks up VM files at <base>/<vmID>/.
|
||||
snapID := filepath.Base(snapDir)
|
||||
|
||||
if err := cloneTartToTailmac(tartDir, snapDir, snapID, "52:cc:cc:cc:ce:01", "/dev/null"); err != nil {
|
||||
return fmt.Errorf("cloning tart: %w", err)
|
||||
}
|
||||
|
||||
modRoot, err := findModRoot()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tailmacDir := filepath.Join(modRoot, "tstest", "tailmac", "bin")
|
||||
hostBin := filepath.Join(tailmacDir, "Host.app", "Contents", "MacOS", "Host")
|
||||
if _, err := os.Stat(hostBin); err != nil {
|
||||
return fmt.Errorf("Host.app not found at %s; run 'make all' in tstest/tailmac/", hostBin)
|
||||
}
|
||||
|
||||
// Host.app reads VM files from ~/.cache/tailscale/vmtest/macos/<id>/.
|
||||
// Our snapDir is already under that tree, and the config.json vmID matches.
|
||||
cmd := exec.Command(hostBin, "run", "--id", snapID, "--headless", "--nat-nic")
|
||||
cmd.Env = append(os.Environ(), "NSUnbufferedIO=YES")
|
||||
|
||||
logPath := snapDir + ".prep.log"
|
||||
logFile, err := os.Create(logPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer logFile.Close()
|
||||
cmd.Stdout = logFile
|
||||
cmd.Stderr = logFile
|
||||
devNull, _ := os.Open(os.DevNull)
|
||||
cmd.Stdin = devNull
|
||||
defer devNull.Close()
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
return fmt.Errorf("starting Host.app: %w", err)
|
||||
}
|
||||
t.Logf("snapshot prep: launched Host.app (pid %d)", cmd.Process.Pid)
|
||||
|
||||
// Wait for SSH to become available via the NAT NIC.
|
||||
// The VM gets an IP from macOS's vmnet DHCP (typically 192.168.64.x).
|
||||
ip, err := waitForVMIP(t, "52:cc:cc:cc:ce:01", 60*time.Second)
|
||||
if err != nil {
|
||||
cmd.Process.Kill()
|
||||
cmd.Wait()
|
||||
return fmt.Errorf("waiting for VM IP: %w", err)
|
||||
}
|
||||
t.Logf("snapshot prep: VM IP is %s, waiting for SSH...", ip)
|
||||
|
||||
sc, err := waitForSSH(ip, 60*time.Second)
|
||||
if err != nil {
|
||||
cmd.Process.Kill()
|
||||
cmd.Wait()
|
||||
return fmt.Errorf("waiting for SSH: %w", err)
|
||||
}
|
||||
t.Logf("snapshot prep: SSH connected")
|
||||
|
||||
// Compile and install TTA in the macOS VM.
|
||||
t.Logf("snapshot prep: installing TTA...")
|
||||
if err := installTTA(t, sc); err != nil {
|
||||
sc.Close()
|
||||
cmd.Process.Kill()
|
||||
cmd.Wait()
|
||||
return fmt.Errorf("installing TTA: %w", err)
|
||||
}
|
||||
sc.Close()
|
||||
|
||||
// Save VM state by sending SIGINT.
|
||||
t.Logf("snapshot prep: saving VM state...")
|
||||
cmd.Process.Signal(os.Interrupt)
|
||||
done := make(chan error, 1)
|
||||
go func() { done <- cmd.Wait() }()
|
||||
select {
|
||||
case err := <-done:
|
||||
if err != nil {
|
||||
// Host.app exits 0 after saving state, non-zero is unexpected.
|
||||
t.Logf("snapshot prep: Host.app exited with: %v", err)
|
||||
}
|
||||
case <-time.After(60 * time.Second):
|
||||
cmd.Process.Kill()
|
||||
<-done
|
||||
return fmt.Errorf("Host.app did not exit after SIGINT")
|
||||
}
|
||||
|
||||
// Verify the save file was created.
|
||||
saveFile := filepath.Join(snapDir, "SaveFile.vzvmsave")
|
||||
if _, err := os.Stat(saveFile); err != nil {
|
||||
return fmt.Errorf("SaveFile.vzvmsave not found after prep")
|
||||
}
|
||||
t.Logf("snapshot prep: done, saved to %s", filepath.Base(snapDir))
|
||||
os.Remove(logPath)
|
||||
return nil
|
||||
}
|
||||
|
||||
// installTTA compiles TTA for darwin/arm64 and installs it in the macOS VM
|
||||
// as a LaunchDaemon via SSH/SCP.
|
||||
func installTTA(t testing.TB, sc *ssh.Client) error {
|
||||
modRoot, err := findModRoot()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Compile TTA for the macOS VM.
|
||||
tmpDir := t.TempDir()
|
||||
ttaBin := filepath.Join(tmpDir, "tta")
|
||||
t.Logf("snapshot prep: compiling TTA for darwin/arm64...")
|
||||
buildCmd := exec.Command("go", "build", "-o", ttaBin, "./cmd/tta")
|
||||
buildCmd.Dir = modRoot
|
||||
buildCmd.Env = append(os.Environ(), "GOOS=darwin", "GOARCH=arm64", "CGO_ENABLED=0")
|
||||
if out, err := buildCmd.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("compiling TTA: %v\n%s", err, out)
|
||||
}
|
||||
|
||||
// Read the binary.
|
||||
ttaData, err := os.ReadFile(ttaBin)
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading TTA binary: %w", err)
|
||||
}
|
||||
t.Logf("snapshot prep: TTA binary is %d bytes", len(ttaData))
|
||||
|
||||
// SCP the TTA binary to the VM via a temp file (admin user can't write /usr/local/bin directly).
|
||||
if err := scpFile(sc, ttaData, "/tmp/tta", 0755); err != nil {
|
||||
return fmt.Errorf("uploading TTA: %w", err)
|
||||
}
|
||||
if err := runSSHCmd(sc, "echo admin | sudo -S mv /tmp/tta /usr/local/bin/tta"); err != nil {
|
||||
return fmt.Errorf("moving TTA to /usr/local/bin: %w", err)
|
||||
}
|
||||
|
||||
// Install the LaunchDaemon plist.
|
||||
plist := `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>Label</key>
|
||||
<string>com.tailscale.tta</string>
|
||||
<key>ProgramArguments</key>
|
||||
<array>
|
||||
<string>/usr/local/bin/tta</string>
|
||||
</array>
|
||||
<key>RunAtLoad</key>
|
||||
<true/>
|
||||
<key>KeepAlive</key>
|
||||
<true/>
|
||||
<key>StandardOutPath</key>
|
||||
<string>/tmp/tta.log</string>
|
||||
<key>StandardErrorPath</key>
|
||||
<string>/tmp/tta.log</string>
|
||||
</dict>
|
||||
</plist>
|
||||
`
|
||||
if err := scpFile(sc, []byte(plist), "/tmp/com.tailscale.tta.plist", 0644); err != nil {
|
||||
return fmt.Errorf("uploading plist: %w", err)
|
||||
}
|
||||
if err := runSSHCmd(sc, "echo admin | sudo -S mv /tmp/com.tailscale.tta.plist /Library/LaunchDaemons/ && echo admin | sudo -S chown root:wheel /Library/LaunchDaemons/com.tailscale.tta.plist"); err != nil {
|
||||
return fmt.Errorf("installing plist: %w", err)
|
||||
}
|
||||
|
||||
// Load the LaunchDaemon.
|
||||
if err := runSSHCmd(sc, "echo admin | sudo -S launchctl load /Library/LaunchDaemons/com.tailscale.tta.plist"); err != nil {
|
||||
return fmt.Errorf("loading LaunchDaemon: %w", err)
|
||||
}
|
||||
|
||||
// Wait for TTA to start.
|
||||
for range 20 {
|
||||
if err := runSSHCmd(sc, "pgrep -x tta"); err == nil {
|
||||
break
|
||||
}
|
||||
time.Sleep(250 * time.Millisecond)
|
||||
}
|
||||
if err := runSSHCmd(sc, "pgrep -x tta"); err != nil {
|
||||
return fmt.Errorf("TTA not running after install: %w", err)
|
||||
}
|
||||
t.Logf("snapshot prep: TTA installed and running")
|
||||
return nil
|
||||
}
|
||||
|
||||
// scpFile uploads data to a remote path via SSH/SCP.
|
||||
func scpFile(sc *ssh.Client, data []byte, remotePath string, mode os.FileMode) error {
|
||||
sess, err := sc.NewSession()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sess.Close()
|
||||
|
||||
// Use a simple shell command to write the file.
|
||||
cmd := fmt.Sprintf("cat > %s && chmod %o %s", remotePath, mode, remotePath)
|
||||
sess.Stdin = bytes.NewReader(data)
|
||||
out, err := sess.CombinedOutput(cmd)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s: %v: %s", cmd, err, out)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// runSSHCmd runs a command on the SSH client and returns an error if it fails.
|
||||
func runSSHCmd(sc *ssh.Client, cmd string) error {
|
||||
sess, err := sc.NewSession()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sess.Close()
|
||||
out, err := sess.CombinedOutput(cmd)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s: %v: %s", cmd, err, out)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// waitForVMIP polls /var/db/dhcpd_leases for a DHCP lease matching the
|
||||
// given MAC address (from macOS's vmnet NAT). Returns the IP.
|
||||
func waitForVMIP(t testing.TB, mac string, timeout time.Duration) (string, error) {
|
||||
// Normalize MAC format: vmnet leases use "1,xx:xx:xx:xx:xx:xx" format
|
||||
// with leading zeros stripped from each octet (e.g. "1,52:cc:cc:cc:ce:1"
|
||||
// instead of "1,52:cc:cc:cc:ce:01").
|
||||
mac = strings.ToLower(mac)
|
||||
parts := strings.Split(mac, ":")
|
||||
for i, p := range parts {
|
||||
parts[i] = strings.TrimLeft(p, "0")
|
||||
if parts[i] == "" {
|
||||
parts[i] = "0"
|
||||
}
|
||||
}
|
||||
leaseMAC := "1," + strings.Join(parts, ":")
|
||||
|
||||
deadline := time.Now().Add(timeout)
|
||||
for time.Now().Before(deadline) {
|
||||
data, err := os.ReadFile("/var/db/dhcpd_leases")
|
||||
if err == nil {
|
||||
// Parse the plist-like lease file.
|
||||
lines := strings.Split(string(data), "\n")
|
||||
var currentIP string
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if strings.HasPrefix(line, "ip_address=") {
|
||||
currentIP = strings.TrimPrefix(line, "ip_address=")
|
||||
}
|
||||
if strings.HasPrefix(line, "hw_address=") {
|
||||
hw := strings.TrimPrefix(line, "hw_address=")
|
||||
if strings.ToLower(hw) == leaseMAC && currentIP != "" {
|
||||
return currentIP, nil
|
||||
}
|
||||
}
|
||||
if line == "}" {
|
||||
currentIP = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
return "", fmt.Errorf("no DHCP lease for MAC %s after %v", mac, timeout)
|
||||
}
|
||||
|
||||
// waitForSSH retries SSH connection to the given IP until it succeeds or
|
||||
// the timeout expires.
|
||||
func waitForSSH(ip string, timeout time.Duration) (*ssh.Client, error) {
|
||||
deadline := time.Now().Add(timeout)
|
||||
addr := net.JoinHostPort(ip, "22")
|
||||
cfg := &ssh.ClientConfig{
|
||||
User: "admin",
|
||||
Auth: []ssh.AuthMethod{ssh.Password("admin")},
|
||||
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
|
||||
Timeout: 2 * time.Second,
|
||||
}
|
||||
for time.Now().Before(deadline) {
|
||||
sc, err := ssh.Dial("tcp", addr, cfg)
|
||||
if err == nil {
|
||||
return sc, nil
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
return nil, fmt.Errorf("SSH to %s timed out after %v", addr, timeout)
|
||||
}
|
||||
|
||||
// ensureTailMac locates the pre-built tailmac Host.app binary.
|
||||
func (e *Env) ensureTailMac() error {
|
||||
modRoot, err := findModRoot()
|
||||
@@ -85,7 +490,6 @@ func cloneTartToTailmac(tartDir, cloneDir, testID, mac, dgramSock string) error
|
||||
return err
|
||||
}
|
||||
|
||||
// Read Tart's config.json for hardware identity.
|
||||
cfgData, err := os.ReadFile(filepath.Join(tartDir, "config.json"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading tart config: %w", err)
|
||||
@@ -95,7 +499,6 @@ func cloneTartToTailmac(tartDir, cloneDir, testID, mac, dgramSock string) error
|
||||
return fmt.Errorf("parsing tart config: %w", err)
|
||||
}
|
||||
|
||||
// Decode and write HardwareModel.
|
||||
hwModel, err := base64.StdEncoding.DecodeString(tc.HardwareModel)
|
||||
if err != nil {
|
||||
return fmt.Errorf("decoding hardwareModel: %w", err)
|
||||
@@ -104,7 +507,6 @@ func cloneTartToTailmac(tartDir, cloneDir, testID, mac, dgramSock string) error
|
||||
return err
|
||||
}
|
||||
|
||||
// Decode and write MachineIdentifier (ECID).
|
||||
ecid, err := base64.StdEncoding.DecodeString(tc.ECID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("decoding ecid: %w", err)
|
||||
@@ -113,22 +515,18 @@ func cloneTartToTailmac(tartDir, cloneDir, testID, mac, dgramSock string) error
|
||||
return err
|
||||
}
|
||||
|
||||
// APFS clone the disk image (nearly instant, copy-on-write).
|
||||
if out, err := exec.Command("cp", "-c", filepath.Join(tartDir, "disk.img"), filepath.Join(cloneDir, "Disk.img")).CombinedOutput(); err != nil {
|
||||
// Fallback to regular copy.
|
||||
if out2, err2 := exec.Command("cp", filepath.Join(tartDir, "disk.img"), filepath.Join(cloneDir, "Disk.img")).CombinedOutput(); err2 != nil {
|
||||
return fmt.Errorf("copying disk: %v: %s (APFS clone: %v: %s)", err2, out2, err, out)
|
||||
}
|
||||
}
|
||||
|
||||
// APFS clone the NVRAM.
|
||||
if out, err := exec.Command("cp", "-c", filepath.Join(tartDir, "nvram.bin"), filepath.Join(cloneDir, "AuxiliaryStorage")).CombinedOutput(); err != nil {
|
||||
if out2, err2 := exec.Command("cp", filepath.Join(tartDir, "nvram.bin"), filepath.Join(cloneDir, "AuxiliaryStorage")).CombinedOutput(); err2 != nil {
|
||||
return fmt.Errorf("copying nvram: %v: %s (APFS clone: %v: %s)", err2, out2, err, out)
|
||||
}
|
||||
}
|
||||
|
||||
// Write tailmac config.json.
|
||||
tmCfg := struct {
|
||||
VMid string `json:"vmID"`
|
||||
ServerSocket string `json:"serverSocket"`
|
||||
@@ -137,17 +535,17 @@ func cloneTartToTailmac(tartDir, cloneDir, testID, mac, dgramSock string) error
|
||||
}{
|
||||
VMid: testID,
|
||||
ServerSocket: dgramSock,
|
||||
MemorySize: 8 * 1024 * 1024 * 1024,
|
||||
MemorySize: 4 * 1024 * 1024 * 1024,
|
||||
Mac: mac,
|
||||
}
|
||||
tmData, _ := json.MarshalIndent(tmCfg, "", " ")
|
||||
return os.WriteFile(filepath.Join(cloneDir, "config.json"), tmData, 0644)
|
||||
}
|
||||
|
||||
// startTailMacVM clones a Tart base image and launches it via tailmac
|
||||
// Host.app in headless mode, connected to vnet's dgram socket.
|
||||
// startTailMacVM restores a macOS VM from a cached snapshot and launches it
|
||||
// via tailmac Host.app in headless mode, connected to vnet's dgram socket.
|
||||
func (e *Env) startTailMacVM(n *Node) error {
|
||||
tartDir := ensureTartImage(e.t)
|
||||
snapDir := e.macosSnapshot
|
||||
|
||||
if err := e.ensureTailMac(); err != nil {
|
||||
return err
|
||||
@@ -156,7 +554,6 @@ func (e *Env) startTailMacVM(n *Node) error {
|
||||
testID := fmt.Sprintf("vmtest-%s-%d", n.name, os.Getpid())
|
||||
|
||||
// Host.app expects VM files under ~/.cache/tailscale/vmtest/macos/<id>/
|
||||
// (hardcoded in Config.swift's vmBundleURL).
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return fmt.Errorf("UserHomeDir: %w", err)
|
||||
@@ -165,16 +562,51 @@ func (e *Env) startTailMacVM(n *Node) error {
|
||||
os.MkdirAll(vmBase, 0755)
|
||||
cloneDir := filepath.Join(vmBase, testID)
|
||||
|
||||
mac := n.vnetNode.NICMac(0)
|
||||
e.t.Logf("[%s] cloning Tart image -> %s (mac=%s)", n.name, testID, mac)
|
||||
if err := cloneTartToTailmac(tartDir, cloneDir, testID, mac.String(), e.dgramSockAddr); err != nil {
|
||||
return fmt.Errorf("cloning tart VM: %w", err)
|
||||
// APFS clone the entire snapshot directory (includes SaveFile.vzvmsave).
|
||||
e.t.Logf("[%s] cloning snapshot -> %s", n.name, testID)
|
||||
if out, err := exec.Command("cp", "-c", "-r", snapDir, cloneDir).CombinedOutput(); err != nil {
|
||||
if out2, err2 := exec.Command("cp", "-r", snapDir, cloneDir).CombinedOutput(); err2 != nil {
|
||||
return fmt.Errorf("cloning snapshot: %v: %s (APFS clone: %v: %s)", err2, out2, err, out)
|
||||
}
|
||||
}
|
||||
e.t.Cleanup(func() { os.RemoveAll(cloneDir) })
|
||||
|
||||
// Write test-specific config.json with the vnet MAC and dgram socket.
|
||||
mac := n.vnetNode.NICMac(0)
|
||||
cfg := struct {
|
||||
VMid string `json:"vmID"`
|
||||
ServerSocket string `json:"serverSocket"`
|
||||
MemorySize uint64 `json:"memorySize"`
|
||||
Mac string `json:"mac"`
|
||||
}{
|
||||
VMid: testID,
|
||||
ServerSocket: e.dgramSockAddr,
|
||||
MemorySize: 8 * 1024 * 1024 * 1024,
|
||||
Mac: mac.String(),
|
||||
}
|
||||
cfgData, _ := json.MarshalIndent(cfg, "", " ")
|
||||
if err := os.WriteFile(filepath.Join(cloneDir, "config.json"), cfgData, 0644); err != nil {
|
||||
return fmt.Errorf("writing config.json: %w", err)
|
||||
}
|
||||
|
||||
// Launch Host.app with disconnected NIC + hot-swap to vnet.
|
||||
// Host.app will restore from SaveFile.vzvmsave (fast), then
|
||||
// hot-swap the NIC to the vnet dgram socket.
|
||||
hostBin := filepath.Join(e.tailmacDir, "Host.app", "Contents", "MacOS", "Host")
|
||||
|
||||
// Compute the node's IP and gateway for static assignment via vsock.
|
||||
nodeIP := n.vnetNode.LanIP(n.nets[0])
|
||||
// The gateway is the network's base address (e.g. 192.168.1.1 for /24).
|
||||
// We derive it from the node IP: same /24 prefix, host part = 1.
|
||||
gwIP := nodeIP.As4()
|
||||
gwIP[3] = 1
|
||||
gateway := netip.AddrFrom4(gwIP)
|
||||
|
||||
args := []string{
|
||||
"run", "--id", testID, "--headless",
|
||||
"--disconnected-nic",
|
||||
"--attach-network", e.dgramSockAddr,
|
||||
"--assign-ip", fmt.Sprintf("%s/255.255.255.0/%s", nodeIP, gateway),
|
||||
}
|
||||
|
||||
wantScreenshots := *vmtestWeb != ""
|
||||
@@ -191,8 +623,6 @@ func (e *Env) startTailMacVM(n *Node) error {
|
||||
cmd := exec.Command(hostBin, args...)
|
||||
cmd.Env = append(os.Environ(), "NSUnbufferedIO=YES")
|
||||
|
||||
// If screenshots are enabled, we need to parse stdout for the
|
||||
// SCREENSHOT_PORT=<port> line, while also logging everything to file.
|
||||
var stdoutPipe io.ReadCloser
|
||||
if wantScreenshots {
|
||||
stdoutPipe, err = cmd.StdoutPipe()
|
||||
@@ -219,14 +649,13 @@ func (e *Env) startTailMacVM(n *Node) error {
|
||||
}
|
||||
e.t.Logf("[%s] launched tailmac (pid %d), log: %s", n.name, cmd.Process.Pid, logPath)
|
||||
|
||||
// Parse screenshot port from stdout and start polling goroutine.
|
||||
if wantScreenshots {
|
||||
screenshotPortCh := make(chan int, 1)
|
||||
go func() {
|
||||
scanner := bufio.NewScanner(stdoutPipe)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
fmt.Fprintln(logFile, line) // tee to log file
|
||||
fmt.Fprintln(logFile, line)
|
||||
if port := 0; strings.HasPrefix(line, "SCREENSHOT_PORT=") {
|
||||
fmt.Sscanf(line, "SCREENSHOT_PORT=%d", &port)
|
||||
if port > 0 {
|
||||
@@ -252,15 +681,9 @@ func (e *Env) startTailMacVM(n *Node) error {
|
||||
clientSock := fmt.Sprintf("/tmp/qemu-dgram-%s.sock", testID)
|
||||
|
||||
e.t.Cleanup(func() {
|
||||
cmd.Process.Signal(os.Interrupt)
|
||||
done := make(chan error, 1)
|
||||
go func() { done <- cmd.Wait() }()
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(15 * time.Second):
|
||||
cmd.Process.Kill()
|
||||
<-done
|
||||
}
|
||||
// Kill immediately — no need to save state for ephemeral test clones.
|
||||
cmd.Process.Kill()
|
||||
cmd.Wait()
|
||||
devNull.Close()
|
||||
logFile.Close()
|
||||
os.Remove(clientSock)
|
||||
|
||||
+233
-258
@@ -78,16 +78,28 @@ type Env struct {
|
||||
gokrazyKernel string // path to gokrazy kernel
|
||||
|
||||
// tailmac-specific paths (macOS VMs)
|
||||
tailmacDir string // path to tailmac bin/ directory containing Host.app
|
||||
tailmacDir string // path to tailmac bin/ directory containing Host.app
|
||||
macosSnapshot string // path to cached macOS VM snapshot directory
|
||||
macosSnapshotOnce sync.Once
|
||||
|
||||
qemuProcs []*exec.Cmd // launched QEMU processes
|
||||
|
||||
sameTailnetUser bool // all nodes register as the same Tailnet user
|
||||
|
||||
// Shared resource initialization (sync.Once for things multiple nodes share).
|
||||
vnetOnce sync.Once
|
||||
gokrazyOnce sync.Once
|
||||
qemuSockOnce sync.Once
|
||||
dgramSockOnce sync.Once
|
||||
compileMu sync.Mutex
|
||||
compiled set.Set[string]
|
||||
|
||||
// Web UI support.
|
||||
ctx context.Context // cancelled when test ends
|
||||
eventBus *EventBus
|
||||
testStatus *TestStatus
|
||||
stepsMu sync.Mutex
|
||||
stepsByKey map[string]*Step
|
||||
steps []*Step
|
||||
|
||||
nodeStatusMu sync.Mutex
|
||||
@@ -102,6 +114,28 @@ func (e *Env) logVerbosef(format string, args ...any) {
|
||||
}
|
||||
}
|
||||
|
||||
// vmPlatform defines how a VM type boots. Each OS image type (gokrazy,
|
||||
// cloud, macOS) implements this interface.
|
||||
type vmPlatform interface {
|
||||
// planSteps registers steps with the web UI in a dry-run pass.
|
||||
planSteps(e *Env, n *Node)
|
||||
|
||||
// boot does everything needed to get this node running: ensure images,
|
||||
// compile binaries, set up sockets, launch VM. Called concurrently.
|
||||
boot(ctx context.Context, e *Env, n *Node) error
|
||||
}
|
||||
|
||||
// platform returns the vmPlatform for this node's OS type.
|
||||
func (n *Node) platform() vmPlatform {
|
||||
if n.os.IsMacOS {
|
||||
return macPlatform{}
|
||||
}
|
||||
if n.os.IsGokrazy {
|
||||
return gokrazyPlatform{}
|
||||
}
|
||||
return qemuCloudPlatform{}
|
||||
}
|
||||
|
||||
// AddStep declares an expected stage of the test. The web UI shows all steps
|
||||
// from the start, tracking their progress. Call before or during the test.
|
||||
// Returns a *Step whose Begin/End methods drive the progress display.
|
||||
@@ -115,6 +149,28 @@ func (e *Env) AddStep(name string) *Step {
|
||||
return s
|
||||
}
|
||||
|
||||
// Step returns a step by key, creating it if it doesn't exist.
|
||||
// Safe for concurrent use. Both planSteps (dry-run) and boot (real-run)
|
||||
// call this to get the same Step object.
|
||||
func (e *Env) Step(key string) *Step {
|
||||
e.stepsMu.Lock()
|
||||
defer e.stepsMu.Unlock()
|
||||
if s, ok := e.stepsByKey[key]; ok {
|
||||
return s
|
||||
}
|
||||
s := &Step{
|
||||
name: key,
|
||||
index: len(e.steps),
|
||||
env: e,
|
||||
}
|
||||
e.steps = append(e.steps, s)
|
||||
if e.stepsByKey == nil {
|
||||
e.stepsByKey = make(map[string]*Step)
|
||||
}
|
||||
e.stepsByKey[key] = s
|
||||
return s
|
||||
}
|
||||
|
||||
// Steps returns all declared steps in order.
|
||||
func (e *Env) Steps() []*Step {
|
||||
return e.steps
|
||||
@@ -397,16 +453,14 @@ func SNATSubnetRoutes(v bool) nodeOptSNATSubnetRoutes { return nodeOptSNATSubnet
|
||||
// The webserver responds with "Hello world I am <nodename> from <sourceIP>" on all requests.
|
||||
func WebServer(port int) nodeOptWebServer { return nodeOptWebServer(port) }
|
||||
|
||||
// Start initializes the virtual network, builds/downloads images, compiles
|
||||
// binaries, launches QEMU processes, and waits for all TTA agents to connect.
|
||||
// It should be called after all AddNetwork/AddNode calls.
|
||||
// Start initializes the virtual network, boots all VMs in parallel, and waits
|
||||
// for all TTA agents to connect. It should be called after all AddNetwork/AddNode calls.
|
||||
func (e *Env) Start() {
|
||||
t := e.t
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
|
||||
t.Cleanup(cancel)
|
||||
e.ctx = ctx
|
||||
|
||||
// Initialize node status and start web UI as early as possible.
|
||||
e.initNodeStatus()
|
||||
e.maybeStartWebServer()
|
||||
|
||||
@@ -414,8 +468,6 @@ func (e *Env) Start() {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Resolve --test-version up front (e.g. "unstable" -> "1.97.255") so all
|
||||
// platforms see the same concrete version.
|
||||
if *testVersion != "" {
|
||||
v, err := resolveTestVersion(ctx, *testVersion)
|
||||
if err != nil {
|
||||
@@ -425,267 +477,42 @@ func (e *Env) Start() {
|
||||
t.Logf("using Tailscale release version %s (from --test-version=%q)", v, *testVersion)
|
||||
}
|
||||
|
||||
// Check if any macOS nodes are present; if so, verify prerequisites.
|
||||
hasMacOS := false
|
||||
for _, n := range e.nodes {
|
||||
if n.os.IsMacOS {
|
||||
hasMacOS = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if hasMacOS {
|
||||
if runtime.GOOS != "darwin" || runtime.GOARCH != "arm64" {
|
||||
if n.os.IsMacOS && (runtime.GOOS != "darwin" || runtime.GOARCH != "arm64") {
|
||||
t.Skip("macOS VM tests require macOS arm64 host")
|
||||
}
|
||||
}
|
||||
|
||||
// Determine which GOOS/GOARCH pairs need compiled binaries (non-gokrazy,
|
||||
// non-macOS images). Gokrazy has binaries built-in. macOS VMs don't use
|
||||
// compiled binaries (no TTA agent).
|
||||
type platform struct{ goos, goarch string }
|
||||
needPlatform := set.Set[platform]{}
|
||||
for _, n := range e.nodes {
|
||||
if !n.os.IsGokrazy && !n.os.IsMacOS {
|
||||
needPlatform.Add(platform{n.os.GOOS(), n.os.GOARCH()})
|
||||
}
|
||||
}
|
||||
|
||||
// Declare framework steps for the web UI.
|
||||
// User-declared steps (from AddStep before Start) get moved to the end
|
||||
// so framework steps (compile, image, QEMU, etc.) come first.
|
||||
// Dry-run: let each platform register its steps with the web UI.
|
||||
userSteps := e.steps
|
||||
e.steps = nil
|
||||
|
||||
compileSteps := map[platform]*Step{}
|
||||
for _, p := range needPlatform.Slice() {
|
||||
compileSteps[p] = e.AddStep(fmt.Sprintf("Compile %s_%s binaries", p.goos, p.goarch))
|
||||
}
|
||||
imageSteps := map[string]*Step{} // keyed by OS name
|
||||
didOS := set.Set[string]{} // dedup by image name
|
||||
for _, n := range e.nodes {
|
||||
if didOS.Contains(n.os.Name) {
|
||||
continue
|
||||
}
|
||||
didOS.Add(n.os.Name)
|
||||
if n.os.IsMacOS {
|
||||
imageSteps[n.os.Name] = e.AddStep("Prepare macOS Tart image")
|
||||
} else if n.os.IsGokrazy {
|
||||
imageSteps["gokrazy"] = e.AddStep("Build gokrazy image")
|
||||
} else {
|
||||
imageSteps[n.os.Name] = e.AddStep(fmt.Sprintf("Prepare %s image", n.os.Name))
|
||||
}
|
||||
n.platform().planSteps(e, n)
|
||||
}
|
||||
vnetStep := e.AddStep("Create virtual network")
|
||||
|
||||
vmSteps := map[string]*Step{}
|
||||
agentSteps := map[string]*Step{}
|
||||
tsUpSteps := map[string]*Step{}
|
||||
for _, n := range e.nodes {
|
||||
if n.os.IsMacOS {
|
||||
vmSteps[n.name] = e.AddStep(fmt.Sprintf("Launch macOS VM: %s", n.name))
|
||||
} else {
|
||||
vmSteps[n.name] = e.AddStep(fmt.Sprintf("Launch QEMU: %s", n.name))
|
||||
}
|
||||
if !n.noAgent {
|
||||
agentSteps[n.name] = e.AddStep(fmt.Sprintf("Wait for agent: %s", n.name))
|
||||
e.Step("Wait for agent: " + n.name)
|
||||
}
|
||||
if n.joinTailnet {
|
||||
tsUpSteps[n.name] = e.AddStep(fmt.Sprintf("Tailscale up: %s", n.name))
|
||||
e.Step("Tailscale up: " + n.name)
|
||||
}
|
||||
}
|
||||
|
||||
// Re-append user-declared steps after all framework steps.
|
||||
for _, s := range userSteps {
|
||||
s.index = len(e.steps)
|
||||
e.steps = append(e.steps, s)
|
||||
}
|
||||
|
||||
// Compile binaries and download/build images in parallel.
|
||||
// Any failure cancels the others via the errgroup context.
|
||||
eg, egCtx := errgroup.WithContext(ctx)
|
||||
for _, p := range needPlatform.Slice() {
|
||||
step := compileSteps[p]
|
||||
eg.Go(func() error {
|
||||
step.Begin()
|
||||
err := e.compileBinariesForOS(egCtx, p.goos, p.goarch)
|
||||
if err != nil {
|
||||
step.End(err)
|
||||
return err
|
||||
}
|
||||
step.End(nil)
|
||||
return nil
|
||||
// Boot all nodes in parallel. Each platform handles its own
|
||||
// dependencies (image prep, binary compilation, socket setup)
|
||||
// via sync.Once, so independent work overlaps naturally.
|
||||
var bootEg errgroup.Group
|
||||
for _, n := range e.nodes {
|
||||
bootEg.Go(func() error {
|
||||
return n.platform().boot(ctx, e, n)
|
||||
})
|
||||
}
|
||||
didOS = set.Set[string]{} // reset for second pass
|
||||
for _, n := range e.nodes {
|
||||
if didOS.Contains(n.os.Name) {
|
||||
continue
|
||||
}
|
||||
didOS.Add(n.os.Name)
|
||||
if n.os.IsMacOS {
|
||||
step := imageSteps[n.os.Name]
|
||||
eg.Go(func() error {
|
||||
step.Begin()
|
||||
ensureTartImage(t)
|
||||
step.End(nil)
|
||||
return nil
|
||||
})
|
||||
} else if n.os.IsGokrazy {
|
||||
step := imageSteps["gokrazy"]
|
||||
eg.Go(func() error {
|
||||
step.Begin()
|
||||
err := e.ensureGokrazy(egCtx)
|
||||
if err != nil {
|
||||
step.End(err)
|
||||
return err
|
||||
}
|
||||
step.End(nil)
|
||||
return nil
|
||||
})
|
||||
} else {
|
||||
step := imageSteps[n.os.Name]
|
||||
osImg := n.os
|
||||
eg.Go(func() error {
|
||||
step.Begin()
|
||||
err := ensureImage(egCtx, osImg)
|
||||
if err != nil {
|
||||
step.End(err)
|
||||
return err
|
||||
}
|
||||
step.End(nil)
|
||||
return nil
|
||||
})
|
||||
}
|
||||
}
|
||||
if err := eg.Wait(); err != nil {
|
||||
t.Fatalf("setup: %v", err)
|
||||
}
|
||||
|
||||
// Create the vnet server.
|
||||
vnetStep.Begin()
|
||||
var err error
|
||||
e.server, err = vnet.New(&e.cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("vnet.New: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { e.server.Close() })
|
||||
|
||||
// Register DHCP event callback for the web UI.
|
||||
e.server.SetDHCPCallback(func(mac vnet.MAC, nodeNum int, msgType layers.DHCPMsgType, ip netip.Addr) {
|
||||
name := e.nodeNameByNum(nodeNum)
|
||||
nicIdx := e.nicIndexForMAC(name, mac)
|
||||
ipStr := ip.String()
|
||||
switch msgType {
|
||||
case layers.DHCPMsgTypeDiscover:
|
||||
e.setNodeDHCP(name, nicIdx, "Discover sent")
|
||||
e.eventBus.Publish(VMEvent{
|
||||
NodeName: name,
|
||||
Type: EventDHCPDiscover,
|
||||
Message: "DHCP Discover sent",
|
||||
NIC: nicIdx,
|
||||
})
|
||||
case layers.DHCPMsgTypeOffer:
|
||||
e.setNodeDHCP(name, nicIdx, "Offered "+ipStr)
|
||||
e.eventBus.Publish(VMEvent{
|
||||
NodeName: name,
|
||||
Type: EventDHCPOffer,
|
||||
Message: "DHCP Offer received",
|
||||
Detail: ipStr,
|
||||
NIC: nicIdx,
|
||||
})
|
||||
case layers.DHCPMsgTypeRequest:
|
||||
e.setNodeDHCP(name, nicIdx, "Requesting "+ipStr)
|
||||
e.eventBus.Publish(VMEvent{
|
||||
NodeName: name,
|
||||
Type: EventDHCPRequest,
|
||||
Message: "DHCP Request sent",
|
||||
Detail: ipStr,
|
||||
NIC: nicIdx,
|
||||
})
|
||||
case layers.DHCPMsgTypeAck:
|
||||
e.setNodeDHCP(name, nicIdx, "Got "+ipStr)
|
||||
e.eventBus.Publish(VMEvent{
|
||||
NodeName: name,
|
||||
Type: EventDHCPAck,
|
||||
Message: "DHCP Ack: got " + ipStr,
|
||||
Detail: ipStr,
|
||||
NIC: nicIdx,
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
if e.sameTailnetUser {
|
||||
e.server.ControlServer().AllNodesSameUser = true
|
||||
}
|
||||
|
||||
// Register compiled binaries with the file server VIP.
|
||||
// Binaries are registered at <goos>_<goarch>/<name> (e.g. "linux_amd64/tta").
|
||||
for _, p := range needPlatform.Slice() {
|
||||
dir := p.goos + "_" + p.goarch
|
||||
for _, name := range []string{"tta", "tailscale", "tailscaled"} {
|
||||
data, err := os.ReadFile(filepath.Join(e.binDir, dir, name))
|
||||
if err != nil {
|
||||
t.Fatalf("reading compiled %s/%s: %v", dir, name, err)
|
||||
}
|
||||
e.server.RegisterFile(dir+"/"+name, data)
|
||||
}
|
||||
}
|
||||
vnetStep.End(nil)
|
||||
|
||||
// Cloud-init config is delivered via local seed ISOs (created in startCloudQEMU),
|
||||
// not via the cloud-init HTTP VIP, because network-config must be available
|
||||
// during init-local before systemd-networkd-wait-online blocks.
|
||||
|
||||
// Start Unix stream socket listener (for QEMU VMs).
|
||||
e.sockAddr = filepath.Join(e.tempDir, "vnet.sock")
|
||||
srv, err := net.Listen("unix", e.sockAddr)
|
||||
if err != nil {
|
||||
t.Fatalf("listen unix: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { srv.Close() })
|
||||
|
||||
go func() {
|
||||
for {
|
||||
c, err := srv.Accept()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
go e.server.ServeUnixConn(c.(*net.UnixConn), vnet.ProtocolQEMU)
|
||||
}
|
||||
}()
|
||||
|
||||
// Start Unix dgram socket listener (for macOS VMs via tailmac).
|
||||
// Use /tmp/ instead of the test temp dir because Unix socket paths
|
||||
// are limited to 104 bytes on macOS, and test temp dir paths are long.
|
||||
if hasMacOS {
|
||||
e.dgramSockAddr = fmt.Sprintf("/tmp/vmtest-dgram-%d.sock", os.Getpid())
|
||||
t.Cleanup(func() { os.Remove(e.dgramSockAddr) })
|
||||
dgramAddr, err := net.ResolveUnixAddr("unixgram", e.dgramSockAddr)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve dgram addr: %v", err)
|
||||
}
|
||||
uc, err := net.ListenUnixgram("unixgram", dgramAddr)
|
||||
if err != nil {
|
||||
t.Fatalf("listen unixgram: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { uc.Close() })
|
||||
go e.server.ServeUnixConn(uc, vnet.ProtocolUnixDGRAM)
|
||||
}
|
||||
|
||||
// Launch VM processes.
|
||||
for _, n := range e.nodes {
|
||||
step := vmSteps[n.name]
|
||||
step.Begin()
|
||||
if n.os.IsMacOS {
|
||||
if err := e.startTailMacVM(n); err != nil {
|
||||
t.Fatalf("startTailMacVM(%s): %v", n.name, err)
|
||||
}
|
||||
} else {
|
||||
if err := e.startQEMU(n); err != nil {
|
||||
t.Fatalf("startQEMU(%s): %v", n.name, err)
|
||||
}
|
||||
}
|
||||
step.End(nil)
|
||||
if err := bootEg.Wait(); err != nil {
|
||||
t.Fatalf("boot: %v", err)
|
||||
}
|
||||
|
||||
// Set up agent clients and wait for all agents to connect.
|
||||
@@ -693,25 +520,32 @@ func (e *Env) Start() {
|
||||
if n.noAgent {
|
||||
continue
|
||||
}
|
||||
e.initVnet() // ensure vnet is ready for agent clients
|
||||
n.agent = e.server.NodeAgentClient(n.vnetNode)
|
||||
n.vnetNode.SetClient(n.agent)
|
||||
}
|
||||
|
||||
// Wait for agents, then bring up tailscale.
|
||||
var agentEg errgroup.Group
|
||||
for _, n := range e.nodes {
|
||||
if n.noAgent {
|
||||
continue
|
||||
}
|
||||
agentEg.Go(func() error {
|
||||
aStep := agentSteps[n.name]
|
||||
aStep := e.Step("Wait for agent: " + n.name)
|
||||
aStep.Begin()
|
||||
t.Logf("[%s] waiting for agent...", n.name)
|
||||
st, err := n.agent.Status(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("[%s] agent status: %w", n.name, err)
|
||||
if n.joinTailnet {
|
||||
st, err := n.agent.Status(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("[%s] agent status: %w", n.name, err)
|
||||
}
|
||||
t.Logf("[%s] agent connected, backend state: %s", n.name, st.BackendState)
|
||||
} else {
|
||||
if err := e.waitForAgentConn(ctx, n); err != nil {
|
||||
return fmt.Errorf("[%s] agent connect: %w", n.name, err)
|
||||
}
|
||||
t.Logf("[%s] agent connected (no tailscale)", n.name)
|
||||
}
|
||||
t.Logf("[%s] agent connected, backend state: %s", n.name, st.BackendState)
|
||||
aStep.End(nil)
|
||||
|
||||
if n.vnetNode.HostFirewall() {
|
||||
@@ -721,21 +555,21 @@ func (e *Env) Start() {
|
||||
}
|
||||
|
||||
if n.joinTailnet {
|
||||
tsStep := tsUpSteps[n.name]
|
||||
tsStep := e.Step("Tailscale up: " + n.name)
|
||||
tsStep.Begin()
|
||||
if err := e.tailscaleUp(ctx, n); err != nil {
|
||||
return fmt.Errorf("[%s] tailscale up: %w", n.name, err)
|
||||
}
|
||||
st, err = n.agent.Status(ctx)
|
||||
st2, err := n.agent.Status(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("[%s] status after up: %w", n.name, err)
|
||||
}
|
||||
if st.BackendState != "Running" {
|
||||
return fmt.Errorf("[%s] state = %q, want Running", n.name, st.BackendState)
|
||||
if st2.BackendState != "Running" {
|
||||
return fmt.Errorf("[%s] state = %q, want Running", n.name, st2.BackendState)
|
||||
}
|
||||
ips := fmt.Sprintf("%v", st.Self.TailscaleIPs)
|
||||
ips := fmt.Sprintf("%v", st2.Self.TailscaleIPs)
|
||||
e.setNodeTailscale(n.name, "Running "+ips)
|
||||
t.Logf("[%s] up with %v", n.name, st.Self.TailscaleIPs)
|
||||
t.Logf("[%s] up with %v", n.name, st2.Self.TailscaleIPs)
|
||||
tsStep.End(nil)
|
||||
}
|
||||
|
||||
@@ -1226,6 +1060,147 @@ func (e *Env) nodeScreenshotPort(name string) int {
|
||||
return 0
|
||||
}
|
||||
|
||||
// initVnet creates the vnet server. Called once via sync.Once.
|
||||
func (e *Env) initVnet() {
|
||||
e.vnetOnce.Do(func() {
|
||||
var err error
|
||||
e.server, err = vnet.New(&e.cfg)
|
||||
if err != nil {
|
||||
e.t.Fatalf("vnet.New: %v", err)
|
||||
}
|
||||
e.t.Cleanup(func() { e.server.Close() })
|
||||
|
||||
e.server.SetDHCPCallback(func(mac vnet.MAC, nodeNum int, msgType layers.DHCPMsgType, ip netip.Addr) {
|
||||
name := e.nodeNameByNum(nodeNum)
|
||||
nicIdx := e.nicIndexForMAC(name, mac)
|
||||
ipStr := ip.String()
|
||||
switch msgType {
|
||||
case layers.DHCPMsgTypeDiscover:
|
||||
e.setNodeDHCP(name, nicIdx, "Discover sent")
|
||||
e.eventBus.Publish(VMEvent{NodeName: name, Type: EventDHCPDiscover, Message: "DHCP Discover sent", NIC: nicIdx})
|
||||
case layers.DHCPMsgTypeOffer:
|
||||
e.setNodeDHCP(name, nicIdx, "Offered "+ipStr)
|
||||
e.eventBus.Publish(VMEvent{NodeName: name, Type: EventDHCPOffer, Message: "DHCP Offer received", Detail: ipStr, NIC: nicIdx})
|
||||
case layers.DHCPMsgTypeRequest:
|
||||
e.setNodeDHCP(name, nicIdx, "Requesting "+ipStr)
|
||||
e.eventBus.Publish(VMEvent{NodeName: name, Type: EventDHCPRequest, Message: "DHCP Request sent", Detail: ipStr, NIC: nicIdx})
|
||||
case layers.DHCPMsgTypeAck:
|
||||
e.setNodeDHCP(name, nicIdx, "Got "+ipStr)
|
||||
e.eventBus.Publish(VMEvent{NodeName: name, Type: EventDHCPAck, Message: "DHCP Ack: got " + ipStr, Detail: ipStr, NIC: nicIdx})
|
||||
}
|
||||
})
|
||||
|
||||
if e.sameTailnetUser {
|
||||
e.server.ControlServer().AllNodesSameUser = true
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// ensureQEMUSocket creates the Unix stream socket for QEMU VMs. Called once.
|
||||
func (e *Env) ensureQEMUSocket() {
|
||||
e.qemuSockOnce.Do(func() {
|
||||
e.initVnet()
|
||||
e.sockAddr = filepath.Join(e.tempDir, "vnet.sock")
|
||||
srv, err := net.Listen("unix", e.sockAddr)
|
||||
if err != nil {
|
||||
e.t.Fatalf("listen unix: %v", err)
|
||||
}
|
||||
e.t.Cleanup(func() { srv.Close() })
|
||||
go func() {
|
||||
for {
|
||||
c, err := srv.Accept()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
go e.server.ServeUnixConn(c.(*net.UnixConn), vnet.ProtocolQEMU)
|
||||
}
|
||||
}()
|
||||
})
|
||||
}
|
||||
|
||||
// ensureDgramSocket creates the Unix dgram socket for macOS VMs. Called once.
|
||||
func (e *Env) ensureDgramSocket() {
|
||||
e.dgramSockOnce.Do(func() {
|
||||
e.initVnet()
|
||||
e.dgramSockAddr = fmt.Sprintf("/tmp/vmtest-dgram-%d.sock", os.Getpid())
|
||||
e.t.Cleanup(func() { os.Remove(e.dgramSockAddr) })
|
||||
dgramAddr, err := net.ResolveUnixAddr("unixgram", e.dgramSockAddr)
|
||||
if err != nil {
|
||||
e.t.Fatalf("resolve dgram addr: %v", err)
|
||||
}
|
||||
uc, err := net.ListenUnixgram("unixgram", dgramAddr)
|
||||
if err != nil {
|
||||
e.t.Fatalf("listen unixgram: %v", err)
|
||||
}
|
||||
e.t.Cleanup(func() { uc.Close() })
|
||||
go e.server.ServeUnixConn(uc, vnet.ProtocolUnixDGRAM)
|
||||
})
|
||||
}
|
||||
|
||||
// ensureCompiled compiles binaries for the given platform and registers them
|
||||
// with the vnet file server. Safe for concurrent use; only compiles once per platform.
|
||||
func (e *Env) ensureCompiled(ctx context.Context, goos, goarch string) {
|
||||
key := goos + "_" + goarch
|
||||
|
||||
e.compileMu.Lock()
|
||||
if e.compiled.Contains(key) {
|
||||
e.compileMu.Unlock()
|
||||
return
|
||||
}
|
||||
e.compileMu.Unlock()
|
||||
|
||||
step := e.Step(fmt.Sprintf("Compile %s_%s binaries", goos, goarch))
|
||||
step.Begin()
|
||||
if err := e.compileBinariesForOS(ctx, goos, goarch); err != nil {
|
||||
step.End(err)
|
||||
e.t.Fatalf("compileBinariesForOS(%s, %s): %v", goos, goarch, err)
|
||||
}
|
||||
step.End(nil)
|
||||
e.registerBinaries(goos, goarch)
|
||||
|
||||
e.compileMu.Lock()
|
||||
e.compiled.Make()
|
||||
e.compiled.Add(key)
|
||||
e.compileMu.Unlock()
|
||||
}
|
||||
|
||||
// registerBinaries registers compiled binaries with the vnet file server.
|
||||
// Safe for concurrent use.
|
||||
func (e *Env) registerBinaries(goos, goarch string) {
|
||||
e.initVnet()
|
||||
dir := goos + "_" + goarch
|
||||
for _, name := range []string{"tta", "tailscale", "tailscaled"} {
|
||||
data, err := os.ReadFile(filepath.Join(e.binDir, dir, name))
|
||||
if err != nil {
|
||||
e.t.Fatalf("reading compiled %s/%s: %v", dir, name, err)
|
||||
}
|
||||
e.server.RegisterFile(dir+"/"+name, data)
|
||||
}
|
||||
}
|
||||
|
||||
// waitForAgentConn waits for a TTA agent to connect by issuing a simple
|
||||
// HTTP GET to the root endpoint, without requiring tailscaled.
|
||||
func (e *Env) waitForAgentConn(ctx context.Context, n *Node) error {
|
||||
for {
|
||||
reqCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
|
||||
req, err := http.NewRequestWithContext(reqCtx, "GET", "http://unused/", nil)
|
||||
if err != nil {
|
||||
cancel()
|
||||
return err
|
||||
}
|
||||
res, err := n.agent.HTTPClient.Do(req)
|
||||
cancel()
|
||||
if err == nil {
|
||||
res.Body.Close()
|
||||
return nil
|
||||
}
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
}
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
// Agent returns the node's TTA agent client, or nil if NoAgent is set.
|
||||
func (n *Node) Agent() *vnet.NodeAgentClient {
|
||||
return n.agent
|
||||
|
||||
@@ -26,16 +26,32 @@ func TestMacOSAndLinuxCanPing(t *testing.T) {
|
||||
vmtest.DontJoinTailnet())
|
||||
macos := env.AddNode("macos", lan,
|
||||
vmtest.OS(vmtest.MacOS),
|
||||
vmtest.DontJoinTailnet(),
|
||||
vmtest.NoAgent())
|
||||
vmtest.DontJoinTailnet())
|
||||
|
||||
env.Start()
|
||||
|
||||
// Ping from Linux (which has TTA) to macOS (which just responds to ICMP).
|
||||
// LANPing retries until the macOS VM has booted and acquired a DHCP lease.
|
||||
env.LANPing(linux, macos.LanIP(lan))
|
||||
}
|
||||
|
||||
func TestTwoMacOSVMsCanPing(t *testing.T) {
|
||||
env := vmtest.New(t)
|
||||
|
||||
lan := env.AddNetwork("192.168.1.1/24")
|
||||
|
||||
mac1 := env.AddNode("mac1", lan,
|
||||
vmtest.OS(vmtest.MacOS),
|
||||
vmtest.DontJoinTailnet())
|
||||
mac2 := env.AddNode("mac2", lan,
|
||||
vmtest.OS(vmtest.MacOS),
|
||||
vmtest.DontJoinTailnet())
|
||||
|
||||
env.Start()
|
||||
|
||||
// Both macOS VMs have TTA. Ping from mac1 to mac2 and vice versa.
|
||||
env.LANPing(mac1, mac2.LanIP(lan))
|
||||
env.LANPing(mac2, mac1.LanIP(lan))
|
||||
}
|
||||
|
||||
func TestSubnetRouter(t *testing.T) {
|
||||
testSubnetRouterForOS(t, vmtest.Ubuntu2404)
|
||||
}
|
||||
|
||||
@@ -33,6 +33,13 @@ func (v virtualIP) Match(a netip.Addr) bool {
|
||||
return v.v4 == a.Unmap() || v.v6 == a
|
||||
}
|
||||
|
||||
// TestDriverIPv4 returns the IPv4 address of the test driver VIP (52.52.0.2).
|
||||
// TTA agents dial this IP on port TestDriverPort to connect to the test harness.
|
||||
func TestDriverIPv4() netip.Addr { return fakeTestAgent.v4 }
|
||||
|
||||
// TestDriverPort is the port the test driver listens on.
|
||||
const TestDriverPort = 8008
|
||||
|
||||
// FakeDNSIPv4 returns the fake DNS IPv4 address.
|
||||
func FakeDNSIPv4() netip.Addr { return fakeDNS.v4 }
|
||||
|
||||
|
||||
@@ -352,7 +352,7 @@ func (n *network) acceptTCP(r *tcp.ForwarderRequest) {
|
||||
return
|
||||
}
|
||||
|
||||
if destPort == 8008 && fakeTestAgent.Match(destIP) {
|
||||
if destPort == TestDriverPort && fakeTestAgent.Match(destIP) {
|
||||
node, ok := n.nodeByIP(clientRemoteIP)
|
||||
if !ok {
|
||||
n.logf("unknown client IP %v trying to connect to test driver", clientRemoteIP)
|
||||
@@ -2106,7 +2106,7 @@ func (s *Server) shouldInterceptTCP(pkt gopacket.Packet) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
if tcp.DstPort == 8008 && fakeTestAgent.Match(flow.dst) {
|
||||
if tcp.DstPort == TestDriverPort && fakeTestAgent.Match(flow.dst) {
|
||||
// Connection from cmd/tta.
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -22,8 +22,10 @@ extension HostCli {
|
||||
@Option var share: String?
|
||||
@Flag(help: "Run without GUI (for automated testing)") var headless: Bool = false
|
||||
@Flag(help: "Create NIC with no attachment (for later hot-swap)") var disconnectedNic: Bool = false
|
||||
@Flag(help: "Use NAT NIC instead of socket NIC (for snapshot prep)") var natNic: Bool = false
|
||||
@Option(help: "Hot-swap NIC to this dgram socket path after boot/restore") var attachNetwork: String?
|
||||
@Option(help: "Serve screenshots on this localhost port (0 = auto)") var screenshotPort: Int?
|
||||
@Option(help: "Assign IP/mask/gw to guest via vsock (e.g. 192.168.1.2/255.255.255.0/192.168.1.1)") var assignIp: String?
|
||||
|
||||
mutating func run() {
|
||||
config = Config(id)
|
||||
@@ -32,19 +34,38 @@ extension HostCli {
|
||||
|
||||
if headless {
|
||||
let attachSocket = attachNetwork
|
||||
let disconnected = disconnectedNic || attachSocket != nil
|
||||
let useNatNIC = natNic
|
||||
let disconnected = !useNatNIC && (disconnectedNic || attachSocket != nil)
|
||||
let wantScreenshots = screenshotPort != nil
|
||||
let requestedPort = UInt16(screenshotPort ?? 0)
|
||||
let ipConfig = assignIp
|
||||
|
||||
// Set up SIGINT handler before entering the event loop.
|
||||
// The dispatch source must be stored in a global to prevent ARC deallocation.
|
||||
signal(SIGINT, SIG_IGN)
|
||||
let sigintSource = DispatchSource.makeSignalSource(signal: SIGINT, queue: .main)
|
||||
retainedSigintSource = sigintSource
|
||||
|
||||
DispatchQueue.main.async {
|
||||
let controller = VMController()
|
||||
controller.createVirtualMachine(headless: true, disconnectedNIC: disconnected)
|
||||
controller.createVirtualMachine(headless: true, disconnectedNIC: disconnected, natNIC: useNatNIC)
|
||||
|
||||
// Start vsock listener for IP assignment.
|
||||
// If --assign-ip is set, the listener replies with the IP config JSON.
|
||||
// If not set (snapshot prep), it replies "wait" so TTA keeps polling.
|
||||
if let ipCfg = ipConfig {
|
||||
let parts = ipCfg.split(separator: "/")
|
||||
if parts.count == 3 {
|
||||
let response = "{\"ip\":\"\(parts[0])\",\"mask\":\"\(parts[1])\",\"gw\":\"\(parts[2])\"}"
|
||||
controller.startIPConfigListener(response: response)
|
||||
}
|
||||
} else {
|
||||
controller.startIPConfigListener(response: "wait")
|
||||
}
|
||||
|
||||
// Handle SIGINT (from test cleanup) by saving VM state before exit.
|
||||
let sigintSource = DispatchSource.makeSignalSource(signal: SIGINT, queue: .main)
|
||||
signal(SIGINT, SIG_IGN) // Let DispatchSource handle it
|
||||
sigintSource.setEventHandler {
|
||||
print("SIGINT received, saving VM state...")
|
||||
print("SIGINT received, disconnecting NIC and saving VM state...")
|
||||
controller.disconnectNetwork()
|
||||
controller.pauseAndSaveVirtualMachine {
|
||||
print("VM state saved, exiting.")
|
||||
Foundation.exit(0)
|
||||
@@ -79,11 +100,7 @@ extension HostCli {
|
||||
|
||||
let doAttach = {
|
||||
if let sock = attachSocket {
|
||||
// Give macOS a moment to settle after boot/restore,
|
||||
// then hot-swap the NIC attachment.
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 1.0) {
|
||||
controller.attachNetwork(serverSocket: sock, clientID: config.vmID)
|
||||
}
|
||||
controller.attachNetwork(serverSocket: sock, clientID: config.vmID)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -107,7 +124,9 @@ extension HostCli {
|
||||
fflush(stdout)
|
||||
app.run()
|
||||
} else {
|
||||
RunLoop.main.run()
|
||||
// Use dispatchMain() instead of RunLoop.main.run() so that
|
||||
// GCD dispatch sources (like the SIGINT handler) are processed.
|
||||
dispatchMain()
|
||||
}
|
||||
} else {
|
||||
_ = NSApplicationMain(CommandLine.argc, CommandLine.unsafeArgv)
|
||||
@@ -119,6 +138,7 @@ extension HostCli {
|
||||
// startScreenshotServer starts a localhost HTTP server that serves VM display
|
||||
// screenshots on GET /screenshot as JPEG. The port is printed to stdout as
|
||||
// "SCREENSHOT_PORT=<port>" so the Go test harness can discover it.
|
||||
var retainedSigintSource: DispatchSourceSignal? // prevent ARC deallocation
|
||||
var screenshotServer: ScreenshotHTTPServer? // prevent GC
|
||||
|
||||
func startScreenshotServer(view: NSView, port: UInt16) {
|
||||
|
||||
@@ -81,7 +81,7 @@ class VMController: NSObject, VZVirtualMachineDelegate {
|
||||
return macPlatform
|
||||
}
|
||||
|
||||
func createVirtualMachine(headless: Bool = false, disconnectedNIC: Bool = false) {
|
||||
func createVirtualMachine(headless: Bool = false, disconnectedNIC: Bool = false, natNIC: Bool = false) {
|
||||
let virtualMachineConfiguration = VZVirtualMachineConfiguration()
|
||||
|
||||
virtualMachineConfiguration.platform = createMacPlaform()
|
||||
@@ -91,7 +91,10 @@ class VMController: NSObject, VZVirtualMachineDelegate {
|
||||
virtualMachineConfiguration.graphicsDevices = [helper.createGraphicsDeviceConfiguration()]
|
||||
virtualMachineConfiguration.storageDevices = [helper.createBlockDeviceConfiguration()]
|
||||
if headless {
|
||||
if disconnectedNIC {
|
||||
if natNIC {
|
||||
// NAT NIC for SSH access during snapshot preparation.
|
||||
virtualMachineConfiguration.networkDevices = [helper.createNetworkDeviceConfiguration()]
|
||||
} else if disconnectedNIC {
|
||||
// Create a NIC with no attachment. The NIC exists in the hardware
|
||||
// config (so saved state is compatible) but appears disconnected.
|
||||
// Call attachNetwork() after restore to hot-swap the attachment.
|
||||
@@ -120,6 +123,17 @@ class VMController: NSObject, VZVirtualMachineDelegate {
|
||||
virtualMachine.delegate = self
|
||||
}
|
||||
|
||||
/// Disconnect the NIC by setting its attachment to nil.
|
||||
/// Call before saving state so the snapshot has no active link.
|
||||
func disconnectNetwork() {
|
||||
guard let nic = virtualMachine.networkDevices.first else {
|
||||
print("disconnectNetwork: no network devices")
|
||||
return
|
||||
}
|
||||
nic.attachment = nil
|
||||
print("disconnectNetwork: NIC attachment set to nil")
|
||||
}
|
||||
|
||||
/// Hot-swap the NIC attachment on a running VM. The VM must have been
|
||||
/// created with disconnectedNIC=true. After calling this, the guest
|
||||
/// sees the link come up and does DHCP.
|
||||
@@ -157,6 +171,21 @@ class VMController: NSObject, VZVirtualMachineDelegate {
|
||||
}
|
||||
}
|
||||
|
||||
/// Start a vsock listener that tells the guest TTA agent what IP to configure.
|
||||
/// If response is nil, the listener replies "wait" (snapshot prep mode).
|
||||
func startIPConfigListener(response: String) {
|
||||
guard let device = virtualMachine.socketDevices.first as? VZVirtioSocketDevice else {
|
||||
print("startIPConfigListener: no socket device")
|
||||
return
|
||||
}
|
||||
let listener = IPConfigListener(response: response)
|
||||
retainedIPConfigListener = listener
|
||||
let vsockListener = VZVirtioSocketListener()
|
||||
vsockListener.delegate = listener
|
||||
device.setSocketListener(vsockListener, forPort: 51011)
|
||||
print("startIPConfigListener: listening on vsock port 51011")
|
||||
}
|
||||
|
||||
func resumeVirtualMachine() {
|
||||
virtualMachine.resume(completionHandler: { (result) in
|
||||
if case let .failure(error) = result {
|
||||
@@ -211,3 +240,28 @@ class VMController: NSObject, VZVirtualMachineDelegate {
|
||||
exit(0)
|
||||
}
|
||||
}
|
||||
|
||||
// Global to prevent ARC deallocation of the vsock listener.
|
||||
var retainedIPConfigListener: IPConfigListener?
|
||||
|
||||
/// Listens on vsock port 51011 for TTA connections and replies with
|
||||
/// an IP configuration JSON string (or "wait" during snapshot prep).
|
||||
class IPConfigListener: NSObject, VZVirtioSocketListenerDelegate {
|
||||
let response: String
|
||||
|
||||
init(response: String) {
|
||||
self.response = response
|
||||
}
|
||||
|
||||
func listener(_ listener: VZVirtioSocketListener,
|
||||
shouldAcceptNewConnection connection: VZVirtioSocketConnection,
|
||||
from socketDevice: VZVirtioSocketDevice) -> Bool {
|
||||
let fd = connection.fileDescriptor
|
||||
let data = Array((response + "\n").utf8)
|
||||
data.withUnsafeBufferPointer { buf in
|
||||
_ = write(fd, buf.baseAddress!, buf.count)
|
||||
}
|
||||
connection.close()
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user