tstest/natlab/vmtest: add macOS VM support using Tart base images

Add macOS VM support to the vmtest framework using Tart's pre-built
macOS images (ghcr.io/cirruslabs/macos-tahoe-base) instead of building
from IPSW. The Tart image has SIP disabled and SSH enabled.

At test time, the Tart base image's disk, NVRAM, and hardware identity
are APFS-cloned into a tailmac-compatible directory layout, and the VM
is booted headlessly via tailmac's Host.app (Virtualization.framework)
with its NIC connected to vnet's dgram socket.

New features:
- tailmac.go: ensureTartImage (auto-pull), cloneTartToTailmac (format
  conversion), startTailMacVM (launch + cleanup)
- NoAgent() node option for VMs without TTA installed
- LANPing() for ICMP reachability testing via TTA's /ping endpoint
- IsMacOS field on OSImage, with GOOS/GOARCH support
- Dgram socket listener in Start() for macOS VMs
- Fix ReadFromUnix error spam on dgram socket close in vnet

TestMacOSAndLinuxCanPing verifies a macOS Tart VM and a gokrazy Linux
VM can ping each other on the same vnet LAN.

Updates #13038

Change-Id: I5e73a27878abf009f780fdf11a346fc857711cff
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick
2026-04-28 12:10:45 -07:00
committed by Brad Fitzpatrick
parent ec7b11d986
commit b2d4ba04b6
5 changed files with 408 additions and 17 deletions
+135 -15
View File
@@ -29,6 +29,7 @@ import (
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
@@ -63,8 +64,9 @@ type Env struct {
nodes []*Node
tempDir string
sockAddr string // shared Unix socket path for all QEMU netdevs
binDir string // directory for compiled binaries
sockAddr string // shared Unix socket path for all QEMU netdevs
dgramSockAddr string // Unix dgram socket path for macOS VMs (tailmac)
binDir string // directory for compiled binaries
// testVersion is the resolved Tailscale release version to use (empty if
// building from source). When non-empty, tailscale and tailscaled binaries
@@ -75,6 +77,9 @@ type Env struct {
gokrazyBase string // path to gokrazy base qcow2 image
gokrazyKernel string // path to gokrazy kernel
// tailmac-specific paths (macOS VMs)
tailmacDir string // path to tailmac bin/ directory containing Host.app
qemuProcs []*exec.Cmd // launched QEMU processes
sameTailnetUser bool // all nodes register as the same Tailnet user
@@ -300,6 +305,7 @@ type Node struct {
vnetNode *vnet.Node // primary vnet node (set during Start)
agent *vnet.NodeAgentClient
joinTailnet bool
noAgent bool // true to skip TTA agent setup (e.g. macOS VMs without TTA)
advertiseRoutes string
snatSubnetRoutes *bool // nil means default (true)
webServerPort int
@@ -329,6 +335,8 @@ func (e *Env) AddNode(name string, opts ...any) *Node {
case nodeOptNoTailscale:
n.joinTailnet = false
vnetOpts = append(vnetOpts, vnet.DontJoinTailnet)
case nodeOptNoAgent:
n.noAgent = true
case nodeOptAdvertiseRoutes:
n.advertiseRoutes = string(o)
case nodeOptSNATSubnetRoutes:
@@ -357,6 +365,7 @@ func (n *Node) LanIP(net *vnet.Network) netip.Addr {
type nodeOptOS OSImage
type nodeOptNoTailscale struct{}
type nodeOptNoAgent struct{}
type nodeOptAdvertiseRoutes string
type nodeOptSNATSubnetRoutes bool
type nodeOptWebServer int
@@ -367,6 +376,11 @@ func OS(img OSImage) nodeOptOS { return nodeOptOS(img) }
// DontJoinTailnet returns a NodeOption that prevents the node from running tailscale up.
func DontJoinTailnet() nodeOptNoTailscale { return nodeOptNoTailscale{} }
// NoAgent returns a NodeOption that skips TTA agent setup. The node will not
// have a test agent, so agent-dependent operations (Status, ExecOnNode, etc.)
// won't work. Useful for VMs that just need to boot and respond to ICMP.
func NoAgent() nodeOptNoAgent { return nodeOptNoAgent{} }
// AdvertiseRoutes returns a NodeOption that configures the node to advertise
// the given routes (comma-separated CIDRs) when joining the tailnet.
func AdvertiseRoutes(routes string) nodeOptAdvertiseRoutes {
@@ -411,12 +425,27 @@ func (e *Env) Start() {
t.Logf("using Tailscale release version %s (from --test-version=%q)", v, *testVersion)
}
// Determine which GOOS/GOARCH pairs need compiled binaries (non-gokrazy
// images). Gokrazy has binaries built-in, so doesn't need compilation.
// Check if any macOS nodes are present; if so, verify prerequisites.
hasMacOS := false
for _, n := range e.nodes {
if n.os.IsMacOS {
hasMacOS = true
break
}
}
if hasMacOS {
if runtime.GOOS != "darwin" || runtime.GOARCH != "arm64" {
t.Skip("macOS VM tests require macOS arm64 host")
}
}
// Determine which GOOS/GOARCH pairs need compiled binaries (non-gokrazy,
// non-macOS images). Gokrazy has binaries built-in. macOS VMs don't use
// compiled binaries (no TTA agent).
type platform struct{ goos, goarch string }
needPlatform := set.Set[platform]{}
for _, n := range e.nodes {
if !n.os.IsGokrazy {
if !n.os.IsGokrazy && !n.os.IsMacOS {
needPlatform.Add(platform{n.os.GOOS(), n.os.GOARCH()})
}
}
@@ -438,7 +467,9 @@ func (e *Env) Start() {
continue
}
didOS.Add(n.os.Name)
if n.os.IsGokrazy {
if n.os.IsMacOS {
imageSteps[n.os.Name] = e.AddStep("Prepare macOS Tart image")
} else if n.os.IsGokrazy {
imageSteps["gokrazy"] = e.AddStep("Build gokrazy image")
} else {
imageSteps[n.os.Name] = e.AddStep(fmt.Sprintf("Prepare %s image", n.os.Name))
@@ -446,12 +477,18 @@ func (e *Env) Start() {
}
vnetStep := e.AddStep("Create virtual network")
qemuSteps := map[string]*Step{}
vmSteps := map[string]*Step{}
agentSteps := map[string]*Step{}
tsUpSteps := map[string]*Step{}
for _, n := range e.nodes {
qemuSteps[n.name] = e.AddStep(fmt.Sprintf("Launch QEMU: %s", n.name))
agentSteps[n.name] = e.AddStep(fmt.Sprintf("Wait for agent: %s", n.name))
if n.os.IsMacOS {
vmSteps[n.name] = e.AddStep(fmt.Sprintf("Launch macOS VM: %s", n.name))
} else {
vmSteps[n.name] = e.AddStep(fmt.Sprintf("Launch QEMU: %s", n.name))
}
if !n.noAgent {
agentSteps[n.name] = e.AddStep(fmt.Sprintf("Wait for agent: %s", n.name))
}
if n.joinTailnet {
tsUpSteps[n.name] = e.AddStep(fmt.Sprintf("Tailscale up: %s", n.name))
}
@@ -485,7 +522,15 @@ func (e *Env) Start() {
continue
}
didOS.Add(n.os.Name)
if n.os.IsGokrazy {
if n.os.IsMacOS {
step := imageSteps[n.os.Name]
eg.Go(func() error {
step.Begin()
ensureTartImage(t)
step.End(nil)
return nil
})
} else if n.os.IsGokrazy {
step := imageSteps["gokrazy"]
eg.Go(func() error {
step.Begin()
@@ -591,7 +636,7 @@ func (e *Env) Start() {
// not via the cloud-init HTTP VIP, because network-config must be available
// during init-local before systemd-networkd-wait-online blocks.
// Start Unix socket listener.
// Start Unix stream socket listener (for QEMU VMs).
e.sockAddr = filepath.Join(e.tempDir, "vnet.sock")
srv, err := net.Listen("unix", e.sockAddr)
if err != nil {
@@ -609,18 +654,45 @@ func (e *Env) Start() {
}
}()
// Launch QEMU processes.
// Start Unix dgram socket listener (for macOS VMs via tailmac).
// Use /tmp/ instead of the test temp dir because Unix socket paths
// are limited to 104 bytes on macOS, and test temp dir paths are long.
if hasMacOS {
e.dgramSockAddr = fmt.Sprintf("/tmp/vmtest-dgram-%d.sock", os.Getpid())
t.Cleanup(func() { os.Remove(e.dgramSockAddr) })
dgramAddr, err := net.ResolveUnixAddr("unixgram", e.dgramSockAddr)
if err != nil {
t.Fatalf("resolve dgram addr: %v", err)
}
uc, err := net.ListenUnixgram("unixgram", dgramAddr)
if err != nil {
t.Fatalf("listen unixgram: %v", err)
}
t.Cleanup(func() { uc.Close() })
go e.server.ServeUnixConn(uc, vnet.ProtocolUnixDGRAM)
}
// Launch VM processes.
for _, n := range e.nodes {
step := qemuSteps[n.name]
step := vmSteps[n.name]
step.Begin()
if err := e.startQEMU(n); err != nil {
t.Fatalf("startQEMU(%s): %v", n.name, err)
if n.os.IsMacOS {
if err := e.startTailMacVM(n); err != nil {
t.Fatalf("startTailMacVM(%s): %v", n.name, err)
}
} else {
if err := e.startQEMU(n); err != nil {
t.Fatalf("startQEMU(%s): %v", n.name, err)
}
}
step.End(nil)
}
// Set up agent clients and wait for all agents to connect.
for _, n := range e.nodes {
if n.noAgent {
continue
}
n.agent = e.server.NodeAgentClient(n.vnetNode)
n.vnetNode.SetClient(n.agent)
}
@@ -628,6 +700,9 @@ func (e *Env) Start() {
// Wait for agents, then bring up tailscale.
var agentEg errgroup.Group
for _, n := range e.nodes {
if n.noAgent {
continue
}
agentEg.Go(func() error {
aStep := agentSteps[n.name]
aStep.Begin()
@@ -1123,6 +1198,51 @@ func (e *Env) HTTPGet(from *Node, targetURL string) string {
return ""
}
// Agent returns the node's TTA agent client, or nil if NoAgent is set.
func (n *Node) Agent() *vnet.NodeAgentClient {
return n.agent
}
// LANPing pings a LAN IP from the given node using TTA's /ping endpoint.
// It retries for up to 2 minutes, which is enough for a macOS VM to boot
// and acquire a DHCP lease.
func (e *Env) LANPing(from *Node, targetIP netip.Addr) {
if from.agent == nil {
e.t.Fatalf("LANPing: node %s has no agent (NoAgent set?)", from.name)
}
e.t.Logf("LANPing: %s -> %s", from.name, targetIP)
deadline := time.Now().Add(2 * time.Minute)
for attempt := 0; time.Now().Before(deadline); attempt++ {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
reqURL := fmt.Sprintf("http://unused/ping?host=%s", targetIP)
req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
if err != nil {
cancel()
e.t.Fatalf("LANPing: %v", err)
}
res, err := from.agent.HTTPClient.Do(req)
cancel()
if err != nil {
if attempt%10 == 0 {
e.t.Logf("LANPing attempt %d: %v", attempt+1, err)
}
time.Sleep(2 * time.Second)
continue
}
body, _ := io.ReadAll(res.Body)
res.Body.Close()
if res.StatusCode == 200 {
e.t.Logf("LANPing: %s -> %s succeeded on attempt %d", from.name, targetIP, attempt+1)
return
}
if attempt%10 == 0 {
e.t.Logf("LANPing attempt %d: status %d, body: %s", attempt+1, res.StatusCode, string(body))
}
time.Sleep(2 * time.Second)
}
e.t.Fatalf("LANPing: %s -> %s timed out after 2 minutes", from.name, targetIP)
}
// SendTaildropFile sends a file via Taildrop from one node to another.
// The to node must be on the tailnet. It fatals on error.
func (e *Env) SendTaildropFile(from, to *Node, name string, content []byte) {