tstest/natlab/vmtest: add macOS VM snapshot caching for fast test starts
Cache a pre-booted macOS VM snapshot on disk so subsequent test runs restore from the snapshot instead of cold-booting. The snapshot is keyed by the Tart base image digest and a code version constant (macOSSnapshotCodeVersion); bumping either invalidates the cache. Snapshot preparation (one-time): - Boot the Tart base image with a NAT NIC (--nat-nic flag) - Wait for SSH, compile and install cmd/tta as a LaunchDaemon - TTA polls the host via AF_VSOCK for an IP assignment; during prep the host replies "wait" - Disconnect NIC, save VM state via SIGINT Test fast path (cached, ~7s to agent connected): - APFS clone the snapshot, write test-specific config.json - Launch Host.app with --disconnected-nic --attach-network --assign-ip - VZ restores from SaveFile.vzvmsave (~5s with 4GB RAM) - TTA's vsock poll gets the IP config, sets static IP via ifconfig (bypasses DHCP entirely), switches driver addr to the IP directly (bypasses DNS), and resets the dial context so the reverse-dial reconnects immediately - TTA agent connects to test driver within ~2s of IP assignment Key optimizations: - 4GB RAM instead of 8GB: halves SaveFile.vzvmsave (1.4GB vs 2.4GB), halves restore time (5.5s vs 11s) - AF_VSOCK IP assignment: bypasses macOS DHCP (~5-7s saved) - Direct IP dial: bypasses DNS resolution for test-driver.tailscale - Dial context reset: cancels stale in-flight dials from snapshot - Kill instead of SIGINT for test VM cleanup (no state save needed) - Parallel VM launches Also: - Add TestDriverIPv4/TestDriverPort constants to vnet - Add --nat-nic and --assign-ip flags to Host.app - Fix SIGINT handler: retain DispatchSource globally, use dispatchMain() - Add vsock listener (port 51011) to Host.app for IP config protocol - Add disconnectNetwork() to VMController for clean snapshot state - Fix Makefile: set -o pipefail so xcodebuild failures aren't swallowed Updates #13038 Change-Id: Icbab73b57af7df3ae96136fb49cda2536310f31b Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
committed by
Brad Fitzpatrick
parent
7b53550fe6
commit
02ffe5baa8
@@ -22,8 +22,10 @@ extension HostCli {
|
||||
@Option var share: String?
|
||||
@Flag(help: "Run without GUI (for automated testing)") var headless: Bool = false
|
||||
@Flag(help: "Create NIC with no attachment (for later hot-swap)") var disconnectedNic: Bool = false
|
||||
@Flag(help: "Use NAT NIC instead of socket NIC (for snapshot prep)") var natNic: Bool = false
|
||||
@Option(help: "Hot-swap NIC to this dgram socket path after boot/restore") var attachNetwork: String?
|
||||
@Option(help: "Serve screenshots on this localhost port (0 = auto)") var screenshotPort: Int?
|
||||
@Option(help: "Assign IP/mask/gw to guest via vsock (e.g. 192.168.1.2/255.255.255.0/192.168.1.1)") var assignIp: String?
|
||||
|
||||
mutating func run() {
|
||||
config = Config(id)
|
||||
@@ -32,19 +34,38 @@ extension HostCli {
|
||||
|
||||
if headless {
|
||||
let attachSocket = attachNetwork
|
||||
let disconnected = disconnectedNic || attachSocket != nil
|
||||
let useNatNIC = natNic
|
||||
let disconnected = !useNatNIC && (disconnectedNic || attachSocket != nil)
|
||||
let wantScreenshots = screenshotPort != nil
|
||||
let requestedPort = UInt16(screenshotPort ?? 0)
|
||||
let ipConfig = assignIp
|
||||
|
||||
// Set up SIGINT handler before entering the event loop.
|
||||
// The dispatch source must be stored in a global to prevent ARC deallocation.
|
||||
signal(SIGINT, SIG_IGN)
|
||||
let sigintSource = DispatchSource.makeSignalSource(signal: SIGINT, queue: .main)
|
||||
retainedSigintSource = sigintSource
|
||||
|
||||
DispatchQueue.main.async {
|
||||
let controller = VMController()
|
||||
controller.createVirtualMachine(headless: true, disconnectedNIC: disconnected)
|
||||
controller.createVirtualMachine(headless: true, disconnectedNIC: disconnected, natNIC: useNatNIC)
|
||||
|
||||
// Start vsock listener for IP assignment.
|
||||
// If --assign-ip is set, the listener replies with the IP config JSON.
|
||||
// If not set (snapshot prep), it replies "wait" so TTA keeps polling.
|
||||
if let ipCfg = ipConfig {
|
||||
let parts = ipCfg.split(separator: "/")
|
||||
if parts.count == 3 {
|
||||
let response = "{\"ip\":\"\(parts[0])\",\"mask\":\"\(parts[1])\",\"gw\":\"\(parts[2])\"}"
|
||||
controller.startIPConfigListener(response: response)
|
||||
}
|
||||
} else {
|
||||
controller.startIPConfigListener(response: "wait")
|
||||
}
|
||||
|
||||
// Handle SIGINT (from test cleanup) by saving VM state before exit.
|
||||
let sigintSource = DispatchSource.makeSignalSource(signal: SIGINT, queue: .main)
|
||||
signal(SIGINT, SIG_IGN) // Let DispatchSource handle it
|
||||
sigintSource.setEventHandler {
|
||||
print("SIGINT received, saving VM state...")
|
||||
print("SIGINT received, disconnecting NIC and saving VM state...")
|
||||
controller.disconnectNetwork()
|
||||
controller.pauseAndSaveVirtualMachine {
|
||||
print("VM state saved, exiting.")
|
||||
Foundation.exit(0)
|
||||
@@ -79,11 +100,7 @@ extension HostCli {
|
||||
|
||||
let doAttach = {
|
||||
if let sock = attachSocket {
|
||||
// Give macOS a moment to settle after boot/restore,
|
||||
// then hot-swap the NIC attachment.
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 1.0) {
|
||||
controller.attachNetwork(serverSocket: sock, clientID: config.vmID)
|
||||
}
|
||||
controller.attachNetwork(serverSocket: sock, clientID: config.vmID)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -107,7 +124,9 @@ extension HostCli {
|
||||
fflush(stdout)
|
||||
app.run()
|
||||
} else {
|
||||
RunLoop.main.run()
|
||||
// Use dispatchMain() instead of RunLoop.main.run() so that
|
||||
// GCD dispatch sources (like the SIGINT handler) are processed.
|
||||
dispatchMain()
|
||||
}
|
||||
} else {
|
||||
_ = NSApplicationMain(CommandLine.argc, CommandLine.unsafeArgv)
|
||||
@@ -119,6 +138,7 @@ extension HostCli {
|
||||
// startScreenshotServer starts a localhost HTTP server that serves VM display
|
||||
// screenshots on GET /screenshot as JPEG. The port is printed to stdout as
|
||||
// "SCREENSHOT_PORT=<port>" so the Go test harness can discover it.
|
||||
var retainedSigintSource: DispatchSourceSignal? // prevent ARC deallocation
|
||||
var screenshotServer: ScreenshotHTTPServer? // prevent GC
|
||||
|
||||
func startScreenshotServer(view: NSView, port: UInt16) {
|
||||
|
||||
@@ -81,7 +81,7 @@ class VMController: NSObject, VZVirtualMachineDelegate {
|
||||
return macPlatform
|
||||
}
|
||||
|
||||
func createVirtualMachine(headless: Bool = false, disconnectedNIC: Bool = false) {
|
||||
func createVirtualMachine(headless: Bool = false, disconnectedNIC: Bool = false, natNIC: Bool = false) {
|
||||
let virtualMachineConfiguration = VZVirtualMachineConfiguration()
|
||||
|
||||
virtualMachineConfiguration.platform = createMacPlaform()
|
||||
@@ -91,7 +91,10 @@ class VMController: NSObject, VZVirtualMachineDelegate {
|
||||
virtualMachineConfiguration.graphicsDevices = [helper.createGraphicsDeviceConfiguration()]
|
||||
virtualMachineConfiguration.storageDevices = [helper.createBlockDeviceConfiguration()]
|
||||
if headless {
|
||||
if disconnectedNIC {
|
||||
if natNIC {
|
||||
// NAT NIC for SSH access during snapshot preparation.
|
||||
virtualMachineConfiguration.networkDevices = [helper.createNetworkDeviceConfiguration()]
|
||||
} else if disconnectedNIC {
|
||||
// Create a NIC with no attachment. The NIC exists in the hardware
|
||||
// config (so saved state is compatible) but appears disconnected.
|
||||
// Call attachNetwork() after restore to hot-swap the attachment.
|
||||
@@ -120,6 +123,17 @@ class VMController: NSObject, VZVirtualMachineDelegate {
|
||||
virtualMachine.delegate = self
|
||||
}
|
||||
|
||||
/// Disconnect the NIC by setting its attachment to nil.
|
||||
/// Call before saving state so the snapshot has no active link.
|
||||
func disconnectNetwork() {
|
||||
guard let nic = virtualMachine.networkDevices.first else {
|
||||
print("disconnectNetwork: no network devices")
|
||||
return
|
||||
}
|
||||
nic.attachment = nil
|
||||
print("disconnectNetwork: NIC attachment set to nil")
|
||||
}
|
||||
|
||||
/// Hot-swap the NIC attachment on a running VM. The VM must have been
|
||||
/// created with disconnectedNIC=true. After calling this, the guest
|
||||
/// sees the link come up and does DHCP.
|
||||
@@ -157,6 +171,21 @@ class VMController: NSObject, VZVirtualMachineDelegate {
|
||||
}
|
||||
}
|
||||
|
||||
/// Start a vsock listener that tells the guest TTA agent what IP to configure.
|
||||
/// If response is nil, the listener replies "wait" (snapshot prep mode).
|
||||
func startIPConfigListener(response: String) {
|
||||
guard let device = virtualMachine.socketDevices.first as? VZVirtioSocketDevice else {
|
||||
print("startIPConfigListener: no socket device")
|
||||
return
|
||||
}
|
||||
let listener = IPConfigListener(response: response)
|
||||
retainedIPConfigListener = listener
|
||||
let vsockListener = VZVirtioSocketListener()
|
||||
vsockListener.delegate = listener
|
||||
device.setSocketListener(vsockListener, forPort: 51011)
|
||||
print("startIPConfigListener: listening on vsock port 51011")
|
||||
}
|
||||
|
||||
func resumeVirtualMachine() {
|
||||
virtualMachine.resume(completionHandler: { (result) in
|
||||
if case let .failure(error) = result {
|
||||
@@ -211,3 +240,28 @@ class VMController: NSObject, VZVirtualMachineDelegate {
|
||||
exit(0)
|
||||
}
|
||||
}
|
||||
|
||||
// Global to prevent ARC deallocation of the vsock listener.
|
||||
var retainedIPConfigListener: IPConfigListener?
|
||||
|
||||
/// Listens on vsock port 51011 for TTA connections and replies with
|
||||
/// an IP configuration JSON string (or "wait" during snapshot prep).
|
||||
class IPConfigListener: NSObject, VZVirtioSocketListenerDelegate {
|
||||
let response: String
|
||||
|
||||
init(response: String) {
|
||||
self.response = response
|
||||
}
|
||||
|
||||
func listener(_ listener: VZVirtioSocketListener,
|
||||
shouldAcceptNewConnection connection: VZVirtioSocketConnection,
|
||||
from socketDevice: VZVirtioSocketDevice) -> Bool {
|
||||
let fd = connection.fileDescriptor
|
||||
let data = Array((response + "\n").utf8)
|
||||
data.withUnsafeBufferPointer { buf in
|
||||
_ = write(fd, buf.baseAddress!, buf.count)
|
||||
}
|
||||
connection.close()
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user