tstest/natlab/vmtest: add macOS VM snapshot caching for fast test starts
Cache a pre-booted macOS VM snapshot on disk so subsequent test runs restore from the snapshot instead of cold-booting. The snapshot is keyed by the Tart base image digest and a code version constant (macOSSnapshotCodeVersion); bumping either invalidates the cache. Snapshot preparation (one-time): - Boot the Tart base image with a NAT NIC (--nat-nic flag) - Wait for SSH, compile and install cmd/tta as a LaunchDaemon - TTA polls the host via AF_VSOCK for an IP assignment; during prep the host replies "wait" - Disconnect NIC, save VM state via SIGINT Test fast path (cached, ~7s to agent connected): - APFS clone the snapshot, write test-specific config.json - Launch Host.app with --disconnected-nic --attach-network --assign-ip - VZ restores from SaveFile.vzvmsave (~5s with 4GB RAM) - TTA's vsock poll gets the IP config, sets static IP via ifconfig (bypasses DHCP entirely), switches driver addr to the IP directly (bypasses DNS), and resets the dial context so the reverse-dial reconnects immediately - TTA agent connects to test driver within ~2s of IP assignment Key optimizations: - 4GB RAM instead of 8GB: halves SaveFile.vzvmsave (1.4GB vs 2.4GB), halves restore time (5.5s vs 11s) - AF_VSOCK IP assignment: bypasses macOS DHCP (~5-7s saved) - Direct IP dial: bypasses DNS resolution for test-driver.tailscale - Dial context reset: cancels stale in-flight dials from snapshot - Kill instead of SIGINT for test VM cleanup (no state save needed) - Parallel VM launches Also: - Add TestDriverIPv4/TestDriverPort constants to vnet - Add --nat-nic and --assign-ip flags to Host.app - Fix SIGINT handler: retain DispatchSource globally, use dispatchMain() - Add vsock listener (port 51011) to Host.app for IP config protocol - Add disconnectNetwork() to VMController for clean snapshot state - Fix Makefile: set -o pipefail so xcodebuild failures aren't swallowed Updates #13038 Change-Id: Icbab73b57af7df3ae96136fb49cda2536310f31b Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
committed by
Brad Fitzpatrick
parent
7b53550fe6
commit
02ffe5baa8
+46
-2
@@ -105,6 +105,10 @@ func main() {
|
||||
}
|
||||
flag.Parse()
|
||||
|
||||
// On macOS VMs, start polling the host via vsock for an IP assignment.
|
||||
// This bypasses DHCP for near-instant network configuration.
|
||||
startIPAssignLoop()
|
||||
|
||||
debug := false
|
||||
if distro.Get() == distro.Gokrazy {
|
||||
cmdLine, _ := os.ReadFile("/proc/cmdline")
|
||||
@@ -408,12 +412,48 @@ func main() {
|
||||
revSt.runDialOutLoop(conns)
|
||||
}
|
||||
|
||||
// dialCancels tracks cancel funcs for in-flight connect() and sleep contexts.
|
||||
// resetDialCancels cancels them all so the dial loop retries immediately.
|
||||
var (
|
||||
dialCancelMu sync.Mutex
|
||||
dialCancels set.HandleSet[context.CancelFunc]
|
||||
)
|
||||
|
||||
// registerDialCancel adds a cancel func and returns a handle for removal.
|
||||
func registerDialCancel(cancel context.CancelFunc) set.Handle {
|
||||
dialCancelMu.Lock()
|
||||
defer dialCancelMu.Unlock()
|
||||
return dialCancels.Add(cancel)
|
||||
}
|
||||
|
||||
// unregisterDialCancel removes a previously registered cancel func.
|
||||
func unregisterDialCancel(h set.Handle) {
|
||||
dialCancelMu.Lock()
|
||||
defer dialCancelMu.Unlock()
|
||||
delete(dialCancels, h)
|
||||
}
|
||||
|
||||
// resetDialCancels cancels all in-flight connect and sleep contexts,
|
||||
// causing the dial loop to retry immediately with the updated driver address.
|
||||
func resetDialCancels() {
|
||||
dialCancelMu.Lock()
|
||||
defer dialCancelMu.Unlock()
|
||||
for h, cancel := range dialCancels {
|
||||
cancel()
|
||||
delete(dialCancels, h)
|
||||
}
|
||||
}
|
||||
|
||||
func connect() (net.Conn, error) {
|
||||
d := net.Dialer{
|
||||
Control: bypassControlFunc,
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
h := registerDialCancel(cancel)
|
||||
defer func() {
|
||||
cancel()
|
||||
unregisterDialCancel(h)
|
||||
}()
|
||||
c, err := d.DialContext(ctx, "tcp", *driverAddr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -510,7 +550,11 @@ func (s *revDialState) runDialOutLoop(conns chan<- net.Conn) {
|
||||
log.Printf("[dial-driver] connect failure: %v", s)
|
||||
}
|
||||
lastErr = s
|
||||
time.Sleep(time.Second)
|
||||
sleepCtx, sleepCancel := context.WithTimeout(context.Background(), time.Second)
|
||||
h := registerDialCancel(sleepCancel)
|
||||
<-sleepCtx.Done()
|
||||
sleepCancel()
|
||||
unregisterDialCancel(h)
|
||||
continue
|
||||
}
|
||||
if !connected {
|
||||
|
||||
Reference in New Issue
Block a user