// Copyright (c) Tailscale Inc & contributors // SPDX-License-Identifier: BSD-3-Clause package vmtest import ( "bytes" "context" "encoding/json" "fmt" "net" "os" "os/exec" "path/filepath" "regexp" "runtime" "strconv" "strings" "testing" "time" "tailscale.com/tstest/natlab/vnet" ) // qemuAccelArgs returns QEMU command-line flags for hardware-accelerated // virtualisation when available, or nil to fall back to TCG (software // emulation). On Linux, KVM is used when /dev/kvm is accessible. On other // platforms (macOS, etc.) TCG is used, which allows the tests to run // without a same-architecture hypervisor at the cost of speed. func qemuAccelArgs() []string { if runtime.GOOS == "linux" { if f, err := os.OpenFile("/dev/kvm", os.O_RDWR, 0); err == nil { f.Close() return []string{"-enable-kvm", "-cpu", "host"} } } return nil } // gokrazyPlatform boots gokrazy (Linux) VMs via QEMU. type gokrazyPlatform struct{} func (gokrazyPlatform) planSteps(e *Env, n *Node) { e.Step("Build gokrazy image") e.Step("Launch QEMU: " + n.name) } func (gokrazyPlatform) boot(ctx context.Context, e *Env, n *Node) error { e.gokrazyOnce.Do(func() { step := e.Step("Build gokrazy image") step.Begin() if err := e.ensureGokrazy(ctx); err != nil { step.End(err) e.t.Fatalf("ensureGokrazy: %v", err) } step.End(nil) }) e.ensureQEMUSocket() vmStep := e.Step("Launch QEMU: " + n.name) vmStep.Begin() if err := e.startGokrazyQEMU(n); err != nil { vmStep.End(err) return err } vmStep.End(nil) return nil } // qemuCloudPlatform boots cloud images (Ubuntu, Debian, FreeBSD) via QEMU. type qemuCloudPlatform struct{} func (qemuCloudPlatform) planSteps(e *Env, n *Node) { e.Step(fmt.Sprintf("Compile %s_%s binaries", n.os.GOOS(), n.os.GOARCH())) e.Step(fmt.Sprintf("Prepare %s image", n.os.Name)) e.Step("Launch QEMU: " + n.name) } func (qemuCloudPlatform) boot(ctx context.Context, e *Env, n *Node) error { goos, goarch := n.os.GOOS(), n.os.GOARCH() e.ensureCompiled(ctx, goos, goarch) if err := e.ensureImage(ctx, n.os); err != nil { return err } e.ensureQEMUSocket() vmStep := e.Step("Launch QEMU: " + n.name) vmStep.Begin() if err := e.startCloudQEMU(n); err != nil { vmStep.End(err) return err } vmStep.End(nil) return nil } // startGokrazyQEMU launches a QEMU process for a gokrazy node. // This follows the same pattern as tstest/integration/nat/nat_test.go. func (e *Env) startGokrazyQEMU(n *Node) error { disk := filepath.Join(e.tempDir, fmt.Sprintf("%s.qcow2", n.name)) if err := createOverlay(e.gokrazyBase, disk); err != nil { return err } var envBuf bytes.Buffer for _, env := range n.vnetNode.Env() { fmt.Fprintf(&envBuf, " tailscaled.env=%s=%s", env.Key, env.Value) } sysLogAddr := net.JoinHostPort(vnet.FakeSyslogIPv4().String(), "995") if n.vnetNode.IsV6Only() { fmt.Fprintf(&envBuf, " tta.nameserver=%s", vnet.FakeDNSIPv6()) sysLogAddr = net.JoinHostPort(vnet.FakeSyslogIPv6().String(), "995") } logPath := filepath.Join(e.tempDir, n.name+".log") args := []string{ "-M", "microvm,isa-serial=off", "-m", fmt.Sprintf("%dM", n.os.MemoryMB), "-nodefaults", "-no-user-config", "-nographic", "-kernel", e.gokrazyKernel, "-append", "console=hvc0 root=PARTUUID=60c24cc1-f3f9-427a-8199-76baa2d60001/PARTNROFF=1 ro init=/gokrazy/init panic=10 oops=panic pci=off nousb tsc=unstable clocksource=hpet gokrazy.remote_syslog.target=" + sysLogAddr + " tailscale-tta=1" + envBuf.String(), "-drive", "id=blk0,file=" + disk + ",format=qcow2", "-device", "virtio-blk-device,drive=blk0", "-device", "virtio-serial-device", "-device", "virtio-rng-device", "-chardev", "file,id=virtiocon0,path=" + logPath, "-device", "virtconsole,chardev=virtiocon0", } // Add network devices — one per NIC. for i := range n.vnetNode.NumNICs() { mac := n.vnetNode.NICMac(i) netdevID := fmt.Sprintf("net%d", i) args = append(args, "-netdev", fmt.Sprintf("stream,id=%s,addr.type=unix,addr.path=%s", netdevID, e.sockAddr), "-device", fmt.Sprintf("virtio-net-device,netdev=%s,mac=%s", netdevID, mac), ) } args = append(args, qemuAccelArgs()...) return e.launchQEMU(n.name, logPath, args) } // startCloudQEMU launches a QEMU process for a cloud image (Ubuntu, Debian, FreeBSD, etc). func (e *Env) startCloudQEMU(n *Node) error { basePath := cachedImagePath(n.os) disk := filepath.Join(e.tempDir, fmt.Sprintf("%s.qcow2", n.name)) if err := createOverlay(basePath, disk); err != nil { return err } // Create a seed ISO with cloud-init config (meta-data, user-data, network-config). // This MUST be a local ISO (not HTTP) so cloud-init reads network-config during // init-local, before systemd-networkd-wait-online blocks boot. seedISO, err := e.createCloudInitISO(n) if err != nil { return fmt.Errorf("creating cloud-init ISO: %w", err) } logPath := filepath.Join(e.tempDir, n.name+".log") qmpSock := filepath.Join(e.sockDir, n.name+"-qmp.sock") args := []string{ "-machine", "q35", "-m", fmt.Sprintf("%dM", n.os.MemoryMB), "-smp", "2", "-display", "none", "-drive", fmt.Sprintf("file=%s,if=virtio", disk), "-drive", fmt.Sprintf("file=%s,if=virtio,media=cdrom,readonly=on", seedISO), "-smbios", "type=1,serial=ds=nocloud", "-serial", "file:" + logPath, "-qmp", "unix:" + qmpSock + ",server,nowait", } // Add network devices — one per NIC. // romfile="" disables the iPXE option ROM entirely, saving ~5s per NIC at boot // and avoiding "duplicate fw_cfg file name" errors with multiple NICs. for i := range n.vnetNode.NumNICs() { mac := n.vnetNode.NICMac(i) netdevID := fmt.Sprintf("net%d", i) args = append(args, "-netdev", fmt.Sprintf("stream,id=%s,addr.type=unix,addr.path=%s", netdevID, e.sockAddr), "-device", fmt.Sprintf("virtio-net-pci,netdev=%s,mac=%s,romfile=", netdevID, mac), ) } // Add a debug NIC with user-mode networking for SSH access from the host. // Use port 0 so the OS picks a free port; we query the actual port via QMP after launch. args = append(args, "-netdev", "user,id=debug0,hostfwd=tcp:127.0.0.1:0-:22", "-device", "virtio-net-pci,netdev=debug0,romfile=", ) args = append(args, qemuAccelArgs()...) if err := e.launchQEMU(n.name, logPath, args); err != nil { return err } // Query QMP to find the actual SSH port that QEMU allocated. port, err := qmpQueryHostFwd(qmpSock) if err != nil { return fmt.Errorf("querying SSH port via QMP: %w", err) } n.sshPort = port e.t.Logf("[%s] SSH debug: ssh -p %d root@127.0.0.1 (password: root)", n.name, port) return nil } // qemuRun is one running qemu-system-x86_64 process plus the file handles // the wrapping code holds open on its behalf. kill tears the whole thing // down (used both for normal cleanup and for the in-flight retry path). type qemuRun struct { cmd *exec.Cmd parentPipe *os.File devNull *os.File qemuLog *os.File } func (r *qemuRun) kill() { killProcessTree(r.cmd) r.cmd.Wait() r.parentPipe.Close() r.devNull.Close() r.qemuLog.Close() } // launchQEMU starts a qemu-system-x86_64 process with the given args and // watches for console activity. If the guest produces no output within // stuckTimeout (empty console *and* QEMU has not exited with an error), // the QEMU process is killed and re-launched. This works around CI // hypervisor flakes seen on shared GitHub Actions runners where a QEMU // process starts but its vCPU never makes any forward progress (the // failure presents as both the virtconsole log and the QEMU stderr log // being zero bytes after many minutes, with the vnet stream socket // connected but no packet ever sent). // // VM console output goes to logPath (via QEMU's -serial or -chardev). // QEMU's own stdout/stderr go to logPath.qemu for diagnostics. func (e *Env) launchQEMU(name, logPath string, args []string) error { // stuckTimeout is generous: a healthy VM prints SeaBIOS/kernel // output within ~1-2s on KVM, but slow shared CI hardware can lag. // Setting it too low risks killing a healthy-but-slow VM; setting it // too high masks the wedge case we want to recover from. const stuckTimeout = 45 * time.Second const maxAttempts = 3 var lastErr error for attempt := 1; attempt <= maxAttempts; attempt++ { if attempt > 1 { e.t.Logf("[%s] QEMU made no progress in %v; killing and retrying (attempt %d/%d)", name, stuckTimeout, attempt, maxAttempts) // QEMU's -chardev file backend opens append-mode, so stale // bytes from a previous attempt would falsely trip the // progress check on retry. Truncate it. os.Truncate(logPath, 0) } run, err := e.startQEMUOnce(name, logPath, args) if err != nil { lastErr = err continue } if waitForConsoleProgress(logPath, stuckTimeout) { e.qemuProcs = append(e.qemuProcs, run.cmd) if e.ctx != nil { go e.tailLogFile(e.ctx, name, logPath) } e.t.Cleanup(func() { run.kill() // Dump tail of VM log and QEMU's own stderr on failure. // The console log (logPath) is empty when the guest never // produced output (e.g. QEMU exited before the kernel ran); // in that case the .qemu file holds the only diagnostic — // KVM errors, "kvm not available", CPU model mismatch, etc. if e.t.Failed() { dumpLogTail(e.t, name, "console", logPath) dumpLogTail(e.t, name, "qemu stderr", logPath+".qemu") } }) return nil } lastErr = fmt.Errorf("QEMU for %s produced no console output in %v", name, stuckTimeout) run.kill() } return fmt.Errorf("QEMU for %s failed after %d attempts: %w", name, maxAttempts, lastErr) } // startQEMUOnce starts a single qemu-system-x86_64 process. On success the // returned qemuRun owns the process and all file handles; the caller must // invoke kill (either inline for a retry or via t.Cleanup for the // surviving attempt). func (e *Env) startQEMUOnce(name, logPath string, args []string) (*qemuRun, error) { cmd := exec.Command("qemu-system-x86_64", args...) devNull, err := os.Open(os.DevNull) if err != nil { return nil, fmt.Errorf("open /dev/null: %w", err) } cmd.Stdin = devNull qemuLog, err := os.Create(logPath + ".qemu") if err != nil { devNull.Close() return nil, err } cmd.Stdout = qemuLog cmd.Stderr = qemuLog parentPipe, err := killWithParent(cmd) if err != nil { devNull.Close() qemuLog.Close() return nil, fmt.Errorf("killWithParent: %w", err) } if err := cmd.Start(); err != nil { parentPipe.Close() devNull.Close() qemuLog.Close() return nil, fmt.Errorf("qemu for %s: %w", name, err) } e.t.Logf("launched QEMU for %s (pid %d), log: %s", name, cmd.Process.Pid, logPath) return &qemuRun{ cmd: cmd, parentPipe: parentPipe, devNull: devNull, qemuLog: qemuLog, }, nil } // waitForConsoleProgress polls logPath until its size is non-zero or // timeout elapses. It returns true on observed forward progress (any // bytes written), false on timeout. func waitForConsoleProgress(logPath string, timeout time.Duration) bool { deadline := time.Now().Add(timeout) for time.Now().Before(deadline) { if fi, err := os.Stat(logPath); err == nil && fi.Size() > 0 { return true } time.Sleep(200 * time.Millisecond) } return false } // dumpLogTail prints the last 50 lines of the file at path to the test log, // prefixed with the VM name and kind (e.g. "console", "qemu stderr"). It is // a no-op (with a short note) if the file can't be read or is empty, so // callers can use it unconditionally on test failure. func dumpLogTail(t testing.TB, name, kind, path string) { t.Helper() data, err := os.ReadFile(path) if err != nil { t.Logf("=== %s %s log unavailable: %v ===", name, kind, err) return } if len(data) == 0 { t.Logf("=== %s %s log is empty ===", name, kind) return } lines := bytes.Split(data, []byte("\n")) start := 0 if len(lines) > 50 { start = len(lines) - 50 } t.Logf("=== last 50 lines of %s %s log ===", name, kind) for _, line := range lines[start:] { t.Logf("[%s] %s", name, line) } } // hostFwdRe matches a single TCP[HOST_FORWARD] line from QEMU's // "info usernet" human-monitor command output, e.g.: // // TCP[HOST_FORWARD] 12 127.0.0.1 35323 10.0.2.15 22 var hostFwdRe = regexp.MustCompile(`TCP\[HOST_FORWARD\]\s+\d+\s+127\.0\.0\.1\s+(\d+)\s+`) // qmpQueryHostFwd connects to a QEMU QMP socket and queries the host port // assigned to the first TCP host forward rule (the SSH debug port). func qmpQueryHostFwd(sockPath string) (int, error) { // Wait for the QMP socket to appear. var conn net.Conn for range 50 { var err error conn, err = net.Dial("unix", sockPath) if err == nil { break } time.Sleep(100 * time.Millisecond) } if conn == nil { return 0, fmt.Errorf("QMP socket %s not available", sockPath) } defer conn.Close() conn.SetDeadline(time.Now().Add(20 * time.Second)) // Read the QMP greeting. var greeting json.RawMessage dec := json.NewDecoder(conn) if err := dec.Decode(&greeting); err != nil { return 0, fmt.Errorf("reading QMP greeting: %w", err) } // Send qmp_capabilities to initialize. fmt.Fprintf(conn, `{"execute":"qmp_capabilities"}`+"\n") var capsResp json.RawMessage if err := dec.Decode(&capsResp); err != nil { return 0, fmt.Errorf("reading qmp_capabilities response: %w", err) } // Poll "info usernet" until the SLIRP host-forward rule appears. // On slow runners (e.g. GitHub Actions) QEMU sometimes returns an // empty "info usernet" if we query it before user-mode networking // has finished wiring up the forward, so single-shot lookups fail. deadline := time.Now().Add(10 * time.Second) var lastReturn string for { fmt.Fprintf(conn, `{"execute":"human-monitor-command","arguments":{"command-line":"info usernet"}}`+"\n") var hmpResp struct { Return string `json:"return"` } if err := dec.Decode(&hmpResp); err != nil { return 0, fmt.Errorf("reading info usernet response: %w", err) } lastReturn = hmpResp.Return if m := hostFwdRe.FindStringSubmatch(hmpResp.Return); m != nil { return strconv.Atoi(m[1]) } if time.Now().After(deadline) { break } time.Sleep(100 * time.Millisecond) } return 0, fmt.Errorf("no hostfwd port found after waiting: %q", lastReturn) } // tailLogFile tails a VM's serial console log file and publishes each line // as an EventConsoleOutput to the event bus for the web UI. func (e *Env) tailLogFile(ctx context.Context, name, logPath string) { // Wait for the file to appear (QEMU may not have created it yet). var f *os.File for { var err error f, err = os.Open(logPath) if err == nil { break } select { case <-ctx.Done(): return case <-time.After(100 * time.Millisecond): } } defer f.Close() // Read the file in a loop, tracking our position manually. // We can't use bufio.Scanner because it caches EOF and won't // pick up new data appended by QEMU after the first EOF. var buf []byte var partial string // incomplete line (no trailing newline yet) readBuf := make([]byte, 4096) for { n, err := f.Read(readBuf) if n > 0 { buf = append(buf, readBuf[:n]...) // Split into complete lines. for { idx := bytes.IndexByte(buf, '\n') if idx < 0 { break } line := partial + string(buf[:idx]) partial = "" buf = buf[idx+1:] // Strip trailing \r from serial consoles. line = strings.TrimRight(line, "\r") if line == "" { continue } e.appendConsoleLine(name, line) e.eventBus.Publish(VMEvent{ NodeName: name, Type: EventConsoleOutput, Message: line, }) } if len(buf) > 0 { partial = string(buf) buf = buf[:0] } } if err != nil || n == 0 { // EOF or error — wait for more data. select { case <-ctx.Done(): return case <-time.After(100 * time.Millisecond): } } } }