WIP: rebase for 2026-05-18 #7
@@ -22,12 +22,23 @@ jobs:
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- name: Enable KVM
|
||||
run: |
|
||||
echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /etc/udev/rules.d/99-kvm4all.rules
|
||||
sudo udevadm control --reload-rules
|
||||
sudo udevadm trigger --name-match=kvm
|
||||
- name: Install qemu
|
||||
run: |
|
||||
sudo rm -f /var/lib/man-db/auto-update
|
||||
sudo apt-get -y update
|
||||
sudo apt-get -y remove man-db
|
||||
sudo apt-get install -y qemu-system-x86 qemu-utils
|
||||
- name: Build VM image
|
||||
# The test will build this if missing, but we do it explicitly
|
||||
# to avoid cutting into the go test -timeout budget, and to
|
||||
# fail earlier with a clearer error if the image build breaks.
|
||||
run: |
|
||||
make -C gokrazy natlab
|
||||
- name: Run natlab integration tests
|
||||
run: |
|
||||
./tool/go test -v -run=^TestEasyEasy$ -timeout=3m -count=1 ./tstest/integration/nat --run-vm-tests
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"bytes"
|
||||
"cmp"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
@@ -17,6 +18,7 @@ import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -327,6 +329,15 @@ func (nt *natTest) setupTest(ctx context.Context, addNode ...addNodeFunc) (nodes
|
||||
}
|
||||
})
|
||||
|
||||
haveKVM := false
|
||||
if runtime.GOOS == "linux" {
|
||||
if f, err := os.OpenFile("/dev/kvm", os.O_RDWR, 0); err == nil {
|
||||
f.Close()
|
||||
haveKVM = true
|
||||
}
|
||||
}
|
||||
|
||||
qmpSocks := make([]string, len(nodes))
|
||||
for i, node := range nodes {
|
||||
disk := fmt.Sprintf("%s/node-%d.qcow2", nt.tempDir, i)
|
||||
out, err := exec.Command("qemu-img", "create",
|
||||
@@ -349,22 +360,28 @@ func (nt *natTest) setupTest(ctx context.Context, addNode ...addNodeFunc) (nodes
|
||||
}
|
||||
envStr := envBuf.String()
|
||||
|
||||
cmd := exec.Command("qemu-system-x86_64",
|
||||
qmpSocks[i] = fmt.Sprintf("%s/qmp-node-%d.sock", nt.tempDir, i)
|
||||
qemuArgs := []string{
|
||||
"-M", "microvm,isa-serial=off",
|
||||
"-m", "384M",
|
||||
"-nodefaults", "-no-user-config", "-nographic",
|
||||
"-kernel", nt.kernel,
|
||||
"-append", "console=hvc0 root=PARTUUID=60c24cc1-f3f9-427a-8199-76baa2d60001/PARTNROFF=1 ro init=/gokrazy/init panic=10 oops=panic pci=off nousb tsc=unstable clocksource=hpet gokrazy.remote_syslog.target="+sysLogAddr+" tailscale-tta=1"+envStr,
|
||||
"-drive", "id=blk0,file="+disk+",format=qcow2",
|
||||
"-append", "console=hvc0 root=PARTUUID=60c24cc1-f3f9-427a-8199-76baa2d60001/PARTNROFF=1 ro init=/gokrazy/init panic=10 oops=panic pci=off nousb gokrazy.remote_syslog.target=" + sysLogAddr + " tailscale-tta=1" + envStr,
|
||||
"-drive", "id=blk0,file=" + disk + ",format=qcow2",
|
||||
"-device", "virtio-blk-device,drive=blk0",
|
||||
"-netdev", "stream,id=net0,addr.type=unix,addr.path="+sockAddr,
|
||||
"-netdev", "stream,id=net0,addr.type=unix,addr.path=" + sockAddr,
|
||||
"-device", "virtio-serial-device",
|
||||
"-device", "virtio-rng-device",
|
||||
"-device", "virtio-net-device,netdev=net0,mac="+node.MAC().String(),
|
||||
"-device", "virtio-net-device,netdev=net0,mac=" + node.MAC().String(),
|
||||
"-chardev", "stdio,id=virtiocon0,mux=on",
|
||||
"-device", "virtconsole,chardev=virtiocon0",
|
||||
"-mon", "chardev=virtiocon0,mode=readline",
|
||||
)
|
||||
"-qmp", "unix:" + qmpSocks[i] + ",server=on,wait=off",
|
||||
}
|
||||
if haveKVM {
|
||||
qemuArgs = append(qemuArgs, "-enable-kvm", "-cpu", "host")
|
||||
}
|
||||
cmd := exec.Command("qemu-system-x86_64", qemuArgs...)
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
if err := cmd.Start(); err != nil {
|
||||
@@ -376,6 +393,15 @@ func (nt *natTest) setupTest(ctx context.Context, addNode ...addNodeFunc) (nodes
|
||||
})
|
||||
}
|
||||
|
||||
for i, node := range nodes {
|
||||
if err := nt.vnet.AwaitFirstPacket(ctx, node.MAC()); err != nil {
|
||||
t.Logf("node %v: no boot progress (no packets received): %v", node, err)
|
||||
t.Logf("node %v: QMP status: %s", node, qmpQueryStatus(qmpSocks[i]))
|
||||
t.FailNow()
|
||||
}
|
||||
t.Logf("node %v: boot detected (first packet received)", node)
|
||||
}
|
||||
|
||||
for _, n := range nodes {
|
||||
client := nt.vnet.NodeAgentClient(n)
|
||||
n.SetClient(client)
|
||||
@@ -430,8 +456,24 @@ func (nt *natTest) setupTest(ctx context.Context, addNode ...addNodeFunc) (nodes
|
||||
return nodes, clients, nt.vnet.Close
|
||||
}
|
||||
|
||||
type hasDeadline interface {
|
||||
Deadline() (deadline time.Time, ok bool)
|
||||
}
|
||||
|
||||
// testContext returns a context derived from the test's deadline (from -timeout),
|
||||
// leaving a small margin for cleanup. Falls back to 60s if no deadline is set.
|
||||
func testContext(tb testing.TB) (context.Context, context.CancelFunc) {
|
||||
if t, ok := tb.(hasDeadline); ok {
|
||||
if dl, ok := t.Deadline(); ok {
|
||||
const margin = 5 * time.Second
|
||||
return context.WithDeadline(context.Background(), dl.Add(-margin))
|
||||
}
|
||||
}
|
||||
return context.WithTimeout(context.Background(), 60*time.Second)
|
||||
}
|
||||
|
||||
func (nt *natTest) runHostConnectivityTest(addNode ...addNodeFunc) bool {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
||||
ctx, cancel := testContext(nt.tb)
|
||||
defer cancel()
|
||||
nodes, clients, cleanup := nt.setupTest(ctx, addNode...)
|
||||
defer cleanup()
|
||||
@@ -457,7 +499,7 @@ func (nt *natTest) runHostConnectivityTest(addNode ...addNodeFunc) bool {
|
||||
}
|
||||
|
||||
func (nt *natTest) runTailscaleConnectivityTest(addNode ...addNodeFunc) pingRoute {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
||||
ctx, cancel := testContext(nt.tb)
|
||||
defer cancel()
|
||||
|
||||
nodes, clients, cleanup := nt.setupTest(ctx, addNode...)
|
||||
@@ -580,6 +622,55 @@ func ping(ctx context.Context, t testing.TB, c *vnet.NodeAgentClient, target net
|
||||
return nil, fmt.Errorf("no ping response (ctx: %v)", ctx.Err())
|
||||
}
|
||||
|
||||
// qmpQueryStatus connects to a QEMU QMP socket and returns the VM status
|
||||
// (e.g. "running", "paused", "prelaunch") or an error string.
|
||||
func qmpQueryStatus(sockPath string) string {
|
||||
conn, err := net.DialTimeout("unix", sockPath, 2*time.Second)
|
||||
if err != nil {
|
||||
return fmt.Sprintf("dial error: %v", err)
|
||||
}
|
||||
defer conn.Close()
|
||||
conn.SetDeadline(time.Now().Add(5 * time.Second))
|
||||
dec := json.NewDecoder(conn)
|
||||
|
||||
// Read QMP greeting.
|
||||
var greeting json.RawMessage
|
||||
if err := dec.Decode(&greeting); err != nil {
|
||||
return fmt.Sprintf("greeting error: %v", err)
|
||||
}
|
||||
|
||||
// Enter command mode.
|
||||
if _, err := conn.Write([]byte(`{"execute":"qmp_capabilities"}` + "\n")); err != nil {
|
||||
return fmt.Sprintf("write caps: %v", err)
|
||||
}
|
||||
var capsResp json.RawMessage
|
||||
if err := dec.Decode(&capsResp); err != nil {
|
||||
return fmt.Sprintf("caps response: %v", err)
|
||||
}
|
||||
|
||||
// Query status.
|
||||
if _, err := conn.Write([]byte(`{"execute":"query-status"}` + "\n")); err != nil {
|
||||
return fmt.Sprintf("write query-status: %v", err)
|
||||
}
|
||||
var statusResp struct {
|
||||
Return struct {
|
||||
Running bool `json:"running"`
|
||||
Status string `json:"status"`
|
||||
} `json:"return"`
|
||||
Error *struct {
|
||||
Class string `json:"class"`
|
||||
Desc string `json:"desc"`
|
||||
} `json:"error"`
|
||||
}
|
||||
if err := dec.Decode(&statusResp); err != nil {
|
||||
return fmt.Sprintf("status response: %v", err)
|
||||
}
|
||||
if statusResp.Error != nil {
|
||||
return fmt.Sprintf("qmp error: %s: %s", statusResp.Error.Class, statusResp.Error.Desc)
|
||||
}
|
||||
return fmt.Sprintf("status=%s running=%v", statusResp.Return.Status, statusResp.Return.Running)
|
||||
}
|
||||
|
||||
func up(ctx context.Context, c *vnet.NodeAgentClient) error {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", "http://unused/up", nil)
|
||||
if err != nil {
|
||||
|
||||
@@ -747,6 +747,7 @@ type Server struct {
|
||||
agentConnWaiter map[*node]chan<- struct{} // signaled after added to set
|
||||
agentConns set.Set[*agentConn] // not keyed by node; should be small/cheap enough to scan all
|
||||
agentDialer map[*node]netx.DialFunc
|
||||
gotFirstPacket map[MAC]chan struct{} // closed on first packet from each MAC
|
||||
|
||||
cloudInitData map[int]*CloudInitData // node num → cloud-init config
|
||||
fileContents map[string][]byte // filename → file bytes
|
||||
@@ -825,6 +826,10 @@ func New(c *Config) (*Server, error) {
|
||||
if err := s.initFromConfig(c); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s.gotFirstPacket = make(map[MAC]chan struct{})
|
||||
for mac := range s.nodeByMAC {
|
||||
s.gotFirstPacket[mac] = make(chan struct{})
|
||||
}
|
||||
for n := range s.networks {
|
||||
if err := n.initStack(); err != nil {
|
||||
return nil, fmt.Errorf("newServer: initStack: %v", err)
|
||||
@@ -932,6 +937,22 @@ func (s *Server) Close() {
|
||||
s.wg.Wait()
|
||||
}
|
||||
|
||||
// AwaitFirstPacket waits until the first ethernet frame is received from the
|
||||
// given MAC address, indicating the VM has booted far enough to send network
|
||||
// traffic. It returns an error if the context expires first.
|
||||
func (s *Server) AwaitFirstPacket(ctx context.Context, mac MAC) error {
|
||||
ch, ok := s.gotFirstPacket[mac]
|
||||
if !ok {
|
||||
return fmt.Errorf("unknown MAC %v", mac)
|
||||
}
|
||||
select {
|
||||
case <-ch:
|
||||
return nil
|
||||
case <-ctx.Done():
|
||||
return fmt.Errorf("no network packets received from %v: %w", mac, ctx.Err())
|
||||
}
|
||||
}
|
||||
|
||||
// MACs returns the MAC addresses of the configured nodes.
|
||||
func (s *Server) MACs() iter.Seq[MAC] {
|
||||
return maps.Keys(s.nodeByMAC)
|
||||
@@ -1088,6 +1109,13 @@ func (s *Server) ServeUnixConn(uc *net.UnixConn, proto Protocol) {
|
||||
}
|
||||
if !didReg[srcMAC] {
|
||||
didReg[srcMAC] = true
|
||||
if ch, ok := s.gotFirstPacket[srcMAC]; ok {
|
||||
select {
|
||||
case <-ch: // already closed
|
||||
default:
|
||||
close(ch)
|
||||
}
|
||||
}
|
||||
srcNet := srcNode.netForMAC(srcMAC)
|
||||
if srcNet == nil {
|
||||
s.logf("[conn %p] node %v has no network for MAC %v", c.uc, srcNode, srcMAC)
|
||||
|
||||
Reference in New Issue
Block a user