WIP: rebase for 2026-05-18 #7

Draft
codinget wants to merge 234 commits from rebase/2026-05-18 into webnet
3 changed files with 138 additions and 8 deletions
Showing only changes of commit 7dcb378875 - Show all commits
@@ -22,12 +22,23 @@ jobs:
steps:
- name: Check out code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Enable KVM
run: |
echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /etc/udev/rules.d/99-kvm4all.rules
sudo udevadm control --reload-rules
sudo udevadm trigger --name-match=kvm
- name: Install qemu
run: |
sudo rm -f /var/lib/man-db/auto-update
sudo apt-get -y update
sudo apt-get -y remove man-db
sudo apt-get install -y qemu-system-x86 qemu-utils
- name: Build VM image
# The test will build this if missing, but we do it explicitly
# to avoid cutting into the go test -timeout budget, and to
# fail earlier with a clearer error if the image build breaks.
run: |
make -C gokrazy natlab
- name: Run natlab integration tests
run: |
./tool/go test -v -run=^TestEasyEasy$ -timeout=3m -count=1 ./tstest/integration/nat --run-vm-tests
+99 -8
View File
@@ -7,6 +7,7 @@ import (
"bytes"
"cmp"
"context"
"encoding/json"
"errors"
"flag"
"fmt"
@@ -17,6 +18,7 @@ import (
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
@@ -327,6 +329,15 @@ func (nt *natTest) setupTest(ctx context.Context, addNode ...addNodeFunc) (nodes
}
})
haveKVM := false
if runtime.GOOS == "linux" {
if f, err := os.OpenFile("/dev/kvm", os.O_RDWR, 0); err == nil {
f.Close()
haveKVM = true
}
}
qmpSocks := make([]string, len(nodes))
for i, node := range nodes {
disk := fmt.Sprintf("%s/node-%d.qcow2", nt.tempDir, i)
out, err := exec.Command("qemu-img", "create",
@@ -349,22 +360,28 @@ func (nt *natTest) setupTest(ctx context.Context, addNode ...addNodeFunc) (nodes
}
envStr := envBuf.String()
cmd := exec.Command("qemu-system-x86_64",
qmpSocks[i] = fmt.Sprintf("%s/qmp-node-%d.sock", nt.tempDir, i)
qemuArgs := []string{
"-M", "microvm,isa-serial=off",
"-m", "384M",
"-nodefaults", "-no-user-config", "-nographic",
"-kernel", nt.kernel,
"-append", "console=hvc0 root=PARTUUID=60c24cc1-f3f9-427a-8199-76baa2d60001/PARTNROFF=1 ro init=/gokrazy/init panic=10 oops=panic pci=off nousb tsc=unstable clocksource=hpet gokrazy.remote_syslog.target="+sysLogAddr+" tailscale-tta=1"+envStr,
"-drive", "id=blk0,file="+disk+",format=qcow2",
"-append", "console=hvc0 root=PARTUUID=60c24cc1-f3f9-427a-8199-76baa2d60001/PARTNROFF=1 ro init=/gokrazy/init panic=10 oops=panic pci=off nousb gokrazy.remote_syslog.target=" + sysLogAddr + " tailscale-tta=1" + envStr,
"-drive", "id=blk0,file=" + disk + ",format=qcow2",
"-device", "virtio-blk-device,drive=blk0",
"-netdev", "stream,id=net0,addr.type=unix,addr.path="+sockAddr,
"-netdev", "stream,id=net0,addr.type=unix,addr.path=" + sockAddr,
"-device", "virtio-serial-device",
"-device", "virtio-rng-device",
"-device", "virtio-net-device,netdev=net0,mac="+node.MAC().String(),
"-device", "virtio-net-device,netdev=net0,mac=" + node.MAC().String(),
"-chardev", "stdio,id=virtiocon0,mux=on",
"-device", "virtconsole,chardev=virtiocon0",
"-mon", "chardev=virtiocon0,mode=readline",
)
"-qmp", "unix:" + qmpSocks[i] + ",server=on,wait=off",
}
if haveKVM {
qemuArgs = append(qemuArgs, "-enable-kvm", "-cpu", "host")
}
cmd := exec.Command("qemu-system-x86_64", qemuArgs...)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Start(); err != nil {
@@ -376,6 +393,15 @@ func (nt *natTest) setupTest(ctx context.Context, addNode ...addNodeFunc) (nodes
})
}
for i, node := range nodes {
if err := nt.vnet.AwaitFirstPacket(ctx, node.MAC()); err != nil {
t.Logf("node %v: no boot progress (no packets received): %v", node, err)
t.Logf("node %v: QMP status: %s", node, qmpQueryStatus(qmpSocks[i]))
t.FailNow()
}
t.Logf("node %v: boot detected (first packet received)", node)
}
for _, n := range nodes {
client := nt.vnet.NodeAgentClient(n)
n.SetClient(client)
@@ -430,8 +456,24 @@ func (nt *natTest) setupTest(ctx context.Context, addNode ...addNodeFunc) (nodes
return nodes, clients, nt.vnet.Close
}
type hasDeadline interface {
Deadline() (deadline time.Time, ok bool)
}
// testContext returns a context derived from the test's deadline (from -timeout),
// leaving a small margin for cleanup. Falls back to 60s if no deadline is set.
func testContext(tb testing.TB) (context.Context, context.CancelFunc) {
if t, ok := tb.(hasDeadline); ok {
if dl, ok := t.Deadline(); ok {
const margin = 5 * time.Second
return context.WithDeadline(context.Background(), dl.Add(-margin))
}
}
return context.WithTimeout(context.Background(), 60*time.Second)
}
func (nt *natTest) runHostConnectivityTest(addNode ...addNodeFunc) bool {
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
ctx, cancel := testContext(nt.tb)
defer cancel()
nodes, clients, cleanup := nt.setupTest(ctx, addNode...)
defer cleanup()
@@ -457,7 +499,7 @@ func (nt *natTest) runHostConnectivityTest(addNode ...addNodeFunc) bool {
}
func (nt *natTest) runTailscaleConnectivityTest(addNode ...addNodeFunc) pingRoute {
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
ctx, cancel := testContext(nt.tb)
defer cancel()
nodes, clients, cleanup := nt.setupTest(ctx, addNode...)
@@ -580,6 +622,55 @@ func ping(ctx context.Context, t testing.TB, c *vnet.NodeAgentClient, target net
return nil, fmt.Errorf("no ping response (ctx: %v)", ctx.Err())
}
// qmpQueryStatus connects to a QEMU QMP socket and returns the VM status
// (e.g. "running", "paused", "prelaunch") or an error string.
func qmpQueryStatus(sockPath string) string {
conn, err := net.DialTimeout("unix", sockPath, 2*time.Second)
if err != nil {
return fmt.Sprintf("dial error: %v", err)
}
defer conn.Close()
conn.SetDeadline(time.Now().Add(5 * time.Second))
dec := json.NewDecoder(conn)
// Read QMP greeting.
var greeting json.RawMessage
if err := dec.Decode(&greeting); err != nil {
return fmt.Sprintf("greeting error: %v", err)
}
// Enter command mode.
if _, err := conn.Write([]byte(`{"execute":"qmp_capabilities"}` + "\n")); err != nil {
return fmt.Sprintf("write caps: %v", err)
}
var capsResp json.RawMessage
if err := dec.Decode(&capsResp); err != nil {
return fmt.Sprintf("caps response: %v", err)
}
// Query status.
if _, err := conn.Write([]byte(`{"execute":"query-status"}` + "\n")); err != nil {
return fmt.Sprintf("write query-status: %v", err)
}
var statusResp struct {
Return struct {
Running bool `json:"running"`
Status string `json:"status"`
} `json:"return"`
Error *struct {
Class string `json:"class"`
Desc string `json:"desc"`
} `json:"error"`
}
if err := dec.Decode(&statusResp); err != nil {
return fmt.Sprintf("status response: %v", err)
}
if statusResp.Error != nil {
return fmt.Sprintf("qmp error: %s: %s", statusResp.Error.Class, statusResp.Error.Desc)
}
return fmt.Sprintf("status=%s running=%v", statusResp.Return.Status, statusResp.Return.Running)
}
func up(ctx context.Context, c *vnet.NodeAgentClient) error {
req, err := http.NewRequestWithContext(ctx, "GET", "http://unused/up", nil)
if err != nil {
+28
View File
@@ -747,6 +747,7 @@ type Server struct {
agentConnWaiter map[*node]chan<- struct{} // signaled after added to set
agentConns set.Set[*agentConn] // not keyed by node; should be small/cheap enough to scan all
agentDialer map[*node]netx.DialFunc
gotFirstPacket map[MAC]chan struct{} // closed on first packet from each MAC
cloudInitData map[int]*CloudInitData // node num → cloud-init config
fileContents map[string][]byte // filename → file bytes
@@ -825,6 +826,10 @@ func New(c *Config) (*Server, error) {
if err := s.initFromConfig(c); err != nil {
return nil, err
}
s.gotFirstPacket = make(map[MAC]chan struct{})
for mac := range s.nodeByMAC {
s.gotFirstPacket[mac] = make(chan struct{})
}
for n := range s.networks {
if err := n.initStack(); err != nil {
return nil, fmt.Errorf("newServer: initStack: %v", err)
@@ -932,6 +937,22 @@ func (s *Server) Close() {
s.wg.Wait()
}
// AwaitFirstPacket waits until the first ethernet frame is received from the
// given MAC address, indicating the VM has booted far enough to send network
// traffic. It returns an error if the context expires first.
func (s *Server) AwaitFirstPacket(ctx context.Context, mac MAC) error {
ch, ok := s.gotFirstPacket[mac]
if !ok {
return fmt.Errorf("unknown MAC %v", mac)
}
select {
case <-ch:
return nil
case <-ctx.Done():
return fmt.Errorf("no network packets received from %v: %w", mac, ctx.Err())
}
}
// MACs returns the MAC addresses of the configured nodes.
func (s *Server) MACs() iter.Seq[MAC] {
return maps.Keys(s.nodeByMAC)
@@ -1088,6 +1109,13 @@ func (s *Server) ServeUnixConn(uc *net.UnixConn, proto Protocol) {
}
if !didReg[srcMAC] {
didReg[srcMAC] = true
if ch, ok := s.gotFirstPacket[srcMAC]; ok {
select {
case <-ch: // already closed
default:
close(ch)
}
}
srcNet := srcNode.netForMAC(srcMAC)
if srcNet == nil {
s.logf("[conn %p] node %v has no network for MAC %v", c.uc, srcNode, srcMAC)