From b0e63cbeb90d57f141baa43957c81e18af755af8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 16 Mar 2026 19:26:25 +0000 Subject: [PATCH] wgengine/netstack: add TS_NETSTACK_KEEPALIVE_{IDLE,INTERVAL} envknobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds envknobs to override the netstack default TCP keepalive idle time (~2h) and probe interval (75s) for forwarded connections. When a tailnet peer goes away without closing its connections (pod deleted, peer removed from the netmap, silent network partition), the forwardTCP io.Copy goroutines block until keepalive fires: the gvisor-side Read waits on a peer that will never send again, and the backend-side Read waits on a backend that is alive and idle. With the netstack default of 7200s idle + 9×75s probes, dead-peer detection takes a little over two hours. Under high-churn forwarding — many short-lived peers, or peers holding thousands of proxied connections that drop at once — stuck goroutines accumulate faster than they clear. The existing SetKeepAlive(true) at this site enables keepalive without setting the timers; the TODO above it noted "a shorter default might be better" and "might be a useful user-tunable". This makes both timers tunable without changing the defaults: unset preserves the ~2h behavior, which is the right trade-off for battery-powered peers. The two knobs are independent — setting one leaves the other at the netstack default. The options are set before SetKeepAlive(true) so the timer arms with the configured values rather than the defaults — matches the order in ipnlocal/local.go for SSH keepalive. Updates #4522 Signed-off-by: Josef Bacik --- wgengine/netstack/netstack.go | 44 ++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/wgengine/netstack/netstack.go b/wgengine/netstack/netstack.go index d77a6781a..c56829b76 100644 --- a/wgengine/netstack/netstack.go +++ b/wgengine/netstack/netstack.go @@ -119,6 +119,22 @@ func maxInFlightConnectionAttemptsPerClient() int { var debugNetstack = envknob.RegisterBool("TS_DEBUG_NETSTACK") +// netstackKeepaliveIdle overrides the netstack default (~2h) TCP keepalive +// idle time for forwarded connections. When a tailnet peer goes away without +// closing its connections (pod deleted, peer removed from netmap, silent +// network partition), the forwardTCP io.Copy goroutines block until keepalive +// fires. Under high-churn forwarding — many short-lived peers, or peers +// holding thousands of proxied connections that drop at once — the 2h default +// lets stuck goroutines accumulate faster than they clear. Value is a Go +// duration, e.g. "60s". See tailscale/tailscale#4522. +var netstackKeepaliveIdle = envknob.RegisterDuration("TS_NETSTACK_KEEPALIVE_IDLE") + +// netstackKeepaliveInterval overrides the netstack default (75s) TCP keepalive +// probe interval for forwarded connections. Independent of +// netstackKeepaliveIdle; setting one without the other leaves the unset knob +// at the netstack default. Value is a Go duration, e.g. "15s". +var netstackKeepaliveInterval = envknob.RegisterDuration("TS_NETSTACK_KEEPALIVE_INTERVAL") + var ( serviceIP = tsaddr.TailscaleServiceIP() serviceIPv6 = tsaddr.TailscaleServiceIPv6() @@ -1520,14 +1536,26 @@ func (ns *Impl) acceptTCP(r *tcp.ForwarderRequest) { // Applications might be setting this on a forwarded connection, but from // userspace we can not see those, so the best we can do is to always // perform them with conservative timing. - // TODO(tailscale/tailscale#4522): Netstack defaults match the Linux - // defaults, and results in a little over two hours before the socket would - // be closed due to keepalive. A shorter default might be better, or seeking - // a default from the host IP stack. This also might be a useful - // user-tunable, as in userspace mode this can have broad implications such - // as lingering connections to fork style daemons. On the other side of the - // fence, the long duration timers are low impact values for battery powered - // peers. + // Netstack defaults match the Linux defaults and result in a little over + // two hours before the socket is closed due to keepalive. Operators can + // shorten the timers with TS_NETSTACK_KEEPALIVE_IDLE and + // TS_NETSTACK_KEEPALIVE_INTERVAL (see netstackKeepaliveIdle); the + // defaults are left unchanged because the long timers are low-impact for + // battery-powered peers and this has broad implications in userspace + // mode (lingering connections to fork-style daemons, etc). See + // tailscale/tailscale#4522. + if d := netstackKeepaliveIdle(); d > 0 { + idle := tcpip.KeepaliveIdleOption(d) + if err := ep.SetSockOpt(&idle); err != nil { + ns.logf("netstack: SetSockOpt(KeepaliveIdle=%v) failed: %v", d, err) + } + } + if d := netstackKeepaliveInterval(); d > 0 { + intvl := tcpip.KeepaliveIntervalOption(d) + if err := ep.SetSockOpt(&intvl); err != nil { + ns.logf("netstack: SetSockOpt(KeepaliveInterval=%v) failed: %v", d, err) + } + } ep.SocketOptions().SetKeepAlive(true) // This function is called when we're ready to use the