wgengine/netstack: absorb all quad-100 traffic locally, never leak to peers
Previously, handleLocalPackets intercepted traffic to the Tailscale
service IP (100.100.100.100 / fd7a:115c:a1e0::53) only for an allow-list
of ports: TCP 53/80/8080 and UDP 53. Any other port returned
filter.Accept, letting the packet fall through to the ACL filter and
wireguard-go, which would attempt a peer lookup. No peer owns the
quad-100 AllowedIP, so after ~5s pendopen.go would log:
open-conn-track: timeout opening ...; no associated peer node
This is the common "conntrack error no peer found for 100.100.100.100:853"
log spam seen in the wild (e.g. from systemd-resolved or another
resolver speculatively trying DoT on quad-100). It also leaks quad-100
packets onto the tailnet.
Remove the port allow-list so handleLocalPackets absorbs every quad-100
packet into netstack regardless of IP protocol or port. Traffic never
reaches the conntrack / peer-routing layers.
With the allow-list gone, acceptTCP needs a corresponding guard: on a
quad-100 TCP port we don't serve, execution used to fall through to the
isTailscaleIP case (quad-100 is in the tailscale IP range), which
rewrote the dial target to 127.0.0.1:<port> and forwardTCP'd the
connection to whatever happened to be listening on the host's loopback
at that port. Add a hittingServiceIP case that RSTs cleanly instead,
placed before the isTailscaleIP fallthrough.
TestQuad100UnservedTCPPortDoesNotForward is a new integration test that
injects a TCP SYN to 100.100.100.100:853 via handleLocalPackets, stubs
forwardDialFunc, and asserts the dialer is not invoked; it catches
regressions of the acceptTCP recursion/loopback-redirection case.
Fixes #15796
Fixes #19421
Updates #3261
Updates #11305
Signed-off-by: James Tucker <james@tailscale.com>
This commit is contained in:
committed by
James Tucker
parent
006d7e180e
commit
1b40911611
@@ -953,6 +953,7 @@ func TestHandleLocalPackets(t *testing.T) {
|
||||
impl.lb.SetIPServiceMappingsForTest(IPServiceMap)
|
||||
|
||||
t.Run("ShouldHandleServiceIP", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
pkt := &packet.Parsed{
|
||||
IPVersion: 4,
|
||||
IPProto: ipproto.TCP,
|
||||
@@ -965,7 +966,94 @@ func TestHandleLocalPackets(t *testing.T) {
|
||||
t.Errorf("got filter outcome %v, want filter.DropSilently", resp)
|
||||
}
|
||||
})
|
||||
// Any port on the quad-100 service IP must be absorbed locally by
|
||||
// netstack and never leak out to the WireGuard / peer-routing
|
||||
// layers. Historically we only intercepted specific ports (UDP 53
|
||||
// and TCP 53/80/8080), causing stray traffic to other ports such
|
||||
// as 100.100.100.100:853 (DoT) to time out in wireguard-go and
|
||||
// produce "open-conn-track: timeout opening ...; no associated
|
||||
// peer node" log spam. See the handleLocalPackets comment.
|
||||
quad100LeakCases := []struct {
|
||||
name string
|
||||
proto ipproto.Proto
|
||||
dst string
|
||||
}{
|
||||
{"TCP-853-DoT-v4", ipproto.TCP, "100.100.100.100:853"},
|
||||
{"TCP-443-DoH-v4", ipproto.TCP, "100.100.100.100:443"},
|
||||
{"TCP-9000-stray-v4", ipproto.TCP, "100.100.100.100:9000"},
|
||||
{"UDP-853-DoQ-v4", ipproto.UDP, "100.100.100.100:853"},
|
||||
{"UDP-443-v4", ipproto.UDP, "100.100.100.100:443"},
|
||||
{"TCP-853-DoT-v6", ipproto.TCP, "[fd7a:115c:a1e0::53]:853"},
|
||||
{"UDP-443-v6", ipproto.UDP, "[fd7a:115c:a1e0::53]:443"},
|
||||
}
|
||||
for _, tc := range quad100LeakCases {
|
||||
t.Run("ShouldNotLeakQuad100_"+tc.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
dst := netip.MustParseAddrPort(tc.dst)
|
||||
ipVersion := uint8(4)
|
||||
if dst.Addr().Is6() {
|
||||
ipVersion = 6
|
||||
}
|
||||
src := "127.0.0.1:9999"
|
||||
if ipVersion == 6 {
|
||||
src = "[::1]:9999"
|
||||
}
|
||||
pkt := &packet.Parsed{
|
||||
IPVersion: ipVersion,
|
||||
IPProto: tc.proto,
|
||||
Src: netip.MustParseAddrPort(src),
|
||||
Dst: dst,
|
||||
}
|
||||
if tc.proto == ipproto.TCP {
|
||||
pkt.TCPFlags = packet.TCPSyn
|
||||
}
|
||||
resp, _ := impl.handleLocalPackets(pkt, impl.tundev, nil)
|
||||
if resp != filter.DropSilently {
|
||||
t.Errorf("quad-100 %s packet leaked: got filter outcome %v, want filter.DropSilently", tc.name, resp)
|
||||
}
|
||||
})
|
||||
}
|
||||
// Exhaustive sweep of all ports for both transport protocols and
|
||||
// both IP versions, confirming no port leaks. The quad-100 branch
|
||||
// of handleLocalPackets is port-independent by construction; this
|
||||
// test serves as a regression guard against accidental port-based
|
||||
// exemptions slipping back in.
|
||||
t.Run("ShouldNotLeakQuad100_AllPorts", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
protos := []ipproto.Proto{ipproto.TCP, ipproto.UDP}
|
||||
dsts := []netip.Addr{
|
||||
netip.MustParseAddr("100.100.100.100"),
|
||||
netip.MustParseAddr("fd7a:115c:a1e0::53"),
|
||||
}
|
||||
for _, proto := range protos {
|
||||
for _, dstAddr := range dsts {
|
||||
ipVersion := uint8(4)
|
||||
srcStr := "127.0.0.1:9999"
|
||||
if dstAddr.Is6() {
|
||||
ipVersion = 6
|
||||
srcStr = "[::1]:9999"
|
||||
}
|
||||
src := netip.MustParseAddrPort(srcStr)
|
||||
for port := 1; port <= 65535; port++ {
|
||||
pkt := &packet.Parsed{
|
||||
IPVersion: ipVersion,
|
||||
IPProto: proto,
|
||||
Src: src,
|
||||
Dst: netip.AddrPortFrom(dstAddr, uint16(port)),
|
||||
}
|
||||
if proto == ipproto.TCP {
|
||||
pkt.TCPFlags = packet.TCPSyn
|
||||
}
|
||||
resp, _ := impl.handleLocalPackets(pkt, impl.tundev, nil)
|
||||
if resp != filter.DropSilently {
|
||||
t.Fatalf("port=%d proto=%v dst=%v: got %v, want filter.DropSilently", port, proto, dstAddr, resp)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
t.Run("ShouldHandle4via6", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
pkt := &packet.Parsed{
|
||||
IPVersion: 6,
|
||||
IPProto: ipproto.TCP,
|
||||
@@ -988,6 +1076,7 @@ func TestHandleLocalPackets(t *testing.T) {
|
||||
}
|
||||
})
|
||||
t.Run("ShouldHandleLocalTailscaleServices", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
pkt := &packet.Parsed{
|
||||
IPVersion: 4,
|
||||
IPProto: ipproto.TCP,
|
||||
@@ -1001,6 +1090,7 @@ func TestHandleLocalPackets(t *testing.T) {
|
||||
}
|
||||
})
|
||||
t.Run("OtherNonHandled", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
pkt := &packet.Parsed{
|
||||
IPVersion: 6,
|
||||
IPProto: ipproto.TCP,
|
||||
@@ -1023,6 +1113,100 @@ func TestHandleLocalPackets(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
// TestQuad100UnservedTCPPortDoesNotForward verifies that a TCP SYN to the
|
||||
// Tailscale service IP (100.100.100.100) on a port we don't serve is
|
||||
// absorbed by netstack and rejected cleanly, without triggering the
|
||||
// outbound forwardTCP dialer.
|
||||
//
|
||||
// handleLocalPackets now absorbs all quad-100 traffic regardless of
|
||||
// port to prevent it leaking to WireGuard peers (which produced noisy
|
||||
// "open-conn-track: timeout opening ...; no associated peer node" log
|
||||
// lines). That leaves acceptTCP responsible for rejecting connections
|
||||
// to ports we don't handle; without an explicit guard, execution would
|
||||
// fall through to the isTailscaleIP case (quad-100 is in the tailscale
|
||||
// range), rewriting the dial target to 127.0.0.1:<port> and forwarding
|
||||
// the connection to whatever random service happened to be listening
|
||||
// on the host's loopback at that port.
|
||||
//
|
||||
// This test asserts that the forward dialer is NOT invoked for quad-100
|
||||
// SYNs on unserved ports; the guard in acceptTCP must RST instead.
|
||||
func TestQuad100UnservedTCPPortDoesNotForward(t *testing.T) {
|
||||
impl := makeNetstack(t, func(impl *Impl) {
|
||||
impl.ProcessSubnets = false
|
||||
impl.ProcessLocalIPs = false
|
||||
impl.atomicIsLocalIPFunc.Store(looksLikeATailscaleSelfAddress)
|
||||
})
|
||||
|
||||
dialFn, gotConn := makeHangDialer(t)
|
||||
impl.forwardDialFunc = dialFn
|
||||
|
||||
// Use a client IP in the CGNAT range so shouldProcessInbound-adjacent
|
||||
// code paths treat this as plausibly-peer-sourced traffic, matching
|
||||
// what a real stray quad-100 probe from the host OS would look like.
|
||||
client := netip.MustParseAddr("100.101.102.103")
|
||||
quad100 := tsaddr.TailscaleServiceIP()
|
||||
|
||||
// 853 is DoT, the specific case called out in the original bug
|
||||
// report ("conntrack error no peer found for 100.100.100.100:853").
|
||||
// Before the fix, port 853 (and any non-{53,80,8080} port) leaked
|
||||
// out to WireGuard; after the fix it is absorbed here and must NOT
|
||||
// trigger forwardTCP.
|
||||
pkt := tcp4syn(t, client, quad100, 1234, 853)
|
||||
var parsed packet.Parsed
|
||||
parsed.Decode(pkt)
|
||||
|
||||
resp, _ := impl.handleLocalPackets(&parsed, impl.tundev, nil)
|
||||
if resp != filter.DropSilently {
|
||||
t.Fatalf("handleLocalPackets for quad-100:853: got %v, want filter.DropSilently", resp)
|
||||
}
|
||||
|
||||
// acceptTCP runs asynchronously in the gVisor TCP dispatcher after
|
||||
// handleLocalPackets injects the packet into netstack. Use the
|
||||
// in-flight connection counter as a deterministic synchronization
|
||||
// point: wrapTCPProtocolHandler increments connsInFlightByClient
|
||||
// when the dispatcher hands the connection off to acceptTCP, and
|
||||
// acceptTCP's deferred decrementInFlightTCPForward decrements it
|
||||
// on return.
|
||||
//
|
||||
// On the green path (RST guard fires), acceptTCP returns promptly
|
||||
// and the counter reaches 0. On the red path (fall-through to
|
||||
// forwardTCP), acceptTCP blocks inside the forwardDialFunc call —
|
||||
// makeHangDialer signals gotConn on entry (buffered, non-blocking)
|
||||
// and then blocks forever — so the counter never reaches 0 but
|
||||
// gotConn fires synchronously from the dispatcher goroutine. A
|
||||
// select on both races those outcomes without real-time padding.
|
||||
//
|
||||
// testing/synctest is not usable here: gVisor's sleep package calls
|
||||
// the runtime's gopark directly rather than via the standard
|
||||
// library, so synctest.Wait() cannot observe those goroutines
|
||||
// becoming durably blocked and hangs indefinitely.
|
||||
inFlightZero := make(chan struct{})
|
||||
go func() {
|
||||
for {
|
||||
impl.mu.Lock()
|
||||
n := impl.connsInFlightByClient[client]
|
||||
impl.mu.Unlock()
|
||||
if n == 0 {
|
||||
close(inFlightZero)
|
||||
return
|
||||
}
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-gotConn:
|
||||
t.Fatalf("forwardDialFunc was called for quad-100:853; acceptTCP fell through to forwardTCP instead of sending RST. This means stray traffic to quad-100 on unserved ports is being redirected to the host's loopback at the same port.")
|
||||
case <-inFlightZero:
|
||||
// acceptTCP returned cleanly; the RST guard fired.
|
||||
case <-time.After(5 * time.Second):
|
||||
// Safety net so a regression in the in-flight counter plumbing
|
||||
// doesn't hang the whole test run; both outcomes above should
|
||||
// fire within milliseconds in practice.
|
||||
t.Fatal("timed out waiting for acceptTCP to dispatch quad-100:853 SYN")
|
||||
}
|
||||
}
|
||||
|
||||
func TestShouldSendToHost(t *testing.T) {
|
||||
var (
|
||||
selfIP4 = netip.MustParseAddr("100.64.1.2")
|
||||
|
||||
Reference in New Issue
Block a user