Files
tailscale/util/linuxfw/linuxfw.go
T
Mike O'Driscoll 48919f708b util/linuxfw: fix nftables endianness and add connmark conditional check (#19725)
Fix the following issues:

1. Endianness Bug: The nftables runner used hardcoded
   big-endian byte arrays for firewall mark values (0xff0000, etc.), breaking
   bitwise operations on little-endian systems (all x86/x64, ARM). This caused
   connmark save/restore rules to silently fail. Fixed by using
   binary.NativeEndian to generate correct byte order for the host system.

2. Connmark Restore Conditional Check: The connmark restore
   mechanism unconditionally overwrote packet marks, even when Tailscale
   hadn't set any mark bits in conntrack. This destroyed mark bits set by
   other systems (VPNs, policy routing, vendor flags), breaking coexistence.
   Fixed by adding a conditional check to only restore when (ct mark &
   0xff0000) != 0, preventing the worst case of wiping all marks to zero.

Changes:
- util/linuxfw/linuxfw.go: Added nativeEndianUint32() helper and updated
  all mask functions to use native byte order instead of hardcoded bytes
- util/linuxfw/nftables_runner.go: Added conditional check in
  makeConnmarkRestoreExprs() to only restore when ct mark has Tailscale
  bits set; added detailed comment about bit preservation limitations
- util/linuxfw/iptables_runner.go: Added conditional check using -m
  connmark ! --mark to match nftables behavior
- Tests updated: Fixed byte-level regression tests to expect little-endian
  byte sequences and verify the new conditional check

Note: Perfect bit preservation in nftables remains challenging
due to nftables expression VM limitations. The current implementation
prevents the critical case of wiping marks with zero.

Updates #3310
Fixes #11803
Related to #8555

Signed-off-by: Mike O'Driscoll <mikeo@tailscale.com>
2026-05-14 09:11:24 -04:00

201 lines
5.7 KiB
Go

// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause
//go:build linux
// Package linuxfw returns the kind of firewall being used by the kernel.
package linuxfw
import (
"encoding/binary"
"errors"
"fmt"
"os"
"strconv"
"strings"
"github.com/tailscale/netlink"
"tailscale.com/feature"
"tailscale.com/tsconst"
"tailscale.com/types/logger"
)
// MatchDecision is the decision made by the firewall for a packet matched by a rule.
// It is used to decide whether to accept or masquerade a packet in addMatchSubnetRouteMarkRule.
type MatchDecision int
const (
Accept MatchDecision = iota
Masq
)
type FWModeNotSupportedError struct {
Mode FirewallMode
Err error
}
func (e FWModeNotSupportedError) Error() string {
return fmt.Sprintf("firewall mode %q not supported: %v", e.Mode, e.Err)
}
func (e FWModeNotSupportedError) Is(target error) bool {
_, ok := target.(FWModeNotSupportedError)
return ok
}
func (e FWModeNotSupportedError) Unwrap() error {
return e.Err
}
type FirewallMode string
const (
FirewallModeIPTables FirewallMode = "iptables"
FirewallModeNfTables FirewallMode = "nftables"
)
type CGNATMode string
const (
CGNATModeDrop CGNATMode = "DROP"
CGNATModeReturn CGNATMode = "RETURN"
)
// The following bits are added to packet marks for Tailscale use.
//
// We tried to pick bits sufficiently out of the way that it's
// unlikely to collide with existing uses. We have 4 bytes of mark
// bits to play with. We leave the lower byte alone on the assumption
// that sysadmins would use those. Kubernetes uses a few bits in the
// second byte, so we steer clear of that too.
//
// Empirically, most of the documentation on packet marks on the
// internet gives the impression that the marks are 16 bits
// wide. Based on this, we theorize that the upper two bytes are
// relatively unused in the wild, and so we consume bits 16:23 (the
// third byte).
//
// The constants are in the iptables/iproute2 string format for
// matching and setting the bits, so they can be directly embedded in
// commands.
const (
fwmarkMask = tsconst.LinuxFwmarkMask
fwmarkMaskNum = tsconst.LinuxFwmarkMaskNum
subnetRouteMark = tsconst.LinuxSubnetRouteMark
subnetRouteMarkNum = tsconst.LinuxSubnetRouteMarkNum
bypassMark = tsconst.LinuxBypassMark
bypassMarkNum = tsconst.LinuxBypassMarkNum
)
// getTailscaleFwmarkMaskNeg returns the negation of TailscaleFwmarkMask
// in native byte order.
func getTailscaleFwmarkMaskNeg() []byte {
return nativeEndianUint32(^uint32(fwmarkMaskNum))
}
// getTailscaleFwmarkMask returns the TailscaleFwmarkMask in native byte order.
func getTailscaleFwmarkMask() []byte {
return nativeEndianUint32(fwmarkMaskNum)
}
// getTailscaleSubnetRouteMark returns the TailscaleSubnetRouteMark
// in native byte order.
func getTailscaleSubnetRouteMark() []byte {
return nativeEndianUint32(subnetRouteMarkNum)
}
// nativeEndianUint32 returns v as a 4-byte slice in the host's native byte order.
func nativeEndianUint32(v uint32) []byte {
b := make([]byte, 4)
binary.NativeEndian.PutUint32(b, v)
return b
}
// checkIPv6ForTest can be set in tests.
var checkIPv6ForTest func(logger.Logf) error
// checkIPv6 checks whether the system appears to have a working IPv6
// network stack. It returns an error explaining what looks wrong or
// missing. It does not check that IPv6 is currently functional or
// that there's a global address, just that the system would support
// IPv6 if it were on an IPv6 network.
func CheckIPv6(logf logger.Logf) error {
if f := checkIPv6ForTest; f != nil {
return f(logf)
}
_, err := os.Stat("/proc/sys/net/ipv6")
if os.IsNotExist(err) {
return err
}
bs, err := os.ReadFile("/proc/sys/net/ipv6/conf/all/disable_ipv6")
if err != nil {
// Be conservative if we can't find the IPv6 configuration knob.
return err
}
disabled, err := strconv.ParseBool(strings.TrimSpace(string(bs)))
if err != nil {
return errors.New("disable_ipv6 has invalid bool")
}
if disabled {
return errors.New("disable_ipv6 is set")
}
// Older kernels don't support IPv6 policy routing. Some kernels
// support policy routing but don't have this knob, so absence of
// the knob is not fatal.
bs, err = os.ReadFile("/proc/sys/net/ipv6/conf/all/disable_policy")
if err == nil {
disabled, err = strconv.ParseBool(strings.TrimSpace(string(bs)))
if err != nil {
return errors.New("disable_policy has invalid bool")
}
if disabled {
return errors.New("disable_policy is set")
}
}
if err := CheckIPRuleSupportsV6(logf); err != nil {
return fmt.Errorf("kernel doesn't support IPv6 policy routing: %w", err)
}
return nil
}
func CheckIPRuleSupportsV6(logf logger.Logf) error {
// First try just a read-only operation to ideally avoid
// having to modify any state.
if rules, err := netlink.RuleList(netlink.FAMILY_V6); err != nil {
return fmt.Errorf("querying IPv6 policy routing rules: %w", err)
} else {
if len(rules) > 0 {
logf("[v1] kernel supports IPv6 policy routing (found %d rules)", len(rules))
return nil
}
}
// Try to actually create & delete one as a test.
rule := netlink.NewRule()
rule.Priority = 1234
rule.Mark = bypassMarkNum
rule.Table = 52
rule.Family = netlink.FAMILY_V6
// First delete the rule unconditionally, and don't check for
// errors. This is just cleaning up anything that might be already
// there.
netlink.RuleDel(rule)
// And clean up on exit.
defer netlink.RuleDel(rule)
return netlink.RuleAdd(rule)
}
var hookIPTablesCleanup feature.Hook[func(logger.Logf)]
// IPTablesCleanUp removes all Tailscale added iptables rules.
// Any errors that occur are logged to the provided logf.
func IPTablesCleanUp(logf logger.Logf) {
if f, ok := hookIPTablesCleanup.GetOk(); ok {
f(logf)
}
}