48919f708b
Fix the following issues: 1. Endianness Bug: The nftables runner used hardcoded big-endian byte arrays for firewall mark values (0xff0000, etc.), breaking bitwise operations on little-endian systems (all x86/x64, ARM). This caused connmark save/restore rules to silently fail. Fixed by using binary.NativeEndian to generate correct byte order for the host system. 2. Connmark Restore Conditional Check: The connmark restore mechanism unconditionally overwrote packet marks, even when Tailscale hadn't set any mark bits in conntrack. This destroyed mark bits set by other systems (VPNs, policy routing, vendor flags), breaking coexistence. Fixed by adding a conditional check to only restore when (ct mark & 0xff0000) != 0, preventing the worst case of wiping all marks to zero. Changes: - util/linuxfw/linuxfw.go: Added nativeEndianUint32() helper and updated all mask functions to use native byte order instead of hardcoded bytes - util/linuxfw/nftables_runner.go: Added conditional check in makeConnmarkRestoreExprs() to only restore when ct mark has Tailscale bits set; added detailed comment about bit preservation limitations - util/linuxfw/iptables_runner.go: Added conditional check using -m connmark ! --mark to match nftables behavior - Tests updated: Fixed byte-level regression tests to expect little-endian byte sequences and verify the new conditional check Note: Perfect bit preservation in nftables remains challenging due to nftables expression VM limitations. The current implementation prevents the critical case of wiping marks with zero. Updates #3310 Fixes #11803 Related to #8555 Signed-off-by: Mike O'Driscoll <mikeo@tailscale.com>
201 lines
5.7 KiB
Go
201 lines
5.7 KiB
Go
// Copyright (c) Tailscale Inc & contributors
|
|
// SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
//go:build linux
|
|
|
|
// Package linuxfw returns the kind of firewall being used by the kernel.
|
|
package linuxfw
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/tailscale/netlink"
|
|
"tailscale.com/feature"
|
|
"tailscale.com/tsconst"
|
|
"tailscale.com/types/logger"
|
|
)
|
|
|
|
// MatchDecision is the decision made by the firewall for a packet matched by a rule.
|
|
// It is used to decide whether to accept or masquerade a packet in addMatchSubnetRouteMarkRule.
|
|
type MatchDecision int
|
|
|
|
const (
|
|
Accept MatchDecision = iota
|
|
Masq
|
|
)
|
|
|
|
type FWModeNotSupportedError struct {
|
|
Mode FirewallMode
|
|
Err error
|
|
}
|
|
|
|
func (e FWModeNotSupportedError) Error() string {
|
|
return fmt.Sprintf("firewall mode %q not supported: %v", e.Mode, e.Err)
|
|
}
|
|
|
|
func (e FWModeNotSupportedError) Is(target error) bool {
|
|
_, ok := target.(FWModeNotSupportedError)
|
|
return ok
|
|
}
|
|
|
|
func (e FWModeNotSupportedError) Unwrap() error {
|
|
return e.Err
|
|
}
|
|
|
|
type FirewallMode string
|
|
|
|
const (
|
|
FirewallModeIPTables FirewallMode = "iptables"
|
|
FirewallModeNfTables FirewallMode = "nftables"
|
|
)
|
|
|
|
type CGNATMode string
|
|
|
|
const (
|
|
CGNATModeDrop CGNATMode = "DROP"
|
|
CGNATModeReturn CGNATMode = "RETURN"
|
|
)
|
|
|
|
// The following bits are added to packet marks for Tailscale use.
|
|
//
|
|
// We tried to pick bits sufficiently out of the way that it's
|
|
// unlikely to collide with existing uses. We have 4 bytes of mark
|
|
// bits to play with. We leave the lower byte alone on the assumption
|
|
// that sysadmins would use those. Kubernetes uses a few bits in the
|
|
// second byte, so we steer clear of that too.
|
|
//
|
|
// Empirically, most of the documentation on packet marks on the
|
|
// internet gives the impression that the marks are 16 bits
|
|
// wide. Based on this, we theorize that the upper two bytes are
|
|
// relatively unused in the wild, and so we consume bits 16:23 (the
|
|
// third byte).
|
|
//
|
|
// The constants are in the iptables/iproute2 string format for
|
|
// matching and setting the bits, so they can be directly embedded in
|
|
// commands.
|
|
const (
|
|
fwmarkMask = tsconst.LinuxFwmarkMask
|
|
fwmarkMaskNum = tsconst.LinuxFwmarkMaskNum
|
|
subnetRouteMark = tsconst.LinuxSubnetRouteMark
|
|
subnetRouteMarkNum = tsconst.LinuxSubnetRouteMarkNum
|
|
bypassMark = tsconst.LinuxBypassMark
|
|
bypassMarkNum = tsconst.LinuxBypassMarkNum
|
|
)
|
|
|
|
// getTailscaleFwmarkMaskNeg returns the negation of TailscaleFwmarkMask
|
|
// in native byte order.
|
|
func getTailscaleFwmarkMaskNeg() []byte {
|
|
return nativeEndianUint32(^uint32(fwmarkMaskNum))
|
|
}
|
|
|
|
// getTailscaleFwmarkMask returns the TailscaleFwmarkMask in native byte order.
|
|
func getTailscaleFwmarkMask() []byte {
|
|
return nativeEndianUint32(fwmarkMaskNum)
|
|
}
|
|
|
|
// getTailscaleSubnetRouteMark returns the TailscaleSubnetRouteMark
|
|
// in native byte order.
|
|
func getTailscaleSubnetRouteMark() []byte {
|
|
return nativeEndianUint32(subnetRouteMarkNum)
|
|
}
|
|
|
|
// nativeEndianUint32 returns v as a 4-byte slice in the host's native byte order.
|
|
func nativeEndianUint32(v uint32) []byte {
|
|
b := make([]byte, 4)
|
|
binary.NativeEndian.PutUint32(b, v)
|
|
return b
|
|
}
|
|
|
|
// checkIPv6ForTest can be set in tests.
|
|
var checkIPv6ForTest func(logger.Logf) error
|
|
|
|
// checkIPv6 checks whether the system appears to have a working IPv6
|
|
// network stack. It returns an error explaining what looks wrong or
|
|
// missing. It does not check that IPv6 is currently functional or
|
|
// that there's a global address, just that the system would support
|
|
// IPv6 if it were on an IPv6 network.
|
|
func CheckIPv6(logf logger.Logf) error {
|
|
if f := checkIPv6ForTest; f != nil {
|
|
return f(logf)
|
|
}
|
|
|
|
_, err := os.Stat("/proc/sys/net/ipv6")
|
|
if os.IsNotExist(err) {
|
|
return err
|
|
}
|
|
bs, err := os.ReadFile("/proc/sys/net/ipv6/conf/all/disable_ipv6")
|
|
if err != nil {
|
|
// Be conservative if we can't find the IPv6 configuration knob.
|
|
return err
|
|
}
|
|
disabled, err := strconv.ParseBool(strings.TrimSpace(string(bs)))
|
|
if err != nil {
|
|
return errors.New("disable_ipv6 has invalid bool")
|
|
}
|
|
if disabled {
|
|
return errors.New("disable_ipv6 is set")
|
|
}
|
|
|
|
// Older kernels don't support IPv6 policy routing. Some kernels
|
|
// support policy routing but don't have this knob, so absence of
|
|
// the knob is not fatal.
|
|
bs, err = os.ReadFile("/proc/sys/net/ipv6/conf/all/disable_policy")
|
|
if err == nil {
|
|
disabled, err = strconv.ParseBool(strings.TrimSpace(string(bs)))
|
|
if err != nil {
|
|
return errors.New("disable_policy has invalid bool")
|
|
}
|
|
if disabled {
|
|
return errors.New("disable_policy is set")
|
|
}
|
|
}
|
|
|
|
if err := CheckIPRuleSupportsV6(logf); err != nil {
|
|
return fmt.Errorf("kernel doesn't support IPv6 policy routing: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func CheckIPRuleSupportsV6(logf logger.Logf) error {
|
|
// First try just a read-only operation to ideally avoid
|
|
// having to modify any state.
|
|
if rules, err := netlink.RuleList(netlink.FAMILY_V6); err != nil {
|
|
return fmt.Errorf("querying IPv6 policy routing rules: %w", err)
|
|
} else {
|
|
if len(rules) > 0 {
|
|
logf("[v1] kernel supports IPv6 policy routing (found %d rules)", len(rules))
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// Try to actually create & delete one as a test.
|
|
rule := netlink.NewRule()
|
|
rule.Priority = 1234
|
|
rule.Mark = bypassMarkNum
|
|
rule.Table = 52
|
|
rule.Family = netlink.FAMILY_V6
|
|
// First delete the rule unconditionally, and don't check for
|
|
// errors. This is just cleaning up anything that might be already
|
|
// there.
|
|
netlink.RuleDel(rule)
|
|
// And clean up on exit.
|
|
defer netlink.RuleDel(rule)
|
|
return netlink.RuleAdd(rule)
|
|
}
|
|
|
|
var hookIPTablesCleanup feature.Hook[func(logger.Logf)]
|
|
|
|
// IPTablesCleanUp removes all Tailscale added iptables rules.
|
|
// Any errors that occur are logged to the provided logf.
|
|
func IPTablesCleanUp(logf logger.Logf) {
|
|
if f, ok := hookIPTablesCleanup.GetOk(); ok {
|
|
f(logf)
|
|
}
|
|
}
|