util/linuxfw,wgengine/router: add connmark rules for rp_filter workaround (#18860)

When a Linux system acts as an exit node or subnet router with strict
reverse path filtering (rp_filter=1), reply packets may
be dropped because they fail the RPF check. Reply packets arrive on the
WAN interface but the routing table indicates they should have arrived
on the Tailscale interface, causing the kernel to drop them.

This adds firewall rules in the mangle table to save outbound packet
marks to conntrack and restore them on reply packets before the routing
decision. When reply packets have their marks restored, the kernel uses
the correct routing table (based on the mark) and the packets pass the
rp_filter check.

Implementation adds two rules per address family (IPv4/IPv6):

- mangle/OUTPUT: Save packet marks to conntrack for NEW connections
with non-zero marks in the Tailscale fwmark range (0xff0000)

- mangle/PREROUTING: Restore marks from conntrack to packets for
ESTABLISHED,RELATED connections before routing decision and rp_filter
check

The workaround is automatically enabled when UseConnmarkForRPFilter is
set in the router configuration, which happens when subnet routes are
advertised on Linux systems.

Both iptables and nftables implementations are provided, with automatic
backend detection.

Fixes #3310
Fixes #14409
Fixes #12022
Fixes #15815
Fixes #9612

Signed-off-by: Mike O'Driscoll <mikeo@tailscale.com>
This commit is contained in:
Mike O'Driscoll
2026-03-04 14:09:11 -05:00
committed by GitHub
parent dab8922fcf
commit 26ef46bf81
6 changed files with 814 additions and 12 deletions
+245
View File
@@ -521,6 +521,15 @@ type NetfilterRunner interface {
// using conntrack.
DelStatefulRule(tunname string) error
// AddConnmarkSaveRule adds conntrack marking rules to save marks from packets.
// These rules run in mangle/PREROUTING and mangle/OUTPUT to mark connections
// and restore marks on reply packets before rp_filter checks, enabling proper
// routing table lookups for exit nodes and subnet routers.
AddConnmarkSaveRule() error
// DelConnmarkSaveRule removes conntrack marking rules added by AddConnmarkSaveRule.
DelConnmarkSaveRule() error
// HasIPV6 reports true if the system supports IPv6.
HasIPV6() bool
@@ -1950,6 +1959,242 @@ func (n *nftablesRunner) DelStatefulRule(tunname string) error {
return nil
}
// makeConnmarkRestoreExprs creates nftables expressions to restore mark from conntrack.
// Implements: ct state established,related ct mark & 0xff0000 != 0 meta mark set ct mark & 0xff0000
func makeConnmarkRestoreExprs() []expr.Any {
return []expr.Any{
// Load conntrack state into register 1
&expr.Ct{
Register: 1,
Key: expr.CtKeySTATE,
},
// Check if state is ESTABLISHED or RELATED
&expr.Bitwise{
SourceRegister: 1,
DestRegister: 1,
Len: 4,
Mask: nativeUint32(
expr.CtStateBitESTABLISHED |
expr.CtStateBitRELATED),
Xor: nativeUint32(0),
},
&expr.Cmp{
Op: expr.CmpOpNeq,
Register: 1,
Data: []byte{0, 0, 0, 0},
},
// Load conntrack mark into register 1
&expr.Ct{
Register: 1,
Key: expr.CtKeyMARK,
},
// Mask to Tailscale mark bits (0xff0000)
&expr.Bitwise{
SourceRegister: 1,
DestRegister: 1,
Len: 4,
Mask: getTailscaleFwmarkMask(),
Xor: []byte{0x00, 0x00, 0x00, 0x00},
},
// Set packet mark from register 1
&expr.Meta{
Key: expr.MetaKeyMARK,
SourceRegister: true,
Register: 1,
},
}
}
// makeConnmarkSaveExprs creates nftables expressions to save mark to conntrack.
// Implements: ct state new meta mark & 0xff0000 != 0 ct mark set meta mark & 0xff0000
func makeConnmarkSaveExprs() []expr.Any {
return []expr.Any{
// Load conntrack state into register 1
&expr.Ct{
Register: 1,
Key: expr.CtKeySTATE,
},
// Check if state is NEW
&expr.Bitwise{
SourceRegister: 1,
DestRegister: 1,
Len: 4,
Mask: nativeUint32(expr.CtStateBitNEW),
Xor: nativeUint32(0),
},
&expr.Cmp{
Op: expr.CmpOpNeq,
Register: 1,
Data: []byte{0, 0, 0, 0},
},
// Load packet mark into register 1
&expr.Meta{
Key: expr.MetaKeyMARK,
Register: 1,
},
// Mask to Tailscale mark bits (0xff0000)
&expr.Bitwise{
SourceRegister: 1,
DestRegister: 1,
Len: 4,
Mask: getTailscaleFwmarkMask(),
Xor: []byte{0x00, 0x00, 0x00, 0x00},
},
// Check if mark is non-zero
&expr.Cmp{
Op: expr.CmpOpNeq,
Register: 1,
Data: []byte{0, 0, 0, 0},
},
// Load packet mark again for saving
&expr.Meta{
Key: expr.MetaKeyMARK,
Register: 1,
},
// Mask again
&expr.Bitwise{
SourceRegister: 1,
DestRegister: 1,
Len: 4,
Mask: getTailscaleFwmarkMask(),
Xor: []byte{0x00, 0x00, 0x00, 0x00},
},
// Set conntrack mark from register 1
&expr.Ct{
Key: expr.CtKeyMARK,
SourceRegister: true,
Register: 1,
},
}
}
// AddConnmarkSaveRule adds conntrack marking rules to save and restore marks.
// These rules run in mangle/PREROUTING (to restore marks from conntrack) and
// mangle/OUTPUT (to save marks to conntrack) before rp_filter checks, enabling
// proper routing table lookups for exit nodes and subnet routers.
func (n *nftablesRunner) AddConnmarkSaveRule() error {
conn := n.conn
// Check if rules already exist (idempotency)
for _, table := range n.getTables() {
mangleTable := &nftables.Table{
Family: table.Proto,
Name: "mangle",
}
// Check PREROUTING chain for restore rule
preroutingChain, err := getChainFromTable(conn, mangleTable, "PREROUTING")
if err == nil {
rules, _ := conn.GetRules(preroutingChain.Table, preroutingChain)
for _, rule := range rules {
if string(rule.UserData) == "ts-connmark-restore" {
// Rules already exist, skip adding
return nil
}
}
}
}
// Add rules for both IPv4 and IPv6
for _, table := range n.getTables() {
// Get or create mangle table
mangleTable := &nftables.Table{
Family: table.Proto,
Name: "mangle",
}
conn.AddTable(mangleTable)
// Get or create PREROUTING chain
preroutingChain, err := getChainFromTable(conn, mangleTable, "PREROUTING")
if err != nil {
// Chain doesn't exist, create it
preroutingChain = conn.AddChain(&nftables.Chain{
Name: "PREROUTING",
Table: mangleTable,
Type: nftables.ChainTypeFilter,
Hooknum: nftables.ChainHookPrerouting,
Priority: nftables.ChainPriorityMangle,
})
}
// Add PREROUTING rule to restore mark from conntrack
conn.InsertRule(&nftables.Rule{
Table: mangleTable,
Chain: preroutingChain,
Exprs: makeConnmarkRestoreExprs(),
UserData: []byte("ts-connmark-restore"),
})
// Get or create OUTPUT chain
outputChain, err := getChainFromTable(conn, mangleTable, "OUTPUT")
if err != nil {
// Chain doesn't exist, create it
outputChain = conn.AddChain(&nftables.Chain{
Name: "OUTPUT",
Table: mangleTable,
Type: nftables.ChainTypeFilter,
Hooknum: nftables.ChainHookOutput,
Priority: nftables.ChainPriorityMangle,
})
}
// Add OUTPUT rule to save mark to conntrack
conn.InsertRule(&nftables.Rule{
Table: mangleTable,
Chain: outputChain,
Exprs: makeConnmarkSaveExprs(),
UserData: []byte("ts-connmark-save"),
})
}
if err := conn.Flush(); err != nil {
return fmt.Errorf("flush add connmark rules: %w", err)
}
return nil
}
// DelConnmarkSaveRule removes conntrack marking rules added by AddConnmarkSaveRule.
func (n *nftablesRunner) DelConnmarkSaveRule() error {
conn := n.conn
for _, table := range n.getTables() {
mangleTable := &nftables.Table{
Family: table.Proto,
Name: "mangle",
}
// Remove PREROUTING rule - look for restore-mark rule by UserData
preroutingChain, err := getChainFromTable(conn, mangleTable, "PREROUTING")
if err == nil {
rules, _ := conn.GetRules(preroutingChain.Table, preroutingChain)
for _, rule := range rules {
if string(rule.UserData) == "ts-connmark-restore" {
conn.DelRule(rule)
break
}
}
}
// Remove OUTPUT rule - look for save-mark rule by UserData
outputChain, err := getChainFromTable(conn, mangleTable, "OUTPUT")
if err == nil {
rules, _ := conn.GetRules(outputChain.Table, outputChain)
for _, rule := range rules {
if string(rule.UserData) == "ts-connmark-save" {
conn.DelRule(rule)
break
}
}
}
}
// Ignore errors during deletion - rules might not exist
conn.Flush()
return nil
}
// cleanupChain removes a jump rule from hookChainName to tsChainName, and then
// the entire chain tsChainName. Errors are logged, but attempts to remove both
// the jump rule and chain continue even if one errors.