util/linuxfw,wgengine/router: add connmark rules for rp_filter workaround (#18860)
When a Linux system acts as an exit node or subnet router with strict reverse path filtering (rp_filter=1), reply packets may be dropped because they fail the RPF check. Reply packets arrive on the WAN interface but the routing table indicates they should have arrived on the Tailscale interface, causing the kernel to drop them. This adds firewall rules in the mangle table to save outbound packet marks to conntrack and restore them on reply packets before the routing decision. When reply packets have their marks restored, the kernel uses the correct routing table (based on the mark) and the packets pass the rp_filter check. Implementation adds two rules per address family (IPv4/IPv6): - mangle/OUTPUT: Save packet marks to conntrack for NEW connections with non-zero marks in the Tailscale fwmark range (0xff0000) - mangle/PREROUTING: Restore marks from conntrack to packets for ESTABLISHED,RELATED connections before routing decision and rp_filter check The workaround is automatically enabled when UseConnmarkForRPFilter is set in the router configuration, which happens when subnet routes are advertised on Linux systems. Both iptables and nftables implementations are provided, with automatic backend detection. Fixes #3310 Fixes #14409 Fixes #12022 Fixes #15815 Fixes #9612 Signed-off-by: Mike O'Driscoll <mikeo@tailscale.com>
This commit is contained in:
@@ -521,6 +521,15 @@ type NetfilterRunner interface {
|
||||
// using conntrack.
|
||||
DelStatefulRule(tunname string) error
|
||||
|
||||
// AddConnmarkSaveRule adds conntrack marking rules to save marks from packets.
|
||||
// These rules run in mangle/PREROUTING and mangle/OUTPUT to mark connections
|
||||
// and restore marks on reply packets before rp_filter checks, enabling proper
|
||||
// routing table lookups for exit nodes and subnet routers.
|
||||
AddConnmarkSaveRule() error
|
||||
|
||||
// DelConnmarkSaveRule removes conntrack marking rules added by AddConnmarkSaveRule.
|
||||
DelConnmarkSaveRule() error
|
||||
|
||||
// HasIPV6 reports true if the system supports IPv6.
|
||||
HasIPV6() bool
|
||||
|
||||
@@ -1950,6 +1959,242 @@ func (n *nftablesRunner) DelStatefulRule(tunname string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// makeConnmarkRestoreExprs creates nftables expressions to restore mark from conntrack.
|
||||
// Implements: ct state established,related ct mark & 0xff0000 != 0 meta mark set ct mark & 0xff0000
|
||||
func makeConnmarkRestoreExprs() []expr.Any {
|
||||
return []expr.Any{
|
||||
// Load conntrack state into register 1
|
||||
&expr.Ct{
|
||||
Register: 1,
|
||||
Key: expr.CtKeySTATE,
|
||||
},
|
||||
// Check if state is ESTABLISHED or RELATED
|
||||
&expr.Bitwise{
|
||||
SourceRegister: 1,
|
||||
DestRegister: 1,
|
||||
Len: 4,
|
||||
Mask: nativeUint32(
|
||||
expr.CtStateBitESTABLISHED |
|
||||
expr.CtStateBitRELATED),
|
||||
Xor: nativeUint32(0),
|
||||
},
|
||||
&expr.Cmp{
|
||||
Op: expr.CmpOpNeq,
|
||||
Register: 1,
|
||||
Data: []byte{0, 0, 0, 0},
|
||||
},
|
||||
// Load conntrack mark into register 1
|
||||
&expr.Ct{
|
||||
Register: 1,
|
||||
Key: expr.CtKeyMARK,
|
||||
},
|
||||
// Mask to Tailscale mark bits (0xff0000)
|
||||
&expr.Bitwise{
|
||||
SourceRegister: 1,
|
||||
DestRegister: 1,
|
||||
Len: 4,
|
||||
Mask: getTailscaleFwmarkMask(),
|
||||
Xor: []byte{0x00, 0x00, 0x00, 0x00},
|
||||
},
|
||||
// Set packet mark from register 1
|
||||
&expr.Meta{
|
||||
Key: expr.MetaKeyMARK,
|
||||
SourceRegister: true,
|
||||
Register: 1,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// makeConnmarkSaveExprs creates nftables expressions to save mark to conntrack.
|
||||
// Implements: ct state new meta mark & 0xff0000 != 0 ct mark set meta mark & 0xff0000
|
||||
func makeConnmarkSaveExprs() []expr.Any {
|
||||
return []expr.Any{
|
||||
// Load conntrack state into register 1
|
||||
&expr.Ct{
|
||||
Register: 1,
|
||||
Key: expr.CtKeySTATE,
|
||||
},
|
||||
// Check if state is NEW
|
||||
&expr.Bitwise{
|
||||
SourceRegister: 1,
|
||||
DestRegister: 1,
|
||||
Len: 4,
|
||||
Mask: nativeUint32(expr.CtStateBitNEW),
|
||||
Xor: nativeUint32(0),
|
||||
},
|
||||
&expr.Cmp{
|
||||
Op: expr.CmpOpNeq,
|
||||
Register: 1,
|
||||
Data: []byte{0, 0, 0, 0},
|
||||
},
|
||||
// Load packet mark into register 1
|
||||
&expr.Meta{
|
||||
Key: expr.MetaKeyMARK,
|
||||
Register: 1,
|
||||
},
|
||||
// Mask to Tailscale mark bits (0xff0000)
|
||||
&expr.Bitwise{
|
||||
SourceRegister: 1,
|
||||
DestRegister: 1,
|
||||
Len: 4,
|
||||
Mask: getTailscaleFwmarkMask(),
|
||||
Xor: []byte{0x00, 0x00, 0x00, 0x00},
|
||||
},
|
||||
// Check if mark is non-zero
|
||||
&expr.Cmp{
|
||||
Op: expr.CmpOpNeq,
|
||||
Register: 1,
|
||||
Data: []byte{0, 0, 0, 0},
|
||||
},
|
||||
// Load packet mark again for saving
|
||||
&expr.Meta{
|
||||
Key: expr.MetaKeyMARK,
|
||||
Register: 1,
|
||||
},
|
||||
// Mask again
|
||||
&expr.Bitwise{
|
||||
SourceRegister: 1,
|
||||
DestRegister: 1,
|
||||
Len: 4,
|
||||
Mask: getTailscaleFwmarkMask(),
|
||||
Xor: []byte{0x00, 0x00, 0x00, 0x00},
|
||||
},
|
||||
// Set conntrack mark from register 1
|
||||
&expr.Ct{
|
||||
Key: expr.CtKeyMARK,
|
||||
SourceRegister: true,
|
||||
Register: 1,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// AddConnmarkSaveRule adds conntrack marking rules to save and restore marks.
|
||||
// These rules run in mangle/PREROUTING (to restore marks from conntrack) and
|
||||
// mangle/OUTPUT (to save marks to conntrack) before rp_filter checks, enabling
|
||||
// proper routing table lookups for exit nodes and subnet routers.
|
||||
func (n *nftablesRunner) AddConnmarkSaveRule() error {
|
||||
conn := n.conn
|
||||
|
||||
// Check if rules already exist (idempotency)
|
||||
for _, table := range n.getTables() {
|
||||
mangleTable := &nftables.Table{
|
||||
Family: table.Proto,
|
||||
Name: "mangle",
|
||||
}
|
||||
|
||||
// Check PREROUTING chain for restore rule
|
||||
preroutingChain, err := getChainFromTable(conn, mangleTable, "PREROUTING")
|
||||
if err == nil {
|
||||
rules, _ := conn.GetRules(preroutingChain.Table, preroutingChain)
|
||||
for _, rule := range rules {
|
||||
if string(rule.UserData) == "ts-connmark-restore" {
|
||||
// Rules already exist, skip adding
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add rules for both IPv4 and IPv6
|
||||
for _, table := range n.getTables() {
|
||||
// Get or create mangle table
|
||||
mangleTable := &nftables.Table{
|
||||
Family: table.Proto,
|
||||
Name: "mangle",
|
||||
}
|
||||
conn.AddTable(mangleTable)
|
||||
|
||||
// Get or create PREROUTING chain
|
||||
preroutingChain, err := getChainFromTable(conn, mangleTable, "PREROUTING")
|
||||
if err != nil {
|
||||
// Chain doesn't exist, create it
|
||||
preroutingChain = conn.AddChain(&nftables.Chain{
|
||||
Name: "PREROUTING",
|
||||
Table: mangleTable,
|
||||
Type: nftables.ChainTypeFilter,
|
||||
Hooknum: nftables.ChainHookPrerouting,
|
||||
Priority: nftables.ChainPriorityMangle,
|
||||
})
|
||||
}
|
||||
|
||||
// Add PREROUTING rule to restore mark from conntrack
|
||||
conn.InsertRule(&nftables.Rule{
|
||||
Table: mangleTable,
|
||||
Chain: preroutingChain,
|
||||
Exprs: makeConnmarkRestoreExprs(),
|
||||
UserData: []byte("ts-connmark-restore"),
|
||||
})
|
||||
|
||||
// Get or create OUTPUT chain
|
||||
outputChain, err := getChainFromTable(conn, mangleTable, "OUTPUT")
|
||||
if err != nil {
|
||||
// Chain doesn't exist, create it
|
||||
outputChain = conn.AddChain(&nftables.Chain{
|
||||
Name: "OUTPUT",
|
||||
Table: mangleTable,
|
||||
Type: nftables.ChainTypeFilter,
|
||||
Hooknum: nftables.ChainHookOutput,
|
||||
Priority: nftables.ChainPriorityMangle,
|
||||
})
|
||||
}
|
||||
|
||||
// Add OUTPUT rule to save mark to conntrack
|
||||
conn.InsertRule(&nftables.Rule{
|
||||
Table: mangleTable,
|
||||
Chain: outputChain,
|
||||
Exprs: makeConnmarkSaveExprs(),
|
||||
UserData: []byte("ts-connmark-save"),
|
||||
})
|
||||
}
|
||||
|
||||
if err := conn.Flush(); err != nil {
|
||||
return fmt.Errorf("flush add connmark rules: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// DelConnmarkSaveRule removes conntrack marking rules added by AddConnmarkSaveRule.
|
||||
func (n *nftablesRunner) DelConnmarkSaveRule() error {
|
||||
conn := n.conn
|
||||
|
||||
for _, table := range n.getTables() {
|
||||
mangleTable := &nftables.Table{
|
||||
Family: table.Proto,
|
||||
Name: "mangle",
|
||||
}
|
||||
|
||||
// Remove PREROUTING rule - look for restore-mark rule by UserData
|
||||
preroutingChain, err := getChainFromTable(conn, mangleTable, "PREROUTING")
|
||||
if err == nil {
|
||||
rules, _ := conn.GetRules(preroutingChain.Table, preroutingChain)
|
||||
for _, rule := range rules {
|
||||
if string(rule.UserData) == "ts-connmark-restore" {
|
||||
conn.DelRule(rule)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove OUTPUT rule - look for save-mark rule by UserData
|
||||
outputChain, err := getChainFromTable(conn, mangleTable, "OUTPUT")
|
||||
if err == nil {
|
||||
rules, _ := conn.GetRules(outputChain.Table, outputChain)
|
||||
for _, rule := range rules {
|
||||
if string(rule.UserData) == "ts-connmark-save" {
|
||||
conn.DelRule(rule)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ignore errors during deletion - rules might not exist
|
||||
conn.Flush()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// cleanupChain removes a jump rule from hookChainName to tsChainName, and then
|
||||
// the entire chain tsChainName. Errors are logged, but attempts to remove both
|
||||
// the jump rule and chain continue even if one errors.
|
||||
|
||||
Reference in New Issue
Block a user