wgengine, all: remove LazyWG, use wireguard-go callback API for on-demand peers

Replace the UAPI text protocol-based wireguard configuration with
wireguard-go's new direct callback API (SetPeerLookupFunc,
SetPeerByIPPacketFunc, RemoveMatchingPeers, SetPrivateKey).

Instead of computing a trimmed wireguard config ahead of time upon
control plane updates and pushing it via UAPI, install callbacks so
wireguard-go creates peers on demand when packets arrive. This removes
all the LazyWG trimming machinery: idle peer tracking, activity maps,
noteRecvActivity callbacks, the KeepFullWGConfig control knob, and the
ts_omit_lazywg build tag.

For incoming packets, PeerLookupFunc answers wireguard-go's questions
about unknown public keys by looking up the peer in the full config.
For outgoing packets, PeerByIPPacketFunc (installed from
LocalBackend.lookupPeerByIP) maps destination IPs to node public keys
using the existing nodeByAddr index.

Updates tailscale/corp#12345

Change-Id: I4cba80979ac49a1231d00a01fdba5f0c2af95dd8
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick
2026-04-15 00:49:12 +00:00
committed by Brad Fitzpatrick
parent b313bffbe7
commit f343b496c3
28 changed files with 354 additions and 1437 deletions
-5
View File
@@ -530,11 +530,6 @@ func (de *endpoint) noteRecvActivity(src epAddr, now mono.Time) bool {
elapsed := now.Sub(de.lastRecvWG.LoadAtomic())
if elapsed > 10*time.Second {
de.lastRecvWG.StoreAtomic(now)
if de.c.noteRecvActivity == nil {
return false
}
de.c.noteRecvActivity(de.publicKey)
return true
}
return false
+3 -32
View File
@@ -164,7 +164,6 @@ type Conn struct {
derpActiveFunc func()
idleFunc func() time.Duration // nil means unknown
testOnlyPacketListener nettype.PacketListener
noteRecvActivity func(key.NodePublic) // or nil, see Options.NoteRecvActivity
onDERPRecv func(int, key.NodePublic, []byte) bool // or nil, see Options.OnDERPRecv
netMon *netmon.Monitor // must be non-nil
health *health.Tracker // or nil
@@ -457,19 +456,6 @@ type Options struct {
// Only used by tests.
TestOnlyPacketListener nettype.PacketListener
// NoteRecvActivity, if provided, is a func for magicsock to call
// whenever it receives a packet from a a peer if it's been more
// than ~10 seconds since the last one. (10 seconds is somewhat
// arbitrary; the sole user, lazy WireGuard configuration,
// just doesn't need or want it called on
// every packet, just every minute or two for WireGuard timeouts,
// and 10 seconds seems like a good trade-off between often enough
// and not too often.)
// The provided func is likely to call back into
// Conn.ParseEndpoint, which acquires Conn.mu. As such, you should
// not hold Conn.mu while calling it.
NoteRecvActivity func(key.NodePublic)
// NetMon is the network monitor to use.
// It must be non-nil.
NetMon *netmon.Monitor
@@ -648,7 +634,6 @@ func NewConn(opts Options) (*Conn, error) {
c.derpActiveFunc = opts.derpActiveFunc()
c.idleFunc = opts.IdleFunc
c.testOnlyPacketListener = opts.TestOnlyPacketListener
c.noteRecvActivity = opts.NoteRecvActivity
c.onDERPRecv = opts.OnDERPRecv
// Set up publishers and subscribers. Subscribe calls must return before
@@ -4270,16 +4255,10 @@ var _ conn.Endpoint = (*lazyEndpoint)(nil)
// InitiationMessagePublicKey implements [conn.InitiationAwareEndpoint].
// wireguard-go calls us here if we passed it a [*lazyEndpoint] for an
// initiation message, for which it might not have the relevant peer configured,
// enabling us to just-in-time configure it and note its activity via
// [*endpoint.noteRecvActivity], before it performs peer lookup and attempts
// decryption.
// initiation message, for which it might not have the relevant peer configured.
// Wireguard-go's PeerLookupFunc handles on-demand peer creation.
//
// Reception of all other WireGuard message types implies pre-existing knowledge
// of the peer by wireguard-go for it to do useful work. See
// [userspaceEngine.maybeReconfigWireguardLocked] &
// [userspaceEngine.noteRecvActivity] for more details around just-in-time
// wireguard-go peer (de)configuration.
// We still update endpoint activity tracking for bestAddr management.
func (le *lazyEndpoint) InitiationMessagePublicKey(peerPublicKey [32]byte) {
pubKey := key.NodePublicFromRaw32(mem.B(peerPublicKey[:]))
if le.maybeEP != nil && pubKey.Compare(le.maybeEP.publicKey) == 0 {
@@ -4287,9 +4266,6 @@ func (le *lazyEndpoint) InitiationMessagePublicKey(peerPublicKey [32]byte) {
}
le.c.mu.Lock()
ep, ok := le.c.peerMap.endpointForNodeKey(pubKey)
// [Conn.mu] must not be held while [Conn.noteRecvActivity] is called, which
// [endpoint.noteRecvActivity] can end up calling. See
// [Options.NoteRecvActivity] docs.
le.c.mu.Unlock()
if !ok {
return
@@ -4297,11 +4273,6 @@ func (le *lazyEndpoint) InitiationMessagePublicKey(peerPublicKey [32]byte) {
now := mono.Now()
ep.lastRecvUDPAny.StoreAtomic(now)
ep.noteRecvActivity(le.src, now)
// [ep.noteRecvActivity] may end up JIT configuring the peer, but we don't
// update [peerMap] as wireguard-go hasn't decrypted the initiation
// message yet. wireguard-go will call us below in [lazyEndpoint.FromPeer]
// if it successfully decrypts the message, at which point it's safe to
// insert le.src into the [peerMap] for ep.
}
func (le *lazyEndpoint) ClearSrc() {}
+94 -127
View File
@@ -242,6 +242,25 @@ func newMagicStackWithKey(t testing.TB, logf logger.Logf, ln nettype.PacketListe
func (s *magicStack) Reconfig(cfg *wgcfg.Config) error {
s.tsTun.SetWGConfig(cfg)
s.wgLogger.SetPeers(cfg.Peers)
// In production, LocalBackend installs a PeerByIPPacketFunc via
// Engine.SetPeerByIPPacketFunc. Tests that bypass LocalBackend need
// to install one here for outbound packet routing.
ipToPeer := make(map[netip.Addr]device.NoisePublicKey, len(cfg.Peers))
for _, p := range cfg.Peers {
pk := p.PublicKey.Raw32()
for _, pfx := range p.AllowedIPs {
if pfx.IsSingleIP() {
ipToPeer[pfx.Addr()] = pk
}
}
}
s.dev.SetPeerByIPPacketFunc(func(_, dst netip.Addr, _ []byte) (device.NoisePublicKey, bool) {
pk, ok := ipToPeer[dst]
return pk, ok
})
s.dev.SetPrivateKey(key.NodePrivateAs[device.NoisePrivateKey](cfg.PrivateKey))
return wgcfg.ReconfigDevice(s.dev, cfg, s.conn.logf)
}
@@ -1442,13 +1461,8 @@ func TestDiscoStringLogRace(t *testing.T) {
}
func Test32bitAlignment(t *testing.T) {
// Need an associated conn with non-nil noteRecvActivity to
// trigger interesting work on the atomics in endpoint.
called := 0
de := endpoint{
c: &Conn{
noteRecvActivity: func(key.NodePublic) { called++ },
},
c: &Conn{},
}
if off := unsafe.Offsetof(de.lastRecvWG); off%8 != 0 {
@@ -1456,13 +1470,7 @@ func Test32bitAlignment(t *testing.T) {
}
de.noteRecvActivity(epAddr{}, mono.Now()) // verify this doesn't panic on 32-bit
if called != 1 {
t.Fatal("expected call to noteRecvActivity")
}
de.noteRecvActivity(epAddr{}, mono.Now())
if called != 1 {
t.Error("expected no second call to noteRecvActivity")
}
de.noteRecvActivity(epAddr{}, mono.Now()) // second call should be throttled
}
// newTestConn returns a new Conn.
@@ -3957,60 +3965,55 @@ func TestConn_receiveIP(t *testing.T) {
// If [*endpoint] then we expect 'got' to be the same [*endpoint]. If
// [*lazyEndpoint] and [*lazyEndpoint.maybeEP] is non-nil, we expect
// got.maybeEP to also be non-nil. Must not be reused across tests.
wantEndpointType wgconn.Endpoint
wantSize int
wantIsGeneveEncap bool
wantOk bool
wantMetricInc *clientmetric.Metric
wantNoteRecvActivityCalled bool
wantEndpointType wgconn.Endpoint
wantSize int
wantIsGeneveEncap bool
wantOk bool
wantMetricInc *clientmetric.Metric
}{
{
name: "naked-disco",
b: looksLikeNakedDisco,
ipp: netip.MustParseAddrPort("127.0.0.1:7777"),
cache: &epAddrEndpointCache{},
wantEndpointType: nil,
wantSize: 0,
wantIsGeneveEncap: false,
wantOk: false,
wantMetricInc: metricRecvDiscoBadPeer,
wantNoteRecvActivityCalled: false,
name: "naked-disco",
b: looksLikeNakedDisco,
ipp: netip.MustParseAddrPort("127.0.0.1:7777"),
cache: &epAddrEndpointCache{},
wantEndpointType: nil,
wantSize: 0,
wantIsGeneveEncap: false,
wantOk: false,
wantMetricInc: metricRecvDiscoBadPeer,
},
{
name: "geneve-encap-disco",
b: looksLikeGeneveDisco,
ipp: netip.MustParseAddrPort("127.0.0.1:7777"),
cache: &epAddrEndpointCache{},
wantEndpointType: nil,
wantSize: 0,
wantIsGeneveEncap: false,
wantOk: false,
wantMetricInc: metricRecvDiscoBadPeer,
wantNoteRecvActivityCalled: false,
name: "geneve-encap-disco",
b: looksLikeGeneveDisco,
ipp: netip.MustParseAddrPort("127.0.0.1:7777"),
cache: &epAddrEndpointCache{},
wantEndpointType: nil,
wantSize: 0,
wantIsGeneveEncap: false,
wantOk: false,
wantMetricInc: metricRecvDiscoBadPeer,
},
{
name: "STUN-binding",
b: looksLikeSTUNBinding,
ipp: netip.MustParseAddrPort("127.0.0.1:7777"),
cache: &epAddrEndpointCache{},
wantEndpointType: nil,
wantSize: 0,
wantIsGeneveEncap: false,
wantOk: false,
wantMetricInc: findMetricByName("netcheck_stun_recv_ipv4"),
wantNoteRecvActivityCalled: false,
name: "STUN-binding",
b: looksLikeSTUNBinding,
ipp: netip.MustParseAddrPort("127.0.0.1:7777"),
cache: &epAddrEndpointCache{},
wantEndpointType: nil,
wantSize: 0,
wantIsGeneveEncap: false,
wantOk: false,
wantMetricInc: findMetricByName("netcheck_stun_recv_ipv4"),
},
{
name: "naked-WireGuard-init-lazyEndpoint-empty-peerMap",
b: looksLikeNakedWireGuardInit,
ipp: netip.MustParseAddrPort("127.0.0.1:7777"),
cache: &epAddrEndpointCache{},
wantEndpointType: &lazyEndpoint{},
wantSize: len(looksLikeNakedWireGuardInit),
wantIsGeneveEncap: false,
wantOk: true,
wantMetricInc: nil,
wantNoteRecvActivityCalled: false,
name: "naked-WireGuard-init-lazyEndpoint-empty-peerMap",
b: looksLikeNakedWireGuardInit,
ipp: netip.MustParseAddrPort("127.0.0.1:7777"),
cache: &epAddrEndpointCache{},
wantEndpointType: &lazyEndpoint{},
wantSize: len(looksLikeNakedWireGuardInit),
wantIsGeneveEncap: false,
wantOk: true,
wantMetricInc: nil,
},
{
name: "naked-WireGuard-init-endpoint-matching-peerMap-entry",
@@ -4024,19 +4027,17 @@ func TestConn_receiveIP(t *testing.T) {
wantIsGeneveEncap: false,
wantOk: true,
wantMetricInc: nil,
wantNoteRecvActivityCalled: true,
},
{
name: "geneve-WireGuard-init-lazyEndpoint-empty-peerMap",
b: looksLikeGeneveWireGuardInit,
ipp: netip.MustParseAddrPort("127.0.0.1:7777"),
cache: &epAddrEndpointCache{},
wantEndpointType: &lazyEndpoint{},
wantSize: len(looksLikeGeneveWireGuardInit) - packet.GeneveFixedHeaderLength,
wantIsGeneveEncap: true,
wantOk: true,
wantMetricInc: nil,
wantNoteRecvActivityCalled: false,
name: "geneve-WireGuard-init-lazyEndpoint-empty-peerMap",
b: looksLikeGeneveWireGuardInit,
ipp: netip.MustParseAddrPort("127.0.0.1:7777"),
cache: &epAddrEndpointCache{},
wantEndpointType: &lazyEndpoint{},
wantSize: len(looksLikeGeneveWireGuardInit) - packet.GeneveFixedHeaderLength,
wantIsGeneveEncap: true,
wantOk: true,
wantMetricInc: nil,
},
{
name: "geneve-WireGuard-init-lazyEndpoint-matching-peerMap-activity-noted",
@@ -4048,11 +4049,10 @@ func TestConn_receiveIP(t *testing.T) {
wantEndpointType: &lazyEndpoint{
maybeEP: newPeerMapInsertableEndpoint(0),
},
wantSize: len(looksLikeGeneveWireGuardInit) - packet.GeneveFixedHeaderLength,
wantIsGeneveEncap: true,
wantOk: true,
wantMetricInc: nil,
wantNoteRecvActivityCalled: true,
wantSize: len(looksLikeGeneveWireGuardInit) - packet.GeneveFixedHeaderLength,
wantIsGeneveEncap: true,
wantOk: true,
wantMetricInc: nil,
},
{
name: "geneve-WireGuard-init-lazyEndpoint-matching-peerMap-no-activity-noted",
@@ -4064,17 +4064,15 @@ func TestConn_receiveIP(t *testing.T) {
wantEndpointType: &lazyEndpoint{
maybeEP: newPeerMapInsertableEndpoint(mono.Now().Add(time.Hour * 24)),
},
wantSize: len(looksLikeGeneveWireGuardInit) - packet.GeneveFixedHeaderLength,
wantIsGeneveEncap: true,
wantOk: true,
wantMetricInc: nil,
wantNoteRecvActivityCalled: false,
wantSize: len(looksLikeGeneveWireGuardInit) - packet.GeneveFixedHeaderLength,
wantIsGeneveEncap: true,
wantOk: true,
wantMetricInc: nil,
},
// TODO(jwhited): verify cache.de is used when conditions permit
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
noteRecvActivityCalled := false
metricBefore := int64(0)
if tt.wantMetricInc != nil {
metricBefore = tt.wantMetricInc.Value()
@@ -4087,9 +4085,6 @@ func TestConn_receiveIP(t *testing.T) {
peerMap: newPeerMap(),
}
c.havePrivateKey.Store(true)
c.noteRecvActivity = func(public key.NodePublic) {
noteRecvActivityCalled = true
}
var counts netlogtype.CountsByConnection
c.SetConnectionCounter(counts.Add)
@@ -4144,10 +4139,6 @@ func TestConn_receiveIP(t *testing.T) {
if tt.wantMetricInc != nil && tt.wantMetricInc.Value() != metricBefore+1 {
t.Errorf("receiveIP() metric %v not incremented", tt.wantMetricInc.Name())
}
if tt.wantNoteRecvActivityCalled != noteRecvActivityCalled {
t.Errorf("receiveIP() noteRecvActivityCalled = %v, want %v", noteRecvActivityCalled, tt.wantNoteRecvActivityCalled)
}
if tt.cache.de != nil {
switch ep := got.(type) {
case *endpoint:
@@ -4199,34 +4190,29 @@ func TestConn_receiveIP(t *testing.T) {
func Test_lazyEndpoint_InitiationMessagePublicKey(t *testing.T) {
tests := []struct {
name string
callWithPeerMapKey bool
maybeEPMatchingKey bool
wantNoteRecvActivityCalled bool
name string
callWithPeerMapKey bool
maybeEPMatchingKey bool
}{
{
name: "noteRecvActivity-called",
callWithPeerMapKey: true,
maybeEPMatchingKey: false,
wantNoteRecvActivityCalled: true,
name: "noteRecvActivity-called",
callWithPeerMapKey: true,
maybeEPMatchingKey: false,
},
{
name: "maybeEP-early-return",
callWithPeerMapKey: true,
maybeEPMatchingKey: true,
wantNoteRecvActivityCalled: false,
name: "maybeEP-early-return",
callWithPeerMapKey: true,
maybeEPMatchingKey: true,
},
{
name: "not-in-peerMap-early-return",
callWithPeerMapKey: false,
maybeEPMatchingKey: false,
wantNoteRecvActivityCalled: false,
name: "not-in-peerMap-early-return",
callWithPeerMapKey: false,
maybeEPMatchingKey: false,
},
{
name: "not-in-peerMap-maybeEP-early-return",
callWithPeerMapKey: false,
maybeEPMatchingKey: true,
wantNoteRecvActivityCalled: false,
name: "not-in-peerMap-maybeEP-early-return",
callWithPeerMapKey: false,
maybeEPMatchingKey: true,
},
}
for _, tt := range tests {
@@ -4239,19 +4225,7 @@ func Test_lazyEndpoint_InitiationMessagePublicKey(t *testing.T) {
key: key.NewDisco().Public(),
})
var noteRecvActivityCalledFor key.NodePublic
conn := newConn(t.Logf)
conn.noteRecvActivity = func(public key.NodePublic) {
// wireguard-go will call into ParseEndpoint if the "real"
// noteRecvActivity ends up JIT configuring the peer. Mimic that
// to ensure there are no deadlocks around conn.mu.
// See tailscale/tailscale#16651 & http://go/corp#30836
_, err := conn.ParseEndpoint(ep.publicKey.UntypedHexString())
if err != nil {
t.Fatalf("ParseEndpoint() err: %v", err)
}
noteRecvActivityCalledFor = public
}
ep.c = conn
var pubKey [32]byte
@@ -4267,13 +4241,6 @@ func Test_lazyEndpoint_InitiationMessagePublicKey(t *testing.T) {
le.maybeEP = ep
}
le.InitiationMessagePublicKey(pubKey)
want := key.NodePublic{}
if tt.wantNoteRecvActivityCalled {
want = ep.publicKey
}
if noteRecvActivityCalledFor.Compare(want) != 0 {
t.Fatalf("noteRecvActivityCalledFor = %v, want %v", noteRecvActivityCalledFor, want)
}
})
}
}