magicsock, ipnlocal: revert eventbus-based node/filter updates, remove Synchronize hack

Restore synchronous method calls from LocalBackend to magicsock.Conn
for node views, filter, and delta mutations. The eventbus delivery
introduced in 8e6f63cf1 was invalid for these updates because
subsequent operations in the same call chain depend on magicsock
already having the current state. The Synchronize/settleEventBus
workaround was fragile and kept requiring more workarounds and
introducing new mystery bugs.

Since eventbus was added, we've since learned more about when to use
eventbus, and this wasn't one of the cases.

We can take another swing at using eventbus for netmap changes in a
future change.

Fixes #16369
Updates #18575 (likely fixes)

Change-Id: I79057cc9259993368bb1e350ff0e073adf6b9a8f
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick
2026-02-08 18:07:33 +00:00
committed by Brad Fitzpatrick
parent 086968c15b
commit dc1d811d48
5 changed files with 95 additions and 217 deletions
+14 -22
View File
@@ -1562,22 +1562,6 @@ func (b *LocalBackend) GetFilterForTest() *filter.Filter {
return nb.filterAtomic.Load()
}
func (b *LocalBackend) settleEventBus() {
// The move to eventbus made some things racy that
// weren't before so we have to wait for it to all be settled
// before we call certain things.
// See https://github.com/tailscale/tailscale/issues/16369
// But we can't do this while holding b.mu without deadlocks,
// (https://github.com/tailscale/tailscale/pull/17804#issuecomment-3514426485) so
// now we just do it in lots of places before acquiring b.mu.
// Is this winning??
if b.sys != nil {
if ms, ok := b.sys.MagicSock.GetOK(); ok {
ms.Synchronize()
}
}
}
// SetControlClientStatus is the callback invoked by the control client whenever it posts a new status.
// Among other things, this is where we update the netmap, packet filters, DNS and DERP maps.
func (b *LocalBackend) SetControlClientStatus(c controlclient.Client, st controlclient.Status) {
@@ -2115,16 +2099,16 @@ func (b *LocalBackend) UpdateNetmapDelta(muts []netmap.NodeMutation) (handled bo
}
}()
// Gross. See https://github.com/tailscale/tailscale/issues/16369
b.settleEventBus()
defer b.settleEventBus()
b.mu.Lock()
defer b.mu.Unlock()
cn := b.currentNode()
cn.UpdateNetmapDelta(muts)
if ms, ok := b.sys.MagicSock.GetOK(); ok {
ms.UpdateNetmapDelta(muts)
}
// If auto exit nodes are enabled and our exit node went offline,
// we need to schedule picking a new one.
// TODO(nickkhyl): move the auto exit node logic to a feature package.
@@ -2440,7 +2424,6 @@ func (b *LocalBackend) initOnce() {
// actually a supported operation (it should be, but it's very unclear
// from the following whether or not that is a safe transition).
func (b *LocalBackend) Start(opts ipn.Options) error {
defer b.settleEventBus() // with b.mu unlocked
b.mu.Lock()
defer b.mu.Unlock()
return b.startLocked(opts)
@@ -2936,6 +2919,9 @@ func packetFilterPermitsUnlockedNodes(peers map[tailcfg.NodeID]tailcfg.NodeView,
func (b *LocalBackend) setFilter(f *filter.Filter) {
b.currentNode().setFilter(f)
b.e.SetFilter(f)
if ms, ok := b.sys.MagicSock.GetOK(); ok {
ms.SetFilter(f)
}
}
var removeFromDefaultRoute = []netip.Prefix{
@@ -4352,7 +4338,6 @@ func (b *LocalBackend) EditPrefsAs(mp *ipn.MaskedPrefs, actor ipnauth.Actor) (ip
if mp.SetsInternal() {
return ipn.PrefsView{}, errors.New("can't set Internal fields")
}
defer b.settleEventBus()
b.mu.Lock()
defer b.mu.Unlock()
@@ -6264,6 +6249,13 @@ func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) {
login = cmp.Or(profileFromView(nm.UserProfiles[nm.User()]).LoginName, "<missing-profile>")
}
b.currentNode().SetNetMap(nm)
if ms, ok := b.sys.MagicSock.GetOK(); ok {
if nm != nil {
ms.SetNetworkMap(nm.SelfNode, nm.Peers)
} else {
ms.SetNetworkMap(tailcfg.NodeView{}, nil)
}
}
if login != b.activeLogin {
b.logf("active login: %v", login)
b.activeLogin = login
-18
View File
@@ -31,7 +31,6 @@ import (
"tailscale.com/util/mak"
"tailscale.com/util/slicesx"
"tailscale.com/wgengine/filter"
"tailscale.com/wgengine/magicsock"
)
// nodeBackend is node-specific [LocalBackend] state. It is usually the current node.
@@ -79,9 +78,6 @@ type nodeBackend struct {
// initialized once and immutable
eventClient *eventbus.Client
filterPub *eventbus.Publisher[magicsock.FilterUpdate]
nodeViewsPub *eventbus.Publisher[magicsock.NodeViewsUpdate]
nodeMutsPub *eventbus.Publisher[magicsock.NodeMutationsUpdate]
derpMapViewPub *eventbus.Publisher[tailcfg.DERPMapView]
// TODO(nickkhyl): maybe use sync.RWMutex?
@@ -122,11 +118,7 @@ func newNodeBackend(ctx context.Context, logf logger.Logf, bus *eventbus.Bus) *n
// Default filter blocks everything and logs nothing.
noneFilter := filter.NewAllowNone(logger.Discard, &netipx.IPSet{})
nb.filterAtomic.Store(noneFilter)
nb.filterPub = eventbus.Publish[magicsock.FilterUpdate](nb.eventClient)
nb.nodeViewsPub = eventbus.Publish[magicsock.NodeViewsUpdate](nb.eventClient)
nb.nodeMutsPub = eventbus.Publish[magicsock.NodeMutationsUpdate](nb.eventClient)
nb.derpMapViewPub = eventbus.Publish[tailcfg.DERPMapView](nb.eventClient)
nb.filterPub.Publish(magicsock.FilterUpdate{Filter: nb.filterAtomic.Load()})
return nb
}
@@ -436,15 +428,11 @@ func (nb *nodeBackend) SetNetMap(nm *netmap.NetworkMap) {
nb.netMap = nm
nb.updateNodeByAddrLocked()
nb.updatePeersLocked()
nv := magicsock.NodeViewsUpdate{}
if nm != nil {
nv.SelfNode = nm.SelfNode
nv.Peers = nm.Peers
nb.derpMapViewPub.Publish(nm.DERPMap.View())
} else {
nb.derpMapViewPub.Publish(tailcfg.DERPMapView{})
}
nb.nodeViewsPub.Publish(nv)
}
func (nb *nodeBackend) updateNodeByAddrLocked() {
@@ -520,9 +508,6 @@ func (nb *nodeBackend) UpdateNetmapDelta(muts []netmap.NodeMutation) (handled bo
// call (e.g. its endpoints + online status both change)
var mutableNodes map[tailcfg.NodeID]*tailcfg.Node
update := magicsock.NodeMutationsUpdate{
Mutations: make([]netmap.NodeMutation, 0, len(muts)),
}
for _, m := range muts {
n, ok := mutableNodes[m.NodeIDBeingMutated()]
if !ok {
@@ -533,14 +518,12 @@ func (nb *nodeBackend) UpdateNetmapDelta(muts []netmap.NodeMutation) (handled bo
}
n = nv.AsStruct()
mak.Set(&mutableNodes, nv.ID(), n)
update.Mutations = append(update.Mutations, m)
}
m.Apply(n)
}
for nid, n := range mutableNodes {
nb.peers[nid] = n.View()
}
nb.nodeMutsPub.Publish(update)
return true
}
@@ -562,7 +545,6 @@ func (nb *nodeBackend) filter() *filter.Filter {
func (nb *nodeBackend) setFilter(f *filter.Filter) {
nb.filterAtomic.Store(f)
nb.filterPub.Publish(magicsock.FilterUpdate{Filter: f})
}
func (nb *nodeBackend) dnsConfigForNetmap(prefs ipn.PrefsView, selfExpired bool, versionOS string) *dns.Config {
-5
View File
@@ -1600,11 +1600,6 @@ func TestEngineReconfigOnStateChange(t *testing.T) {
tt.steps(t, lb, cc)
}
// TODO(bradfitz): this whole event bus settling thing
// should be unnecessary once the bogus uses of eventbus
// are removed. (https://github.com/tailscale/tailscale/issues/16369)
lb.settleEventBus()
if gotState := lb.State(); gotState != tt.wantState {
t.Errorf("State: got %v; want %v", gotState, tt.wantState)
}