ipn/ipnlocal: signal nodeBackend readiness and shutdown

We update LocalBackend to shut down the current nodeBackend
when switching to a different node, and to mark the new node's
nodeBackend as ready when the switch completes.

Updates tailscale/corp#28014
Updates tailscale/corp#29543
Updates #12614

Signed-off-by: Nick Khyl <nickk@tailscale.com>
This commit is contained in:
Nick Khyl
2025-06-13 12:51:40 -05:00
committed by Nick Khyl
parent fe391d5694
commit 733bfaeffe
3 changed files with 230 additions and 16 deletions
+32 -11
View File
@@ -168,6 +168,17 @@ type watchSession struct {
var metricCaptivePortalDetected = clientmetric.NewCounter("captiveportal_detected")
var (
// errShutdown indicates that the [LocalBackend.Shutdown] was called.
errShutdown = errors.New("shutting down")
// errNodeContextChanged indicates that [LocalBackend] has switched
// to a different [localNodeContext], usually due to a profile change.
// It is used as a context cancellation cause for the old context
// and can be returned when an operation is performed on it.
errNodeContextChanged = errors.New("profile changed")
)
// LocalBackend is the glue between the major pieces of the Tailscale
// network software: the cloud control plane (via controlclient), the
// network data plane (via wgengine), and the user-facing UIs and CLIs
@@ -180,11 +191,11 @@ var metricCaptivePortalDetected = clientmetric.NewCounter("captiveportal_detecte
// state machine generates events back out to zero or more components.
type LocalBackend struct {
// Elements that are thread-safe or constant after construction.
ctx context.Context // canceled by [LocalBackend.Shutdown]
ctxCancel context.CancelFunc // cancels ctx
logf logger.Logf // general logging
keyLogf logger.Logf // for printing list of peers on change
statsLogf logger.Logf // for printing peers stats on change
ctx context.Context // canceled by [LocalBackend.Shutdown]
ctxCancel context.CancelCauseFunc // cancels ctx
logf logger.Logf // general logging
keyLogf logger.Logf // for printing list of peers on change
statsLogf logger.Logf // for printing peers stats on change
sys *tsd.System
health *health.Tracker // always non-nil
metrics metrics
@@ -463,7 +474,7 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo
envknob.LogCurrent(logf)
ctx, cancel := context.WithCancel(context.Background())
ctx, cancel := context.WithCancelCause(context.Background())
clock := tstime.StdClock{}
// Until we transition to a Running state, use a canceled context for
@@ -503,7 +514,10 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo
captiveCancel: nil, // so that we start checkCaptivePortalLoop when Running
needsCaptiveDetection: make(chan bool),
}
b.currentNodeAtomic.Store(newNodeBackend())
nb := newNodeBackend(ctx)
b.currentNodeAtomic.Store(nb)
nb.ready()
mConn.SetNetInfoCallback(b.setNetInfo)
if sys.InitialConfig != nil {
@@ -586,8 +600,10 @@ func (b *LocalBackend) currentNode() *nodeBackend {
return v
}
// Auto-init one in tests for LocalBackend created without the NewLocalBackend constructor...
v := newNodeBackend()
b.currentNodeAtomic.CompareAndSwap(nil, v)
v := newNodeBackend(cmp.Or(b.ctx, context.Background()))
if b.currentNodeAtomic.CompareAndSwap(nil, v) {
v.ready()
}
return b.currentNodeAtomic.Load()
}
@@ -1089,8 +1105,9 @@ func (b *LocalBackend) Shutdown() {
if cc != nil {
cc.Shutdown()
}
b.ctxCancel(errShutdown)
b.currentNode().shutdown(errShutdown)
extHost.Shutdown()
b.ctxCancel()
b.e.Close()
<-b.e.Done()
b.awaitNoGoroutinesInTest()
@@ -6992,7 +7009,11 @@ func (b *LocalBackend) resetForProfileChangeLockedOnEntry(unlock unlockOnce) err
// down, so no need to do any work.
return nil
}
b.currentNodeAtomic.Store(newNodeBackend())
newNode := newNodeBackend(b.ctx)
if oldNode := b.currentNodeAtomic.Swap(newNode); oldNode != nil {
oldNode.shutdown(errNodeContextChanged)
}
defer newNode.ready()
b.setNetMapLocked(nil) // Reset netmap.
b.updateFilterLocked(ipn.PrefsView{})
// Reset the NetworkMap in the engine