health: avoid publishing health.Change when warnable visibility remains unchanged

Warnables with a non-zero TimeToVisible are only published on the eventbus when
they remain unhealthy long enough to become visible.

However, we still publish a health.Change when a warning that was never visible
(and was never published to the eventbus) becomes healthy.

This PR fixes that and reduces churn when there is no actual state change. In
particular, it avoids unnecessary IPN bus notifications sent to GUI/CLI clients,
captive portal detection, etc.

Updates tailscale/corp#39759 (noticed while working on it)

Signed-off-by: Nick Khyl <nickk@tailscale.com>
This commit is contained in:
Nick Khyl
2026-05-13 16:51:18 -05:00
committed by Nick Khyl
parent 41286c2b56
commit 4d68493144
2 changed files with 109 additions and 48 deletions
+23 -5
View File
@@ -492,7 +492,12 @@ func (t *Tracker) SetHealthy(w *Warnable) {
}
func (t *Tracker) setHealthyLocked(w *Warnable) {
if !buildfeatures.HasHealth || t.warnableVal[w] == nil {
if !buildfeatures.HasHealth {
return
}
ws := t.warnableVal[w]
if ws == nil {
// Nothing to remove
return
}
@@ -501,15 +506,28 @@ func (t *Tracker) setHealthyLocked(w *Warnable) {
// Stop any pending visiblity timers for this Warnable
if canc, ok := t.pendingVisibleTimers[w]; ok {
// We removed the warningState for this Warnable,
// and we hold the lock, so even if the timer callback
// has already started, it won't find a warningState
// for this Warnable and won't publish any changes.
canc.Stop()
delete(t.pendingVisibleTimers, w)
}
change := Change{
WarnableChanged: true,
Warnable: w,
// Only publish a change if the Warnable was unhealthy long
// enough to become visible to the user. Otherwise, it would
// not have been published as unhealthy, so there is no need
// to publish it as healthy. This prevents eventbus (and by
// extension the IPN bus) churn for Warnables that are marked
// unhealthy and then healthy again. Notably, this includes
// warnables touched by [Tracker.updateBuiltinWarnablesLocked].
if w.IsVisible(ws, t.now) {
change := Change{
WarnableChanged: true,
Warnable: w,
}
t.changePub.Publish(change)
}
t.changePub.Publish(change)
}
// notifyWatchersControlChangedLocked calls each watcher to signal that control
+86 -43
View File
@@ -271,47 +271,91 @@ func TestSetUnhealthyWithTimeToVisible(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(*testing.T) {
bus := eventbustest.NewBus(t)
ht := NewTracker(bus)
mw := Register(&Warnable{
Code: "test-warnable-3-secs-to-visible",
Title: "Test Warnable with 3 seconds to visible",
Text: StaticMessage("Hello world"),
TimeToVisible: 2 * time.Second,
ImpactsConnectivity: true,
synctest.Test(t, func(t *testing.T) {
clock := tstest.NewClock(tstest.ClockOpts{
Start: time.Unix(123, 0),
FollowRealTime: false,
})
bus := eventbustest.NewBus(t)
ht := NewTracker(bus)
ht.testClock = clock
mw := Register(&Warnable{
Code: "test-warnable-3-secs-to-visible",
Title: "Test Warnable with 3 seconds to visible",
Text: StaticMessage("Hello world"),
TimeToVisible: 2 * time.Second,
ImpactsConnectivity: true,
})
defer unregister(mw)
becameUnhealthy := make(chan struct{})
becameHealthy := make(chan struct{})
watchFunc := func(c Change) {
w := c.Warnable
us := c.UnhealthyState
if w != mw {
t.Fatalf("watcherFunc was called, but with an unexpected Warnable: %v, want: %v", w, w)
}
if us != nil {
becameUnhealthy <- struct{}{}
} else {
becameHealthy <- struct{}{}
}
}
tt.preFunc(t, ht, bus, watchFunc)
ht.SetUnhealthy(mw, Args{ArgError: "Hello world"})
// Advance time by half of the TimeToVisible duration.
clock.Advance(mw.TimeToVisible / 2)
select {
case <-becameUnhealthy:
// Test failed because the watcher got notified of an unhealthy state
t.Fatalf("watcherFunc was called with an unhealthy state")
case <-becameHealthy:
// Test failed because the watcher got of a healthy state
t.Fatalf("watcherFunc was called with a healthy state")
default:
// As expected, watcherFunc still had not been called
// after mw.TimeToVisible / 2.
}
// Advance time to get past the the TimeToVisible duration.
// The watcher should be notified of the unhealthy state.
clock.Advance(mw.TimeToVisible/2 + 1)
<-becameUnhealthy
// Reset the warnable to neutral / healthy state before
// the next part of the test.
ht.SetHealthy(mw)
<-becameHealthy
// Mark the warnable unhealthy and then immediately healthy
// before the TimeToVisible duration elapses.
// The watcher should not be notified of either change
// because the warnable never became visible.
ht.SetUnhealthy(mw, Args{ArgError: "Hello world"})
ht.SetHealthy(mw)
// Advance to get past the the TimeToVisible delay.
clock.Advance(mw.TimeToVisible * 2)
synctest.Wait()
select {
case <-becameUnhealthy:
// Test failed because the watcher got notified of an unhealthy state
t.Fatalf("watcherFunc was called with an unhealthy state")
case <-becameHealthy:
// Test failed because the watcher got of a healthy state
t.Fatalf("watcherFunc was called with a healthy state")
default:
// As expected, watcherFunc was not called after marking
// the warnable healthy again as it never became visible.
}
})
becameUnhealthy := make(chan struct{})
becameHealthy := make(chan struct{})
watchFunc := func(c Change) {
w := c.Warnable
us := c.UnhealthyState
if w != mw {
t.Fatalf("watcherFunc was called, but with an unexpected Warnable: %v, want: %v", w, w)
}
if us != nil {
becameUnhealthy <- struct{}{}
} else {
becameHealthy <- struct{}{}
}
}
tt.preFunc(t, ht, bus, watchFunc)
ht.SetUnhealthy(mw, Args{ArgError: "Hello world"})
select {
case <-becameUnhealthy:
// Test failed because the watcher got notified of an unhealthy state
t.Fatalf("watcherFunc was called with an unhealthy state")
case <-becameHealthy:
// Test failed because the watcher got of a healthy state
t.Fatalf("watcherFunc was called with a healthy state")
case <-time.After(1 * time.Second):
// As expected, watcherFunc still had not been called after 1 second
}
unregister(mw)
})
}
}
@@ -739,12 +783,11 @@ func TestControlHealthNotifies(t *testing.T) {
ht.SetIPNState("NeedsLogin", true)
ht.GotStreamedMapResponse()
// Expect events at starup, before doing anything else, skip unstable
// event and no warning event as they show up at different times.
// Expect events at starup, before doing anything else, skip
// the warming up events.
synctest.Wait()
if err := eventbustest.Expect(tw,
CompareWarnableCode(t, tsconst.HealthWarnableWarmingUp),
CompareWarnableCode(t, tsconst.HealthWarnableNotInMapPoll),
CompareWarnableCode(t, tsconst.HealthWarnableWarmingUp),
); err != nil {
t.Errorf("startup error: %v", err)