prober: export a metric with the number of in-flight probes

Updates tailscale/corp#37049

Signed-off-by: Anton Tolchanov <anton@tailscale.com>
main
Anton Tolchanov 2 months ago committed by Anton Tolchanov
parent 8e39a0aa0f
commit 45db3691b9
  1. 7
      prober/prober.go
  2. 19
      prober/prober_test.go

@ -161,6 +161,7 @@ func newProbe(p *Prober, name string, interval time.Duration, lg prometheus.Labe
mEndTime: prometheus.NewDesc("end_secs", "Latest probe end time (seconds since epoch)", nil, lg), mEndTime: prometheus.NewDesc("end_secs", "Latest probe end time (seconds since epoch)", nil, lg),
mLatency: prometheus.NewDesc("latency_millis", "Latest probe latency (ms)", nil, lg), mLatency: prometheus.NewDesc("latency_millis", "Latest probe latency (ms)", nil, lg),
mResult: prometheus.NewDesc("result", "Latest probe result (1 = success, 0 = failure)", nil, lg), mResult: prometheus.NewDesc("result", "Latest probe result (1 = success, 0 = failure)", nil, lg),
mInFlight: prometheus.NewDesc("in_flight", "Number of probes currently running", nil, lg),
mAttempts: prometheus.NewCounterVec(prometheus.CounterOpts{ mAttempts: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "attempts_total", Help: "Total number of probing attempts", ConstLabels: lg, Name: "attempts_total", Help: "Total number of probing attempts", ConstLabels: lg,
}, []string{"status"}), }, []string{"status"}),
@ -261,10 +262,12 @@ type Probe struct {
mEndTime *prometheus.Desc mEndTime *prometheus.Desc
mLatency *prometheus.Desc mLatency *prometheus.Desc
mResult *prometheus.Desc mResult *prometheus.Desc
mInFlight *prometheus.Desc
mAttempts *prometheus.CounterVec mAttempts *prometheus.CounterVec
mSeconds *prometheus.CounterVec mSeconds *prometheus.CounterVec
mu sync.Mutex mu sync.Mutex
inFlight int // number of currently running probes
start time.Time // last time doProbe started start time.Time // last time doProbe started
end time.Time // last time doProbe returned end time.Time // last time doProbe returned
latency time.Duration // last successful probe latency latency time.Duration // last successful probe latency
@ -392,11 +395,13 @@ func (p *Probe) run() (pi ProbeInfo, err error) {
func (p *Probe) recordStart() { func (p *Probe) recordStart() {
p.mu.Lock() p.mu.Lock()
p.start = p.prober.now() p.start = p.prober.now()
p.inFlight++
p.mu.Unlock() p.mu.Unlock()
} }
func (p *Probe) recordEndLocked(err error) { func (p *Probe) recordEndLocked(err error) {
end := p.prober.now() end := p.prober.now()
p.inFlight--
p.end = end p.end = end
p.succeeded = err == nil p.succeeded = err == nil
p.lastErr = err p.lastErr = err
@ -649,6 +654,7 @@ func (p *Probe) Describe(ch chan<- *prometheus.Desc) {
ch <- p.mStartTime ch <- p.mStartTime
ch <- p.mEndTime ch <- p.mEndTime
ch <- p.mResult ch <- p.mResult
ch <- p.mInFlight
ch <- p.mLatency ch <- p.mLatency
p.mAttempts.Describe(ch) p.mAttempts.Describe(ch)
p.mSeconds.Describe(ch) p.mSeconds.Describe(ch)
@ -664,6 +670,7 @@ func (p *Probe) Collect(ch chan<- prometheus.Metric) {
p.mu.Lock() p.mu.Lock()
defer p.mu.Unlock() defer p.mu.Unlock()
ch <- prometheus.MustNewConstMetric(p.mInterval, prometheus.GaugeValue, p.interval.Seconds()) ch <- prometheus.MustNewConstMetric(p.mInterval, prometheus.GaugeValue, p.interval.Seconds())
ch <- prometheus.MustNewConstMetric(p.mInFlight, prometheus.GaugeValue, float64(p.inFlight))
if !p.start.IsZero() { if !p.start.IsZero() {
ch <- prometheus.MustNewConstMetric(p.mStartTime, prometheus.GaugeValue, float64(p.start.Unix())) ch <- prometheus.MustNewConstMetric(p.mStartTime, prometheus.GaugeValue, float64(p.start.Unix()))
} }

@ -213,6 +213,14 @@ func TestProberConcurrency(t *testing.T) {
if got, want := ran.Load(), int64(3); got != want { if got, want := ran.Load(), int64(3); got != want {
return fmt.Errorf("expected %d probes to run concurrently, got %d", want, got) return fmt.Errorf("expected %d probes to run concurrently, got %d", want, got)
} }
wantMetrics := `
# HELP prober_in_flight Number of probes currently running
# TYPE prober_in_flight gauge
prober_in_flight{class="",name="foo"} 3
`
if err := testutil.GatherAndCompare(p.metrics, strings.NewReader(wantMetrics), "prober_in_flight"); err != nil {
return fmt.Errorf("unexpected metrics: %w", err)
}
return nil return nil
}); err != nil { }); err != nil {
t.Fatal(err) t.Fatal(err)
@ -308,9 +316,12 @@ probe_end_secs{class="",label="value",name="testprobe"} %d
# HELP probe_result Latest probe result (1 = success, 0 = failure) # HELP probe_result Latest probe result (1 = success, 0 = failure)
# TYPE probe_result gauge # TYPE probe_result gauge
probe_result{class="",label="value",name="testprobe"} 0 probe_result{class="",label="value",name="testprobe"} 0
# HELP probe_in_flight Number of probes currently running
# TYPE probe_in_flight gauge
probe_in_flight{class="",label="value",name="testprobe"} 0
`, probeInterval.Seconds(), epoch.Unix(), epoch.Add(aFewMillis).Unix()) `, probeInterval.Seconds(), epoch.Unix(), epoch.Add(aFewMillis).Unix())
return testutil.GatherAndCompare(p.metrics, strings.NewReader(want), return testutil.GatherAndCompare(p.metrics, strings.NewReader(want),
"probe_interval_secs", "probe_start_secs", "probe_end_secs", "probe_result") "probe_interval_secs", "probe_start_secs", "probe_end_secs", "probe_result", "probe_in_flight")
}) })
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@ -338,9 +349,13 @@ probe_latency_millis{class="",label="value",name="testprobe"} %d
# HELP probe_result Latest probe result (1 = success, 0 = failure) # HELP probe_result Latest probe result (1 = success, 0 = failure)
# TYPE probe_result gauge # TYPE probe_result gauge
probe_result{class="",label="value",name="testprobe"} 1 probe_result{class="",label="value",name="testprobe"} 1
# HELP probe_in_flight Number of probes currently running
# TYPE probe_in_flight gauge
probe_in_flight{class="",label="value",name="testprobe"} 0
`, probeInterval.Seconds(), start.Unix(), end.Unix(), aFewMillis.Milliseconds()) `, probeInterval.Seconds(), start.Unix(), end.Unix(), aFewMillis.Milliseconds())
return testutil.GatherAndCompare(p.metrics, strings.NewReader(want), return testutil.GatherAndCompare(p.metrics, strings.NewReader(want),
"probe_interval_secs", "probe_start_secs", "probe_end_secs", "probe_latency_millis", "probe_result") "probe_interval_secs", "probe_start_secs", "probe_end_secs",
"probe_latency_millis", "probe_result", "probe_in_flight")
}) })
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)

Loading…
Cancel
Save