prober: export a metric with the number of in-flight probes
Updates tailscale/corp#37049 Signed-off-by: Anton Tolchanov <anton@tailscale.com>
This commit is contained in:
committed by
Anton Tolchanov
parent
8e39a0aa0f
commit
45db3691b9
@@ -161,6 +161,7 @@ func newProbe(p *Prober, name string, interval time.Duration, lg prometheus.Labe
|
||||
mEndTime: prometheus.NewDesc("end_secs", "Latest probe end time (seconds since epoch)", nil, lg),
|
||||
mLatency: prometheus.NewDesc("latency_millis", "Latest probe latency (ms)", nil, lg),
|
||||
mResult: prometheus.NewDesc("result", "Latest probe result (1 = success, 0 = failure)", nil, lg),
|
||||
mInFlight: prometheus.NewDesc("in_flight", "Number of probes currently running", nil, lg),
|
||||
mAttempts: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "attempts_total", Help: "Total number of probing attempts", ConstLabels: lg,
|
||||
}, []string{"status"}),
|
||||
@@ -261,10 +262,12 @@ type Probe struct {
|
||||
mEndTime *prometheus.Desc
|
||||
mLatency *prometheus.Desc
|
||||
mResult *prometheus.Desc
|
||||
mInFlight *prometheus.Desc
|
||||
mAttempts *prometheus.CounterVec
|
||||
mSeconds *prometheus.CounterVec
|
||||
|
||||
mu sync.Mutex
|
||||
inFlight int // number of currently running probes
|
||||
start time.Time // last time doProbe started
|
||||
end time.Time // last time doProbe returned
|
||||
latency time.Duration // last successful probe latency
|
||||
@@ -392,11 +395,13 @@ func (p *Probe) run() (pi ProbeInfo, err error) {
|
||||
func (p *Probe) recordStart() {
|
||||
p.mu.Lock()
|
||||
p.start = p.prober.now()
|
||||
p.inFlight++
|
||||
p.mu.Unlock()
|
||||
}
|
||||
|
||||
func (p *Probe) recordEndLocked(err error) {
|
||||
end := p.prober.now()
|
||||
p.inFlight--
|
||||
p.end = end
|
||||
p.succeeded = err == nil
|
||||
p.lastErr = err
|
||||
@@ -649,6 +654,7 @@ func (p *Probe) Describe(ch chan<- *prometheus.Desc) {
|
||||
ch <- p.mStartTime
|
||||
ch <- p.mEndTime
|
||||
ch <- p.mResult
|
||||
ch <- p.mInFlight
|
||||
ch <- p.mLatency
|
||||
p.mAttempts.Describe(ch)
|
||||
p.mSeconds.Describe(ch)
|
||||
@@ -664,6 +670,7 @@ func (p *Probe) Collect(ch chan<- prometheus.Metric) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
ch <- prometheus.MustNewConstMetric(p.mInterval, prometheus.GaugeValue, p.interval.Seconds())
|
||||
ch <- prometheus.MustNewConstMetric(p.mInFlight, prometheus.GaugeValue, float64(p.inFlight))
|
||||
if !p.start.IsZero() {
|
||||
ch <- prometheus.MustNewConstMetric(p.mStartTime, prometheus.GaugeValue, float64(p.start.Unix()))
|
||||
}
|
||||
|
||||
+17
-2
@@ -213,6 +213,14 @@ func TestProberConcurrency(t *testing.T) {
|
||||
if got, want := ran.Load(), int64(3); got != want {
|
||||
return fmt.Errorf("expected %d probes to run concurrently, got %d", want, got)
|
||||
}
|
||||
wantMetrics := `
|
||||
# HELP prober_in_flight Number of probes currently running
|
||||
# TYPE prober_in_flight gauge
|
||||
prober_in_flight{class="",name="foo"} 3
|
||||
`
|
||||
if err := testutil.GatherAndCompare(p.metrics, strings.NewReader(wantMetrics), "prober_in_flight"); err != nil {
|
||||
return fmt.Errorf("unexpected metrics: %w", err)
|
||||
}
|
||||
return nil
|
||||
}); err != nil {
|
||||
t.Fatal(err)
|
||||
@@ -308,9 +316,12 @@ probe_end_secs{class="",label="value",name="testprobe"} %d
|
||||
# HELP probe_result Latest probe result (1 = success, 0 = failure)
|
||||
# TYPE probe_result gauge
|
||||
probe_result{class="",label="value",name="testprobe"} 0
|
||||
# HELP probe_in_flight Number of probes currently running
|
||||
# TYPE probe_in_flight gauge
|
||||
probe_in_flight{class="",label="value",name="testprobe"} 0
|
||||
`, probeInterval.Seconds(), epoch.Unix(), epoch.Add(aFewMillis).Unix())
|
||||
return testutil.GatherAndCompare(p.metrics, strings.NewReader(want),
|
||||
"probe_interval_secs", "probe_start_secs", "probe_end_secs", "probe_result")
|
||||
"probe_interval_secs", "probe_start_secs", "probe_end_secs", "probe_result", "probe_in_flight")
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@@ -338,9 +349,13 @@ probe_latency_millis{class="",label="value",name="testprobe"} %d
|
||||
# HELP probe_result Latest probe result (1 = success, 0 = failure)
|
||||
# TYPE probe_result gauge
|
||||
probe_result{class="",label="value",name="testprobe"} 1
|
||||
# HELP probe_in_flight Number of probes currently running
|
||||
# TYPE probe_in_flight gauge
|
||||
probe_in_flight{class="",label="value",name="testprobe"} 0
|
||||
`, probeInterval.Seconds(), start.Unix(), end.Unix(), aFewMillis.Milliseconds())
|
||||
return testutil.GatherAndCompare(p.metrics, strings.NewReader(want),
|
||||
"probe_interval_secs", "probe_start_secs", "probe_end_secs", "probe_latency_millis", "probe_result")
|
||||
"probe_interval_secs", "probe_start_secs", "probe_end_secs",
|
||||
"probe_latency_millis", "probe_result", "probe_in_flight")
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
|
||||
Reference in New Issue
Block a user