diff --git a/prober/prober.go b/prober/prober.go index 16c262bc8..3a43401a1 100644 --- a/prober/prober.go +++ b/prober/prober.go @@ -161,6 +161,7 @@ func newProbe(p *Prober, name string, interval time.Duration, lg prometheus.Labe mEndTime: prometheus.NewDesc("end_secs", "Latest probe end time (seconds since epoch)", nil, lg), mLatency: prometheus.NewDesc("latency_millis", "Latest probe latency (ms)", nil, lg), mResult: prometheus.NewDesc("result", "Latest probe result (1 = success, 0 = failure)", nil, lg), + mInFlight: prometheus.NewDesc("in_flight", "Number of probes currently running", nil, lg), mAttempts: prometheus.NewCounterVec(prometheus.CounterOpts{ Name: "attempts_total", Help: "Total number of probing attempts", ConstLabels: lg, }, []string{"status"}), @@ -261,10 +262,12 @@ type Probe struct { mEndTime *prometheus.Desc mLatency *prometheus.Desc mResult *prometheus.Desc + mInFlight *prometheus.Desc mAttempts *prometheus.CounterVec mSeconds *prometheus.CounterVec mu sync.Mutex + inFlight int // number of currently running probes start time.Time // last time doProbe started end time.Time // last time doProbe returned latency time.Duration // last successful probe latency @@ -392,11 +395,13 @@ func (p *Probe) run() (pi ProbeInfo, err error) { func (p *Probe) recordStart() { p.mu.Lock() p.start = p.prober.now() + p.inFlight++ p.mu.Unlock() } func (p *Probe) recordEndLocked(err error) { end := p.prober.now() + p.inFlight-- p.end = end p.succeeded = err == nil p.lastErr = err @@ -649,6 +654,7 @@ func (p *Probe) Describe(ch chan<- *prometheus.Desc) { ch <- p.mStartTime ch <- p.mEndTime ch <- p.mResult + ch <- p.mInFlight ch <- p.mLatency p.mAttempts.Describe(ch) p.mSeconds.Describe(ch) @@ -664,6 +670,7 @@ func (p *Probe) Collect(ch chan<- prometheus.Metric) { p.mu.Lock() defer p.mu.Unlock() ch <- prometheus.MustNewConstMetric(p.mInterval, prometheus.GaugeValue, p.interval.Seconds()) + ch <- prometheus.MustNewConstMetric(p.mInFlight, prometheus.GaugeValue, float64(p.inFlight)) if !p.start.IsZero() { ch <- prometheus.MustNewConstMetric(p.mStartTime, prometheus.GaugeValue, float64(p.start.Unix())) } diff --git a/prober/prober_test.go b/prober/prober_test.go index c945f617a..8da512787 100644 --- a/prober/prober_test.go +++ b/prober/prober_test.go @@ -213,6 +213,14 @@ func TestProberConcurrency(t *testing.T) { if got, want := ran.Load(), int64(3); got != want { return fmt.Errorf("expected %d probes to run concurrently, got %d", want, got) } + wantMetrics := ` + # HELP prober_in_flight Number of probes currently running + # TYPE prober_in_flight gauge + prober_in_flight{class="",name="foo"} 3 + ` + if err := testutil.GatherAndCompare(p.metrics, strings.NewReader(wantMetrics), "prober_in_flight"); err != nil { + return fmt.Errorf("unexpected metrics: %w", err) + } return nil }); err != nil { t.Fatal(err) @@ -308,9 +316,12 @@ probe_end_secs{class="",label="value",name="testprobe"} %d # HELP probe_result Latest probe result (1 = success, 0 = failure) # TYPE probe_result gauge probe_result{class="",label="value",name="testprobe"} 0 +# HELP probe_in_flight Number of probes currently running +# TYPE probe_in_flight gauge +probe_in_flight{class="",label="value",name="testprobe"} 0 `, probeInterval.Seconds(), epoch.Unix(), epoch.Add(aFewMillis).Unix()) return testutil.GatherAndCompare(p.metrics, strings.NewReader(want), - "probe_interval_secs", "probe_start_secs", "probe_end_secs", "probe_result") + "probe_interval_secs", "probe_start_secs", "probe_end_secs", "probe_result", "probe_in_flight") }) if err != nil { t.Fatal(err) @@ -338,9 +349,13 @@ probe_latency_millis{class="",label="value",name="testprobe"} %d # HELP probe_result Latest probe result (1 = success, 0 = failure) # TYPE probe_result gauge probe_result{class="",label="value",name="testprobe"} 1 +# HELP probe_in_flight Number of probes currently running +# TYPE probe_in_flight gauge +probe_in_flight{class="",label="value",name="testprobe"} 0 `, probeInterval.Seconds(), start.Unix(), end.Unix(), aFewMillis.Milliseconds()) return testutil.GatherAndCompare(p.metrics, strings.NewReader(want), - "probe_interval_secs", "probe_start_secs", "probe_end_secs", "probe_latency_millis", "probe_result") + "probe_interval_secs", "probe_start_secs", "probe_end_secs", + "probe_latency_millis", "probe_result", "probe_in_flight") }) if err != nil { t.Fatal(err)