prober: export a metric with the number of in-flight probes
Updates tailscale/corp#37049 Signed-off-by: Anton Tolchanov <anton@tailscale.com>
This commit is contained in:
committed by
Anton Tolchanov
parent
8e39a0aa0f
commit
45db3691b9
@@ -161,6 +161,7 @@ func newProbe(p *Prober, name string, interval time.Duration, lg prometheus.Labe
|
|||||||
mEndTime: prometheus.NewDesc("end_secs", "Latest probe end time (seconds since epoch)", nil, lg),
|
mEndTime: prometheus.NewDesc("end_secs", "Latest probe end time (seconds since epoch)", nil, lg),
|
||||||
mLatency: prometheus.NewDesc("latency_millis", "Latest probe latency (ms)", nil, lg),
|
mLatency: prometheus.NewDesc("latency_millis", "Latest probe latency (ms)", nil, lg),
|
||||||
mResult: prometheus.NewDesc("result", "Latest probe result (1 = success, 0 = failure)", nil, lg),
|
mResult: prometheus.NewDesc("result", "Latest probe result (1 = success, 0 = failure)", nil, lg),
|
||||||
|
mInFlight: prometheus.NewDesc("in_flight", "Number of probes currently running", nil, lg),
|
||||||
mAttempts: prometheus.NewCounterVec(prometheus.CounterOpts{
|
mAttempts: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||||
Name: "attempts_total", Help: "Total number of probing attempts", ConstLabels: lg,
|
Name: "attempts_total", Help: "Total number of probing attempts", ConstLabels: lg,
|
||||||
}, []string{"status"}),
|
}, []string{"status"}),
|
||||||
@@ -261,10 +262,12 @@ type Probe struct {
|
|||||||
mEndTime *prometheus.Desc
|
mEndTime *prometheus.Desc
|
||||||
mLatency *prometheus.Desc
|
mLatency *prometheus.Desc
|
||||||
mResult *prometheus.Desc
|
mResult *prometheus.Desc
|
||||||
|
mInFlight *prometheus.Desc
|
||||||
mAttempts *prometheus.CounterVec
|
mAttempts *prometheus.CounterVec
|
||||||
mSeconds *prometheus.CounterVec
|
mSeconds *prometheus.CounterVec
|
||||||
|
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
|
inFlight int // number of currently running probes
|
||||||
start time.Time // last time doProbe started
|
start time.Time // last time doProbe started
|
||||||
end time.Time // last time doProbe returned
|
end time.Time // last time doProbe returned
|
||||||
latency time.Duration // last successful probe latency
|
latency time.Duration // last successful probe latency
|
||||||
@@ -392,11 +395,13 @@ func (p *Probe) run() (pi ProbeInfo, err error) {
|
|||||||
func (p *Probe) recordStart() {
|
func (p *Probe) recordStart() {
|
||||||
p.mu.Lock()
|
p.mu.Lock()
|
||||||
p.start = p.prober.now()
|
p.start = p.prober.now()
|
||||||
|
p.inFlight++
|
||||||
p.mu.Unlock()
|
p.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Probe) recordEndLocked(err error) {
|
func (p *Probe) recordEndLocked(err error) {
|
||||||
end := p.prober.now()
|
end := p.prober.now()
|
||||||
|
p.inFlight--
|
||||||
p.end = end
|
p.end = end
|
||||||
p.succeeded = err == nil
|
p.succeeded = err == nil
|
||||||
p.lastErr = err
|
p.lastErr = err
|
||||||
@@ -649,6 +654,7 @@ func (p *Probe) Describe(ch chan<- *prometheus.Desc) {
|
|||||||
ch <- p.mStartTime
|
ch <- p.mStartTime
|
||||||
ch <- p.mEndTime
|
ch <- p.mEndTime
|
||||||
ch <- p.mResult
|
ch <- p.mResult
|
||||||
|
ch <- p.mInFlight
|
||||||
ch <- p.mLatency
|
ch <- p.mLatency
|
||||||
p.mAttempts.Describe(ch)
|
p.mAttempts.Describe(ch)
|
||||||
p.mSeconds.Describe(ch)
|
p.mSeconds.Describe(ch)
|
||||||
@@ -664,6 +670,7 @@ func (p *Probe) Collect(ch chan<- prometheus.Metric) {
|
|||||||
p.mu.Lock()
|
p.mu.Lock()
|
||||||
defer p.mu.Unlock()
|
defer p.mu.Unlock()
|
||||||
ch <- prometheus.MustNewConstMetric(p.mInterval, prometheus.GaugeValue, p.interval.Seconds())
|
ch <- prometheus.MustNewConstMetric(p.mInterval, prometheus.GaugeValue, p.interval.Seconds())
|
||||||
|
ch <- prometheus.MustNewConstMetric(p.mInFlight, prometheus.GaugeValue, float64(p.inFlight))
|
||||||
if !p.start.IsZero() {
|
if !p.start.IsZero() {
|
||||||
ch <- prometheus.MustNewConstMetric(p.mStartTime, prometheus.GaugeValue, float64(p.start.Unix()))
|
ch <- prometheus.MustNewConstMetric(p.mStartTime, prometheus.GaugeValue, float64(p.start.Unix()))
|
||||||
}
|
}
|
||||||
|
|||||||
+17
-2
@@ -213,6 +213,14 @@ func TestProberConcurrency(t *testing.T) {
|
|||||||
if got, want := ran.Load(), int64(3); got != want {
|
if got, want := ran.Load(), int64(3); got != want {
|
||||||
return fmt.Errorf("expected %d probes to run concurrently, got %d", want, got)
|
return fmt.Errorf("expected %d probes to run concurrently, got %d", want, got)
|
||||||
}
|
}
|
||||||
|
wantMetrics := `
|
||||||
|
# HELP prober_in_flight Number of probes currently running
|
||||||
|
# TYPE prober_in_flight gauge
|
||||||
|
prober_in_flight{class="",name="foo"} 3
|
||||||
|
`
|
||||||
|
if err := testutil.GatherAndCompare(p.metrics, strings.NewReader(wantMetrics), "prober_in_flight"); err != nil {
|
||||||
|
return fmt.Errorf("unexpected metrics: %w", err)
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
@@ -308,9 +316,12 @@ probe_end_secs{class="",label="value",name="testprobe"} %d
|
|||||||
# HELP probe_result Latest probe result (1 = success, 0 = failure)
|
# HELP probe_result Latest probe result (1 = success, 0 = failure)
|
||||||
# TYPE probe_result gauge
|
# TYPE probe_result gauge
|
||||||
probe_result{class="",label="value",name="testprobe"} 0
|
probe_result{class="",label="value",name="testprobe"} 0
|
||||||
|
# HELP probe_in_flight Number of probes currently running
|
||||||
|
# TYPE probe_in_flight gauge
|
||||||
|
probe_in_flight{class="",label="value",name="testprobe"} 0
|
||||||
`, probeInterval.Seconds(), epoch.Unix(), epoch.Add(aFewMillis).Unix())
|
`, probeInterval.Seconds(), epoch.Unix(), epoch.Add(aFewMillis).Unix())
|
||||||
return testutil.GatherAndCompare(p.metrics, strings.NewReader(want),
|
return testutil.GatherAndCompare(p.metrics, strings.NewReader(want),
|
||||||
"probe_interval_secs", "probe_start_secs", "probe_end_secs", "probe_result")
|
"probe_interval_secs", "probe_start_secs", "probe_end_secs", "probe_result", "probe_in_flight")
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
@@ -338,9 +349,13 @@ probe_latency_millis{class="",label="value",name="testprobe"} %d
|
|||||||
# HELP probe_result Latest probe result (1 = success, 0 = failure)
|
# HELP probe_result Latest probe result (1 = success, 0 = failure)
|
||||||
# TYPE probe_result gauge
|
# TYPE probe_result gauge
|
||||||
probe_result{class="",label="value",name="testprobe"} 1
|
probe_result{class="",label="value",name="testprobe"} 1
|
||||||
|
# HELP probe_in_flight Number of probes currently running
|
||||||
|
# TYPE probe_in_flight gauge
|
||||||
|
probe_in_flight{class="",label="value",name="testprobe"} 0
|
||||||
`, probeInterval.Seconds(), start.Unix(), end.Unix(), aFewMillis.Milliseconds())
|
`, probeInterval.Seconds(), start.Unix(), end.Unix(), aFewMillis.Milliseconds())
|
||||||
return testutil.GatherAndCompare(p.metrics, strings.NewReader(want),
|
return testutil.GatherAndCompare(p.metrics, strings.NewReader(want),
|
||||||
"probe_interval_secs", "probe_start_secs", "probe_end_secs", "probe_latency_millis", "probe_result")
|
"probe_interval_secs", "probe_start_secs", "probe_end_secs",
|
||||||
|
"probe_latency_millis", "probe_result", "probe_in_flight")
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
|
|||||||
Reference in New Issue
Block a user