logtail: add metrics (#18184)
Add metrics about logtail uploading and underlying buffer. Add metrics to the in-memory buffer implementation. Updates tailscale/corp#21363 Signed-off-by: Joe Tsai <joetsai@digital-static.net>
This commit is contained in:
+42
-1
@@ -8,8 +8,10 @@ package logtail
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"errors"
|
"errors"
|
||||||
|
"expvar"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
|
"tailscale.com/metrics"
|
||||||
"tailscale.com/syncs"
|
"tailscale.com/syncs"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -39,12 +41,42 @@ type memBuffer struct {
|
|||||||
|
|
||||||
dropMu syncs.Mutex
|
dropMu syncs.Mutex
|
||||||
dropCount int
|
dropCount int
|
||||||
|
|
||||||
|
// Metrics (see [memBuffer.ExpVar] for details).
|
||||||
|
writeCalls expvar.Int
|
||||||
|
readCalls expvar.Int
|
||||||
|
writeBytes expvar.Int
|
||||||
|
readBytes expvar.Int
|
||||||
|
droppedBytes expvar.Int
|
||||||
|
storedBytes expvar.Int
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExpVar returns a [metrics.Set] with metrics about the buffer.
|
||||||
|
//
|
||||||
|
// - counter_write_calls: Total number of write calls.
|
||||||
|
// - counter_read_calls: Total number of read calls.
|
||||||
|
// - counter_write_bytes: Total number of bytes written.
|
||||||
|
// - counter_read_bytes: Total number of bytes read.
|
||||||
|
// - counter_dropped_bytes: Total number of bytes dropped.
|
||||||
|
// - gauge_stored_bytes: Current number of bytes stored in memory.
|
||||||
|
func (b *memBuffer) ExpVar() expvar.Var {
|
||||||
|
m := new(metrics.Set)
|
||||||
|
m.Set("counter_write_calls", &b.writeCalls)
|
||||||
|
m.Set("counter_read_calls", &b.readCalls)
|
||||||
|
m.Set("counter_write_bytes", &b.writeBytes)
|
||||||
|
m.Set("counter_read_bytes", &b.readBytes)
|
||||||
|
m.Set("counter_dropped_bytes", &b.droppedBytes)
|
||||||
|
m.Set("gauge_stored_bytes", &b.storedBytes)
|
||||||
|
return m
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *memBuffer) TryReadLine() ([]byte, error) {
|
func (m *memBuffer) TryReadLine() ([]byte, error) {
|
||||||
|
m.readCalls.Add(1)
|
||||||
if m.next != nil {
|
if m.next != nil {
|
||||||
msg := m.next
|
msg := m.next
|
||||||
m.next = nil
|
m.next = nil
|
||||||
|
m.readBytes.Add(int64(len(msg)))
|
||||||
|
m.storedBytes.Add(-int64(len(msg)))
|
||||||
return msg, nil
|
return msg, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -52,8 +84,13 @@ func (m *memBuffer) TryReadLine() ([]byte, error) {
|
|||||||
case ent := <-m.pending:
|
case ent := <-m.pending:
|
||||||
if ent.dropCount > 0 {
|
if ent.dropCount > 0 {
|
||||||
m.next = ent.msg
|
m.next = ent.msg
|
||||||
return fmt.Appendf(nil, "----------- %d logs dropped ----------", ent.dropCount), nil
|
b := fmt.Appendf(nil, "----------- %d logs dropped ----------", ent.dropCount)
|
||||||
|
m.writeBytes.Add(int64(len(b))) // indicate pseudo-injected log message
|
||||||
|
m.readBytes.Add(int64(len(b)))
|
||||||
|
return b, nil
|
||||||
}
|
}
|
||||||
|
m.readBytes.Add(int64(len(ent.msg)))
|
||||||
|
m.storedBytes.Add(-int64(len(ent.msg)))
|
||||||
return ent.msg, nil
|
return ent.msg, nil
|
||||||
default:
|
default:
|
||||||
return nil, nil
|
return nil, nil
|
||||||
@@ -61,6 +98,7 @@ func (m *memBuffer) TryReadLine() ([]byte, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *memBuffer) Write(b []byte) (int, error) {
|
func (m *memBuffer) Write(b []byte) (int, error) {
|
||||||
|
m.writeCalls.Add(1)
|
||||||
m.dropMu.Lock()
|
m.dropMu.Lock()
|
||||||
defer m.dropMu.Unlock()
|
defer m.dropMu.Unlock()
|
||||||
|
|
||||||
@@ -70,10 +108,13 @@ func (m *memBuffer) Write(b []byte) (int, error) {
|
|||||||
}
|
}
|
||||||
select {
|
select {
|
||||||
case m.pending <- ent:
|
case m.pending <- ent:
|
||||||
|
m.writeBytes.Add(int64(len(b)))
|
||||||
|
m.storedBytes.Add(+int64(len(b)))
|
||||||
m.dropCount = 0
|
m.dropCount = 0
|
||||||
return len(b), nil
|
return len(b), nil
|
||||||
default:
|
default:
|
||||||
m.dropCount++
|
m.dropCount++
|
||||||
|
m.droppedBytes.Add(int64(len(b)))
|
||||||
return 0, errBufferFull
|
return 0, errBufferFull
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ import (
|
|||||||
"slices"
|
"slices"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"tailscale.com/metrics"
|
||||||
"tailscale.com/util/must"
|
"tailscale.com/util/must"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -88,7 +89,7 @@ type Filch struct {
|
|||||||
storedBytes expvar.Int
|
storedBytes expvar.Int
|
||||||
}
|
}
|
||||||
|
|
||||||
// ExpVar report metrics about the buffer.
|
// ExpVar returns a [metrics.Set] with metrics about the buffer.
|
||||||
//
|
//
|
||||||
// - counter_write_calls: Total number of calls to [Filch.Write]
|
// - counter_write_calls: Total number of calls to [Filch.Write]
|
||||||
// (excludes calls when file is closed).
|
// (excludes calls when file is closed).
|
||||||
@@ -114,7 +115,7 @@ type Filch struct {
|
|||||||
//
|
//
|
||||||
// - gauge_stored_bytes: Current number of bytes stored on disk.
|
// - gauge_stored_bytes: Current number of bytes stored on disk.
|
||||||
func (f *Filch) ExpVar() expvar.Var {
|
func (f *Filch) ExpVar() expvar.Var {
|
||||||
m := new(expvar.Map)
|
m := new(metrics.Set)
|
||||||
m.Set("counter_write_calls", &f.writeCalls)
|
m.Set("counter_write_calls", &f.writeCalls)
|
||||||
m.Set("counter_read_calls", &f.readCalls)
|
m.Set("counter_read_calls", &f.readCalls)
|
||||||
m.Set("counter_rotate_calls", &f.rotateCalls)
|
m.Set("counter_rotate_calls", &f.rotateCalls)
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"crypto/rand"
|
"crypto/rand"
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
|
"expvar"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
@@ -28,6 +29,7 @@ import (
|
|||||||
"github.com/creachadair/msync/trigger"
|
"github.com/creachadair/msync/trigger"
|
||||||
"github.com/go-json-experiment/json/jsontext"
|
"github.com/go-json-experiment/json/jsontext"
|
||||||
"tailscale.com/envknob"
|
"tailscale.com/envknob"
|
||||||
|
"tailscale.com/metrics"
|
||||||
"tailscale.com/net/netmon"
|
"tailscale.com/net/netmon"
|
||||||
"tailscale.com/net/sockstats"
|
"tailscale.com/net/sockstats"
|
||||||
"tailscale.com/tstime"
|
"tailscale.com/tstime"
|
||||||
@@ -180,6 +182,12 @@ type Logger struct {
|
|||||||
shutdownStartMu sync.Mutex // guards the closing of shutdownStart
|
shutdownStartMu sync.Mutex // guards the closing of shutdownStart
|
||||||
shutdownStart chan struct{} // closed when shutdown begins
|
shutdownStart chan struct{} // closed when shutdown begins
|
||||||
shutdownDone chan struct{} // closed when shutdown complete
|
shutdownDone chan struct{} // closed when shutdown complete
|
||||||
|
|
||||||
|
// Metrics (see [Logger.ExpVar] for details).
|
||||||
|
uploadCalls expvar.Int
|
||||||
|
failedCalls expvar.Int
|
||||||
|
uploadedBytes expvar.Int
|
||||||
|
uploadingTime expvar.Int
|
||||||
}
|
}
|
||||||
|
|
||||||
type atomicSocktatsLabel struct{ p atomic.Uint32 }
|
type atomicSocktatsLabel struct{ p atomic.Uint32 }
|
||||||
@@ -477,6 +485,9 @@ func (lg *Logger) awaitInternetUp(ctx context.Context) {
|
|||||||
// origlen indicates the pre-compression body length.
|
// origlen indicates the pre-compression body length.
|
||||||
// origlen of -1 indicates that the body is not compressed.
|
// origlen of -1 indicates that the body is not compressed.
|
||||||
func (lg *Logger) upload(ctx context.Context, body []byte, origlen int) (retryAfter time.Duration, err error) {
|
func (lg *Logger) upload(ctx context.Context, body []byte, origlen int) (retryAfter time.Duration, err error) {
|
||||||
|
lg.uploadCalls.Add(1)
|
||||||
|
startUpload := time.Now()
|
||||||
|
|
||||||
const maxUploadTime = 45 * time.Second
|
const maxUploadTime = 45 * time.Second
|
||||||
ctx = sockstats.WithSockStats(ctx, lg.sockstatsLabel.Load(), lg.Logf)
|
ctx = sockstats.WithSockStats(ctx, lg.sockstatsLabel.Load(), lg.Logf)
|
||||||
ctx, cancel := context.WithTimeout(ctx, maxUploadTime)
|
ctx, cancel := context.WithTimeout(ctx, maxUploadTime)
|
||||||
@@ -516,15 +527,20 @@ func (lg *Logger) upload(ctx context.Context, body []byte, origlen int) (retryAf
|
|||||||
lg.httpDoCalls.Add(1)
|
lg.httpDoCalls.Add(1)
|
||||||
resp, err := lg.httpc.Do(req)
|
resp, err := lg.httpc.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
lg.failedCalls.Add(1)
|
||||||
return 0, fmt.Errorf("log upload of %d bytes %s failed: %v", len(body), compressedNote, err)
|
return 0, fmt.Errorf("log upload of %d bytes %s failed: %v", len(body), compressedNote, err)
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
lg.failedCalls.Add(1)
|
||||||
n, _ := strconv.Atoi(resp.Header.Get("Retry-After"))
|
n, _ := strconv.Atoi(resp.Header.Get("Retry-After"))
|
||||||
b, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<10))
|
b, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<10))
|
||||||
return time.Duration(n) * time.Second, fmt.Errorf("log upload of %d bytes %s failed %d: %s", len(body), compressedNote, resp.StatusCode, bytes.TrimSpace(b))
|
return time.Duration(n) * time.Second, fmt.Errorf("log upload of %d bytes %s failed %d: %s", len(body), compressedNote, resp.StatusCode, bytes.TrimSpace(b))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
lg.uploadedBytes.Add(int64(len(body)))
|
||||||
|
lg.uploadingTime.Add(int64(time.Since(startUpload)))
|
||||||
return 0, nil
|
return 0, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -546,6 +562,30 @@ func (lg *Logger) StartFlush() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ExpVar report metrics about the logger.
|
||||||
|
//
|
||||||
|
// - counter_upload_calls: Total number of upload attempts.
|
||||||
|
//
|
||||||
|
// - counter_upload_errors: Total number of upload attempts that failed.
|
||||||
|
//
|
||||||
|
// - counter_uploaded_bytes: Total number of bytes successfully uploaded
|
||||||
|
// (which is calculated after compression is applied).
|
||||||
|
//
|
||||||
|
// - counter_uploading_nsecs: Total number of nanoseconds spent uploading.
|
||||||
|
//
|
||||||
|
// - buffer: An optional [metrics.Set] with metrics for the [Buffer].
|
||||||
|
func (lg *Logger) ExpVar() expvar.Var {
|
||||||
|
m := new(metrics.Set)
|
||||||
|
m.Set("counter_upload_calls", &lg.uploadCalls)
|
||||||
|
m.Set("counter_upload_errors", &lg.failedCalls)
|
||||||
|
m.Set("counter_uploaded_bytes", &lg.uploadedBytes)
|
||||||
|
m.Set("counter_uploading_nsecs", &lg.uploadingTime)
|
||||||
|
if v, ok := lg.buffer.(interface{ ExpVar() expvar.Var }); ok {
|
||||||
|
m.Set("buffer", v.ExpVar())
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
// logtailDisabled is whether logtail uploads to logcatcher are disabled.
|
// logtailDisabled is whether logtail uploads to logcatcher are disabled.
|
||||||
var logtailDisabled atomic.Bool
|
var logtailDisabled atomic.Bool
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user