You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
442 lines
15 KiB
442 lines
15 KiB
// Copyright (c) Tailscale Inc & contributors
|
|
// SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
// Package tundevstats provides a mechanism for exposing TUN device statistics
|
|
// via clientmetrics.
|
|
package tundevstats
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"runtime"
|
|
"sync"
|
|
"time"
|
|
"unsafe"
|
|
|
|
"github.com/mdlayher/netlink"
|
|
"github.com/tailscale/wireguard-go/tun"
|
|
"golang.org/x/sys/unix"
|
|
"tailscale.com/feature"
|
|
"tailscale.com/net/tstun"
|
|
"tailscale.com/util/clientmetric"
|
|
)
|
|
|
|
func init() {
|
|
feature.Register("tundevstats")
|
|
if runtime.GOOS != "linux" {
|
|
// Exclude Android for now. There's no reason this shouldn't work on
|
|
// Android, but it needs to be tested, and justified from a battery
|
|
// cost perspective.
|
|
return
|
|
}
|
|
tstun.HookPollTUNDevStats.Set(newPoller)
|
|
}
|
|
|
|
// poller polls TUN device stats via netlink, and surfaces them via
|
|
// [tailscale.com/util/clientmetric].
|
|
type poller struct {
|
|
conn *netlink.Conn
|
|
ifIndex uint32
|
|
closeCh chan struct{}
|
|
closeOnce sync.Once
|
|
wg sync.WaitGroup
|
|
lastTXQDrops uint64
|
|
}
|
|
|
|
// getIfIndex returns the interface index for ifName via ioctl.
|
|
func getIfIndex(ifName string) (uint32, error) {
|
|
ifr, err := unix.NewIfreq(ifName)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
fd, err := unix.Socket(
|
|
unix.AF_INET,
|
|
unix.SOCK_DGRAM|unix.SOCK_CLOEXEC,
|
|
0,
|
|
)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
defer unix.Close(fd)
|
|
err = unix.IoctlIfreq(fd, unix.SIOCGIFINDEX, ifr)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return ifr.Uint32(), nil
|
|
}
|
|
|
|
type netlinkDialFn func(family int, config *netlink.Config) (*netlink.Conn, error)
|
|
|
|
// newPollerWithNetlinkDialer exists to allow swapping [netlinkDialFn] in tests,
|
|
// but newPoller, which calls with [netlink.Dial], is what gets set as a
|
|
// [feature.Hook] in tstun.
|
|
func newPollerWithNetlinkDialer(tdev tun.Device, netlinkDialFn netlinkDialFn) (io.Closer, error) {
|
|
ifName, err := tdev.Name()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error getting device name: %w", err)
|
|
}
|
|
ifIndex, err := getIfIndex(ifName)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error getting ifIndex: %w", err)
|
|
}
|
|
conn, err := netlinkDialFn(unix.NETLINK_ROUTE, nil)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error opening netlink socket: %w", err)
|
|
}
|
|
p := &poller{
|
|
conn: conn,
|
|
ifIndex: ifIndex,
|
|
closeCh: make(chan struct{}),
|
|
}
|
|
p.wg.Go(p.run)
|
|
return p, nil
|
|
}
|
|
|
|
// newPoller starts polling device stats for tdev, returning an [io.Closer]
|
|
// that halts polling operations.
|
|
func newPoller(tdev tun.Device) (io.Closer, error) {
|
|
return newPollerWithNetlinkDialer(tdev, netlink.Dial)
|
|
}
|
|
|
|
const (
|
|
// pollInterval is how frequently [poller] polls TUN device statistics. Its
|
|
// value mirrors [tailscale.com/util/clientmetric.minMetricEncodeInterval],
|
|
// which is the minimum interval between clientmetrics emissions.
|
|
pollInterval = 15 * time.Second
|
|
)
|
|
|
|
var (
|
|
registerMetricOnce sync.Once
|
|
txQueueDrops *clientmetric.Metric
|
|
)
|
|
|
|
// getTXQDropsMetric returns the TX queue drops clientmetric. It must not be
|
|
// called until device stats have been successfully polled via netlink since it
|
|
// sets the metric value to zero. A nil or absent clientmetric has meaning when
|
|
// polling fails, vs a misleading zero value.
|
|
func getTXQDropsMetric() *clientmetric.Metric {
|
|
registerMetricOnce.Do(func() {
|
|
txQueueDrops = clientmetric.NewCounter("tundev_txq_drops")
|
|
})
|
|
return txQueueDrops
|
|
}
|
|
|
|
func (p *poller) poll() error {
|
|
stats, err := getStats(p.conn, p.ifIndex)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
m := getTXQDropsMetric()
|
|
delta := stats.txDropped - p.lastTXQDrops
|
|
m.Add(int64(delta))
|
|
p.lastTXQDrops = stats.txDropped
|
|
return nil
|
|
}
|
|
|
|
// run polls immediately and every [pollInterval] returning when [poller.poll]
|
|
// returns an error, or [poller.closeCh] is closed via [poller.Close].
|
|
func (p *poller) run() {
|
|
ticker := time.NewTicker(pollInterval)
|
|
defer ticker.Stop()
|
|
err := p.poll() // poll immediately
|
|
if err != nil {
|
|
return
|
|
}
|
|
for {
|
|
select {
|
|
case <-p.closeCh:
|
|
return
|
|
case <-ticker.C:
|
|
err = p.poll()
|
|
if err != nil {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Close halts polling operations.
|
|
func (p *poller) Close() error {
|
|
p.closeOnce.Do(func() {
|
|
p.conn.Close()
|
|
close(p.closeCh)
|
|
p.wg.Wait()
|
|
})
|
|
return nil
|
|
}
|
|
|
|
// ifStatsMsg is struct if_stats_msg from uapi/linux/if_link.h.
|
|
type ifStatsMsg struct {
|
|
family uint8
|
|
pad1 uint8
|
|
pad2 uint16
|
|
ifIndex uint32
|
|
filterMask uint32
|
|
}
|
|
|
|
// encode encodes i in binary form for use over netlink in an RTM_GETSTATS
|
|
// request.
|
|
func (i *ifStatsMsg) encode() []byte {
|
|
return unsafe.Slice((*byte)(unsafe.Pointer(i)), unsafe.Sizeof(ifStatsMsg{}))
|
|
}
|
|
|
|
const (
|
|
iflaStatsLink64 = 1 // IFLA_STATS_LINK_64 from uapi/linux/if_link.h
|
|
iflaStatsLink64FilterMask = 1 << (iflaStatsLink64 - 1)
|
|
)
|
|
|
|
// getStats returns [rtnlLinkStats64] via netlink RTM_GETSTATS over the provided
|
|
// conn for the provided ifIndex.
|
|
func getStats(conn *netlink.Conn, ifIndex uint32) (rtnlLinkStats64, error) {
|
|
reqData := ifStatsMsg{
|
|
family: unix.AF_UNSPEC,
|
|
ifIndex: ifIndex,
|
|
filterMask: iflaStatsLink64FilterMask,
|
|
}
|
|
req := netlink.Message{
|
|
Header: netlink.Header{
|
|
Flags: netlink.Request,
|
|
Type: unix.RTM_GETSTATS,
|
|
},
|
|
Data: reqData.encode(),
|
|
}
|
|
msgs, err := conn.Execute(req)
|
|
if err != nil {
|
|
return rtnlLinkStats64{}, err
|
|
}
|
|
if len(msgs) != 1 {
|
|
return rtnlLinkStats64{}, fmt.Errorf("expected one netlink response message, got: %d", len(msgs))
|
|
}
|
|
msg := msgs[0]
|
|
if msg.Header.Type != unix.RTM_NEWSTATS {
|
|
return rtnlLinkStats64{}, fmt.Errorf("expected RTM_NEWSTATS (%d) netlink response, got: %d", unix.RTM_NEWSTATS, msg.Header.Type)
|
|
}
|
|
sizeOfIfStatsMsg := int(unsafe.Sizeof(ifStatsMsg{}))
|
|
if len(msg.Data) < sizeOfIfStatsMsg {
|
|
return rtnlLinkStats64{}, fmt.Errorf("length of netlink response data < %d, got: %d", sizeOfIfStatsMsg, len(msg.Data))
|
|
}
|
|
ad, err := netlink.NewAttributeDecoder(msg.Data[sizeOfIfStatsMsg:])
|
|
if err != nil {
|
|
return rtnlLinkStats64{}, err
|
|
}
|
|
for ad.Next() {
|
|
if ad.Type() == iflaStatsLink64 {
|
|
stats := rtnlLinkStats64{}
|
|
ad.Do(func(b []byte) error {
|
|
return stats.decode(b)
|
|
})
|
|
if ad.Err() != nil {
|
|
return rtnlLinkStats64{}, ad.Err()
|
|
}
|
|
return stats, nil
|
|
}
|
|
}
|
|
if err = ad.Err(); err != nil {
|
|
return rtnlLinkStats64{}, err
|
|
}
|
|
return rtnlLinkStats64{}, errors.New("no stats found in netlink response")
|
|
}
|
|
|
|
// rtnlLinkStats64 is struct rtnl_link_stats64 from uapi/linux/if_link.h up to
|
|
// the addition of the RTM_GETSTATS netlink message (Linux commit 10c9ead9f3c6).
|
|
// Newer fields are omitted. Since we expect this type in response to RTM_GETSTATS,
|
|
// we marry them together from a minimum kernel version perspective (Linux v4.7).
|
|
// Field documentation is copied from the kernel verbatim.
|
|
type rtnlLinkStats64 struct {
|
|
// rxPackets is the number of good packets received by the interface.
|
|
// For hardware interfaces counts all good packets received from the device
|
|
// by the host, including packets which host had to drop at various stages
|
|
// of processing (even in the driver).
|
|
rxPackets uint64
|
|
|
|
// txPackets is the number of packets successfully transmitted.
|
|
// For hardware interfaces counts packets which host was able to successfully
|
|
// hand over to the device, which does not necessarily mean that packets
|
|
// had been successfully transmitted out of the device, only that device
|
|
// acknowledged it copied them out of host memory.
|
|
txPackets uint64
|
|
|
|
// rxBytes is the number of good received bytes, corresponding to rxPackets.
|
|
// For IEEE 802.3 devices should count the length of Ethernet Frames
|
|
// excluding the FCS.
|
|
rxBytes uint64
|
|
|
|
// txBytes is the number of good transmitted bytes, corresponding to txPackets.
|
|
// For IEEE 802.3 devices should count the length of Ethernet Frames
|
|
// excluding the FCS.
|
|
txBytes uint64
|
|
|
|
// rxErrors is the total number of bad packets received on this network device.
|
|
// This counter must include events counted by rxLengthErrors,
|
|
// rxCRCErrors, rxFrameErrors and other errors not otherwise counted.
|
|
rxErrors uint64
|
|
|
|
// txErrors is the total number of transmit problems.
|
|
// This counter must include events counted by txAbortedErrors,
|
|
// txCarrierErrors, txFIFOErrors, txHeartbeatErrors,
|
|
// txWindowErrors and other errors not otherwise counted.
|
|
txErrors uint64
|
|
|
|
// rxDropped is the number of packets received but not processed,
|
|
// e.g. due to lack of resources or unsupported protocol.
|
|
// For hardware interfaces this counter may include packets discarded
|
|
// due to L2 address filtering but should not include packets dropped
|
|
// by the device due to buffer exhaustion which are counted separately in
|
|
// rxMissedErrors (since procfs folds those two counters together).
|
|
rxDropped uint64
|
|
|
|
// txDropped is the number of packets dropped on their way to transmission,
|
|
// e.g. due to lack of resources.
|
|
txDropped uint64
|
|
|
|
// multicast is the number of multicast packets received.
|
|
// For hardware interfaces this statistic is commonly calculated
|
|
// at the device level (unlike rxPackets) and therefore may include
|
|
// packets which did not reach the host.
|
|
// For IEEE 802.3 devices this counter may be equivalent to:
|
|
// - 30.3.1.1.21 aMulticastFramesReceivedOK
|
|
multicast uint64
|
|
|
|
// collisions is the number of collisions during packet transmissions.
|
|
collisions uint64
|
|
|
|
// rxLengthErrors is the number of packets dropped due to invalid length.
|
|
// Part of aggregate "frame" errors in /proc/net/dev.
|
|
// For IEEE 802.3 devices this counter should be equivalent to a sum of:
|
|
// - 30.3.1.1.23 aInRangeLengthErrors
|
|
// - 30.3.1.1.24 aOutOfRangeLengthField
|
|
// - 30.3.1.1.25 aFrameTooLongErrors
|
|
rxLengthErrors uint64
|
|
|
|
// rxOverErrors is the receiver FIFO overflow event counter.
|
|
// Historically the count of overflow events. Such events may be reported
|
|
// in the receive descriptors or via interrupts, and may not correspond
|
|
// one-to-one with dropped packets.
|
|
// The recommended interpretation for high speed interfaces is the number
|
|
// of packets dropped because they did not fit into buffers provided by the
|
|
// host, e.g. packets larger than MTU or next buffer in the ring was not
|
|
// available for a scatter transfer.
|
|
// Part of aggregate "frame" errors in /proc/net/dev.
|
|
// This statistic corresponds to hardware events and is not commonly used
|
|
// on software devices.
|
|
rxOverErrors uint64
|
|
|
|
// rxCRCErrors is the number of packets received with a CRC error.
|
|
// Part of aggregate "frame" errors in /proc/net/dev.
|
|
// For IEEE 802.3 devices this counter must be equivalent to:
|
|
// - 30.3.1.1.6 aFrameCheckSequenceErrors
|
|
rxCRCErrors uint64
|
|
|
|
// rxFrameErrors is the receiver frame alignment errors.
|
|
// Part of aggregate "frame" errors in /proc/net/dev.
|
|
// For IEEE 802.3 devices this counter should be equivalent to:
|
|
// - 30.3.1.1.7 aAlignmentErrors
|
|
rxFrameErrors uint64
|
|
|
|
// rxFIFOErrors is the receiver FIFO error counter.
|
|
// Historically the count of overflow events. Those events may be reported
|
|
// in the receive descriptors or via interrupts, and may not correspond
|
|
// one-to-one with dropped packets.
|
|
// This statistic is used on software devices, e.g. to count software
|
|
// packet queue overflow (can) or sequencing errors (GRE).
|
|
rxFIFOErrors uint64
|
|
|
|
// rxMissedErrors is the count of packets missed by the host.
|
|
// Folded into the "drop" counter in /proc/net/dev.
|
|
// Counts number of packets dropped by the device due to lack of buffer
|
|
// space. This usually indicates that the host interface is slower than
|
|
// the network interface, or host is not keeping up with the receive
|
|
// packet rate.
|
|
// This statistic corresponds to hardware events and is not used on
|
|
// software devices.
|
|
rxMissedErrors uint64
|
|
|
|
// txAbortedErrors is part of aggregate "carrier" errors in /proc/net/dev.
|
|
// For IEEE 802.3 devices capable of half-duplex operation this counter
|
|
// must be equivalent to:
|
|
// - 30.3.1.1.11 aFramesAbortedDueToXSColls
|
|
// High speed interfaces may use this counter as a general device discard
|
|
// counter.
|
|
txAbortedErrors uint64
|
|
|
|
// txCarrierErrors is the number of frame transmission errors due to loss
|
|
// of carrier during transmission.
|
|
// Part of aggregate "carrier" errors in /proc/net/dev.
|
|
// For IEEE 802.3 devices this counter must be equivalent to:
|
|
// - 30.3.1.1.13 aCarrierSenseErrors
|
|
txCarrierErrors uint64
|
|
|
|
// txFIFOErrors is the number of frame transmission errors due to device
|
|
// FIFO underrun / underflow. This condition occurs when the device begins
|
|
// transmission of a frame but is unable to deliver the entire frame to
|
|
// the transmitter in time for transmission.
|
|
// Part of aggregate "carrier" errors in /proc/net/dev.
|
|
txFIFOErrors uint64
|
|
|
|
// txHeartbeatErrors is the number of Heartbeat / SQE Test errors for
|
|
// old half-duplex Ethernet.
|
|
// Part of aggregate "carrier" errors in /proc/net/dev.
|
|
// For IEEE 802.3 devices possibly equivalent to:
|
|
// - 30.3.2.1.4 aSQETestErrors
|
|
txHeartbeatErrors uint64
|
|
|
|
// txWindowErrors is the number of frame transmission errors due to late
|
|
// collisions (for Ethernet - after the first 64B of transmission).
|
|
// Part of aggregate "carrier" errors in /proc/net/dev.
|
|
// For IEEE 802.3 devices this counter must be equivalent to:
|
|
// - 30.3.1.1.10 aLateCollisions
|
|
txWindowErrors uint64
|
|
|
|
// rxCompressed is the number of correctly received compressed packets.
|
|
// This counter is only meaningful for interfaces which support packet
|
|
// compression (e.g. CSLIP, PPP).
|
|
rxCompressed uint64
|
|
|
|
// txCompressed is the number of transmitted compressed packets.
|
|
// This counter is only meaningful for interfaces which support packet
|
|
// compression (e.g. CSLIP, PPP).
|
|
txCompressed uint64
|
|
|
|
// rxNoHandler is the number of packets received on the interface but
|
|
// dropped by the networking stack because the device is not designated
|
|
// to receive packets (e.g. backup link in a bond).
|
|
rxNoHandler uint64
|
|
}
|
|
|
|
// decode unpacks a [rtnlLinkStats64] from the raw bytes of a netlink attribute
|
|
// payload, e.g. IFLA_STATS_LINK_64. The kernel writes the struct in host byte
|
|
// order, so binary.NativeEndian is used throughout. The buffer may be larger
|
|
// than the struct to allow for future kernel additions.
|
|
func (s *rtnlLinkStats64) decode(b []byte) error {
|
|
const minSize = 24 * 8
|
|
if len(b) < minSize {
|
|
return fmt.Errorf("rtnlLinkStats64.decode: buffer too short: got %d bytes, want at least %d", len(b), minSize)
|
|
}
|
|
s.rxPackets = binary.NativeEndian.Uint64(b[0:])
|
|
s.txPackets = binary.NativeEndian.Uint64(b[8:])
|
|
s.rxBytes = binary.NativeEndian.Uint64(b[16:])
|
|
s.txBytes = binary.NativeEndian.Uint64(b[24:])
|
|
s.rxErrors = binary.NativeEndian.Uint64(b[32:])
|
|
s.txErrors = binary.NativeEndian.Uint64(b[40:])
|
|
s.rxDropped = binary.NativeEndian.Uint64(b[48:])
|
|
s.txDropped = binary.NativeEndian.Uint64(b[56:])
|
|
s.multicast = binary.NativeEndian.Uint64(b[64:])
|
|
s.collisions = binary.NativeEndian.Uint64(b[72:])
|
|
s.rxLengthErrors = binary.NativeEndian.Uint64(b[80:])
|
|
s.rxOverErrors = binary.NativeEndian.Uint64(b[88:])
|
|
s.rxCRCErrors = binary.NativeEndian.Uint64(b[96:])
|
|
s.rxFrameErrors = binary.NativeEndian.Uint64(b[104:])
|
|
s.rxFIFOErrors = binary.NativeEndian.Uint64(b[112:])
|
|
s.rxMissedErrors = binary.NativeEndian.Uint64(b[120:])
|
|
s.txAbortedErrors = binary.NativeEndian.Uint64(b[128:])
|
|
s.txCarrierErrors = binary.NativeEndian.Uint64(b[136:])
|
|
s.txFIFOErrors = binary.NativeEndian.Uint64(b[144:])
|
|
s.txHeartbeatErrors = binary.NativeEndian.Uint64(b[152:])
|
|
s.txWindowErrors = binary.NativeEndian.Uint64(b[160:])
|
|
s.rxCompressed = binary.NativeEndian.Uint64(b[168:])
|
|
s.txCompressed = binary.NativeEndian.Uint64(b[176:])
|
|
s.rxNoHandler = binary.NativeEndian.Uint64(b[184:])
|
|
return nil
|
|
}
|
|
|