net/udprelay: add tailscaled_peer_relay_endpoints gauge (#18265)
New gauge reflects endpoints state via labels: - open, when both peers are connected and ready to talk, and - connecting. when at least one peer hasn't connected yet. Corresponding client metrics are logged as - udprelay_endpoints_connecting - udprelay_endpoints_open Updates tailscale/corp#30820 Change-Id: Idb1baa90a38c97847e14f9b2390093262ad0ea23 Signed-off-by: Alex Valiushko <alexvaliushko@tailscale.com>
This commit is contained in:
+71
-21
@@ -122,6 +122,7 @@ type serverEndpoint struct {
|
||||
allocatedAt mono.Time
|
||||
|
||||
mu sync.Mutex // guards the following fields
|
||||
closed bool // signals that no new data should be accepted
|
||||
inProgressGeneration [2]uint32 // or zero if a handshake has never started, or has just completed
|
||||
boundAddrPorts [2]netip.AddrPort // or zero value if a handshake has never completed for that relay leg
|
||||
lastSeen [2]mono.Time
|
||||
@@ -151,9 +152,15 @@ func blakeMACFromBindMsg(blakeKey [blake2s.Size]byte, src netip.AddrPort, msg di
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (e *serverEndpoint) handleDiscoControlMsg(from netip.AddrPort, senderIndex int, discoMsg disco.Message, serverDisco key.DiscoPublic, macSecrets views.Slice[[blake2s.Size]byte], now mono.Time) (write []byte, to netip.AddrPort) {
|
||||
func (e *serverEndpoint) handleDiscoControlMsg(from netip.AddrPort, senderIndex int, discoMsg disco.Message, serverDisco key.DiscoPublic, macSecrets views.Slice[[blake2s.Size]byte], now mono.Time, m endpointUpdater) (write []byte, to netip.AddrPort) {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
lastState := e.stateLocked()
|
||||
|
||||
if lastState == endpointClosed {
|
||||
// endpoint was closed in [Server.endpointGC]
|
||||
return nil, netip.AddrPort{}
|
||||
}
|
||||
|
||||
if senderIndex != 0 && senderIndex != 1 {
|
||||
return nil, netip.AddrPort{}
|
||||
@@ -230,6 +237,7 @@ func (e *serverEndpoint) handleDiscoControlMsg(from netip.AddrPort, senderIndex
|
||||
if bytes.Equal(mac[:], discoMsg.Challenge[:]) {
|
||||
// Handshake complete. Update the binding for this sender.
|
||||
e.boundAddrPorts[senderIndex] = from
|
||||
m.updateEndpoint(lastState, e.stateLocked())
|
||||
e.lastSeen[senderIndex] = now // record last seen as bound time
|
||||
e.inProgressGeneration[senderIndex] = 0 // reset to zero, which indicates there is no in-progress handshake
|
||||
return nil, netip.AddrPort{}
|
||||
@@ -243,7 +251,7 @@ func (e *serverEndpoint) handleDiscoControlMsg(from netip.AddrPort, senderIndex
|
||||
}
|
||||
}
|
||||
|
||||
func (e *serverEndpoint) handleSealedDiscoControlMsg(from netip.AddrPort, b []byte, serverDisco key.DiscoPublic, macSecrets views.Slice[[blake2s.Size]byte], now mono.Time) (write []byte, to netip.AddrPort) {
|
||||
func (e *serverEndpoint) handleSealedDiscoControlMsg(from netip.AddrPort, b []byte, serverDisco key.DiscoPublic, macSecrets views.Slice[[blake2s.Size]byte], now mono.Time, m endpointUpdater) (write []byte, to netip.AddrPort) {
|
||||
senderRaw, isDiscoMsg := disco.Source(b)
|
||||
if !isDiscoMsg {
|
||||
// Not a Disco message
|
||||
@@ -274,7 +282,7 @@ func (e *serverEndpoint) handleSealedDiscoControlMsg(from netip.AddrPort, b []by
|
||||
return nil, netip.AddrPort{}
|
||||
}
|
||||
|
||||
return e.handleDiscoControlMsg(from, senderIndex, discoMsg, serverDisco, macSecrets, now)
|
||||
return e.handleDiscoControlMsg(from, senderIndex, discoMsg, serverDisco, macSecrets, now, m)
|
||||
}
|
||||
|
||||
func (e *serverEndpoint) handleDataPacket(from netip.AddrPort, b []byte, now mono.Time) (write []byte, to netip.AddrPort) {
|
||||
@@ -284,6 +292,10 @@ func (e *serverEndpoint) handleDataPacket(from netip.AddrPort, b []byte, now mon
|
||||
// not a control packet, but serverEndpoint isn't bound
|
||||
return nil, netip.AddrPort{}
|
||||
}
|
||||
if e.stateLocked() == endpointClosed {
|
||||
// endpoint was closed in [Server.endpointGC]
|
||||
return nil, netip.AddrPort{}
|
||||
}
|
||||
switch {
|
||||
case from == e.boundAddrPorts[0]:
|
||||
e.lastSeen[0] = now
|
||||
@@ -301,9 +313,21 @@ func (e *serverEndpoint) handleDataPacket(from netip.AddrPort, b []byte, now mon
|
||||
}
|
||||
}
|
||||
|
||||
func (e *serverEndpoint) isExpired(now mono.Time, bindLifetime, steadyStateLifetime time.Duration) bool {
|
||||
// maybeExpire checks if the endpoint has expired according to the provided timeouts and sets its closed state accordingly.
|
||||
// True is returned if the endpoint was expired and closed.
|
||||
func (e *serverEndpoint) maybeExpire(now mono.Time, bindLifetime, steadyStateLifetime time.Duration, m endpointUpdater) bool {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
before := e.stateLocked()
|
||||
if e.isExpiredLocked(now, bindLifetime, steadyStateLifetime) {
|
||||
e.closed = true
|
||||
m.updateEndpoint(before, e.stateLocked())
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (e *serverEndpoint) isExpiredLocked(now mono.Time, bindLifetime, steadyStateLifetime time.Duration) bool {
|
||||
if !e.isBoundLocked() {
|
||||
if now.Sub(e.allocatedAt) > bindLifetime {
|
||||
return true
|
||||
@@ -323,6 +347,31 @@ func (e *serverEndpoint) isBoundLocked() bool {
|
||||
e.boundAddrPorts[1].IsValid()
|
||||
}
|
||||
|
||||
// stateLocked returns current endpointState according to the
|
||||
// peers handshake status.
|
||||
func (e *serverEndpoint) stateLocked() endpointState {
|
||||
switch {
|
||||
case e == nil, e.closed:
|
||||
return endpointClosed
|
||||
case e.boundAddrPorts[0].IsValid() && e.boundAddrPorts[1].IsValid():
|
||||
return endpointOpen
|
||||
default:
|
||||
return endpointConnecting
|
||||
}
|
||||
}
|
||||
|
||||
// endpointState canonicalizes endpoint state names,
|
||||
// see [serverEndpoint.stateLocked].
|
||||
//
|
||||
// Usermetrics can't handle Stringer, must be a string enum.
|
||||
type endpointState string
|
||||
|
||||
const (
|
||||
endpointClosed endpointState = "closed" // unallocated, not tracked in metrics
|
||||
endpointConnecting endpointState = "connecting" // at least one peer has not completed handshake
|
||||
endpointOpen endpointState = "open" // ready to forward
|
||||
)
|
||||
|
||||
// NewServer constructs a [Server] listening on port. If port is zero, then
|
||||
// port selection is left up to the host networking stack. If
|
||||
// onlyStaticAddrPorts is true, then dynamic addr:port discovery will be
|
||||
@@ -703,33 +752,33 @@ func (s *Server) Close() error {
|
||||
clear(s.serverEndpointByDisco)
|
||||
s.closed = true
|
||||
s.bus.Close()
|
||||
deregisterMetrics()
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Server) endpointGC(bindLifetime, steadyStateLifetime time.Duration) {
|
||||
now := mono.Now()
|
||||
// TODO: consider performance implications of scanning all endpoints and
|
||||
// holding s.mu for the duration. Keep it simple (and slow) for now.
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
for k, v := range s.serverEndpointByDisco {
|
||||
if v.maybeExpire(now, bindLifetime, steadyStateLifetime, s.metrics) {
|
||||
delete(s.serverEndpointByDisco, k)
|
||||
s.serverEndpointByVNI.Delete(v.vni)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) endpointGCLoop() {
|
||||
defer s.wg.Done()
|
||||
ticker := time.NewTicker(s.bindLifetime)
|
||||
defer ticker.Stop()
|
||||
|
||||
gc := func() {
|
||||
now := mono.Now()
|
||||
// TODO: consider performance implications of scanning all endpoints and
|
||||
// holding s.mu for the duration. Keep it simple (and slow) for now.
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
for k, v := range s.serverEndpointByDisco {
|
||||
if v.isExpired(now, s.bindLifetime, s.steadyStateLifetime) {
|
||||
delete(s.serverEndpointByDisco, k)
|
||||
s.serverEndpointByVNI.Delete(v.vni)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
gc()
|
||||
s.endpointGC(s.bindLifetime, s.steadyStateLifetime)
|
||||
case <-s.closeCh:
|
||||
return
|
||||
}
|
||||
@@ -773,7 +822,7 @@ func (s *Server) handlePacket(from netip.AddrPort, b []byte) (write []byte, to n
|
||||
}
|
||||
msg := b[packet.GeneveFixedHeaderLength:]
|
||||
secrets := s.getMACSecrets(now)
|
||||
write, to = e.(*serverEndpoint).handleSealedDiscoControlMsg(from, msg, s.discoPublic, secrets, now)
|
||||
write, to = e.(*serverEndpoint).handleSealedDiscoControlMsg(from, msg, s.discoPublic, secrets, now, s.metrics)
|
||||
isDataPacket = false
|
||||
return
|
||||
}
|
||||
@@ -1015,6 +1064,7 @@ func (s *Server) AllocateEndpoint(discoA, discoB key.DiscoPublic) (endpoint.Serv
|
||||
s.serverEndpointByVNI.Store(e.vni, e)
|
||||
|
||||
s.logf("allocated endpoint vni=%d lamportID=%d disco[0]=%v disco[1]=%v", e.vni, e.lamportID, pair.Get()[0].ShortString(), pair.Get()[1].ShortString())
|
||||
s.metrics.updateEndpoint(endpointClosed, endpointConnecting)
|
||||
return endpoint.ServerEndpoint{
|
||||
ServerDisco: s.discoPublic,
|
||||
ClientDisco: pair.Get(),
|
||||
|
||||
Reference in New Issue
Block a user