@ -22,12 +22,47 @@ import (
"tailscale.com/tailcfg"
"tailscale.com/types/key"
"tailscale.com/types/netmap"
"tailscale.com/util/clientmetric"
"tailscale.com/wgengine/filter"
"tailscale.com/wgengine/router"
"tailscale.com/wgengine/wgcfg"
"tailscale.com/wgengine/wgint"
)
type watchdogEvent string
const (
Any watchdogEvent = "Any"
Reconfig watchdogEvent = "Reconfig"
ResetAndStop watchdogEvent = "ResetAndStop"
SetFilter watchdogEvent = "SetFilter"
SetJailedFilter watchdogEvent = "SetJailedFilter"
SetStatusCallback watchdogEvent = "SetStatusCallback"
UpdateStatus watchdogEvent = "UpdateStatus"
RequestStatus watchdogEvent = "RequestStatus"
SetNetworkMap watchdogEvent = "SetNetworkMap"
Ping watchdogEvent = "Ping"
Close watchdogEvent = "Close"
PeerForIPEvent watchdogEvent = "PeerForIP"
)
var (
watchdogMetrics = map [ watchdogEvent ] * clientmetric . Metric {
Any : clientmetric . NewCounter ( "watchdog_timeout_any_total" ) ,
Reconfig : clientmetric . NewCounter ( "watchdog_timeout_reconfig" ) ,
ResetAndStop : clientmetric . NewCounter ( "watchdog_timeout_resetandstop" ) ,
SetFilter : clientmetric . NewCounter ( "watchdog_timeout_setfilter" ) ,
SetJailedFilter : clientmetric . NewCounter ( "watchdog_timeout_setjailedfilter" ) ,
SetStatusCallback : clientmetric . NewCounter ( "watchdog_timeout_setstatuscallback" ) ,
UpdateStatus : clientmetric . NewCounter ( "watchdog_timeout_updatestatus" ) ,
RequestStatus : clientmetric . NewCounter ( "watchdog_timeout_requeststatus" ) ,
SetNetworkMap : clientmetric . NewCounter ( "watchdog_timeout_setnetworkmap" ) ,
Ping : clientmetric . NewCounter ( "watchdog_timeout_ping" ) ,
Close : clientmetric . NewCounter ( "watchdog_timeout_close" ) ,
PeerForIPEvent : clientmetric . NewCounter ( "watchdog_timeout_peerforipevent" ) ,
}
)
// NewWatchdog wraps an Engine and makes sure that all methods complete
// within a reasonable amount of time.
//
@ -46,7 +81,7 @@ func NewWatchdog(e Engine) Engine {
}
type inFlightKey struct {
op string
op watchdogEvent
ctr uint64
}
@ -62,12 +97,13 @@ type watchdogEngine struct {
inFlightCtr uint64
}
func ( e * watchdogEngine ) watchdogErr ( name string , fn func ( ) error ) error {
func ( e * watchdogEngine ) watchdogErr ( event watchdogEvent , fn func ( ) error ) error {
// Track all in-flight operations so we can print more useful error
// messages on watchdog failure
e . inFlightMu . Lock ( )
key := inFlightKey {
op : name ,
op : event ,
ctr : e . inFlightCtr ,
}
e . inFlightCtr ++
@ -93,7 +129,6 @@ func (e *watchdogEngine) watchdogErr(name string, fn func() error) error {
buf := new ( strings . Builder )
pprof . Lookup ( "goroutine" ) . WriteTo ( buf , 1 )
e . logf ( "wgengine watchdog stacks:\n%s" , buf . String ( ) )
// Collect the list of in-flight operations for debugging.
var (
b [ ] byte
@ -104,64 +139,92 @@ func (e *watchdogEngine) watchdogErr(name string, fn func() error) error {
dur := now . Sub ( t ) . Round ( time . Millisecond )
b = fmt . Appendf ( b , "in-flight[%d]: name=%s duration=%v start=%s\n" , k . ctr , k . op , dur , t . Format ( time . RFC3339Nano ) )
}
e . recordEvent ( event )
e . inFlightMu . Unlock ( )
// Print everything as a single string to avoid log
// rate limits.
e . logf ( "wgengine watchdog in-flight:\n%s" , b )
e . fatalf ( "wgengine: watchdog timeout on %s" , name )
e . fatalf ( "wgengine: watchdog timeout on %s" , event )
return nil
}
}
func ( e * watchdogEngine ) watchdog ( name string , fn func ( ) ) {
e . watchdogErr ( name , func ( ) error {
func ( e * watchdogEngine ) recordEvent ( event watchdogEvent ) {
if watchdogMetrics == nil {
return
}
mEvent , ok := watchdogMetrics [ event ]
if ok {
mEvent . Add ( 1 )
}
mAny , ok := watchdogMetrics [ Any ]
if ok {
mAny . Add ( 1 )
}
}
func ( e * watchdogEngine ) watchdog ( event watchdogEvent , fn func ( ) ) {
e . watchdogErr ( event , func ( ) error {
fn ( )
return nil
} )
}
func ( e * watchdogEngine ) Reconfig ( cfg * wgcfg . Config , routerCfg * router . Config , dnsCfg * dns . Config ) error {
return e . watchdogErr ( "Reconfig" , func ( ) error { return e . wrap . Reconfig ( cfg , routerCfg , dnsCfg ) } )
return e . watchdogErr ( Reconfig , func ( ) error { return e . wrap . Reconfig ( cfg , routerCfg , dnsCfg ) } )
}
func ( e * watchdogEngine ) ResetAndStop ( ) ( st * Status , err error ) {
e . watchdog ( "ResetAndStop" , func ( ) {
e . watchdog ( ResetAndStop , func ( ) {
st , err = e . wrap . ResetAndStop ( )
} )
return st , err
}
func ( e * watchdogEngine ) GetFilter ( ) * filter . Filter {
return e . wrap . GetFilter ( )
}
func ( e * watchdogEngine ) SetFilter ( filt * filter . Filter ) {
e . watchdog ( "SetFilter" , func ( ) { e . wrap . SetFilter ( filt ) } )
e . watchdog ( SetFilter , func ( ) { e . wrap . SetFilter ( filt ) } )
}
func ( e * watchdogEngine ) GetJailedFilter ( ) * filter . Filter {
return e . wrap . GetJailedFilter ( )
}
func ( e * watchdogEngine ) SetJailedFilter ( filt * filter . Filter ) {
e . watchdog ( "SetJailedFilter" , func ( ) { e . wrap . SetJailedFilter ( filt ) } )
e . watchdog ( SetJailedFilter , func ( ) { e . wrap . SetJailedFilter ( filt ) } )
}
func ( e * watchdogEngine ) SetStatusCallback ( cb StatusCallback ) {
e . watchdog ( "SetStatusCallback" , func ( ) { e . wrap . SetStatusCallback ( cb ) } )
e . watchdog ( SetStatusCallback , func ( ) { e . wrap . SetStatusCallback ( cb ) } )
}
func ( e * watchdogEngine ) UpdateStatus ( sb * ipnstate . StatusBuilder ) {
e . watchdog ( "UpdateStatus" , func ( ) { e . wrap . UpdateStatus ( sb ) } )
e . watchdog ( UpdateStatus , func ( ) { e . wrap . UpdateStatus ( sb ) } )
}
func ( e * watchdogEngine ) RequestStatus ( ) {
e . watchdog ( "RequestStatus" , func ( ) { e . wrap . RequestStatus ( ) } )
e . watchdog ( RequestStatus , func ( ) { e . wrap . RequestStatus ( ) } )
}
func ( e * watchdogEngine ) SetNetworkMap ( nm * netmap . NetworkMap ) {
e . watchdog ( "SetNetworkMap" , func ( ) { e . wrap . SetNetworkMap ( nm ) } )
e . watchdog ( SetNetworkMap , func ( ) { e . wrap . SetNetworkMap ( nm ) } )
}
func ( e * watchdogEngine ) Ping ( ip netip . Addr , pingType tailcfg . PingType , size int , cb func ( * ipnstate . PingResult ) ) {
e . watchdog ( "Ping" , func ( ) { e . wrap . Ping ( ip , pingType , size , cb ) } )
e . watchdog ( Ping , func ( ) { e . wrap . Ping ( ip , pingType , size , cb ) } )
}
func ( e * watchdogEngine ) Close ( ) {
e . watchdog ( "Close" , e . wrap . Close )
e . watchdog ( Close , e . wrap . Close )
}
func ( e * watchdogEngine ) PeerForIP ( ip netip . Addr ) ( ret PeerForIP , ok bool ) {
e . watchdog ( "PeerForIP" , func ( ) { ret , ok = e . wrap . PeerForIP ( ip ) } )
e . watchdog ( PeerForIPEvent , func ( ) { ret , ok = e . wrap . PeerForIP ( ip ) } )
return ret , ok
}