From fa13f83375680d223d411604b7360b0325936b19 Mon Sep 17 00:00:00 2001 From: James Tucker Date: Sat, 28 Feb 2026 15:53:09 -0800 Subject: [PATCH] tsnet: fix deadlock in Server.Close during shutdown Server.Close held s.mu for the entire shutdown duration, including netstack.Close (which waits for gVisor goroutines to exit) and lb.Shutdown. gVisor callbacks like getTCPHandlerForFlow acquire s.mu via listenerForDstAddr, so any in-flight gVisor goroutine attempting that callback during stack shutdown would deadlock with Close. Replace the mu-guarded closed bool with a sync.Once, and release s.mu after closing listeners but before the heavy shutdown operations. Also cancel shutdownCtx before netstack.Close so pending handlers observe cancellation rather than contending on the lock. Updates #18423 Signed-off-by: James Tucker --- tsnet/tsnet.go | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/tsnet/tsnet.go b/tsnet/tsnet.go index ccea22d16..416c90750 100644 --- a/tsnet/tsnet.go +++ b/tsnet/tsnet.go @@ -198,7 +198,7 @@ type Server struct { listeners map[listenKey]*listener fallbackTCPHandlers set.HandleSet[FallbackTCPHandler] dialer *tsdial.Dialer - closed bool + closeOnce sync.Once } // FallbackTCPHandler describes the callback which @@ -439,11 +439,29 @@ func (s *Server) Up(ctx context.Context) (*ipnstate.Status, error) { // // It must not be called before or concurrently with Start. func (s *Server) Close() error { - s.mu.Lock() - defer s.mu.Unlock() - if s.closed { + didClose := false + s.closeOnce.Do(func() { + didClose = true + s.close() + }) + if !didClose { return fmt.Errorf("tsnet: %w", net.ErrClosed) } + return nil +} + +func (s *Server) close() { + // Close listeners under s.mu, then release before the heavy shutdown + // operations. We must not hold s.mu during netstack.Close, lb.Shutdown, + // etc. because callbacks from gVisor (e.g. getTCPHandlerForFlow) + // acquire s.mu, and waiting for those goroutines while holding the lock + // would deadlock. + s.mu.Lock() + for _, ln := range s.listeners { + ln.closeLocked() + } + s.mu.Unlock() + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() var wg sync.WaitGroup @@ -466,13 +484,12 @@ func (s *Server) Close() error { } }() - if s.netstack != nil { - s.netstack.Close() - s.netstack = nil - } if s.shutdownCancel != nil { s.shutdownCancel() } + if s.netstack != nil { + s.netstack.Close() + } if s.lb != nil { s.lb.Shutdown() } @@ -489,13 +506,8 @@ func (s *Server) Close() error { s.loopbackListener.Close() } - for _, ln := range s.listeners { - ln.closeLocked() - } wg.Wait() s.sys.Bus.Get().Close() - s.closed = true - return nil } func (s *Server) doInit() {