From c52b406afa192daf3e680cf60dcd620efe942903 Mon Sep 17 00:00:00 2001 From: Maycon Santos Date: Wed, 4 Sep 2024 19:22:33 +0200 Subject: [PATCH] [client] Avoid deadlock when auto connect and early exit (#2528) --- client/internal/connect.go | 13 ++++++++----- client/server/server.go | 33 ++++++++++++++++++++------------- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/client/internal/connect.go b/client/internal/connect.go index 62fd3c61d..6e1994f96 100644 --- a/client/internal/connect.go +++ b/client/internal/connect.go @@ -61,9 +61,9 @@ func (c *ConnectClient) Run() error { // RunWithProbes runs the client's main logic with probes attached func (c *ConnectClient) RunWithProbes( probes *ProbeHolder, - runningWg *sync.WaitGroup, + runningChan chan error, ) error { - return c.run(MobileDependency{}, probes, runningWg) + return c.run(MobileDependency{}, probes, runningChan) } // RunOnAndroid with main logic on mobile system @@ -104,7 +104,7 @@ func (c *ConnectClient) RunOniOS( func (c *ConnectClient) run( mobileDependency MobileDependency, probes *ProbeHolder, - runningWg *sync.WaitGroup, + runningChan chan error, ) error { defer func() { if r := recover(); r != nil { @@ -195,6 +195,7 @@ func (c *ConnectClient) run( log.Debug(err) if s, ok := gstatus.FromError(err); ok && (s.Code() == codes.PermissionDenied) { state.Set(StatusNeedsLogin) + _ = c.Stop() return backoff.Permanent(wrapErr(err)) // unrecoverable error } return wrapErr(err) @@ -263,8 +264,9 @@ func (c *ConnectClient) run( log.Infof("Netbird engine started, the IP is: %s", peerConfig.GetAddress()) state.Set(StatusConnected) - if runningWg != nil { - runningWg.Done() + if runningChan != nil { + runningChan <- nil + close(runningChan) } <-engineCtx.Done() @@ -287,6 +289,7 @@ func (c *ConnectClient) run( log.Debugf("exiting client retry loop due to unrecoverable error: %s", err) if s, ok := gstatus.FromError(err); ok && (s.Code() == codes.PermissionDenied) { state.Set(StatusNeedsLogin) + _ = c.Stop() } return err } diff --git a/client/server/server.go b/client/server/server.go index ce6e90864..d8d32e1ce 100644 --- a/client/server/server.go +++ b/client/server/server.go @@ -142,13 +142,11 @@ func (s *Server) Start() error { s.sessionWatcher.SetOnExpireListener(s.onSessionExpire) } - runningWg := sync.WaitGroup{} - runningWg.Add(1) - if !config.DisableAutoConnect { - go s.connectWithRetryRuns(ctx, config, s.statusRecorder, &runningWg) + if config.DisableAutoConnect { + return nil } - runningWg.Wait() + go s.connectWithRetryRuns(ctx, config, s.statusRecorder, nil) return nil } @@ -157,7 +155,7 @@ func (s *Server) Start() error { // mechanism to keep the client connected even when the connection is lost. // we cancel retry if the client receive a stop or down command, or if disable auto connect is configured. func (s *Server) connectWithRetryRuns(ctx context.Context, config *internal.Config, statusRecorder *peer.Status, - runningWg *sync.WaitGroup, + runningChan chan error, ) { backOff := getConnectWithBackoff(ctx) retryStarted := false @@ -196,7 +194,7 @@ func (s *Server) connectWithRetryRuns(ctx context.Context, config *internal.Conf WgProbe: s.wgProbe, } - err := s.connectClient.RunWithProbes(&probes, runningWg) + err := s.connectClient.RunWithProbes(&probes, runningChan) if err != nil { log.Debugf("run client connection exited with error: %v. Will retry in the background", err) } @@ -587,13 +585,22 @@ func (s *Server) Up(callerCtx context.Context, _ *proto.UpRequest) (*proto.UpRes s.statusRecorder.UpdateManagementAddress(s.config.ManagementURL.String()) s.statusRecorder.UpdateRosenpass(s.config.RosenpassEnabled, s.config.RosenpassPermissive) - runningWg := sync.WaitGroup{} - runningWg.Add(1) - go s.connectWithRetryRuns(ctx, s.config, s.statusRecorder, &runningWg) + runningChan := make(chan error) + go s.connectWithRetryRuns(ctx, s.config, s.statusRecorder, runningChan) - runningWg.Wait() - - return &proto.UpResponse{}, nil + for { + select { + case err := <-runningChan: + if err != nil { + log.Debugf("waiting for engine to become ready failed: %s", err) + } else { + return &proto.UpResponse{}, nil + } + case <-callerCtx.Done(): + log.Debug("context done, stopping the wait for engine to become ready") + return nil, callerCtx.Err() + } + } } // Down engine work in the daemon.