[client] Avoid deadlock when auto connect and early exit (#2528)

This commit is contained in:
Maycon Santos 2024-09-04 19:22:33 +02:00 committed by GitHub
parent 1ff7a953a0
commit c52b406afa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 28 additions and 18 deletions

View File

@ -61,9 +61,9 @@ func (c *ConnectClient) Run() error {
// RunWithProbes runs the client's main logic with probes attached
func (c *ConnectClient) RunWithProbes(
probes *ProbeHolder,
runningWg *sync.WaitGroup,
runningChan chan error,
) error {
return c.run(MobileDependency{}, probes, runningWg)
return c.run(MobileDependency{}, probes, runningChan)
}
// RunOnAndroid with main logic on mobile system
@ -104,7 +104,7 @@ func (c *ConnectClient) RunOniOS(
func (c *ConnectClient) run(
mobileDependency MobileDependency,
probes *ProbeHolder,
runningWg *sync.WaitGroup,
runningChan chan error,
) error {
defer func() {
if r := recover(); r != nil {
@ -195,6 +195,7 @@ func (c *ConnectClient) run(
log.Debug(err)
if s, ok := gstatus.FromError(err); ok && (s.Code() == codes.PermissionDenied) {
state.Set(StatusNeedsLogin)
_ = c.Stop()
return backoff.Permanent(wrapErr(err)) // unrecoverable error
}
return wrapErr(err)
@ -263,8 +264,9 @@ func (c *ConnectClient) run(
log.Infof("Netbird engine started, the IP is: %s", peerConfig.GetAddress())
state.Set(StatusConnected)
if runningWg != nil {
runningWg.Done()
if runningChan != nil {
runningChan <- nil
close(runningChan)
}
<-engineCtx.Done()
@ -287,6 +289,7 @@ func (c *ConnectClient) run(
log.Debugf("exiting client retry loop due to unrecoverable error: %s", err)
if s, ok := gstatus.FromError(err); ok && (s.Code() == codes.PermissionDenied) {
state.Set(StatusNeedsLogin)
_ = c.Stop()
}
return err
}

View File

@ -142,13 +142,11 @@ func (s *Server) Start() error {
s.sessionWatcher.SetOnExpireListener(s.onSessionExpire)
}
runningWg := sync.WaitGroup{}
runningWg.Add(1)
if !config.DisableAutoConnect {
go s.connectWithRetryRuns(ctx, config, s.statusRecorder, &runningWg)
if config.DisableAutoConnect {
return nil
}
runningWg.Wait()
go s.connectWithRetryRuns(ctx, config, s.statusRecorder, nil)
return nil
}
@ -157,7 +155,7 @@ func (s *Server) Start() error {
// mechanism to keep the client connected even when the connection is lost.
// we cancel retry if the client receive a stop or down command, or if disable auto connect is configured.
func (s *Server) connectWithRetryRuns(ctx context.Context, config *internal.Config, statusRecorder *peer.Status,
runningWg *sync.WaitGroup,
runningChan chan error,
) {
backOff := getConnectWithBackoff(ctx)
retryStarted := false
@ -196,7 +194,7 @@ func (s *Server) connectWithRetryRuns(ctx context.Context, config *internal.Conf
WgProbe: s.wgProbe,
}
err := s.connectClient.RunWithProbes(&probes, runningWg)
err := s.connectClient.RunWithProbes(&probes, runningChan)
if err != nil {
log.Debugf("run client connection exited with error: %v. Will retry in the background", err)
}
@ -587,13 +585,22 @@ func (s *Server) Up(callerCtx context.Context, _ *proto.UpRequest) (*proto.UpRes
s.statusRecorder.UpdateManagementAddress(s.config.ManagementURL.String())
s.statusRecorder.UpdateRosenpass(s.config.RosenpassEnabled, s.config.RosenpassPermissive)
runningWg := sync.WaitGroup{}
runningWg.Add(1)
go s.connectWithRetryRuns(ctx, s.config, s.statusRecorder, &runningWg)
runningChan := make(chan error)
go s.connectWithRetryRuns(ctx, s.config, s.statusRecorder, runningChan)
runningWg.Wait()
return &proto.UpResponse{}, nil
for {
select {
case err := <-runningChan:
if err != nil {
log.Debugf("waiting for engine to become ready failed: %s", err)
} else {
return &proto.UpResponse{}, nil
}
case <-callerCtx.Done():
log.Debug("context done, stopping the wait for engine to become ready")
return nil, callerCtx.Err()
}
}
}
// Down engine work in the daemon.