[client] Avoid deadlock when auto connect and early exit (#2528)

This commit is contained in:
Maycon Santos 2024-09-04 19:22:33 +02:00 committed by GitHub
parent 1ff7a953a0
commit c52b406afa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 28 additions and 18 deletions

View File

@ -61,9 +61,9 @@ func (c *ConnectClient) Run() error {
// RunWithProbes runs the client's main logic with probes attached // RunWithProbes runs the client's main logic with probes attached
func (c *ConnectClient) RunWithProbes( func (c *ConnectClient) RunWithProbes(
probes *ProbeHolder, probes *ProbeHolder,
runningWg *sync.WaitGroup, runningChan chan error,
) error { ) error {
return c.run(MobileDependency{}, probes, runningWg) return c.run(MobileDependency{}, probes, runningChan)
} }
// RunOnAndroid with main logic on mobile system // RunOnAndroid with main logic on mobile system
@ -104,7 +104,7 @@ func (c *ConnectClient) RunOniOS(
func (c *ConnectClient) run( func (c *ConnectClient) run(
mobileDependency MobileDependency, mobileDependency MobileDependency,
probes *ProbeHolder, probes *ProbeHolder,
runningWg *sync.WaitGroup, runningChan chan error,
) error { ) error {
defer func() { defer func() {
if r := recover(); r != nil { if r := recover(); r != nil {
@ -195,6 +195,7 @@ func (c *ConnectClient) run(
log.Debug(err) log.Debug(err)
if s, ok := gstatus.FromError(err); ok && (s.Code() == codes.PermissionDenied) { if s, ok := gstatus.FromError(err); ok && (s.Code() == codes.PermissionDenied) {
state.Set(StatusNeedsLogin) state.Set(StatusNeedsLogin)
_ = c.Stop()
return backoff.Permanent(wrapErr(err)) // unrecoverable error return backoff.Permanent(wrapErr(err)) // unrecoverable error
} }
return wrapErr(err) return wrapErr(err)
@ -263,8 +264,9 @@ func (c *ConnectClient) run(
log.Infof("Netbird engine started, the IP is: %s", peerConfig.GetAddress()) log.Infof("Netbird engine started, the IP is: %s", peerConfig.GetAddress())
state.Set(StatusConnected) state.Set(StatusConnected)
if runningWg != nil { if runningChan != nil {
runningWg.Done() runningChan <- nil
close(runningChan)
} }
<-engineCtx.Done() <-engineCtx.Done()
@ -287,6 +289,7 @@ func (c *ConnectClient) run(
log.Debugf("exiting client retry loop due to unrecoverable error: %s", err) log.Debugf("exiting client retry loop due to unrecoverable error: %s", err)
if s, ok := gstatus.FromError(err); ok && (s.Code() == codes.PermissionDenied) { if s, ok := gstatus.FromError(err); ok && (s.Code() == codes.PermissionDenied) {
state.Set(StatusNeedsLogin) state.Set(StatusNeedsLogin)
_ = c.Stop()
} }
return err return err
} }

View File

@ -142,13 +142,11 @@ func (s *Server) Start() error {
s.sessionWatcher.SetOnExpireListener(s.onSessionExpire) s.sessionWatcher.SetOnExpireListener(s.onSessionExpire)
} }
runningWg := sync.WaitGroup{} if config.DisableAutoConnect {
runningWg.Add(1) return nil
if !config.DisableAutoConnect {
go s.connectWithRetryRuns(ctx, config, s.statusRecorder, &runningWg)
} }
runningWg.Wait() go s.connectWithRetryRuns(ctx, config, s.statusRecorder, nil)
return nil return nil
} }
@ -157,7 +155,7 @@ func (s *Server) Start() error {
// mechanism to keep the client connected even when the connection is lost. // mechanism to keep the client connected even when the connection is lost.
// we cancel retry if the client receive a stop or down command, or if disable auto connect is configured. // we cancel retry if the client receive a stop or down command, or if disable auto connect is configured.
func (s *Server) connectWithRetryRuns(ctx context.Context, config *internal.Config, statusRecorder *peer.Status, func (s *Server) connectWithRetryRuns(ctx context.Context, config *internal.Config, statusRecorder *peer.Status,
runningWg *sync.WaitGroup, runningChan chan error,
) { ) {
backOff := getConnectWithBackoff(ctx) backOff := getConnectWithBackoff(ctx)
retryStarted := false retryStarted := false
@ -196,7 +194,7 @@ func (s *Server) connectWithRetryRuns(ctx context.Context, config *internal.Conf
WgProbe: s.wgProbe, WgProbe: s.wgProbe,
} }
err := s.connectClient.RunWithProbes(&probes, runningWg) err := s.connectClient.RunWithProbes(&probes, runningChan)
if err != nil { if err != nil {
log.Debugf("run client connection exited with error: %v. Will retry in the background", err) log.Debugf("run client connection exited with error: %v. Will retry in the background", err)
} }
@ -587,13 +585,22 @@ func (s *Server) Up(callerCtx context.Context, _ *proto.UpRequest) (*proto.UpRes
s.statusRecorder.UpdateManagementAddress(s.config.ManagementURL.String()) s.statusRecorder.UpdateManagementAddress(s.config.ManagementURL.String())
s.statusRecorder.UpdateRosenpass(s.config.RosenpassEnabled, s.config.RosenpassPermissive) s.statusRecorder.UpdateRosenpass(s.config.RosenpassEnabled, s.config.RosenpassPermissive)
runningWg := sync.WaitGroup{} runningChan := make(chan error)
runningWg.Add(1) go s.connectWithRetryRuns(ctx, s.config, s.statusRecorder, runningChan)
go s.connectWithRetryRuns(ctx, s.config, s.statusRecorder, &runningWg)
runningWg.Wait() for {
select {
return &proto.UpResponse{}, nil case err := <-runningChan:
if err != nil {
log.Debugf("waiting for engine to become ready failed: %s", err)
} else {
return &proto.UpResponse{}, nil
}
case <-callerCtx.Done():
log.Debug("context done, stopping the wait for engine to become ready")
return nil, callerCtx.Err()
}
}
} }
// Down engine work in the daemon. // Down engine work in the daemon.