mirror of
https://github.com/netbirdio/netbird.git
synced 2025-08-15 01:32:56 +02:00
[client] Destory WG interface on down timeout (#2435)
wait on engine down to not only wait for the interface to be down but completely removed. If the waiting loop reaches the timeout we will trigger an interface destroy. On the up command, it now waits until the engine is fully running before sending the response to the CLI. Includes a small refactor of probes to comply with sonar rules about parameter count in the function call
This commit is contained in:
@ -12,7 +12,6 @@ import (
|
||||
|
||||
"github.com/cenkalti/backoff/v4"
|
||||
"golang.org/x/exp/maps"
|
||||
|
||||
"google.golang.org/protobuf/types/known/durationpb"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
@ -143,10 +142,14 @@ func (s *Server) Start() error {
|
||||
s.sessionWatcher.SetOnExpireListener(s.onSessionExpire)
|
||||
}
|
||||
|
||||
runningWg := sync.WaitGroup{}
|
||||
runningWg.Add(1)
|
||||
if !config.DisableAutoConnect {
|
||||
go s.connectWithRetryRuns(ctx, config, s.statusRecorder, s.mgmProbe, s.signalProbe, s.relayProbe, s.wgProbe)
|
||||
go s.connectWithRetryRuns(ctx, config, s.statusRecorder, &runningWg)
|
||||
}
|
||||
|
||||
runningWg.Wait()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -154,7 +157,7 @@ func (s *Server) Start() error {
|
||||
// mechanism to keep the client connected even when the connection is lost.
|
||||
// we cancel retry if the client receive a stop or down command, or if disable auto connect is configured.
|
||||
func (s *Server) connectWithRetryRuns(ctx context.Context, config *internal.Config, statusRecorder *peer.Status,
|
||||
mgmProbe *internal.Probe, signalProbe *internal.Probe, relayProbe *internal.Probe, wgProbe *internal.Probe,
|
||||
runningWg *sync.WaitGroup,
|
||||
) {
|
||||
backOff := getConnectWithBackoff(ctx)
|
||||
retryStarted := false
|
||||
@ -185,7 +188,15 @@ func (s *Server) connectWithRetryRuns(ctx context.Context, config *internal.Conf
|
||||
runOperation := func() error {
|
||||
log.Tracef("running client connection")
|
||||
s.connectClient = internal.NewConnectClient(ctx, config, statusRecorder)
|
||||
err := s.connectClient.RunWithProbes(mgmProbe, signalProbe, relayProbe, wgProbe)
|
||||
|
||||
probes := internal.ProbeHolder{
|
||||
MgmProbe: s.mgmProbe,
|
||||
SignalProbe: s.signalProbe,
|
||||
RelayProbe: s.relayProbe,
|
||||
WgProbe: s.wgProbe,
|
||||
}
|
||||
|
||||
err := s.connectClient.RunWithProbes(&probes, runningWg)
|
||||
if err != nil {
|
||||
log.Debugf("run client connection exited with error: %v. Will retry in the background", err)
|
||||
}
|
||||
@ -576,7 +587,11 @@ func (s *Server) Up(callerCtx context.Context, _ *proto.UpRequest) (*proto.UpRes
|
||||
s.statusRecorder.UpdateManagementAddress(s.config.ManagementURL.String())
|
||||
s.statusRecorder.UpdateRosenpass(s.config.RosenpassEnabled, s.config.RosenpassPermissive)
|
||||
|
||||
go s.connectWithRetryRuns(ctx, s.config, s.statusRecorder, s.mgmProbe, s.signalProbe, s.relayProbe, s.wgProbe)
|
||||
runningWg := sync.WaitGroup{}
|
||||
runningWg.Add(1)
|
||||
go s.connectWithRetryRuns(ctx, s.config, s.statusRecorder, &runningWg)
|
||||
|
||||
runningWg.Wait()
|
||||
|
||||
return &proto.UpResponse{}, nil
|
||||
}
|
||||
@ -590,28 +605,19 @@ func (s *Server) Down(ctx context.Context, _ *proto.DownRequest) (*proto.DownRes
|
||||
return nil, fmt.Errorf("service is not up")
|
||||
}
|
||||
s.actCancel()
|
||||
|
||||
err := s.connectClient.Stop()
|
||||
if err != nil {
|
||||
log.Errorf("failed to shut down properly: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
state := internal.CtxGetState(s.rootCtx)
|
||||
state.Set(internal.StatusIdle)
|
||||
|
||||
maxWaitTime := 5 * time.Second
|
||||
timeout := time.After(maxWaitTime)
|
||||
log.Infof("service is down")
|
||||
|
||||
engine := s.connectClient.Engine()
|
||||
|
||||
for {
|
||||
if !engine.IsWGIfaceUp() {
|
||||
return &proto.DownResponse{}, nil
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return &proto.DownResponse{}, nil
|
||||
case <-timeout:
|
||||
return nil, fmt.Errorf("failed to shut down properly")
|
||||
default:
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
return &proto.DownResponse{}, nil
|
||||
}
|
||||
|
||||
// Status returns the daemon status
|
||||
|
Reference in New Issue
Block a user