[client] Destory WG interface on down timeout (#2435)

wait on engine down to not only wait for the interface to be down but completely removed. If the waiting loop reaches the timeout we will trigger an interface destroy. On the up command, it now waits until the engine is fully running before sending the response to the CLI. Includes a small refactor of probes to comply with sonar rules about parameter count in the function call
This commit is contained in:
pascal-fischer
2024-09-02 19:19:14 +02:00
committed by GitHub
parent 95174d4619
commit 13e7198046
16 changed files with 222 additions and 116 deletions

View File

@ -12,7 +12,6 @@ import (
"github.com/cenkalti/backoff/v4"
"golang.org/x/exp/maps"
"google.golang.org/protobuf/types/known/durationpb"
log "github.com/sirupsen/logrus"
@ -143,10 +142,14 @@ func (s *Server) Start() error {
s.sessionWatcher.SetOnExpireListener(s.onSessionExpire)
}
runningWg := sync.WaitGroup{}
runningWg.Add(1)
if !config.DisableAutoConnect {
go s.connectWithRetryRuns(ctx, config, s.statusRecorder, s.mgmProbe, s.signalProbe, s.relayProbe, s.wgProbe)
go s.connectWithRetryRuns(ctx, config, s.statusRecorder, &runningWg)
}
runningWg.Wait()
return nil
}
@ -154,7 +157,7 @@ func (s *Server) Start() error {
// mechanism to keep the client connected even when the connection is lost.
// we cancel retry if the client receive a stop or down command, or if disable auto connect is configured.
func (s *Server) connectWithRetryRuns(ctx context.Context, config *internal.Config, statusRecorder *peer.Status,
mgmProbe *internal.Probe, signalProbe *internal.Probe, relayProbe *internal.Probe, wgProbe *internal.Probe,
runningWg *sync.WaitGroup,
) {
backOff := getConnectWithBackoff(ctx)
retryStarted := false
@ -185,7 +188,15 @@ func (s *Server) connectWithRetryRuns(ctx context.Context, config *internal.Conf
runOperation := func() error {
log.Tracef("running client connection")
s.connectClient = internal.NewConnectClient(ctx, config, statusRecorder)
err := s.connectClient.RunWithProbes(mgmProbe, signalProbe, relayProbe, wgProbe)
probes := internal.ProbeHolder{
MgmProbe: s.mgmProbe,
SignalProbe: s.signalProbe,
RelayProbe: s.relayProbe,
WgProbe: s.wgProbe,
}
err := s.connectClient.RunWithProbes(&probes, runningWg)
if err != nil {
log.Debugf("run client connection exited with error: %v. Will retry in the background", err)
}
@ -576,7 +587,11 @@ func (s *Server) Up(callerCtx context.Context, _ *proto.UpRequest) (*proto.UpRes
s.statusRecorder.UpdateManagementAddress(s.config.ManagementURL.String())
s.statusRecorder.UpdateRosenpass(s.config.RosenpassEnabled, s.config.RosenpassPermissive)
go s.connectWithRetryRuns(ctx, s.config, s.statusRecorder, s.mgmProbe, s.signalProbe, s.relayProbe, s.wgProbe)
runningWg := sync.WaitGroup{}
runningWg.Add(1)
go s.connectWithRetryRuns(ctx, s.config, s.statusRecorder, &runningWg)
runningWg.Wait()
return &proto.UpResponse{}, nil
}
@ -590,28 +605,19 @@ func (s *Server) Down(ctx context.Context, _ *proto.DownRequest) (*proto.DownRes
return nil, fmt.Errorf("service is not up")
}
s.actCancel()
err := s.connectClient.Stop()
if err != nil {
log.Errorf("failed to shut down properly: %v", err)
return nil, err
}
state := internal.CtxGetState(s.rootCtx)
state.Set(internal.StatusIdle)
maxWaitTime := 5 * time.Second
timeout := time.After(maxWaitTime)
log.Infof("service is down")
engine := s.connectClient.Engine()
for {
if !engine.IsWGIfaceUp() {
return &proto.DownResponse{}, nil
}
select {
case <-ctx.Done():
return &proto.DownResponse{}, nil
case <-timeout:
return nil, fmt.Errorf("failed to shut down properly")
default:
time.Sleep(100 * time.Millisecond)
}
}
return &proto.DownResponse{}, nil
}
// Status returns the daemon status