[client] Destory WG interface on down timeout (#2435)

wait on engine down to not only wait for the interface to be down but completely removed. If the waiting loop reaches the timeout we will trigger an interface destroy. On the up command, it now waits until the engine is fully running before sending the response to the CLI. Includes a small refactor of probes to comply with sonar rules about parameter count in the function call
2025-08-15 01:32:56 +02:00 · 2024-09-02 19:19:14 +02:00
parent 95174d4619
commit 13e7198046
16 changed files with 222 additions and 116 deletions
--- a/client/server/server.go
+++ b/client/server/server.go
@ -12,7 +12,6 @@ import (

 	"github.com/cenkalti/backoff/v4"
 	"golang.org/x/exp/maps"
-
 	"google.golang.org/protobuf/types/known/durationpb"

 	log "github.com/sirupsen/logrus"
@ -143,10 +142,14 @@ func (s *Server) Start() error {
 		s.sessionWatcher.SetOnExpireListener(s.onSessionExpire)
 	}

+	runningWg := sync.WaitGroup{}
+	runningWg.Add(1)
 	if !config.DisableAutoConnect {
-		go s.connectWithRetryRuns(ctx, config, s.statusRecorder, s.mgmProbe, s.signalProbe, s.relayProbe, s.wgProbe)
+		go s.connectWithRetryRuns(ctx, config, s.statusRecorder, &runningWg)
 	}

+	runningWg.Wait()
+
 	return nil
 }

@ -154,7 +157,7 @@ func (s *Server) Start() error {
 // mechanism to keep the client connected even when the connection is lost.
 // we cancel retry if the client receive a stop or down command, or if disable auto connect is configured.
 func (s *Server) connectWithRetryRuns(ctx context.Context, config *internal.Config, statusRecorder *peer.Status,
-	mgmProbe *internal.Probe, signalProbe *internal.Probe, relayProbe *internal.Probe, wgProbe *internal.Probe,
+	runningWg *sync.WaitGroup,
 ) {
 	backOff := getConnectWithBackoff(ctx)
 	retryStarted := false
@ -185,7 +188,15 @@ func (s *Server) connectWithRetryRuns(ctx context.Context, config *internal.Conf
 	runOperation := func() error {
 		log.Tracef("running client connection")
 		s.connectClient = internal.NewConnectClient(ctx, config, statusRecorder)
-		err := s.connectClient.RunWithProbes(mgmProbe, signalProbe, relayProbe, wgProbe)
+
+		probes := internal.ProbeHolder{
+			MgmProbe:    s.mgmProbe,
+			SignalProbe: s.signalProbe,
+			RelayProbe:  s.relayProbe,
+			WgProbe:     s.wgProbe,
+		}
+
+		err := s.connectClient.RunWithProbes(&probes, runningWg)
 		if err != nil {
 			log.Debugf("run client connection exited with error: %v. Will retry in the background", err)
 		}
@ -576,7 +587,11 @@ func (s *Server) Up(callerCtx context.Context, _ *proto.UpRequest) (*proto.UpRes
 	s.statusRecorder.UpdateManagementAddress(s.config.ManagementURL.String())
 	s.statusRecorder.UpdateRosenpass(s.config.RosenpassEnabled, s.config.RosenpassPermissive)

-	go s.connectWithRetryRuns(ctx, s.config, s.statusRecorder, s.mgmProbe, s.signalProbe, s.relayProbe, s.wgProbe)
+	runningWg := sync.WaitGroup{}
+	runningWg.Add(1)
+	go s.connectWithRetryRuns(ctx, s.config, s.statusRecorder, &runningWg)
+
+	runningWg.Wait()

 	return &proto.UpResponse{}, nil
 }
@ -590,28 +605,19 @@ func (s *Server) Down(ctx context.Context, _ *proto.DownRequest) (*proto.DownRes
 		return nil, fmt.Errorf("service is not up")
 	}
 	s.actCancel()
+
+	err := s.connectClient.Stop()
+	if err != nil {
+		log.Errorf("failed to shut down properly: %v", err)
+		return nil, err
+	}
+
 	state := internal.CtxGetState(s.rootCtx)
 	state.Set(internal.StatusIdle)

-	maxWaitTime := 5 * time.Second
-	timeout := time.After(maxWaitTime)
+	log.Infof("service is down")

-	engine := s.connectClient.Engine()
-
-	for {
-		if !engine.IsWGIfaceUp() {
-			return &proto.DownResponse{}, nil
-		}
-
-		select {
-		case <-ctx.Done():
-			return &proto.DownResponse{}, nil
-		case <-timeout:
-			return nil, fmt.Errorf("failed to shut down properly")
-		default:
-			time.Sleep(100 * time.Millisecond)
-		}
-	}
+	return &proto.DownResponse{}, nil
 }

 // Status returns the daemon status