mirror of
https://github.com/netbirdio/netbird.git
synced 2025-08-15 17:52:47 +02:00
[client, relay] Fix/wg watch (#3261)
Fix WireGuard watcher related issues - Fix race handling between TURN and Relayed reconnection - Move the WgWatcher logic to separate struct - Handle timeouts in a more defensive way - Fix initial Relay client reconnection to the home server
This commit is contained in:
@ -14,8 +14,9 @@ var (
|
||||
|
||||
// Guard manage the reconnection tries to the Relay server in case of disconnection event.
|
||||
type Guard struct {
|
||||
// OnNewRelayClient is a channel that is used to notify the relay client about a new relay client instance.
|
||||
// OnNewRelayClient is a channel that is used to notify the relay manager about a new relay client instance.
|
||||
OnNewRelayClient chan *Client
|
||||
OnReconnected chan struct{}
|
||||
serverPicker *ServerPicker
|
||||
}
|
||||
|
||||
@ -23,6 +24,7 @@ type Guard struct {
|
||||
func NewGuard(sp *ServerPicker) *Guard {
|
||||
g := &Guard{
|
||||
OnNewRelayClient: make(chan *Client, 1),
|
||||
OnReconnected: make(chan struct{}, 1),
|
||||
serverPicker: sp,
|
||||
}
|
||||
return g
|
||||
@ -39,14 +41,13 @@ func NewGuard(sp *ServerPicker) *Guard {
|
||||
// - relayClient: The relay client instance that was disconnected.
|
||||
// todo prevent multiple reconnection instances. In the current usage it should not happen, but it is better to prevent
|
||||
func (g *Guard) StartReconnectTrys(ctx context.Context, relayClient *Client) {
|
||||
if relayClient == nil {
|
||||
goto RETRY
|
||||
}
|
||||
if g.isServerURLStillValid(relayClient) && g.quickReconnect(ctx, relayClient) {
|
||||
// try to reconnect to the same server
|
||||
if ok := g.tryToQuickReconnect(ctx, relayClient); ok {
|
||||
g.notifyReconnected()
|
||||
return
|
||||
}
|
||||
|
||||
RETRY:
|
||||
// start a ticker to pick a new server
|
||||
ticker := exponentTicker(ctx)
|
||||
defer ticker.Stop()
|
||||
|
||||
@ -64,6 +65,28 @@ RETRY:
|
||||
}
|
||||
}
|
||||
|
||||
func (g *Guard) tryToQuickReconnect(parentCtx context.Context, rc *Client) bool {
|
||||
if rc == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if !g.isServerURLStillValid(rc) {
|
||||
return false
|
||||
}
|
||||
|
||||
if cancelled := waiteBeforeRetry(parentCtx); !cancelled {
|
||||
return false
|
||||
}
|
||||
|
||||
log.Infof("try to reconnect to Relay server: %s", rc.connectionURL)
|
||||
|
||||
if err := rc.Connect(); err != nil {
|
||||
log.Errorf("failed to reconnect to relay server: %s", err)
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (g *Guard) retry(ctx context.Context) error {
|
||||
log.Infof("try to pick up a new Relay server")
|
||||
relayClient, err := g.serverPicker.PickServer(ctx)
|
||||
@ -78,23 +101,6 @@ func (g *Guard) retry(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (g *Guard) quickReconnect(parentCtx context.Context, rc *Client) bool {
|
||||
ctx, cancel := context.WithTimeout(parentCtx, 1500*time.Millisecond)
|
||||
defer cancel()
|
||||
<-ctx.Done()
|
||||
|
||||
if parentCtx.Err() != nil {
|
||||
return false
|
||||
}
|
||||
log.Infof("try to reconnect to Relay server: %s", rc.connectionURL)
|
||||
|
||||
if err := rc.Connect(); err != nil {
|
||||
log.Errorf("failed to reconnect to relay server: %s", err)
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (g *Guard) drainRelayClientChan() {
|
||||
select {
|
||||
case <-g.OnNewRelayClient:
|
||||
@ -111,6 +117,13 @@ func (g *Guard) isServerURLStillValid(rc *Client) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (g *Guard) notifyReconnected() {
|
||||
select {
|
||||
case g.OnReconnected <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
func exponentTicker(ctx context.Context) *backoff.Ticker {
|
||||
bo := backoff.WithContext(&backoff.ExponentialBackOff{
|
||||
InitialInterval: 2 * time.Second,
|
||||
@ -121,3 +134,15 @@ func exponentTicker(ctx context.Context) *backoff.Ticker {
|
||||
|
||||
return backoff.NewTicker(bo)
|
||||
}
|
||||
|
||||
func waiteBeforeRetry(ctx context.Context) bool {
|
||||
timer := time.NewTimer(1500 * time.Millisecond)
|
||||
defer timer.Stop()
|
||||
|
||||
select {
|
||||
case <-timer.C:
|
||||
return true
|
||||
case <-ctx.Done():
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user