Enhance DNS failover reliability (#1637)

* Fix using wrong array index in log to avoid potential panic

* Increase gRPC connection timeout and add the timeout resolv.conf option

This makes sure the dns client is able to failover to a second
configured nameserver, if present. That is the case then when using the
dns `file` manager and a resolv.conf file generated for netbird.

* On file backup restore, remove the first NS if it's the netbird NS

* Bump dns mangager discovery message from debug to info to ease debugging
This commit is contained in:
Viktor Liu
2024-03-01 15:17:35 +01:00
committed by GitHub
parent a4b9e93217
commit 17b1099032
7 changed files with 224 additions and 15 deletions

View File

@@ -26,6 +26,8 @@ import (
"github.com/netbirdio/netbird/management/proto"
)
const ConnectTimeout = 10 * time.Second
// ConnStateNotifier is a wrapper interface of the status recorders
type ConnStateNotifier interface {
MarkManagementDisconnected(error)
@@ -49,7 +51,7 @@ func NewClient(ctx context.Context, addr string, ourPrivateKey wgtypes.Key, tlsE
transportOption = grpc.WithTransportCredentials(credentials.NewTLS(&tls.Config{}))
}
mgmCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
mgmCtx, cancel := context.WithTimeout(ctx, ConnectTimeout)
defer cancel()
conn, err := grpc.DialContext(
mgmCtx,
@@ -318,7 +320,7 @@ func (c *GrpcClient) login(serverKey wgtypes.Key, req *proto.LoginRequest) (*pro
log.Errorf("failed to encrypt message: %s", err)
return nil, err
}
mgmCtx, cancel := context.WithTimeout(c.ctx, 5*time.Second)
mgmCtx, cancel := context.WithTimeout(c.ctx, ConnectTimeout)
defer cancel()
resp, err := c.realClient.Login(mgmCtx, &proto.EncryptedMessage{
WgPubKey: c.key.PublicKey().String(),