mirror of
https://github.com/netbirdio/netbird.git
synced 2025-08-09 07:15:15 +02:00
Monitor network changes and restart engine on detection (#1904)
This commit is contained in:
@ -2,6 +2,7 @@ package internal
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"net"
|
||||
@ -21,6 +22,7 @@ import (
|
||||
"github.com/netbirdio/netbird/client/firewall/manager"
|
||||
"github.com/netbirdio/netbird/client/internal/acl"
|
||||
"github.com/netbirdio/netbird/client/internal/dns"
|
||||
"github.com/netbirdio/netbird/client/internal/networkmonitor"
|
||||
"github.com/netbirdio/netbird/client/internal/peer"
|
||||
"github.com/netbirdio/netbird/client/internal/relay"
|
||||
"github.com/netbirdio/netbird/client/internal/rosenpass"
|
||||
@ -60,6 +62,9 @@ type EngineConfig struct {
|
||||
// WgPrivateKey is a Wireguard private key of our peer (it MUST never leave the machine)
|
||||
WgPrivateKey wgtypes.Key
|
||||
|
||||
// NetworkMonitor is a flag to enable network monitoring
|
||||
NetworkMonitor bool
|
||||
|
||||
// IFaceBlackList is a list of network interfaces to ignore when discovering connection candidates (ICE related)
|
||||
IFaceBlackList []string
|
||||
DisableIPv6Discovery bool
|
||||
@ -114,9 +119,11 @@ type Engine struct {
|
||||
// clientRoutes is the most recent list of clientRoutes received from the Management Service
|
||||
clientRoutes route.HAMap
|
||||
|
||||
cancel context.CancelFunc
|
||||
clientCtx context.Context
|
||||
clientCancel context.CancelFunc
|
||||
|
||||
ctx context.Context
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
|
||||
wgInterface *iface.WGIface
|
||||
wgProxyFactory *wgproxy.Factory
|
||||
@ -126,6 +133,8 @@ type Engine struct {
|
||||
// networkSerial is the latest CurrentSerial (state ID) of the network sent by the Management service
|
||||
networkSerial uint64
|
||||
|
||||
networkWatcher *networkmonitor.NetworkWatcher
|
||||
|
||||
sshServerFunc func(hostKeyPEM []byte, addr string) (nbssh.Server, error)
|
||||
sshServer nbssh.Server
|
||||
|
||||
@ -151,8 +160,8 @@ type Peer struct {
|
||||
|
||||
// NewEngine creates a new Connection Engine
|
||||
func NewEngine(
|
||||
ctx context.Context,
|
||||
cancel context.CancelFunc,
|
||||
clientCtx context.Context,
|
||||
clientCancel context.CancelFunc,
|
||||
signalClient signal.Client,
|
||||
mgmClient mgm.Client,
|
||||
config *EngineConfig,
|
||||
@ -160,8 +169,8 @@ func NewEngine(
|
||||
statusRecorder *peer.Status,
|
||||
) *Engine {
|
||||
return NewEngineWithProbes(
|
||||
ctx,
|
||||
cancel,
|
||||
clientCtx,
|
||||
clientCancel,
|
||||
signalClient,
|
||||
mgmClient,
|
||||
config,
|
||||
@ -176,8 +185,8 @@ func NewEngine(
|
||||
|
||||
// NewEngineWithProbes creates a new Connection Engine with probes attached
|
||||
func NewEngineWithProbes(
|
||||
ctx context.Context,
|
||||
cancel context.CancelFunc,
|
||||
clientCtx context.Context,
|
||||
clientCancel context.CancelFunc,
|
||||
signalClient signal.Client,
|
||||
mgmClient mgm.Client,
|
||||
config *EngineConfig,
|
||||
@ -188,9 +197,10 @@ func NewEngineWithProbes(
|
||||
relayProbe *Probe,
|
||||
wgProbe *Probe,
|
||||
) *Engine {
|
||||
|
||||
return &Engine{
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
clientCtx: clientCtx,
|
||||
clientCancel: clientCancel,
|
||||
signal: signalClient,
|
||||
mgmClient: mgmClient,
|
||||
peerConns: make(map[string]*peer.Conn),
|
||||
@ -202,7 +212,7 @@ func NewEngineWithProbes(
|
||||
networkSerial: 0,
|
||||
sshServerFunc: nbssh.DefaultSSHServer,
|
||||
statusRecorder: statusRecorder,
|
||||
wgProxyFactory: wgproxy.NewFactory(config.WgPort),
|
||||
networkWatcher: networkmonitor.New(),
|
||||
mgmProbe: mgmProbe,
|
||||
signalProbe: signalProbe,
|
||||
relayProbe: relayProbe,
|
||||
@ -214,6 +224,13 @@ func (e *Engine) Stop() error {
|
||||
e.syncMsgMux.Lock()
|
||||
defer e.syncMsgMux.Unlock()
|
||||
|
||||
if e.cancel != nil {
|
||||
e.cancel()
|
||||
}
|
||||
|
||||
// stopping network monitor first to avoid starting the engine again
|
||||
e.networkWatcher.Stop()
|
||||
|
||||
err := e.removeAllPeers()
|
||||
if err != nil {
|
||||
return err
|
||||
@ -222,7 +239,7 @@ func (e *Engine) Stop() error {
|
||||
e.clientRoutes = nil
|
||||
|
||||
// very ugly but we want to remove peers from the WireGuard interface first before removing interface.
|
||||
// Removing peers happens in the conn.CLose() asynchronously
|
||||
// Removing peers happens in the conn.Close() asynchronously
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
|
||||
e.close()
|
||||
@ -237,6 +254,13 @@ func (e *Engine) Start() error {
|
||||
e.syncMsgMux.Lock()
|
||||
defer e.syncMsgMux.Unlock()
|
||||
|
||||
if e.cancel != nil {
|
||||
e.cancel()
|
||||
}
|
||||
e.ctx, e.cancel = context.WithCancel(e.clientCtx)
|
||||
|
||||
e.wgProxyFactory = wgproxy.NewFactory(e.clientCtx, e.config.WgPort)
|
||||
|
||||
wgIface, err := e.newWgIface()
|
||||
if err != nil {
|
||||
log.Errorf("failed creating wireguard interface instance %s: [%s]", e.config.WgIfaceName, err)
|
||||
@ -320,6 +344,21 @@ func (e *Engine) Start() error {
|
||||
e.receiveManagementEvents()
|
||||
e.receiveProbeEvents()
|
||||
|
||||
if e.config.NetworkMonitor {
|
||||
// starting network monitor at the very last to avoid disruptions
|
||||
go e.networkWatcher.Start(e.ctx, func() {
|
||||
log.Infof("Network monitor detected network change, restarting engine")
|
||||
if err := e.Stop(); err != nil {
|
||||
log.Errorf("Failed to stop engine: %v", err)
|
||||
}
|
||||
if err := e.Start(); err != nil {
|
||||
log.Errorf("Failed to start engine: %v", err)
|
||||
}
|
||||
})
|
||||
} else {
|
||||
log.Infof("Network monitor is disabled, not starting")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -588,12 +627,12 @@ func (e *Engine) updateConfig(conf *mgmProto.PeerConfig) error {
|
||||
// E.g. when a new peer has been registered and we are allowed to connect to it.
|
||||
func (e *Engine) receiveManagementEvents() {
|
||||
go func() {
|
||||
err := e.mgmClient.Sync(e.handleSync)
|
||||
err := e.mgmClient.Sync(e.ctx, e.handleSync)
|
||||
if err != nil {
|
||||
// happens if management is unavailable for a long time.
|
||||
// We want to cancel the operation of the whole client
|
||||
_ = CtxGetState(e.ctx).Wrap(ErrResetConnection)
|
||||
e.cancel()
|
||||
e.clientCancel()
|
||||
return
|
||||
}
|
||||
log.Debugf("stopped receiving updates from Management Service")
|
||||
@ -869,11 +908,12 @@ func (e *Engine) connWorker(conn *peer.Conn, peerKey string) {
|
||||
conn.UpdateStunTurn(append(e.STUNs, e.TURNs...))
|
||||
e.syncMsgMux.Unlock()
|
||||
|
||||
err := conn.Open()
|
||||
err := conn.Open(e.ctx)
|
||||
if err != nil {
|
||||
log.Debugf("connection to peer %s failed: %v", peerKey, err)
|
||||
switch err.(type) {
|
||||
case *peer.ConnectionClosedError:
|
||||
var connectionClosedError *peer.ConnectionClosedError
|
||||
switch {
|
||||
case errors.As(err, &connectionClosedError):
|
||||
// conn has been forced to close, so we exit the loop
|
||||
return
|
||||
default:
|
||||
@ -984,7 +1024,7 @@ func (e *Engine) createPeerConn(pubKey string, allowedIPs string) (*peer.Conn, e
|
||||
func (e *Engine) receiveSignalEvents() {
|
||||
go func() {
|
||||
// connect to a stream of messages coming from the signal server
|
||||
err := e.signal.Receive(func(msg *sProto.Message) error {
|
||||
err := e.signal.Receive(e.ctx, func(msg *sProto.Message) error {
|
||||
e.syncMsgMux.Lock()
|
||||
defer e.syncMsgMux.Unlock()
|
||||
|
||||
@ -1058,7 +1098,7 @@ func (e *Engine) receiveSignalEvents() {
|
||||
// happens if signal is unavailable for a long time.
|
||||
// We want to cancel the operation of the whole client
|
||||
_ = CtxGetState(e.ctx).Wrap(ErrResetConnection)
|
||||
e.cancel()
|
||||
e.clientCancel()
|
||||
return
|
||||
}
|
||||
}()
|
||||
@ -1119,13 +1159,16 @@ func (e *Engine) parseNATExternalIPMappings() []string {
|
||||
}
|
||||
|
||||
func (e *Engine) close() {
|
||||
if err := e.wgProxyFactory.Free(); err != nil {
|
||||
log.Errorf("failed closing ebpf proxy: %s", err)
|
||||
if e.wgProxyFactory != nil {
|
||||
if err := e.wgProxyFactory.Free(); err != nil {
|
||||
log.Errorf("failed closing ebpf proxy: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
// stop/restore DNS first so dbus and friends don't complain because of a missing interface
|
||||
if e.dnsServer != nil {
|
||||
e.dnsServer.Stop()
|
||||
e.dnsServer = nil
|
||||
}
|
||||
|
||||
if e.routeManager != nil {
|
||||
|
Reference in New Issue
Block a user