mirror of
https://github.com/netbirdio/netbird.git
synced 2025-07-21 00:13:26 +02:00
- Refactor the network monitoring to handle one event and it after return - In the engine restart cancel the upper layer context and the responsibility of the engine stop will be the upper layer - Before triggering a restart, the engine checks whether the state is already down. This helps avoid unnecessary delayed network restart events.
133 lines
2.8 KiB
Go
133 lines
2.8 KiB
Go
//go:build !ios && !android
|
|
|
|
package networkmonitor
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"net/netip"
|
|
"runtime/debug"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/cenkalti/backoff/v4"
|
|
log "github.com/sirupsen/logrus"
|
|
|
|
"github.com/netbirdio/netbird/client/internal/routemanager/systemops"
|
|
)
|
|
|
|
const (
|
|
debounceTime = 2 * time.Second
|
|
)
|
|
|
|
var checkChangeFn = checkChange
|
|
|
|
// NetworkMonitor watches for changes in network configuration.
|
|
type NetworkMonitor struct {
|
|
cancel context.CancelFunc
|
|
wg sync.WaitGroup
|
|
mu sync.Mutex
|
|
}
|
|
|
|
// New creates a new network monitor.
|
|
func New() *NetworkMonitor {
|
|
return &NetworkMonitor{}
|
|
}
|
|
|
|
// Listen begins monitoring network changes. When a change is detected, this function will return without error.
|
|
func (nw *NetworkMonitor) Listen(ctx context.Context) (err error) {
|
|
nw.mu.Lock()
|
|
if nw.cancel != nil {
|
|
nw.mu.Unlock()
|
|
return errors.New("network monitor already started")
|
|
}
|
|
|
|
ctx, nw.cancel = context.WithCancel(ctx)
|
|
defer nw.cancel()
|
|
nw.wg.Add(1)
|
|
nw.mu.Unlock()
|
|
|
|
defer nw.wg.Done()
|
|
|
|
var nexthop4, nexthop6 systemops.Nexthop
|
|
|
|
operation := func() error {
|
|
var errv4, errv6 error
|
|
nexthop4, errv4 = systemops.GetNextHop(netip.IPv4Unspecified())
|
|
nexthop6, errv6 = systemops.GetNextHop(netip.IPv6Unspecified())
|
|
|
|
if errv4 != nil && errv6 != nil {
|
|
return errors.New("failed to get default next hops")
|
|
}
|
|
|
|
if errv4 == nil {
|
|
log.Debugf("Network monitor: IPv4 default route: %s, interface: %s", nexthop4.IP, nexthop4.Intf.Name)
|
|
}
|
|
if errv6 == nil {
|
|
log.Debugf("Network monitor: IPv6 default route: %s, interface: %s", nexthop6.IP, nexthop6.Intf.Name)
|
|
}
|
|
|
|
// continue if either route was found
|
|
return nil
|
|
}
|
|
|
|
expBackOff := backoff.WithContext(backoff.NewExponentialBackOff(), ctx)
|
|
|
|
if err := backoff.Retry(operation, expBackOff); err != nil {
|
|
return fmt.Errorf("failed to get default next hops: %w", err)
|
|
}
|
|
|
|
// recover in case sys ops panic
|
|
defer func() {
|
|
if r := recover(); r != nil {
|
|
err = fmt.Errorf("panic occurred: %v, stack trace: %s", r, debug.Stack())
|
|
}
|
|
}()
|
|
|
|
event := make(chan struct{}, 1)
|
|
go nw.checkChanges(ctx, event, nexthop4, nexthop6)
|
|
|
|
// debounce changes
|
|
timer := time.NewTimer(0)
|
|
timer.Stop()
|
|
for {
|
|
select {
|
|
case <-event:
|
|
timer.Reset(debounceTime)
|
|
case <-timer.C:
|
|
return nil
|
|
case <-ctx.Done():
|
|
timer.Stop()
|
|
return ctx.Err()
|
|
}
|
|
}
|
|
}
|
|
|
|
// Stop stops the network monitor.
|
|
func (nw *NetworkMonitor) Stop() {
|
|
nw.mu.Lock()
|
|
defer nw.mu.Unlock()
|
|
|
|
if nw.cancel == nil {
|
|
return
|
|
}
|
|
|
|
nw.cancel()
|
|
nw.wg.Wait()
|
|
}
|
|
|
|
func (nw *NetworkMonitor) checkChanges(ctx context.Context, event chan struct{}, nexthop4 systemops.Nexthop, nexthop6 systemops.Nexthop) {
|
|
for {
|
|
if err := checkChangeFn(ctx, nexthop4, nexthop6); err != nil {
|
|
close(event)
|
|
return
|
|
}
|
|
// prevent blocking
|
|
select {
|
|
case event <- struct{}{}:
|
|
default:
|
|
}
|
|
}
|
|
}
|