2022-09-05 09:06:35 +02:00
package routemanager
import (
"context"
"fmt"
2024-07-16 10:19:01 +02:00
"reflect"
2024-04-09 21:20:02 +02:00
"time"
2022-12-08 13:19:55 +01:00
2024-06-13 13:24:24 +02:00
"github.com/hashicorp/go-multierror"
2023-03-03 19:49:18 +01:00
log "github.com/sirupsen/logrus"
2024-06-13 13:24:24 +02:00
nberrors "github.com/netbirdio/netbird/client/errors"
2024-10-02 18:24:22 +02:00
"github.com/netbirdio/netbird/client/iface"
2024-08-02 18:43:00 +02:00
nbdns "github.com/netbirdio/netbird/client/internal/dns"
2022-09-05 09:06:35 +02:00
"github.com/netbirdio/netbird/client/internal/peer"
2024-06-13 13:24:24 +02:00
"github.com/netbirdio/netbird/client/internal/routemanager/dynamic"
"github.com/netbirdio/netbird/client/internal/routemanager/refcounter"
"github.com/netbirdio/netbird/client/internal/routemanager/static"
2022-09-05 09:06:35 +02:00
"github.com/netbirdio/netbird/route"
)
type routerPeerStatus struct {
connected bool
relayed bool
2024-04-09 21:20:02 +02:00
latency time . Duration
2022-09-05 09:06:35 +02:00
}
type routesUpdate struct {
updateSerial uint64
routes [ ] * route . Route
}
2024-06-13 13:24:24 +02:00
// RouteHandler defines the interface for handling routes
type RouteHandler interface {
String ( ) string
AddRoute ( ctx context . Context ) error
RemoveRoute ( ) error
AddAllowedIPs ( peerKey string ) error
RemoveAllowedIPs ( ) error
}
2022-09-05 09:06:35 +02:00
type clientNetwork struct {
ctx context . Context
2024-06-13 13:24:24 +02:00
cancel context . CancelFunc
2023-03-03 19:49:18 +01:00
statusRecorder * peer . Status
2024-08-29 21:31:19 +02:00
wgInterface iface . IWGIface
2024-05-06 14:47:49 +02:00
routes map [ route . ID ] * route . Route
2022-09-05 09:06:35 +02:00
routeUpdate chan routesUpdate
peerStateUpdate chan struct { }
routePeersNotifiers map [ string ] chan struct { }
2024-06-13 13:24:24 +02:00
currentChosen * route . Route
handler RouteHandler
2022-09-05 09:06:35 +02:00
updateSerial uint64
}
2024-08-29 21:31:19 +02:00
func newClientNetworkWatcher ( ctx context . Context , dnsRouteInterval time . Duration , wgInterface iface . IWGIface , statusRecorder * peer . Status , rt * route . Route , routeRefCounter * refcounter . RouteRefCounter , allowedIPsRefCounter * refcounter . AllowedIPsRefCounter ) * clientNetwork {
2022-09-05 09:06:35 +02:00
ctx , cancel := context . WithCancel ( ctx )
2024-04-08 18:56:52 +02:00
2022-09-05 09:06:35 +02:00
client := & clientNetwork {
ctx : ctx ,
2024-06-13 13:24:24 +02:00
cancel : cancel ,
2022-09-05 09:06:35 +02:00
statusRecorder : statusRecorder ,
wgInterface : wgInterface ,
2024-05-06 14:47:49 +02:00
routes : make ( map [ route . ID ] * route . Route ) ,
2022-09-05 09:06:35 +02:00
routePeersNotifiers : make ( map [ string ] chan struct { } ) ,
routeUpdate : make ( chan routesUpdate ) ,
peerStateUpdate : make ( chan struct { } ) ,
2024-08-02 18:43:00 +02:00
handler : handlerFromRoute ( rt , routeRefCounter , allowedIPsRefCounter , dnsRouteInterval , statusRecorder , wgInterface ) ,
2022-09-05 09:06:35 +02:00
}
return client
}
2024-05-06 14:47:49 +02:00
func ( c * clientNetwork ) getRouterPeerStatuses ( ) map [ route . ID ] routerPeerStatus {
routePeerStatuses := make ( map [ route . ID ] routerPeerStatus )
2022-09-05 09:06:35 +02:00
for _ , r := range c . routes {
peerStatus , err := c . statusRecorder . GetPeer ( r . Peer )
if err != nil {
log . Debugf ( "couldn't fetch peer state: %v" , err )
continue
}
routePeerStatuses [ r . ID ] = routerPeerStatus {
2023-03-03 19:49:18 +01:00
connected : peerStatus . ConnStatus == peer . StatusConnected ,
2022-09-05 09:06:35 +02:00
relayed : peerStatus . Relayed ,
2024-04-09 21:20:02 +02:00
latency : peerStatus . Latency ,
2022-09-05 09:06:35 +02:00
}
}
return routePeerStatuses
}
2024-04-08 18:56:52 +02:00
// getBestRouteFromStatuses determines the most optimal route from the available routes
// within a clientNetwork, taking into account peer connection status, route metrics, and
// preference for non-relayed and direct connections.
//
// It follows these prioritization rules:
// * Connected peers: Only routes with connected peers are considered.
// * Metric: Routes with lower metrics (better) are prioritized.
// * Non-relayed: Routes without relays are preferred.
2024-04-09 21:20:02 +02:00
// * Latency: Routes with lower latency are prioritized.
2024-09-08 12:06:14 +02:00
// * we compare the current score + 10ms to the chosen score to avoid flapping between routes
2024-06-13 13:24:24 +02:00
// * Stability: In case of equal scores, the currently active route (if any) is maintained.
2024-04-08 18:56:52 +02:00
//
// It returns the ID of the selected optimal route.
2024-05-06 14:47:49 +02:00
func ( c * clientNetwork ) getBestRouteFromStatuses ( routePeerStatuses map [ route . ID ] routerPeerStatus ) route . ID {
chosen := route . ID ( "" )
2024-04-09 21:20:02 +02:00
chosenScore := float64 ( 0 )
currScore := float64 ( 0 )
2022-09-05 09:06:35 +02:00
2024-05-06 14:47:49 +02:00
currID := route . ID ( "" )
2024-06-13 13:24:24 +02:00
if c . currentChosen != nil {
currID = c . currentChosen . ID
2022-09-05 09:06:35 +02:00
}
for _ , r := range c . routes {
2024-04-09 21:20:02 +02:00
tempScore := float64 ( 0 )
2022-09-05 09:06:35 +02:00
peerStatus , found := routePeerStatuses [ r . ID ]
if ! found || ! peerStatus . connected {
continue
}
2023-06-01 16:00:44 +02:00
2022-09-05 09:06:35 +02:00
if r . Metric < route . MaxMetric {
metricDiff := route . MaxMetric - r . Metric
2024-04-09 21:20:02 +02:00
tempScore = float64 ( metricDiff ) * 10
2022-09-05 09:06:35 +02:00
}
2023-06-01 16:00:44 +02:00
2024-11-11 10:53:57 +01:00
// in some temporal cases, latency can be 0, so we set it to 999ms to not block but try to avoid this route
latency := 999 * time . Millisecond
2024-04-09 21:20:02 +02:00
if peerStatus . latency != 0 {
latency = peerStatus . latency
} else {
2024-11-11 10:53:57 +01:00
log . Tracef ( "peer %s has 0 latency, range %s" , r . Peer , c . handler )
2024-04-09 21:20:02 +02:00
}
2024-11-11 10:53:57 +01:00
// avoid negative tempScore on the higher latency calculation
if latency > 1 * time . Second {
latency = 999 * time . Millisecond
}
// higher latency is worse score
2024-04-09 21:20:02 +02:00
tempScore += 1 - latency . Seconds ( )
2022-09-05 09:06:35 +02:00
if ! peerStatus . relayed {
tempScore ++
}
2023-06-01 16:00:44 +02:00
2024-04-09 21:20:02 +02:00
if tempScore > chosenScore || ( tempScore == chosenScore && chosen == "" ) {
2023-06-01 16:00:44 +02:00
chosen = r . ID
chosenScore = tempScore
}
if chosen == "" && currID == "" {
2022-09-05 09:06:35 +02:00
chosen = r . ID
chosenScore = tempScore
}
2024-04-09 21:20:02 +02:00
if r . ID == currID {
currScore = tempScore
}
2022-09-05 09:06:35 +02:00
}
2024-11-11 10:53:57 +01:00
log . Debugf ( "chosen route: %s, chosen score: %f, current route: %s, current score: %f" , chosen , chosenScore , currID , currScore )
2024-04-09 21:20:02 +02:00
switch {
case chosen == "" :
2022-09-05 09:06:35 +02:00
var peers [ ] string
for _ , r := range c . routes {
peers = append ( peers , r . Peer )
}
2023-06-01 16:00:44 +02:00
2024-06-13 13:24:24 +02:00
log . Warnf ( "The network [%v] has not been assigned a routing peer as no peers from the list %s are currently connected" , c . handler , peers )
2024-04-09 21:20:02 +02:00
case chosen != currID :
2024-05-02 11:51:03 +02:00
// we compare the current score + 10ms to the chosen score to avoid flapping between routes
if currScore != 0 && currScore + 0.01 > chosenScore {
2024-06-13 13:24:24 +02:00
log . Debugf ( "Keeping current routing peer because the score difference with latency is less than 0.01(10ms), current: %f, new: %f" , currScore , chosenScore )
2024-04-09 21:20:02 +02:00
return currID
}
2024-05-02 11:51:03 +02:00
var p string
if rt := c . routes [ chosen ] ; rt != nil {
p = rt . Peer
}
2024-06-13 13:24:24 +02:00
log . Infof ( "New chosen route is %s with peer %s with score %f for network [%v]" , chosen , p , chosenScore , c . handler )
2022-09-05 09:06:35 +02:00
}
return chosen
}
func ( c * clientNetwork ) watchPeerStatusChanges ( ctx context . Context , peerKey string , peerStateUpdate chan struct { } , closer chan struct { } ) {
for {
select {
case <- ctx . Done ( ) :
return
case <- closer :
return
case <- c . statusRecorder . GetPeerStateChangeNotifier ( peerKey ) :
state , err := c . statusRecorder . GetPeer ( peerKey )
2023-03-03 19:49:18 +01:00
if err != nil || state . ConnStatus == peer . StatusConnecting {
2022-09-05 09:06:35 +02:00
continue
}
peerStateUpdate <- struct { } { }
log . Debugf ( "triggered route state update for Peer %s, state: %s" , peerKey , state . ConnStatus )
}
}
}
func ( c * clientNetwork ) startPeersStatusChangeWatcher ( ) {
for _ , r := range c . routes {
_ , found := c . routePeersNotifiers [ r . Peer ]
2024-11-11 10:53:57 +01:00
if found {
continue
2022-09-05 09:06:35 +02:00
}
2024-11-11 10:53:57 +01:00
closerChan := make ( chan struct { } )
c . routePeersNotifiers [ r . Peer ] = closerChan
go c . watchPeerStatusChanges ( c . ctx , r . Peer , c . peerStateUpdate , closerChan )
2022-09-05 09:06:35 +02:00
}
}
2024-11-11 10:53:57 +01:00
func ( c * clientNetwork ) removeRouteFromWireGuardPeer ( ) error {
if err := c . statusRecorder . RemovePeerStateRoute ( c . currentChosen . Peer , c . handler . String ( ) ) ; err != nil {
log . Warnf ( "Failed to update peer state: %v" , err )
}
2022-09-05 09:06:35 +02:00
2024-06-13 13:24:24 +02:00
if err := c . handler . RemoveAllowedIPs ( ) ; err != nil {
return fmt . Errorf ( "remove allowed IPs: %w" , err )
2022-09-05 09:06:35 +02:00
}
return nil
}
func ( c * clientNetwork ) removeRouteFromPeerAndSystem ( ) error {
2024-06-13 13:24:24 +02:00
if c . currentChosen == nil {
return nil
}
2024-04-08 18:56:52 +02:00
2024-06-13 13:24:24 +02:00
var merr * multierror . Error
2024-11-11 10:53:57 +01:00
if err := c . removeRouteFromWireGuardPeer ( ) ; err != nil {
2024-06-13 13:24:24 +02:00
merr = multierror . Append ( merr , fmt . Errorf ( "remove allowed IPs for peer %s: %w" , c . currentChosen . Peer , err ) )
2022-09-05 09:06:35 +02:00
}
2024-06-13 13:24:24 +02:00
if err := c . handler . RemoveRoute ( ) ; err != nil {
merr = multierror . Append ( merr , fmt . Errorf ( "remove route: %w" , err ) )
}
return nberrors . FormatErrorOrNil ( merr )
2022-09-05 09:06:35 +02:00
}
func ( c * clientNetwork ) recalculateRouteAndUpdatePeerAndSystem ( ) error {
routerPeerStatuses := c . getRouterPeerStatuses ( )
2024-06-13 13:24:24 +02:00
newChosenID := c . getBestRouteFromStatuses ( routerPeerStatuses )
2024-04-08 18:56:52 +02:00
// If no route is chosen, remove the route from the peer and system
2024-06-13 13:24:24 +02:00
if newChosenID == "" {
2024-04-08 18:56:52 +02:00
if err := c . removeRouteFromPeerAndSystem ( ) ; err != nil {
2024-06-13 13:24:24 +02:00
return fmt . Errorf ( "remove route for peer %s: %w" , c . currentChosen . Peer , err )
2022-09-05 09:06:35 +02:00
}
2024-06-13 13:24:24 +02:00
c . currentChosen = nil
2022-09-05 09:06:35 +02:00
return nil
}
2024-04-08 18:56:52 +02:00
// If the chosen route is the same as the current route, do nothing
2024-06-13 13:24:24 +02:00
if c . currentChosen != nil && c . currentChosen . ID == newChosenID &&
c . currentChosen . IsEqual ( c . routes [ newChosenID ] ) {
return nil
2022-09-05 09:06:35 +02:00
}
2024-06-13 13:24:24 +02:00
if c . currentChosen == nil {
// If they were not previously assigned to another peer, add routes to the system first
if err := c . handler . AddRoute ( c . ctx ) ; err != nil {
return fmt . Errorf ( "add route: %w" , err )
2022-09-05 09:06:35 +02:00
}
} else {
2024-06-13 13:24:24 +02:00
// Otherwise, remove the allowed IPs from the previous peer first
2024-11-11 10:53:57 +01:00
if err := c . removeRouteFromWireGuardPeer ( ) ; err != nil {
2024-06-13 13:24:24 +02:00
return fmt . Errorf ( "remove allowed IPs for peer %s: %w" , c . currentChosen . Peer , err )
2022-09-05 09:06:35 +02:00
}
}
2024-06-13 13:24:24 +02:00
c . currentChosen = c . routes [ newChosenID ]
if err := c . handler . AddAllowedIPs ( c . currentChosen . Peer ) ; err != nil {
return fmt . Errorf ( "add allowed IPs for peer %s: %w" , c . currentChosen . Peer , err )
}
2024-11-11 10:53:57 +01:00
err := c . statusRecorder . AddPeerStateRoute ( c . currentChosen . Peer , c . handler . String ( ) )
2024-06-13 13:24:24 +02:00
if err != nil {
2024-11-11 10:53:57 +01:00
return fmt . Errorf ( "add peer state route: %w" , err )
2024-06-13 13:24:24 +02:00
}
2024-11-11 10:53:57 +01:00
return nil
2022-09-05 09:06:35 +02:00
}
func ( c * clientNetwork ) sendUpdateToClientNetworkWatcher ( update routesUpdate ) {
go func ( ) {
c . routeUpdate <- update
} ( )
}
2024-07-16 10:19:01 +02:00
func ( c * clientNetwork ) handleUpdate ( update routesUpdate ) bool {
isUpdateMapDifferent := false
2024-05-06 14:47:49 +02:00
updateMap := make ( map [ route . ID ] * route . Route )
2022-09-05 09:06:35 +02:00
for _ , r := range update . routes {
updateMap [ r . ID ] = r
}
2024-07-16 10:19:01 +02:00
if len ( c . routes ) != len ( updateMap ) {
isUpdateMapDifferent = true
}
2022-09-05 09:06:35 +02:00
for id , r := range c . routes {
_ , found := updateMap [ id ]
if ! found {
close ( c . routePeersNotifiers [ r . Peer ] )
delete ( c . routePeersNotifiers , r . Peer )
2024-07-16 10:19:01 +02:00
isUpdateMapDifferent = true
continue
}
if ! reflect . DeepEqual ( c . routes [ id ] , updateMap [ id ] ) {
isUpdateMapDifferent = true
2022-09-05 09:06:35 +02:00
}
}
c . routes = updateMap
2024-07-16 10:19:01 +02:00
return isUpdateMapDifferent
2022-09-05 09:06:35 +02:00
}
// peersStateAndUpdateWatcher is the main point of reacting on client network routing events.
// All the processing related to the client network should be done here. Thread-safe.
func ( c * clientNetwork ) peersStateAndUpdateWatcher ( ) {
for {
select {
case <- c . ctx . Done ( ) :
2024-06-13 13:24:24 +02:00
log . Debugf ( "Stopping watcher for network [%v]" , c . handler )
if err := c . removeRouteFromPeerAndSystem ( ) ; err != nil {
log . Errorf ( "Failed to remove routes for [%v]: %v" , c . handler , err )
2022-09-05 09:06:35 +02:00
}
return
case <- c . peerStateUpdate :
err := c . recalculateRouteAndUpdatePeerAndSystem ( )
if err != nil {
2024-06-13 13:24:24 +02:00
log . Errorf ( "Failed to recalculate routes for network [%v]: %v" , c . handler , err )
2022-09-05 09:06:35 +02:00
}
case update := <- c . routeUpdate :
if update . updateSerial < c . updateSerial {
2024-06-13 13:24:24 +02:00
log . Warnf ( "Received a routes update with smaller serial number (%d -> %d), ignoring it" , c . updateSerial , update . updateSerial )
2022-09-05 09:06:35 +02:00
continue
}
2024-06-13 13:24:24 +02:00
log . Debugf ( "Received a new client network route update for [%v]" , c . handler )
2022-09-05 09:06:35 +02:00
2024-07-16 10:19:01 +02:00
// hash update somehow
isTrueRouteUpdate := c . handleUpdate ( update )
2022-09-05 09:06:35 +02:00
c . updateSerial = update . updateSerial
2024-07-16 10:19:01 +02:00
if isTrueRouteUpdate {
log . Debug ( "Client network update contains different routes, recalculating routes" )
err := c . recalculateRouteAndUpdatePeerAndSystem ( )
if err != nil {
log . Errorf ( "Failed to recalculate routes for network [%v]: %v" , c . handler , err )
}
} else {
log . Debug ( "Route update is not different, skipping route recalculation" )
2022-09-05 09:06:35 +02:00
}
c . startPeersStatusChangeWatcher ( )
}
}
}
2024-04-26 16:37:27 +02:00
2024-08-29 21:31:19 +02:00
func handlerFromRoute ( rt * route . Route , routeRefCounter * refcounter . RouteRefCounter , allowedIPsRefCounter * refcounter . AllowedIPsRefCounter , dnsRouterInteval time . Duration , statusRecorder * peer . Status , wgInterface iface . IWGIface ) RouteHandler {
2024-06-13 13:24:24 +02:00
if rt . IsDynamic ( ) {
2024-08-02 18:43:00 +02:00
dns := nbdns . NewServiceViaMemory ( wgInterface )
return dynamic . NewRoute ( rt , routeRefCounter , allowedIPsRefCounter , dnsRouterInteval , statusRecorder , wgInterface , fmt . Sprintf ( "%s:%d" , dns . RuntimeIP ( ) , dns . RuntimePort ( ) ) )
2024-04-26 16:37:27 +02:00
}
2024-06-13 13:24:24 +02:00
return static . NewRoute ( rt , routeRefCounter , allowedIPsRefCounter )
2024-04-26 16:37:27 +02:00
}