2022-09-05 09:06:35 +02:00
package routemanager
import (
"context"
"fmt"
2024-04-26 16:37:27 +02:00
"net"
2022-12-08 13:19:55 +01:00
"net/netip"
2024-04-09 21:20:02 +02:00
"time"
2022-12-08 13:19:55 +01:00
2023-03-03 19:49:18 +01:00
log "github.com/sirupsen/logrus"
2022-09-05 09:06:35 +02:00
"github.com/netbirdio/netbird/client/internal/peer"
"github.com/netbirdio/netbird/iface"
"github.com/netbirdio/netbird/route"
)
2023-11-24 11:31:22 +01:00
const minRangeBits = 7
2022-09-05 09:06:35 +02:00
type routerPeerStatus struct {
connected bool
relayed bool
direct bool
2024-04-09 21:20:02 +02:00
latency time . Duration
2022-09-05 09:06:35 +02:00
}
type routesUpdate struct {
updateSerial uint64
routes [ ] * route . Route
}
type clientNetwork struct {
ctx context . Context
stop context . CancelFunc
2023-03-03 19:49:18 +01:00
statusRecorder * peer . Status
2022-09-05 09:06:35 +02:00
wgInterface * iface . WGIface
2024-05-06 14:47:49 +02:00
routes map [ route . ID ] * route . Route
2022-09-05 09:06:35 +02:00
routeUpdate chan routesUpdate
peerStateUpdate chan struct { }
routePeersNotifiers map [ string ] chan struct { }
chosenRoute * route . Route
network netip . Prefix
updateSerial uint64
}
2023-03-03 19:49:18 +01:00
func newClientNetworkWatcher ( ctx context . Context , wgInterface * iface . WGIface , statusRecorder * peer . Status , network netip . Prefix ) * clientNetwork {
2022-09-05 09:06:35 +02:00
ctx , cancel := context . WithCancel ( ctx )
2024-04-08 18:56:52 +02:00
2022-09-05 09:06:35 +02:00
client := & clientNetwork {
ctx : ctx ,
stop : cancel ,
statusRecorder : statusRecorder ,
wgInterface : wgInterface ,
2024-05-06 14:47:49 +02:00
routes : make ( map [ route . ID ] * route . Route ) ,
2022-09-05 09:06:35 +02:00
routePeersNotifiers : make ( map [ string ] chan struct { } ) ,
routeUpdate : make ( chan routesUpdate ) ,
peerStateUpdate : make ( chan struct { } ) ,
network : network ,
}
return client
}
2024-05-06 14:47:49 +02:00
func ( c * clientNetwork ) getRouterPeerStatuses ( ) map [ route . ID ] routerPeerStatus {
routePeerStatuses := make ( map [ route . ID ] routerPeerStatus )
2022-09-05 09:06:35 +02:00
for _ , r := range c . routes {
peerStatus , err := c . statusRecorder . GetPeer ( r . Peer )
if err != nil {
log . Debugf ( "couldn't fetch peer state: %v" , err )
continue
}
routePeerStatuses [ r . ID ] = routerPeerStatus {
2023-03-03 19:49:18 +01:00
connected : peerStatus . ConnStatus == peer . StatusConnected ,
2022-09-05 09:06:35 +02:00
relayed : peerStatus . Relayed ,
direct : peerStatus . Direct ,
2024-04-09 21:20:02 +02:00
latency : peerStatus . Latency ,
2022-09-05 09:06:35 +02:00
}
}
return routePeerStatuses
}
2024-04-08 18:56:52 +02:00
// getBestRouteFromStatuses determines the most optimal route from the available routes
// within a clientNetwork, taking into account peer connection status, route metrics, and
// preference for non-relayed and direct connections.
//
// It follows these prioritization rules:
// * Connected peers: Only routes with connected peers are considered.
// * Metric: Routes with lower metrics (better) are prioritized.
// * Non-relayed: Routes without relays are preferred.
// * Direct connections: Routes with direct peer connections are favored.
// * Stability: In case of equal scores, the currently active route (if any) is maintained.
2024-04-09 21:20:02 +02:00
// * Latency: Routes with lower latency are prioritized.
2024-04-08 18:56:52 +02:00
//
// It returns the ID of the selected optimal route.
2024-05-06 14:47:49 +02:00
func ( c * clientNetwork ) getBestRouteFromStatuses ( routePeerStatuses map [ route . ID ] routerPeerStatus ) route . ID {
chosen := route . ID ( "" )
2024-04-09 21:20:02 +02:00
chosenScore := float64 ( 0 )
currScore := float64 ( 0 )
2022-09-05 09:06:35 +02:00
2024-05-06 14:47:49 +02:00
currID := route . ID ( "" )
2022-09-05 09:06:35 +02:00
if c . chosenRoute != nil {
currID = c . chosenRoute . ID
}
for _ , r := range c . routes {
2024-04-09 21:20:02 +02:00
tempScore := float64 ( 0 )
2022-09-05 09:06:35 +02:00
peerStatus , found := routePeerStatuses [ r . ID ]
if ! found || ! peerStatus . connected {
continue
}
2023-06-01 16:00:44 +02:00
2022-09-05 09:06:35 +02:00
if r . Metric < route . MaxMetric {
metricDiff := route . MaxMetric - r . Metric
2024-04-09 21:20:02 +02:00
tempScore = float64 ( metricDiff ) * 10
2022-09-05 09:06:35 +02:00
}
2023-06-01 16:00:44 +02:00
2024-04-09 21:20:02 +02:00
// in some temporal cases, latency can be 0, so we set it to 1s to not block but try to avoid this route
latency := time . Second
if peerStatus . latency != 0 {
latency = peerStatus . latency
} else {
log . Warnf ( "peer %s has 0 latency" , r . Peer )
}
tempScore += 1 - latency . Seconds ( )
2022-09-05 09:06:35 +02:00
if ! peerStatus . relayed {
tempScore ++
}
2023-06-01 16:00:44 +02:00
if peerStatus . direct {
2022-09-05 09:06:35 +02:00
tempScore ++
}
2023-06-01 16:00:44 +02:00
2024-04-09 21:20:02 +02:00
if tempScore > chosenScore || ( tempScore == chosenScore && chosen == "" ) {
2023-06-01 16:00:44 +02:00
chosen = r . ID
chosenScore = tempScore
}
if chosen == "" && currID == "" {
2022-09-05 09:06:35 +02:00
chosen = r . ID
chosenScore = tempScore
}
2024-04-09 21:20:02 +02:00
if r . ID == currID {
currScore = tempScore
}
2022-09-05 09:06:35 +02:00
}
2024-04-09 21:20:02 +02:00
switch {
case chosen == "" :
2022-09-05 09:06:35 +02:00
var peers [ ] string
for _ , r := range c . routes {
peers = append ( peers , r . Peer )
}
2023-06-01 16:00:44 +02:00
log . Warnf ( "the network %s has not been assigned a routing peer as no peers from the list %s are currently connected" , c . network , peers )
2024-04-09 21:20:02 +02:00
case chosen != currID :
2024-05-02 11:51:03 +02:00
// we compare the current score + 10ms to the chosen score to avoid flapping between routes
if currScore != 0 && currScore + 0.01 > chosenScore {
log . Debugf ( "keeping current routing peer because the score difference with latency is less than 0.01(10ms), current: %f, new: %f" , currScore , chosenScore )
2024-04-09 21:20:02 +02:00
return currID
}
2024-05-02 11:51:03 +02:00
var p string
if rt := c . routes [ chosen ] ; rt != nil {
p = rt . Peer
}
log . Infof ( "new chosen route is %s with peer %s with score %f for network %s" , chosen , p , chosenScore , c . network )
2022-09-05 09:06:35 +02:00
}
return chosen
}
func ( c * clientNetwork ) watchPeerStatusChanges ( ctx context . Context , peerKey string , peerStateUpdate chan struct { } , closer chan struct { } ) {
for {
select {
case <- ctx . Done ( ) :
return
case <- closer :
return
case <- c . statusRecorder . GetPeerStateChangeNotifier ( peerKey ) :
state , err := c . statusRecorder . GetPeer ( peerKey )
2023-03-03 19:49:18 +01:00
if err != nil || state . ConnStatus == peer . StatusConnecting {
2022-09-05 09:06:35 +02:00
continue
}
peerStateUpdate <- struct { } { }
log . Debugf ( "triggered route state update for Peer %s, state: %s" , peerKey , state . ConnStatus )
}
}
}
func ( c * clientNetwork ) startPeersStatusChangeWatcher ( ) {
for _ , r := range c . routes {
_ , found := c . routePeersNotifiers [ r . Peer ]
if ! found {
c . routePeersNotifiers [ r . Peer ] = make ( chan struct { } )
go c . watchPeerStatusChanges ( c . ctx , r . Peer , c . peerStateUpdate , c . routePeersNotifiers [ r . Peer ] )
}
}
}
func ( c * clientNetwork ) removeRouteFromWireguardPeer ( peerKey string ) error {
state , err := c . statusRecorder . GetPeer ( peerKey )
2023-09-04 17:03:44 +02:00
if err != nil {
2024-04-08 18:56:52 +02:00
return fmt . Errorf ( "get peer state: %v" , err )
2023-09-04 17:03:44 +02:00
}
2024-03-12 19:06:16 +01:00
2024-04-11 22:12:23 +02:00
state . DeleteRoute ( c . network . String ( ) )
2024-03-12 19:06:16 +01:00
if err := c . statusRecorder . UpdatePeerState ( state ) ; err != nil {
log . Warnf ( "Failed to update peer state: %v" , err )
}
2023-09-04 17:03:44 +02:00
if state . ConnStatus != peer . StatusConnected {
2022-09-05 09:06:35 +02:00
return nil
}
err = c . wgInterface . RemoveAllowedIP ( peerKey , c . network . String ( ) )
if err != nil {
2024-04-08 18:56:52 +02:00
return fmt . Errorf ( "remove allowed IP %s removed for peer %s, err: %v" ,
2022-09-05 09:06:35 +02:00
c . network , c . chosenRoute . Peer , err )
}
return nil
}
func ( c * clientNetwork ) removeRouteFromPeerAndSystem ( ) error {
if c . chosenRoute != nil {
2024-04-26 16:37:27 +02:00
if err := removeVPNRoute ( c . network , c . getAsInterface ( ) ) ; err != nil {
2024-04-08 18:56:52 +02:00
return fmt . Errorf ( "remove route %s from system, err: %v" , c . network , err )
2022-09-05 09:06:35 +02:00
}
2024-04-08 18:56:52 +02:00
if err := c . removeRouteFromWireguardPeer ( c . chosenRoute . Peer ) ; err != nil {
return fmt . Errorf ( "remove route: %v" , err )
2022-09-05 09:06:35 +02:00
}
}
return nil
}
func ( c * clientNetwork ) recalculateRouteAndUpdatePeerAndSystem ( ) error {
routerPeerStatuses := c . getRouterPeerStatuses ( )
chosen := c . getBestRouteFromStatuses ( routerPeerStatuses )
2024-04-08 18:56:52 +02:00
// If no route is chosen, remove the route from the peer and system
2022-09-05 09:06:35 +02:00
if chosen == "" {
2024-04-08 18:56:52 +02:00
if err := c . removeRouteFromPeerAndSystem ( ) ; err != nil {
return fmt . Errorf ( "remove route from peer and system: %v" , err )
2022-09-05 09:06:35 +02:00
}
c . chosenRoute = nil
return nil
}
2024-04-08 18:56:52 +02:00
// If the chosen route is the same as the current route, do nothing
2022-09-05 09:06:35 +02:00
if c . chosenRoute != nil && c . chosenRoute . ID == chosen {
if c . chosenRoute . IsEqual ( c . routes [ chosen ] ) {
return nil
}
}
if c . chosenRoute != nil {
2024-04-08 18:56:52 +02:00
// If a previous route exists, remove it from the peer
if err := c . removeRouteFromWireguardPeer ( c . chosenRoute . Peer ) ; err != nil {
return fmt . Errorf ( "remove route from peer: %v" , err )
2022-09-05 09:06:35 +02:00
}
} else {
2024-04-08 18:56:52 +02:00
// otherwise add the route to the system
2024-04-26 16:37:27 +02:00
if err := addVPNRoute ( c . network , c . getAsInterface ( ) ) ; err != nil {
2022-09-05 09:06:35 +02:00
return fmt . Errorf ( "route %s couldn't be added for peer %s, err: %v" ,
2023-02-13 18:34:56 +01:00
c . network . String ( ) , c . wgInterface . Address ( ) . IP . String ( ) , err )
2022-09-05 09:06:35 +02:00
}
}
c . chosenRoute = c . routes [ chosen ]
2024-03-12 19:06:16 +01:00
state , err := c . statusRecorder . GetPeer ( c . chosenRoute . Peer )
if err != nil {
log . Errorf ( "Failed to get peer state: %v" , err )
} else {
2024-04-11 22:12:23 +02:00
state . AddRoute ( c . network . String ( ) )
2024-03-12 19:06:16 +01:00
if err := c . statusRecorder . UpdatePeerState ( state ) ; err != nil {
log . Warnf ( "Failed to update peer state: %v" , err )
}
}
2024-04-08 18:56:52 +02:00
if err := c . wgInterface . AddAllowedIP ( c . chosenRoute . Peer , c . network . String ( ) ) ; err != nil {
2022-09-05 09:06:35 +02:00
log . Errorf ( "couldn't add allowed IP %s added for peer %s, err: %v" ,
c . network , c . chosenRoute . Peer , err )
}
return nil
}
func ( c * clientNetwork ) sendUpdateToClientNetworkWatcher ( update routesUpdate ) {
go func ( ) {
c . routeUpdate <- update
} ( )
}
func ( c * clientNetwork ) handleUpdate ( update routesUpdate ) {
2024-05-06 14:47:49 +02:00
updateMap := make ( map [ route . ID ] * route . Route )
2022-09-05 09:06:35 +02:00
for _ , r := range update . routes {
updateMap [ r . ID ] = r
}
for id , r := range c . routes {
_ , found := updateMap [ id ]
if ! found {
close ( c . routePeersNotifiers [ r . Peer ] )
delete ( c . routePeersNotifiers , r . Peer )
}
}
c . routes = updateMap
}
// peersStateAndUpdateWatcher is the main point of reacting on client network routing events.
// All the processing related to the client network should be done here. Thread-safe.
func ( c * clientNetwork ) peersStateAndUpdateWatcher ( ) {
for {
select {
case <- c . ctx . Done ( ) :
log . Debugf ( "stopping watcher for network %s" , c . network )
err := c . removeRouteFromPeerAndSystem ( )
if err != nil {
2024-04-08 18:56:52 +02:00
log . Errorf ( "Couldn't remove route from peer and system for network %s: %v" , c . network , err )
2022-09-05 09:06:35 +02:00
}
return
case <- c . peerStateUpdate :
err := c . recalculateRouteAndUpdatePeerAndSystem ( )
if err != nil {
2024-04-08 18:56:52 +02:00
log . Errorf ( "Couldn't recalculate route and update peer and system: %v" , err )
2022-09-05 09:06:35 +02:00
}
case update := <- c . routeUpdate :
if update . updateSerial < c . updateSerial {
2024-04-08 18:56:52 +02:00
log . Warnf ( "Received a routes update with smaller serial number, ignoring it" )
2022-09-05 09:06:35 +02:00
continue
}
2024-04-08 18:56:52 +02:00
log . Debugf ( "Received a new client network route update for %s" , c . network )
2022-09-05 09:06:35 +02:00
c . handleUpdate ( update )
c . updateSerial = update . updateSerial
err := c . recalculateRouteAndUpdatePeerAndSystem ( )
if err != nil {
2024-04-08 18:56:52 +02:00
log . Errorf ( "Couldn't recalculate route and update peer and system for network %s: %v" , c . network , err )
2022-09-05 09:06:35 +02:00
}
c . startPeersStatusChangeWatcher ( )
}
}
}
2024-04-26 16:37:27 +02:00
func ( c * clientNetwork ) getAsInterface ( ) * net . Interface {
intf , err := net . InterfaceByName ( c . wgInterface . Name ( ) )
if err != nil {
log . Warnf ( "Couldn't get interface by name %s: %v" , c . wgInterface . Name ( ) , err )
intf = & net . Interface {
Name : c . wgInterface . Name ( ) ,
}
}
return intf
}