mirror of
https://github.com/netbirdio/netbird.git
synced 2025-06-25 12:12:12 +02:00
[client] Handle lazy routing peers that are part of HA groups (#3943)
* Activate new lazy routing peers if the HA group is active * Prevent lazy peers going to idle if HA group members are active (#3948)
This commit is contained in:
parent
83457f8b99
commit
2a51609436
@ -175,7 +175,7 @@ func (e *ConnMgr) AddPeerConn(ctx context.Context, peerKey string, conn *peer.Co
|
|||||||
PeerConnID: conn.ConnID(),
|
PeerConnID: conn.ConnID(),
|
||||||
Log: conn.Log,
|
Log: conn.Log,
|
||||||
}
|
}
|
||||||
excluded, err := e.lazyConnMgr.AddPeer(lazyPeerCfg)
|
excluded, err := e.lazyConnMgr.AddPeer(e.lazyCtx, lazyPeerCfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
conn.Log.Errorf("failed to add peer to lazyconn manager: %v", err)
|
conn.Log.Errorf("failed to add peer to lazyconn manager: %v", err)
|
||||||
if err := conn.Open(ctx); err != nil {
|
if err := conn.Open(ctx); err != nil {
|
||||||
|
@ -68,3 +68,8 @@ func (i *Monitor) PauseTimer() {
|
|||||||
func (i *Monitor) ResetTimer() {
|
func (i *Monitor) ResetTimer() {
|
||||||
i.timer.Reset(i.inactivityThreshold)
|
i.timer.Reset(i.inactivityThreshold)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (i *Monitor) ResetMonitor(ctx context.Context, timeoutChan chan peer.ConnID) {
|
||||||
|
i.Stop()
|
||||||
|
go i.Start(ctx, timeoutChan)
|
||||||
|
}
|
||||||
|
@ -58,7 +58,7 @@ type Manager struct {
|
|||||||
// Route HA group management
|
// Route HA group management
|
||||||
peerToHAGroups map[string][]route.HAUniqueID // peer ID -> HA groups they belong to
|
peerToHAGroups map[string][]route.HAUniqueID // peer ID -> HA groups they belong to
|
||||||
haGroupToPeers map[route.HAUniqueID][]string // HA group -> peer IDs in the group
|
haGroupToPeers map[route.HAUniqueID][]string // HA group -> peer IDs in the group
|
||||||
routesMu sync.RWMutex // protects route mappings
|
routesMu sync.RWMutex
|
||||||
|
|
||||||
onInactive chan peerid.ConnID
|
onInactive chan peerid.ConnID
|
||||||
}
|
}
|
||||||
@ -146,7 +146,7 @@ func (m *Manager) Start(ctx context.Context) {
|
|||||||
case peerConnID := <-m.activityManager.OnActivityChan:
|
case peerConnID := <-m.activityManager.OnActivityChan:
|
||||||
m.onPeerActivity(ctx, peerConnID)
|
m.onPeerActivity(ctx, peerConnID)
|
||||||
case peerConnID := <-m.onInactive:
|
case peerConnID := <-m.onInactive:
|
||||||
m.onPeerInactivityTimedOut(peerConnID)
|
m.onPeerInactivityTimedOut(ctx, peerConnID)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -197,7 +197,7 @@ func (m *Manager) ExcludePeer(ctx context.Context, peerConfigs []lazyconn.PeerCo
|
|||||||
return added
|
return added
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Manager) AddPeer(peerCfg lazyconn.PeerConfig) (bool, error) {
|
func (m *Manager) AddPeer(ctx context.Context, peerCfg lazyconn.PeerConfig) (bool, error) {
|
||||||
m.managedPeersMu.Lock()
|
m.managedPeersMu.Lock()
|
||||||
defer m.managedPeersMu.Unlock()
|
defer m.managedPeersMu.Unlock()
|
||||||
|
|
||||||
@ -225,6 +225,13 @@ func (m *Manager) AddPeer(peerCfg lazyconn.PeerConfig) (bool, error) {
|
|||||||
peerCfg: &peerCfg,
|
peerCfg: &peerCfg,
|
||||||
expectedWatcher: watcherActivity,
|
expectedWatcher: watcherActivity,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if this peer should be activated because its HA group peers are active
|
||||||
|
if group, ok := m.shouldActivateNewPeer(peerCfg.PublicKey); ok {
|
||||||
|
peerCfg.Log.Debugf("peer belongs to active HA group %s, will activate immediately", group)
|
||||||
|
m.activateNewPeerInActiveGroup(ctx, peerCfg)
|
||||||
|
}
|
||||||
|
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -315,36 +322,38 @@ func (m *Manager) activateSinglePeer(ctx context.Context, cfg *lazyconn.PeerConf
|
|||||||
|
|
||||||
// activateHAGroupPeers activates all peers in HA groups that the given peer belongs to
|
// activateHAGroupPeers activates all peers in HA groups that the given peer belongs to
|
||||||
func (m *Manager) activateHAGroupPeers(ctx context.Context, triggerPeerID string) {
|
func (m *Manager) activateHAGroupPeers(ctx context.Context, triggerPeerID string) {
|
||||||
|
var peersToActivate []string
|
||||||
|
|
||||||
m.routesMu.RLock()
|
m.routesMu.RLock()
|
||||||
haGroups := m.peerToHAGroups[triggerPeerID]
|
haGroups := m.peerToHAGroups[triggerPeerID]
|
||||||
m.routesMu.RUnlock()
|
|
||||||
|
|
||||||
if len(haGroups) == 0 {
|
if len(haGroups) == 0 {
|
||||||
|
m.routesMu.RUnlock()
|
||||||
log.Debugf("peer %s is not part of any HA groups", triggerPeerID)
|
log.Debugf("peer %s is not part of any HA groups", triggerPeerID)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
activatedCount := 0
|
|
||||||
for _, haGroup := range haGroups {
|
for _, haGroup := range haGroups {
|
||||||
m.routesMu.RLock()
|
|
||||||
peers := m.haGroupToPeers[haGroup]
|
peers := m.haGroupToPeers[haGroup]
|
||||||
m.routesMu.RUnlock()
|
|
||||||
|
|
||||||
for _, peerID := range peers {
|
for _, peerID := range peers {
|
||||||
if peerID == triggerPeerID {
|
if peerID != triggerPeerID {
|
||||||
continue
|
peersToActivate = append(peersToActivate, peerID)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m.routesMu.RUnlock()
|
||||||
|
|
||||||
cfg, mp := m.getPeerForActivation(peerID)
|
activatedCount := 0
|
||||||
if cfg == nil {
|
for _, peerID := range peersToActivate {
|
||||||
continue
|
cfg, mp := m.getPeerForActivation(peerID)
|
||||||
}
|
if cfg == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
if m.activateSinglePeer(ctx, cfg, mp) {
|
if m.activateSinglePeer(ctx, cfg, mp) {
|
||||||
activatedCount++
|
activatedCount++
|
||||||
cfg.Log.Infof("activated peer as part of HA group %s (triggered by %s)", haGroup, triggerPeerID)
|
cfg.Log.Infof("activated peer as part of HA group (triggered by %s)", triggerPeerID)
|
||||||
m.peerStore.PeerConnOpen(m.engineCtx, cfg.PublicKey)
|
m.peerStore.PeerConnOpen(m.engineCtx, cfg.PublicKey)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -354,6 +363,51 @@ func (m *Manager) activateHAGroupPeers(ctx context.Context, triggerPeerID string
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// shouldActivateNewPeer checks if a newly added peer should be activated
|
||||||
|
// because other peers in its HA groups are already active
|
||||||
|
func (m *Manager) shouldActivateNewPeer(peerID string) (route.HAUniqueID, bool) {
|
||||||
|
m.routesMu.RLock()
|
||||||
|
defer m.routesMu.RUnlock()
|
||||||
|
|
||||||
|
haGroups := m.peerToHAGroups[peerID]
|
||||||
|
if len(haGroups) == 0 {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, haGroup := range haGroups {
|
||||||
|
peers := m.haGroupToPeers[haGroup]
|
||||||
|
for _, groupPeerID := range peers {
|
||||||
|
if groupPeerID == peerID {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg, ok := m.managedPeers[groupPeerID]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if mp, ok := m.managedPeersByConnID[cfg.PeerConnID]; ok && mp.expectedWatcher == watcherInactivity {
|
||||||
|
return haGroup, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
// activateNewPeerInActiveGroup activates a newly added peer that should be active due to HA group
|
||||||
|
func (m *Manager) activateNewPeerInActiveGroup(ctx context.Context, peerCfg lazyconn.PeerConfig) {
|
||||||
|
mp, ok := m.managedPeersByConnID[peerCfg.PeerConnID]
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if !m.activateSinglePeer(ctx, &peerCfg, mp) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
peerCfg.Log.Infof("activated newly added peer due to active HA group peers")
|
||||||
|
m.peerStore.PeerConnOpen(m.engineCtx, peerCfg.PublicKey)
|
||||||
|
}
|
||||||
|
|
||||||
func (m *Manager) addActivePeer(ctx context.Context, peerCfg lazyconn.PeerConfig) error {
|
func (m *Manager) addActivePeer(ctx context.Context, peerCfg lazyconn.PeerConfig) error {
|
||||||
if _, ok := m.managedPeers[peerCfg.PublicKey]; ok {
|
if _, ok := m.managedPeers[peerCfg.PublicKey]; ok {
|
||||||
peerCfg.Log.Warnf("peer already managed")
|
peerCfg.Log.Warnf("peer already managed")
|
||||||
@ -415,6 +469,48 @@ func (m *Manager) close() {
|
|||||||
log.Infof("lazy connection manager closed")
|
log.Infof("lazy connection manager closed")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// shouldDeferIdleForHA checks if peer should stay connected due to HA group requirements
|
||||||
|
func (m *Manager) shouldDeferIdleForHA(peerID string) bool {
|
||||||
|
m.routesMu.RLock()
|
||||||
|
defer m.routesMu.RUnlock()
|
||||||
|
|
||||||
|
haGroups := m.peerToHAGroups[peerID]
|
||||||
|
if len(haGroups) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, haGroup := range haGroups {
|
||||||
|
groupPeers := m.haGroupToPeers[haGroup]
|
||||||
|
|
||||||
|
for _, groupPeerID := range groupPeers {
|
||||||
|
if groupPeerID == peerID {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg, ok := m.managedPeers[groupPeerID]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
groupMp, ok := m.managedPeersByConnID[cfg.PeerConnID]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if groupMp.expectedWatcher != watcherInactivity {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Other member is still connected, defer idle
|
||||||
|
if peer, ok := m.peerStore.PeerConn(groupPeerID); ok && peer.IsConnected() {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func (m *Manager) onPeerActivity(ctx context.Context, peerConnID peerid.ConnID) {
|
func (m *Manager) onPeerActivity(ctx context.Context, peerConnID peerid.ConnID) {
|
||||||
m.managedPeersMu.Lock()
|
m.managedPeersMu.Lock()
|
||||||
defer m.managedPeersMu.Unlock()
|
defer m.managedPeersMu.Unlock()
|
||||||
@ -441,7 +537,7 @@ func (m *Manager) onPeerActivity(ctx context.Context, peerConnID peerid.ConnID)
|
|||||||
m.peerStore.PeerConnOpen(m.engineCtx, mp.peerCfg.PublicKey)
|
m.peerStore.PeerConnOpen(m.engineCtx, mp.peerCfg.PublicKey)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Manager) onPeerInactivityTimedOut(peerConnID peerid.ConnID) {
|
func (m *Manager) onPeerInactivityTimedOut(ctx context.Context, peerConnID peerid.ConnID) {
|
||||||
m.managedPeersMu.Lock()
|
m.managedPeersMu.Lock()
|
||||||
defer m.managedPeersMu.Unlock()
|
defer m.managedPeersMu.Unlock()
|
||||||
|
|
||||||
@ -456,6 +552,17 @@ func (m *Manager) onPeerInactivityTimedOut(peerConnID peerid.ConnID) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if m.shouldDeferIdleForHA(mp.peerCfg.PublicKey) {
|
||||||
|
iw, ok := m.inactivityMonitors[peerConnID]
|
||||||
|
if ok {
|
||||||
|
mp.peerCfg.Log.Debugf("resetting inactivity timer due to HA group requirements")
|
||||||
|
iw.ResetMonitor(ctx, m.onInactive)
|
||||||
|
} else {
|
||||||
|
mp.peerCfg.Log.Errorf("inactivity monitor not found for HA defer reset")
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
mp.peerCfg.Log.Infof("connection timed out")
|
mp.peerCfg.Log.Infof("connection timed out")
|
||||||
|
|
||||||
// this is blocking operation, potentially can be optimized
|
// this is blocking operation, potentially can be optimized
|
||||||
@ -489,7 +596,7 @@ func (m *Manager) onPeerConnected(peerConnID peerid.ConnID) {
|
|||||||
|
|
||||||
iw, ok := m.inactivityMonitors[mp.peerCfg.PeerConnID]
|
iw, ok := m.inactivityMonitors[mp.peerCfg.PeerConnID]
|
||||||
if !ok {
|
if !ok {
|
||||||
mp.peerCfg.Log.Errorf("inactivity monitor not found for peer")
|
mp.peerCfg.Log.Warnf("inactivity monitor not found for peer")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -317,12 +317,12 @@ func (conn *Conn) WgConfig() WgConfig {
|
|||||||
return conn.config.WgConfig
|
return conn.config.WgConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsConnected unit tests only
|
// IsConnected returns true if the peer is connected
|
||||||
// refactor unit test to use status recorder use refactor status recorded to manage connection status in peer.Conn
|
|
||||||
func (conn *Conn) IsConnected() bool {
|
func (conn *Conn) IsConnected() bool {
|
||||||
conn.mu.Lock()
|
conn.mu.Lock()
|
||||||
defer conn.mu.Unlock()
|
defer conn.mu.Unlock()
|
||||||
return conn.currentConnPriority != conntype.None
|
|
||||||
|
return conn.evalStatus() == StatusConnected
|
||||||
}
|
}
|
||||||
|
|
||||||
func (conn *Conn) GetKey() string {
|
func (conn *Conn) GetKey() string {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user