From 01a62047eed37ad6eccafaa1c973c175bd14aa99 Mon Sep 17 00:00:00 2001 From: Viktor Liu <17948409+lixmal@users.noreply.github.com> Date: Thu, 19 Jun 2025 09:28:48 +0200 Subject: [PATCH] [client] Prevent lazy peers going to idle if HA group members are active (#3948) --- .../lazyconn/inactivity/inactivity.go | 5 ++ client/internal/lazyconn/manager/manager.go | 61 +++++++++++++++++-- client/internal/peer/conn.go | 7 +-- 3 files changed, 64 insertions(+), 9 deletions(-) diff --git a/client/internal/lazyconn/inactivity/inactivity.go b/client/internal/lazyconn/inactivity/inactivity.go index a30c1846d..9b7c8511b 100644 --- a/client/internal/lazyconn/inactivity/inactivity.go +++ b/client/internal/lazyconn/inactivity/inactivity.go @@ -68,3 +68,8 @@ func (i *Monitor) PauseTimer() { func (i *Monitor) ResetTimer() { i.timer.Reset(i.inactivityThreshold) } + +func (i *Monitor) ResetMonitor(ctx context.Context, timeoutChan chan peer.ConnID) { + i.Stop() + go i.Start(ctx, timeoutChan) +} diff --git a/client/internal/lazyconn/manager/manager.go b/client/internal/lazyconn/manager/manager.go index 0aa7019c0..74ede50a7 100644 --- a/client/internal/lazyconn/manager/manager.go +++ b/client/internal/lazyconn/manager/manager.go @@ -58,7 +58,7 @@ type Manager struct { // Route HA group management peerToHAGroups map[string][]route.HAUniqueID // peer ID -> HA groups they belong to haGroupToPeers map[route.HAUniqueID][]string // HA group -> peer IDs in the group - routesMu sync.RWMutex // protects route mappings + routesMu sync.RWMutex onInactive chan peerid.ConnID } @@ -146,7 +146,7 @@ func (m *Manager) Start(ctx context.Context) { case peerConnID := <-m.activityManager.OnActivityChan: m.onPeerActivity(ctx, peerConnID) case peerConnID := <-m.onInactive: - m.onPeerInactivityTimedOut(peerConnID) + m.onPeerInactivityTimedOut(ctx, peerConnID) } } } @@ -469,6 +469,48 @@ func (m *Manager) close() { log.Infof("lazy connection manager closed") } +// shouldDeferIdleForHA checks if peer should stay connected due to HA group requirements +func (m *Manager) shouldDeferIdleForHA(peerID string) bool { + m.routesMu.RLock() + defer m.routesMu.RUnlock() + + haGroups := m.peerToHAGroups[peerID] + if len(haGroups) == 0 { + return false + } + + for _, haGroup := range haGroups { + groupPeers := m.haGroupToPeers[haGroup] + + for _, groupPeerID := range groupPeers { + if groupPeerID == peerID { + continue + } + + cfg, ok := m.managedPeers[groupPeerID] + if !ok { + continue + } + + groupMp, ok := m.managedPeersByConnID[cfg.PeerConnID] + if !ok { + continue + } + + if groupMp.expectedWatcher != watcherInactivity { + continue + } + + // Other member is still connected, defer idle + if peer, ok := m.peerStore.PeerConn(groupPeerID); ok && peer.IsConnected() { + return true + } + } + } + + return false +} + func (m *Manager) onPeerActivity(ctx context.Context, peerConnID peerid.ConnID) { m.managedPeersMu.Lock() defer m.managedPeersMu.Unlock() @@ -495,7 +537,7 @@ func (m *Manager) onPeerActivity(ctx context.Context, peerConnID peerid.ConnID) m.peerStore.PeerConnOpen(m.engineCtx, mp.peerCfg.PublicKey) } -func (m *Manager) onPeerInactivityTimedOut(peerConnID peerid.ConnID) { +func (m *Manager) onPeerInactivityTimedOut(ctx context.Context, peerConnID peerid.ConnID) { m.managedPeersMu.Lock() defer m.managedPeersMu.Unlock() @@ -510,6 +552,17 @@ func (m *Manager) onPeerInactivityTimedOut(peerConnID peerid.ConnID) { return } + if m.shouldDeferIdleForHA(mp.peerCfg.PublicKey) { + iw, ok := m.inactivityMonitors[peerConnID] + if ok { + mp.peerCfg.Log.Debugf("resetting inactivity timer due to HA group requirements") + iw.ResetMonitor(ctx, m.onInactive) + } else { + mp.peerCfg.Log.Errorf("inactivity monitor not found for HA defer reset") + } + return + } + mp.peerCfg.Log.Infof("connection timed out") // this is blocking operation, potentially can be optimized @@ -543,7 +596,7 @@ func (m *Manager) onPeerConnected(peerConnID peerid.ConnID) { iw, ok := m.inactivityMonitors[mp.peerCfg.PeerConnID] if !ok { - mp.peerCfg.Log.Errorf("inactivity monitor not found for peer") + mp.peerCfg.Log.Warnf("inactivity monitor not found for peer") return } diff --git a/client/internal/peer/conn.go b/client/internal/peer/conn.go index b33023873..bfca8a481 100644 --- a/client/internal/peer/conn.go +++ b/client/internal/peer/conn.go @@ -317,12 +317,9 @@ func (conn *Conn) WgConfig() WgConfig { return conn.config.WgConfig } -// IsConnected unit tests only -// refactor unit test to use status recorder use refactor status recorded to manage connection status in peer.Conn +// IsConnected returns true if the peer is connected func (conn *Conn) IsConnected() bool { - conn.mu.Lock() - defer conn.mu.Unlock() - return conn.currentConnPriority != conntype.None + return conn.evalStatus() == StatusConnected } func (conn *Conn) GetKey() string {