From af27aaf9af28124198fcf0b1a90ef3443f6aeb5d Mon Sep 17 00:00:00 2001 From: Zoltan Papp Date: Tue, 3 Jun 2025 09:20:33 +0200 Subject: [PATCH] [client] Refactor peer state change subscription mechanism (#3910) * Refactor peer state change subscription mechanism Because the code generated new channel for every single event, was easy to miss notification. Use single channel. * Fix lint * Avoid potential deadlock * Fix test * Add context * Fix test --- client/internal/peer/status.go | 81 +++++++++++++++++++++----- client/internal/peer/status_test.go | 13 +++-- client/internal/routemanager/client.go | 11 ++-- 3 files changed, 78 insertions(+), 27 deletions(-) diff --git a/client/internal/peer/status.go b/client/internal/peer/status.go index 40956e68e..0c6aac372 100644 --- a/client/internal/peer/status.go +++ b/client/internal/peer/status.go @@ -1,6 +1,7 @@ package peer import ( + "context" "errors" "net/netip" "slices" @@ -146,11 +147,31 @@ type FullStatus struct { LazyConnectionEnabled bool } +type StatusChangeSubscription struct { + peerID string + id string + eventsChan chan struct{} + ctx context.Context +} + +func newStatusChangeSubscription(ctx context.Context, peerID string) *StatusChangeSubscription { + return &StatusChangeSubscription{ + ctx: ctx, + peerID: peerID, + id: uuid.New().String(), + eventsChan: make(chan struct{}, 1), + } +} + +func (s *StatusChangeSubscription) Events() chan struct{} { + return s.eventsChan +} + // Status holds a state of peers, signal, management connections and relays type Status struct { mux sync.Mutex peers map[string]State - changeNotify map[string]chan struct{} + changeNotify map[string]map[string]*StatusChangeSubscription // map[peerID]map[subscriptionID]*StatusChangeSubscription signalState bool signalError error managementState bool @@ -187,7 +208,7 @@ type Status struct { func NewRecorder(mgmAddress string) *Status { return &Status{ peers: make(map[string]State), - changeNotify: make(map[string]chan struct{}), + changeNotify: make(map[string]map[string]*StatusChangeSubscription), eventStreams: make(map[string]chan *proto.SystemEvent), eventQueue: NewEventQueue(eventQueueSize), offlinePeers: make([]State, 0), @@ -312,7 +333,6 @@ func (d *Status) UpdatePeerState(receivedState State) error { // when we close the connection we will not notify the router manager if receivedState.ConnStatus == StatusIdle { d.notifyPeerStateChangeListeners(receivedState.PubKey) - } return nil } @@ -552,19 +572,41 @@ func (d *Status) FinishPeerListModifications() { d.notifyPeerListChanged() } -// GetPeerStateChangeNotifier returns a change notifier channel for a peer -func (d *Status) GetPeerStateChangeNotifier(peer string) <-chan struct{} { +func (d *Status) SubscribeToPeerStateChanges(ctx context.Context, peerID string) *StatusChangeSubscription { d.mux.Lock() defer d.mux.Unlock() - ch, found := d.changeNotify[peer] - if found { - return ch + sub := newStatusChangeSubscription(ctx, peerID) + if _, ok := d.changeNotify[peerID]; !ok { + d.changeNotify[peerID] = make(map[string]*StatusChangeSubscription) + } + d.changeNotify[peerID][sub.id] = sub + + return sub +} + +func (d *Status) UnsubscribePeerStateChanges(subscription *StatusChangeSubscription) { + d.mux.Lock() + defer d.mux.Unlock() + + if subscription == nil { + return } - ch = make(chan struct{}) - d.changeNotify[peer] = ch - return ch + channels, ok := d.changeNotify[subscription.peerID] + if !ok { + return + } + + sub, exists := channels[subscription.id] + if !exists { + return + } + + delete(channels, subscription.id) + if len(channels) == 0 { + delete(d.changeNotify, sub.peerID) + } } // GetLocalPeerState returns the local peer state @@ -939,13 +981,20 @@ func (d *Status) onConnectionChanged() { // notifyPeerStateChangeListeners notifies route manager about the change in peer state func (d *Status) notifyPeerStateChangeListeners(peerID string) { - ch, found := d.changeNotify[peerID] - if !found { + subs, ok := d.changeNotify[peerID] + if !ok { return } - - close(ch) - delete(d.changeNotify, peerID) + for _, sub := range subs { + // block the write because we do not want to miss notification + // must have to be sure we will run the GetPeerState() on separated thread + go func() { + select { + case sub.eventsChan <- struct{}{}: + case <-sub.ctx.Done(): + } + }() + } } func (d *Status) notifyPeerListChanged() { diff --git a/client/internal/peer/status_test.go b/client/internal/peer/status_test.go index 8f28a9862..272638750 100644 --- a/client/internal/peer/status_test.go +++ b/client/internal/peer/status_test.go @@ -1,6 +1,7 @@ package peer import ( + "context" "errors" "sync" "testing" @@ -86,8 +87,8 @@ func TestGetPeerStateChangeNotifierLogic(t *testing.T) { status := NewRecorder("https://mgm") _ = status.AddPeer(key, "abc.netbird", ip) - ch := status.GetPeerStateChangeNotifier(key) - assert.NotNil(t, ch, "channel shouldn't be nil") + sub := status.SubscribeToPeerStateChanges(context.Background(), key) + assert.NotNil(t, sub, "channel shouldn't be nil") peerState := State{ PubKey: key, @@ -99,10 +100,12 @@ func TestGetPeerStateChangeNotifierLogic(t *testing.T) { err := status.UpdatePeerRelayedStateToDisconnected(peerState) assert.NoError(t, err, "shouldn't return error") + timeoutCtx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() select { - case <-ch: - default: - t.Errorf("channel wasn't closed after update") + case <-sub.eventsChan: + case <-timeoutCtx.Done(): + t.Errorf("timed out waiting for event") } } diff --git a/client/internal/routemanager/client.go b/client/internal/routemanager/client.go index 137e00d31..bff954c27 100644 --- a/client/internal/routemanager/client.go +++ b/client/internal/routemanager/client.go @@ -224,19 +224,18 @@ func (c *clientNetwork) getBestRouteFromStatuses(routePeerStatuses map[route.ID] } func (c *clientNetwork) watchPeerStatusChanges(ctx context.Context, peerKey string, peerStateUpdate chan struct{}, closer chan struct{}) { + subscription := c.statusRecorder.SubscribeToPeerStateChanges(ctx, peerKey) + defer c.statusRecorder.UnsubscribePeerStateChanges(subscription) + for { select { case <-ctx.Done(): return case <-closer: return - case <-c.statusRecorder.GetPeerStateChangeNotifier(peerKey): - state, err := c.statusRecorder.GetPeer(peerKey) - if err != nil { - continue - } + case <-subscription.Events(): peerStateUpdate <- struct{}{} - log.Debugf("triggered route state update for Peer %s, state: %s", peerKey, state.ConnStatus) + log.Debugf("triggered route state update for Peer: %s", peerKey) } } }