[client] Refactor peer state change subscription mechanism (#3910)

* Refactor peer state change subscription mechanism

Because the code generated new channel for every single event, was easy to miss notification.
Use single channel.

* Fix lint

* Avoid potential deadlock

* Fix test

* Add context

* Fix test
This commit is contained in:
Zoltan Papp 2025-06-03 09:20:33 +02:00 committed by GitHub
parent 35287f8241
commit af27aaf9af
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 78 additions and 27 deletions

View File

@ -1,6 +1,7 @@
package peer
import (
"context"
"errors"
"net/netip"
"slices"
@ -146,11 +147,31 @@ type FullStatus struct {
LazyConnectionEnabled bool
}
type StatusChangeSubscription struct {
peerID string
id string
eventsChan chan struct{}
ctx context.Context
}
func newStatusChangeSubscription(ctx context.Context, peerID string) *StatusChangeSubscription {
return &StatusChangeSubscription{
ctx: ctx,
peerID: peerID,
id: uuid.New().String(),
eventsChan: make(chan struct{}, 1),
}
}
func (s *StatusChangeSubscription) Events() chan struct{} {
return s.eventsChan
}
// Status holds a state of peers, signal, management connections and relays
type Status struct {
mux sync.Mutex
peers map[string]State
changeNotify map[string]chan struct{}
changeNotify map[string]map[string]*StatusChangeSubscription // map[peerID]map[subscriptionID]*StatusChangeSubscription
signalState bool
signalError error
managementState bool
@ -187,7 +208,7 @@ type Status struct {
func NewRecorder(mgmAddress string) *Status {
return &Status{
peers: make(map[string]State),
changeNotify: make(map[string]chan struct{}),
changeNotify: make(map[string]map[string]*StatusChangeSubscription),
eventStreams: make(map[string]chan *proto.SystemEvent),
eventQueue: NewEventQueue(eventQueueSize),
offlinePeers: make([]State, 0),
@ -312,7 +333,6 @@ func (d *Status) UpdatePeerState(receivedState State) error {
// when we close the connection we will not notify the router manager
if receivedState.ConnStatus == StatusIdle {
d.notifyPeerStateChangeListeners(receivedState.PubKey)
}
return nil
}
@ -552,19 +572,41 @@ func (d *Status) FinishPeerListModifications() {
d.notifyPeerListChanged()
}
// GetPeerStateChangeNotifier returns a change notifier channel for a peer
func (d *Status) GetPeerStateChangeNotifier(peer string) <-chan struct{} {
func (d *Status) SubscribeToPeerStateChanges(ctx context.Context, peerID string) *StatusChangeSubscription {
d.mux.Lock()
defer d.mux.Unlock()
ch, found := d.changeNotify[peer]
if found {
return ch
sub := newStatusChangeSubscription(ctx, peerID)
if _, ok := d.changeNotify[peerID]; !ok {
d.changeNotify[peerID] = make(map[string]*StatusChangeSubscription)
}
d.changeNotify[peerID][sub.id] = sub
return sub
}
func (d *Status) UnsubscribePeerStateChanges(subscription *StatusChangeSubscription) {
d.mux.Lock()
defer d.mux.Unlock()
if subscription == nil {
return
}
ch = make(chan struct{})
d.changeNotify[peer] = ch
return ch
channels, ok := d.changeNotify[subscription.peerID]
if !ok {
return
}
sub, exists := channels[subscription.id]
if !exists {
return
}
delete(channels, subscription.id)
if len(channels) == 0 {
delete(d.changeNotify, sub.peerID)
}
}
// GetLocalPeerState returns the local peer state
@ -939,13 +981,20 @@ func (d *Status) onConnectionChanged() {
// notifyPeerStateChangeListeners notifies route manager about the change in peer state
func (d *Status) notifyPeerStateChangeListeners(peerID string) {
ch, found := d.changeNotify[peerID]
if !found {
subs, ok := d.changeNotify[peerID]
if !ok {
return
}
close(ch)
delete(d.changeNotify, peerID)
for _, sub := range subs {
// block the write because we do not want to miss notification
// must have to be sure we will run the GetPeerState() on separated thread
go func() {
select {
case sub.eventsChan <- struct{}{}:
case <-sub.ctx.Done():
}
}()
}
}
func (d *Status) notifyPeerListChanged() {

View File

@ -1,6 +1,7 @@
package peer
import (
"context"
"errors"
"sync"
"testing"
@ -86,8 +87,8 @@ func TestGetPeerStateChangeNotifierLogic(t *testing.T) {
status := NewRecorder("https://mgm")
_ = status.AddPeer(key, "abc.netbird", ip)
ch := status.GetPeerStateChangeNotifier(key)
assert.NotNil(t, ch, "channel shouldn't be nil")
sub := status.SubscribeToPeerStateChanges(context.Background(), key)
assert.NotNil(t, sub, "channel shouldn't be nil")
peerState := State{
PubKey: key,
@ -99,10 +100,12 @@ func TestGetPeerStateChangeNotifierLogic(t *testing.T) {
err := status.UpdatePeerRelayedStateToDisconnected(peerState)
assert.NoError(t, err, "shouldn't return error")
timeoutCtx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
defer cancel()
select {
case <-ch:
default:
t.Errorf("channel wasn't closed after update")
case <-sub.eventsChan:
case <-timeoutCtx.Done():
t.Errorf("timed out waiting for event")
}
}

View File

@ -224,19 +224,18 @@ func (c *clientNetwork) getBestRouteFromStatuses(routePeerStatuses map[route.ID]
}
func (c *clientNetwork) watchPeerStatusChanges(ctx context.Context, peerKey string, peerStateUpdate chan struct{}, closer chan struct{}) {
subscription := c.statusRecorder.SubscribeToPeerStateChanges(ctx, peerKey)
defer c.statusRecorder.UnsubscribePeerStateChanges(subscription)
for {
select {
case <-ctx.Done():
return
case <-closer:
return
case <-c.statusRecorder.GetPeerStateChangeNotifier(peerKey):
state, err := c.statusRecorder.GetPeer(peerKey)
if err != nil {
continue
}
case <-subscription.Events():
peerStateUpdate <- struct{}{}
log.Debugf("triggered route state update for Peer %s, state: %s", peerKey, state.ConnStatus)
log.Debugf("triggered route state update for Peer: %s", peerKey)
}
}
}