diff --git a/management/server/account.go b/management/server/account.go index fbbd6800c..17d18f0fc 100644 --- a/management/server/account.go +++ b/management/server/account.go @@ -1580,7 +1580,6 @@ func (am *DefaultAccountManager) OnPeerDisconnected(ctx context.Context, account if err != nil { log.WithContext(ctx).Warnf("failed marking peer as disconnected %s %v", peerPubKey, err) } - am.loginFilter.removeLogin(peerPubKey) return nil } diff --git a/management/server/loginfilter.go b/management/server/loginfilter.go index 1b2a2a163..850ad94e5 100644 --- a/management/server/loginfilter.go +++ b/management/server/loginfilter.go @@ -2,6 +2,7 @@ package server import ( "hash/fnv" + "math" "sync" "time" @@ -9,44 +10,44 @@ import ( ) const ( - filterTimeout = 5 * time.Minute // Duration to secure the previous login information in the filter - reconnThreshold = 5 * time.Minute - blockDuration = 10 * time.Minute // Duration for which a peer is banned after exceeding the reconnection limit + baseBlockDuration = 10 * time.Minute // Duration for which a peer is banned after exceeding the reconnection limit reconnLimitForBan = 30 // Number of reconnections within the reconnTreshold that triggers a ban - metaChangeLim = 3 // Number of reconnections with different metadata that triggers a ban of one peer + metaChangeLimit = 3 // Number of reconnections with different metadata that triggers a ban of one peer ) type config struct { - filterTimeout time.Duration reconnThreshold time.Duration - blockDuration time.Duration + baseBlockDuration time.Duration reconnLimitForBan int - metaChangeLim int + metaChangeLimit int +} + +func initCfg() *config { + return &config{ + reconnThreshold: reconnThreshold, + baseBlockDuration: baseBlockDuration, + reconnLimitForBan: reconnLimitForBan, + metaChangeLimit: metaChangeLimit, + } } type loginFilter struct { mu sync.RWMutex cfg *config - logged map[string]metahash + logged map[string]*peerState } -type metahash struct { - hash uint64 - counter int - banned bool - firstLogin time.Time - lastSeen time.Time -} - -func initCfg() *config { - return &config{ - filterTimeout: filterTimeout, - reconnThreshold: reconnThreshold, - blockDuration: blockDuration, - reconnLimitForBan: reconnLimitForBan, - metaChangeLim: metaChangeLim, - } +type peerState struct { + currentHash uint64 + sessionCounter int + sessionStart time.Time + lastSeen time.Time + isBanned bool + banLevel int + banExpiresAt time.Time + metaChangeCounter int + metaChangeWindowStart time.Time } func newLoginFilter() *loginFilter { @@ -55,55 +56,94 @@ func newLoginFilter() *loginFilter { func newLoginFilterWithCfg(cfg *config) *loginFilter { return &loginFilter{ - logged: make(map[string]metahash), + logged: make(map[string]*peerState), cfg: cfg, } } -func (l *loginFilter) addLogin(wgPubKey string, metaHash uint64) { - l.mu.Lock() - defer l.mu.Unlock() - mh, ok := l.logged[wgPubKey] - if !ok || mh.banned || (mh.hash != metaHash && mh.counter > l.cfg.metaChangeLim) { - mh = metahash{ - hash: metaHash, - firstLogin: time.Now(), - } - } - mh.counter++ - mh.hash = metaHash - mh.lastSeen = time.Now() - if mh.counter > l.cfg.reconnLimitForBan && mh.lastSeen.Sub(mh.firstLogin) < l.cfg.reconnThreshold { - mh.banned = true - } - l.logged[wgPubKey] = mh -} - func (l *loginFilter) allowLogin(wgPubKey string, metaHash uint64) bool { l.mu.RLock() defer l.mu.RUnlock() - mh, ok := l.logged[wgPubKey] + state, ok := l.logged[wgPubKey] if !ok { return true } - if mh.banned && time.Since(mh.lastSeen) < l.cfg.blockDuration { + if state.isBanned && time.Now().Before(state.banExpiresAt) { return false } - if mh.hash != metaHash && time.Since(mh.lastSeen) < l.cfg.filterTimeout && mh.counter > l.cfg.metaChangeLim { - return false + if metaHash != state.currentHash { + if time.Now().Before(state.metaChangeWindowStart.Add(l.cfg.reconnThreshold)) && state.metaChangeCounter >= l.cfg.metaChangeLimit { + return false + } } return true } -func (l *loginFilter) removeLogin(wgPubKey string) { +func (l *loginFilter) addLogin(wgPubKey string, metaHash uint64) { l.mu.Lock() defer l.mu.Unlock() - delete(l.logged, wgPubKey) + + now := time.Now() + state, ok := l.logged[wgPubKey] + + if !ok { + l.logged[wgPubKey] = &peerState{ + currentHash: metaHash, + sessionCounter: 1, + sessionStart: now, + lastSeen: now, + metaChangeWindowStart: now, + metaChangeCounter: 1, + } + return + } + + if state.isBanned && now.After(state.banExpiresAt) { + state.isBanned = false + } + + if state.banLevel > 0 && now.Sub(state.lastSeen) > (2*l.cfg.baseBlockDuration) { + state.banLevel = 0 + } + + if metaHash != state.currentHash { + if now.After(state.metaChangeWindowStart.Add(l.cfg.reconnThreshold)) { + state.metaChangeWindowStart = now + state.metaChangeCounter = 1 + } else { + state.metaChangeCounter++ + } + state.currentHash = metaHash + state.sessionCounter = 1 + state.sessionStart = now + state.lastSeen = now + return + } + + state.sessionCounter++ + if state.sessionCounter > l.cfg.reconnLimitForBan && now.Sub(state.sessionStart) < l.cfg.reconnThreshold { + state.isBanned = true + state.banLevel++ + + backoffFactor := math.Pow(2, float64(state.banLevel-1)) + duration := time.Duration(float64(l.cfg.baseBlockDuration) * backoffFactor) + state.banExpiresAt = now.Add(duration) + + state.sessionCounter = 0 + state.sessionStart = now + } + state.lastSeen = now } func metaHash(meta nbpeer.PeerSystemMeta, pubip string) uint64 { h := fnv.New64a() + if len(meta.NetworkAddresses) != 0 { + for _, na := range meta.NetworkAddresses { + h.Write([]byte(na.Mac)) + } + } + h.Write([]byte(meta.WtVersion)) h.Write([]byte(meta.OSVersion)) h.Write([]byte(meta.KernelVersion)) diff --git a/management/server/loginfilter_test.go b/management/server/loginfilter_test.go index fe838e62d..3137dac87 100644 --- a/management/server/loginfilter_test.go +++ b/management/server/loginfilter_test.go @@ -2,6 +2,7 @@ package server import ( "hash/fnv" + "math" "strconv" "strings" "testing" @@ -12,13 +13,12 @@ import ( nbpeer "github.com/netbirdio/netbird/management/server/peer" ) -func testCfg() *config { +func testAdvancedCfg() *config { return &config{ - filterTimeout: 20 * time.Millisecond, reconnThreshold: 50 * time.Millisecond, - blockDuration: 100 * time.Millisecond, + baseBlockDuration: 100 * time.Millisecond, reconnLimitForBan: 3, - metaChangeLim: 1, + metaChangeLimit: 2, } } @@ -28,113 +28,131 @@ type LoginFilterTestSuite struct { } func (s *LoginFilterTestSuite) SetupTest() { - s.filter = newLoginFilterWithCfg(testCfg()) + s.filter = newLoginFilterWithCfg(testAdvancedCfg()) } func TestLoginFilterTestSuite(t *testing.T) { suite.Run(t, new(LoginFilterTestSuite)) } -func (s *LoginFilterTestSuite) TestFirstLogin() { +func (s *LoginFilterTestSuite) TestFirstLoginIsAlwaysAllowed() { pubKey := "PUB_KEY_A" - meta := uint64(4353457657645) + meta := uint64(1) - s.True(s.filter.allowLogin(pubKey, meta), "should allow a new peer") + s.True(s.filter.allowLogin(pubKey, meta)) s.filter.addLogin(pubKey, meta) s.Require().Contains(s.filter.logged, pubKey) - s.Equal(1, s.filter.logged[pubKey].counter) + s.Equal(1, s.filter.logged[pubKey].sessionCounter) } -func (s *LoginFilterTestSuite) TestFlappingPeerTriggersBan() { +func (s *LoginFilterTestSuite) TestFlappingSameHashTriggersBan() { pubKey := "PUB_KEY_A" - meta := uint64(4353457657645) + meta := uint64(1) limit := s.filter.cfg.reconnLimitForBan - for range limit { + for i := 0; i <= limit; i++ { s.filter.addLogin(pubKey, meta) } - s.True(s.filter.allowLogin(pubKey, meta), "should still allow login at the limit boundary") - - s.filter.addLogin(pubKey, meta) - - s.False(s.filter.allowLogin(pubKey, meta), "should deny login after exceeding the limit") - s.True(s.filter.logged[pubKey].banned, "peer should be marked as banned") + s.False(s.filter.allowLogin(pubKey, meta)) + s.Require().Contains(s.filter.logged, pubKey) + s.True(s.filter.logged[pubKey].isBanned) } -func (s *LoginFilterTestSuite) TestBannedPeerIsDenied() { +func (s *LoginFilterTestSuite) TestBanDurationIncreasesExponentially() { pubKey := "PUB_KEY_A" - meta := uint64(4353457657645) + meta := uint64(1) + limit := s.filter.cfg.reconnLimitForBan + baseBan := s.filter.cfg.baseBlockDuration - s.filter.logged[pubKey] = metahash{ - hash: meta, - banned: true, - lastSeen: time.Now(), + for i := 0; i <= limit; i++ { + s.filter.addLogin(pubKey, meta) } + s.Require().Contains(s.filter.logged, pubKey) + s.True(s.filter.logged[pubKey].isBanned) + s.Equal(1, s.filter.logged[pubKey].banLevel) + firstBanDuration := s.filter.logged[pubKey].banExpiresAt.Sub(s.filter.logged[pubKey].lastSeen) + s.InDelta(baseBan, firstBanDuration, float64(time.Millisecond)) - s.False(s.filter.allowLogin(pubKey, meta)) + s.filter.logged[pubKey].banExpiresAt = time.Now().Add(-time.Second) + s.filter.logged[pubKey].isBanned = false + + for i := 0; i <= limit; i++ { + s.filter.addLogin(pubKey, meta) + } + s.True(s.filter.logged[pubKey].isBanned) + s.Equal(2, s.filter.logged[pubKey].banLevel) + secondBanDuration := s.filter.logged[pubKey].banExpiresAt.Sub(s.filter.logged[pubKey].lastSeen) + expectedSecondDuration := time.Duration(float64(baseBan) * math.Pow(2, 1)) + s.InDelta(expectedSecondDuration, secondBanDuration, float64(time.Millisecond)) } func (s *LoginFilterTestSuite) TestPeerIsAllowedAfterBanExpires() { pubKey := "PUB_KEY_A" - meta := uint64(4353457657645) + meta := uint64(1) - s.filter.logged[pubKey] = metahash{ - hash: meta, - banned: true, - lastSeen: time.Now().Add(-(s.filter.cfg.blockDuration + time.Second)), + s.filter.logged[pubKey] = &peerState{ + isBanned: true, + banExpiresAt: time.Now().Add(-(s.filter.cfg.baseBlockDuration + time.Second)), } - s.True(s.filter.allowLogin(pubKey, meta), "should allow login after ban expires") - - s.filter.addLogin(pubKey, meta) - s.Require().Contains(s.filter.logged, pubKey) - entry := s.filter.logged[pubKey] - s.False(entry.banned, "ban should be lifted on new login") - s.Equal(1, entry.counter, "counter should be reset") -} - -func (s *LoginFilterTestSuite) TestDifferentHashIsBlockedWhenActive() { - pubKey := "PUB_KEY_A" - meta1 := uint64(23424223423) - meta2 := uint64(99878798987987) - - for range s.filter.cfg.metaChangeLim { - s.filter.addLogin(pubKey, meta1) - } - - s.filter.addLogin(pubKey, meta1) - - s.False(s.filter.allowLogin(pubKey, meta2)) -} - -func (s *LoginFilterTestSuite) TestDifferentHashIsAllowedAfterTimeout() { - pubKey := "PUB_KEY_A" - meta1 := uint64(23424223423) - meta2 := uint64(99878798987987) - - s.filter.addLogin(pubKey, meta1) - - s.Require().Contains(s.filter.logged, pubKey) - entry := s.filter.logged[pubKey] - entry.lastSeen = time.Now().Add(-(s.filter.cfg.filterTimeout + time.Second)) - s.filter.logged[pubKey] = entry - - s.True(s.filter.allowLogin(pubKey, meta2)) -} - -func (s *LoginFilterTestSuite) TestRemovedPeerCanLogin() { - pubKey := "PUB_KEY_A" - meta := uint64(4353457657645) - - s.filter.addLogin(pubKey, meta) - s.Require().Contains(s.filter.logged, pubKey) - - s.filter.removeLogin(pubKey) - s.NotContains(s.filter.logged, pubKey) - s.True(s.filter.allowLogin(pubKey, meta)) + + s.filter.addLogin(pubKey, meta) + s.Require().Contains(s.filter.logged, pubKey) + s.False(s.filter.logged[pubKey].isBanned) +} + +func (s *LoginFilterTestSuite) TestBanLevelResetsAfterGoodBehavior() { + pubKey := "PUB_KEY_A" + meta := uint64(1) + + s.filter.logged[pubKey] = &peerState{ + currentHash: meta, + banLevel: 3, + lastSeen: time.Now().Add(-3 * s.filter.cfg.baseBlockDuration), + } + + s.filter.addLogin(pubKey, meta) + s.Require().Contains(s.filter.logged, pubKey) + s.Equal(0, s.filter.logged[pubKey].banLevel) +} + +func (s *LoginFilterTestSuite) TestFlappingDifferentHashesTriggersBlock() { + pubKey := "PUB_KEY_A" + limit := s.filter.cfg.metaChangeLimit + + for i := range limit { + s.filter.addLogin(pubKey, uint64(i+1)) + } + + s.Require().Contains(s.filter.logged, pubKey) + s.Equal(limit, s.filter.logged[pubKey].metaChangeCounter) + + isAllowed := s.filter.allowLogin(pubKey, uint64(limit+1)) + + s.False(isAllowed, "should block new meta hash after limit is reached") +} + +func (s *LoginFilterTestSuite) TestMetaChangeIsAllowedAfterWindowResets() { + pubKey := "PUB_KEY_A" + meta1 := uint64(1) + meta2 := uint64(2) + meta3 := uint64(3) + + s.filter.addLogin(pubKey, meta1) + s.filter.addLogin(pubKey, meta2) + s.Require().Contains(s.filter.logged, pubKey) + s.Equal(s.filter.cfg.metaChangeLimit, s.filter.logged[pubKey].metaChangeCounter) + s.False(s.filter.allowLogin(pubKey, meta3), "should be blocked inside window") + + s.filter.logged[pubKey].metaChangeWindowStart = time.Now().Add(-(s.filter.cfg.reconnThreshold + time.Second)) + + s.True(s.filter.allowLogin(pubKey, meta3), "should be allowed after window expires") + + s.filter.addLogin(pubKey, meta3) + s.Equal(1, s.filter.logged[pubKey].metaChangeCounter, "meta change counter should reset") } func BenchmarkHashingMethods(b *testing.B) { @@ -182,6 +200,12 @@ func BenchmarkHashingMethods(b *testing.B) { func fnvHashToString(meta nbpeer.PeerSystemMeta, pubip string) string { h := fnv.New64a() + if len(meta.NetworkAddresses) != 0 { + for _, na := range meta.NetworkAddresses { + h.Write([]byte(na.Mac)) + } + } + h.Write([]byte(meta.WtVersion)) h.Write([]byte(meta.OSVersion)) h.Write([]byte(meta.KernelVersion)) @@ -193,8 +217,9 @@ func fnvHashToString(meta nbpeer.PeerSystemMeta, pubip string) string { } func builderString(meta nbpeer.PeerSystemMeta, pubip string) string { + mac := getMacAddress(meta.NetworkAddresses) estimatedSize := len(meta.WtVersion) + len(meta.OSVersion) + len(meta.KernelVersion) + len(meta.Hostname) + len(meta.SystemSerialNumber) + - len(pubip) + 5 + len(pubip) + len(mac) + 6 var b strings.Builder b.Grow(estimatedSize) @@ -213,3 +238,14 @@ func builderString(meta nbpeer.PeerSystemMeta, pubip string) string { return b.String() } + +func getMacAddress(nas []nbpeer.NetworkAddress) string { + if len(nas) == 0 { + return "" + } + macs := make([]string, 0, len(nas)) + for _, na := range nas { + macs = append(macs, na.Mac) + } + return strings.Join(macs, "/") +}