diff --git a/client/internal/peer/conn.go b/client/internal/peer/conn.go index dbc07d754..561437f04 100644 --- a/client/internal/peer/conn.go +++ b/client/internal/peer/conn.go @@ -336,13 +336,12 @@ func (conn *Conn) reconnectLoopWithRetry() { return } - // checks if there is peer connection is established via relay or ice and that it has a wireguard handshake and skip offer - conn.log.Tracef("ticker timedout, relay state: %s, ice state: %s", conn.statusRelay, conn.statusICE) + // checks if there is peer connection is established via relay or ice + conn.log.Infof("ticker timedout, relay state: %s, ice state: %s", conn.statusRelay, conn.statusICE) if conn.isConnected() { continue } - conn.log.Debugf("ticker timed out, retry to do handshake") err := conn.handshaker.sendOffer() if err != nil { conn.log.Errorf("failed to do handshake: %v", err) @@ -722,18 +721,7 @@ func (conn *Conn) isConnected() bool { if conn.statusICE != StatusConnected && conn.statusICE != StatusConnecting { return false } - - wgStats, err := conn.config.WgConfig.WgInterface.GetStats(conn.config.Key) - if err != nil { - conn.log.Errorf("failed to get wg stats: %v", err) - return false - } - - if time.Since(wgStats.LastHandshake) > 2*time.Minute { - return false - } return true - } func isRosenpassEnabled(remoteRosenpassPubKey []byte) bool { diff --git a/client/internal/peer/worker_relay.go b/client/internal/peer/worker_relay.go index beea912aa..f4acdba6a 100644 --- a/client/internal/peer/worker_relay.go +++ b/client/internal/peer/worker_relay.go @@ -4,12 +4,19 @@ import ( "context" "errors" "net" + "time" log "github.com/sirupsen/logrus" + "github.com/netbirdio/netbird/iface" relayClient "github.com/netbirdio/netbird/relay/client" ) +var ( + wgHandshakePeriod = 2 * time.Minute + wgHandshakeOvertime = 30000 * time.Millisecond +) + type RelayConnInfo struct { relayedConn net.Conn rosenpassPubKey []byte @@ -25,11 +32,12 @@ type WorkerRelay struct { ctx context.Context log *log.Entry config ConnConfig - relayManager *relayClient.Manager + wgInterface iface.IWGIface + relayManager relayClient.ManagerService conn WorkerRelayCallbacks } -func NewWorkerRelay(ctx context.Context, log *log.Entry, config ConnConfig, relayManager *relayClient.Manager, callbacks WorkerRelayCallbacks) *WorkerRelay { +func NewWorkerRelay(ctx context.Context, log *log.Entry, config ConnConfig, relayManager relayClient.ManagerService, callbacks WorkerRelayCallbacks) *WorkerRelay { return &WorkerRelay{ ctx: ctx, log: log, @@ -48,7 +56,7 @@ func (w *WorkerRelay) OnNewOffer(remoteOfferAnswer *OfferAnswer) { // the relayManager will return with error in case if the connection has lost with relay server currentRelayAddress, err := w.relayManager.RelayInstanceAddress() if err != nil { - w.log.Infof("local Relay connection is lost, skipping connection attempt") + w.log.Errorf("failed to handle new offer: %s", err) return } @@ -61,10 +69,12 @@ func (w *WorkerRelay) OnNewOffer(remoteOfferAnswer *OfferAnswer) { w.log.Infof("do not need to reopen relay connection") return } - w.log.Infof("do not need to reopen relay connection: %s", err) + w.log.Errorf("failed to open connection via Relay: %s", err) return } + go w.wgStateCheck(relayedConn) + w.log.Debugf("Relay connection established with %s", srv) go w.conn.OnConnReady(RelayConnInfo{ relayedConn: relayedConn, @@ -85,6 +95,35 @@ func (w *WorkerRelay) RelayIsSupportedLocally() bool { return w.relayManager.HasRelayAddress() } +// wgStateCheck help to check the state of the wireguard handshake and relay connection +func (w *WorkerRelay) wgStateCheck(conn net.Conn) { + timer := time.NewTimer(wgHandshakeOvertime) + defer timer.Stop() + + for { + select { + case <-timer.C: + lastHandshake, err := w.wgState() + if err != nil { + w.log.Errorf("failed to read wg stats: %v", err) + continue + } + log.Infof("last handshake: %v", lastHandshake) + + if time.Since(lastHandshake) > wgHandshakePeriod { + w.log.Infof("Wireguard handshake timed out, closing relay connection") + _ = conn.Close() + w.conn.OnDisconnected() + return + } + resetTime := (lastHandshake.Add(wgHandshakeOvertime + wgHandshakePeriod)).Sub(time.Now()) + timer.Reset(resetTime) + case <-w.ctx.Done(): + return + } + } +} + func (w *WorkerRelay) isRelaySupported(answer *OfferAnswer) bool { if !w.relayManager.HasRelayAddress() { return false @@ -98,3 +137,11 @@ func (w *WorkerRelay) preferredRelayServer(myRelayAddress, remoteRelayAddress st } return remoteRelayAddress } + +func (w *WorkerRelay) wgState() (time.Time, error) { + wgState, err := w.config.WgConfig.WgInterface.GetStats(w.config.Key) + if err != nil { + return time.Time{}, err + } + return wgState.LastHandshake, nil +} diff --git a/client/internal/wgproxy/proxy_ebpf.go b/client/internal/wgproxy/proxy_ebpf.go index 9d08a0d24..bbaafa756 100644 --- a/client/internal/wgproxy/proxy_ebpf.go +++ b/client/internal/wgproxy/proxy_ebpf.go @@ -181,7 +181,7 @@ func (p *WGEBPFProxy) proxyToRemote() { conn, ok := p.turnConnStore[uint16(addr.Port)] p.turnConnMutex.Unlock() if !ok { - log.Infof("turn conn not found by port: %d", addr.Port) + log.Infof("turn conn not found by port, exit form proxy: %d", addr.Port) return // todo replace it to return. For debug troubleshooting keep it } diff --git a/relay/client/manager.go b/relay/client/manager.go index ffc8faef0..e75455e47 100644 --- a/relay/client/manager.go +++ b/relay/client/manager.go @@ -30,6 +30,15 @@ func NewRelayTrack() *RelayTrack { return &RelayTrack{} } +type ManagerService interface { + Serve() error + OpenConn(serverAddress, peerKey string, onClosedListener func()) (net.Conn, error) + RelayInstanceAddress() (string, error) + ServerURL() string + HasRelayAddress() bool + UpdateToken(token *relayAuth.Token) +} + // Manager is a manager for the relay client. It establish one persistent connection to the given relay server. In case // of network error the manager will try to reconnect to the server. // The manager also manage temproary relay connection. If a client wants to communicate with an another client on a