Add reconnect logic

This commit is contained in:
Zoltan Papp
2024-05-28 01:00:25 +02:00
parent 645a1f31a7
commit 076ce69a24
2 changed files with 206 additions and 57 deletions

View File

@ -3,6 +3,7 @@ package client
import (
"context"
"fmt"
"github.com/netbirdio/netbird/relay/client/dialer/udp"
"io"
"net"
"sync"
@ -10,7 +11,6 @@ import (
log "github.com/sirupsen/logrus"
"github.com/netbirdio/netbird/relay/client/dialer/udp"
"github.com/netbirdio/netbird/relay/messages"
)
@ -19,6 +19,10 @@ const (
serverResponseTimeout = 8 * time.Second
)
var (
reconnectingTimeout = 5 * time.Second
)
type Msg struct {
buf []byte
}
@ -35,56 +39,51 @@ type Client struct {
serverAddress string
hashedID []byte
conns map[string]*connContainer // todo handle it in thread safe way
relayConnIsEstablished bool
conns map[string]*connContainer
connsMutext sync.Mutex // protect conns and relayConnIsEstablished bool
relayConn net.Conn
relayConnState bool
wgRelayConn sync.WaitGroup
mu sync.Mutex
relayConn net.Conn
serviceIsRunning bool
wgRelayConn sync.WaitGroup
mu sync.Mutex
onDisconnected chan struct{}
}
func NewClient(ctx context.Context, serverAddress, peerID string) *Client {
ctx, ctxCancel := context.WithCancel(ctx)
hashedID, hashedStringId := messages.HashID(peerID)
return &Client{
log: log.WithField("client_id", hashedStringId),
ctx: ctx,
ctxCancel: ctxCancel,
serverAddress: serverAddress,
hashedID: hashedID,
conns: make(map[string]*connContainer),
log: log.WithField("client_id", hashedStringId),
ctx: ctx,
ctxCancel: ctxCancel,
serverAddress: serverAddress,
hashedID: hashedID,
conns: make(map[string]*connContainer),
onDisconnected: make(chan struct{}),
}
}
func (c *Client) Connect() error {
c.mu.Lock()
if c.relayConnState {
if c.serviceIsRunning {
c.mu.Unlock()
return nil
}
conn, err := udp.Dial(c.serverAddress)
err := c.connect()
if err != nil {
return err
}
c.relayConn = conn
err = c.handShake()
if err != nil {
cErr := conn.Close()
if cErr != nil {
log.Errorf("failed to close connection: %s", cErr)
}
c.relayConn = nil
c.mu.Unlock()
return err
}
c.relayConnState = true
c.mu.Unlock()
c.serviceIsRunning = true
c.wgRelayConn.Add(1)
go c.readLoop()
c.mu.Unlock()
go func() {
<-c.ctx.Done()
cErr := c.close()
@ -93,13 +92,50 @@ func (c *Client) Connect() error {
}
}()
go c.reconnectGuard()
return nil
}
func (c *Client) reconnectGuard() {
for {
c.wgRelayConn.Wait()
c.mu.Lock()
if !c.serviceIsRunning {
c.mu.Unlock()
return
}
log.Infof("reconnecting to relay server")
err := c.connect()
if err != nil {
log.Errorf("failed to reconnect to relay server: %s", err)
c.mu.Unlock()
time.Sleep(reconnectingTimeout)
continue
}
log.Infof("reconnected to relay server")
c.wgRelayConn.Add(1)
go c.readLoop()
c.mu.Unlock()
}
}
func (c *Client) OpenConn(dstPeerID string) (net.Conn, error) {
c.mu.Lock()
defer c.mu.Unlock()
if !c.relayConnState {
c.connsMutext.Lock()
defer c.connsMutext.Unlock()
if !c.relayConnIsEstablished {
return nil, fmt.Errorf("relay connection is not established")
}
if !c.serviceIsRunning {
return nil, fmt.Errorf("relay connection is not established")
}
@ -120,26 +156,41 @@ func (c *Client) Close() error {
return c.close()
}
func (c *Client) connect() error {
conn, err := udp.Dial(c.serverAddress)
if err != nil {
return err
}
c.relayConn = conn
err = c.handShake()
if err != nil {
cErr := conn.Close()
if cErr != nil {
log.Errorf("failed to close connection: %s", cErr)
}
c.relayConn = nil
return err
}
c.relayConnIsEstablished = true
return nil
}
func (c *Client) close() error {
c.mu.Lock()
defer c.mu.Unlock()
if !c.relayConnState {
if !c.serviceIsRunning {
return nil
}
c.relayConnState = false
c.serviceIsRunning = false
err := c.relayConn.Close()
c.wgRelayConn.Wait()
// close all Conn types
for _, container := range c.conns {
close(container.messages)
}
c.conns = make(map[string]*connContainer)
return err
}
@ -189,17 +240,13 @@ func (c *Client) handShake() error {
}
func (c *Client) readLoop() {
defer func() {
c.log.Tracef("exit from read loop")
c.wgRelayConn.Done()
}()
var errExit error
var n int
for {
buf := make([]byte, bufferSize)
n, errExit = c.relayConn.Read(buf)
if errExit != nil {
if c.relayConnState {
if c.serviceIsRunning {
c.log.Debugf("failed to read message from relay server: %s", errExit)
}
break
@ -232,10 +279,20 @@ func (c *Client) readLoop() {
}
}
if c.relayConnState {
c.log.Errorf("failed to read message from relay server: %s", errExit)
if c.serviceIsRunning {
_ = c.relayConn.Close()
}
c.connsMutext.Lock()
c.relayConnIsEstablished = false
for _, container := range c.conns {
close(container.messages)
}
c.conns = make(map[string]*connContainer)
c.connsMutext.Unlock()
c.log.Tracef("exit from read loop")
c.wgRelayConn.Done()
}
func (c *Client) writeTo(id string, dstID []byte, payload []byte) (int, error) {
@ -275,6 +332,9 @@ func (c *Client) closeConn(id string) error {
c.mu.Lock()
defer c.mu.Unlock()
c.connsMutext.Lock()
defer c.connsMutext.Unlock()
conn, ok := c.conns[id]
if !ok {
return fmt.Errorf("connection already closed")