netbird/client/internal/connect.go

495 lines
14 KiB
Go
Raw Normal View History

package internal
import (
"context"
"errors"
2022-07-02 12:02:17 +02:00
"fmt"
"net"
"runtime"
2024-04-09 20:27:27 +02:00
"runtime/debug"
2022-07-02 12:02:17 +02:00
"strings"
2024-05-10 10:47:16 +02:00
"sync"
"time"
"github.com/cenkalti/backoff/v4"
log "github.com/sirupsen/logrus"
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
"google.golang.org/grpc/codes"
gstatus "google.golang.org/grpc/status"
"github.com/netbirdio/netbird/client/iface"
"github.com/netbirdio/netbird/client/iface/device"
"github.com/netbirdio/netbird/client/internal/dns"
"github.com/netbirdio/netbird/client/internal/listener"
"github.com/netbirdio/netbird/client/internal/peer"
"github.com/netbirdio/netbird/client/internal/stdnet"
"github.com/netbirdio/netbird/client/ssh"
"github.com/netbirdio/netbird/client/system"
mgm "github.com/netbirdio/netbird/management/client"
mgmProto "github.com/netbirdio/netbird/management/proto"
"github.com/netbirdio/netbird/relay/auth/hmac"
relayClient "github.com/netbirdio/netbird/relay/client"
signal "github.com/netbirdio/netbird/signal/client"
"github.com/netbirdio/netbird/util"
2023-10-25 00:47:40 +02:00
"github.com/netbirdio/netbird/version"
)
2024-05-10 10:47:16 +02:00
type ConnectClient struct {
ctx context.Context
config *Config
statusRecorder *peer.Status
engine *Engine
engineMutex sync.Mutex
}
2024-05-10 10:47:16 +02:00
func NewConnectClient(
ctx context.Context,
config *Config,
statusRecorder *peer.Status,
2024-05-10 10:47:16 +02:00
) *ConnectClient {
return &ConnectClient{
ctx: ctx,
config: config,
statusRecorder: statusRecorder,
engineMutex: sync.Mutex{},
}
}
// Run with main logic.
func (c *ConnectClient) Run() error {
return c.run(MobileDependency{}, nil, nil)
2024-05-10 10:47:16 +02:00
}
// RunWithProbes runs the client's main logic with probes attached
func (c *ConnectClient) RunWithProbes(probes *ProbeHolder, runningChan chan error) error {
return c.run(MobileDependency{}, probes, runningChan)
}
2024-05-10 10:47:16 +02:00
// RunOnAndroid with main logic on mobile system
func (c *ConnectClient) RunOnAndroid(
tunAdapter device.TunAdapter,
iFaceDiscover stdnet.ExternalIFaceDiscover,
networkChangeListener listener.NetworkChangeListener,
dnsAddresses []string,
dnsReadyListener dns.ReadyListener,
) error {
// in case of non Android os these variables will be nil
mobileDependency := MobileDependency{
TunAdapter: tunAdapter,
IFaceDiscover: iFaceDiscover,
NetworkChangeListener: networkChangeListener,
HostDNSAddresses: dnsAddresses,
DnsReadyListener: dnsReadyListener,
}
return c.run(mobileDependency, nil, nil)
}
2024-05-10 10:47:16 +02:00
func (c *ConnectClient) RunOniOS(
fileDescriptor int32,
networkChangeListener listener.NetworkChangeListener,
dnsManager dns.IosDnsManager,
) error {
// Set GC percent to 5% to reduce memory usage as iOS only allows 50MB of memory for the extension.
debug.SetGCPercent(5)
Feature/add iOS support (#1244) * starting engine by passing file descriptor on engine start * inject logger that does not compile * logger and first client * first working connection * support for routes and working connection * small refactor for better code quality in swift * trying to add DNS * fix * updated * fix route deletion * trying to bind the DNS resolver dialer to an interface * use dns.Client.Exchange * fix metadata send on startup * switching between client to query upstream * fix panic on no dns response * fix after merge changes * add engine ready listener * replace engine listener with connection listener * disable relay connection for iOS until proxy is refactored into bind * Extract private upstream for iOS and fix function headers for other OS * Update mock Server * Fix dns server and upstream tests * Fix engine null pointer with mobile dependencies for other OS * Revert back to disabling upstream on no response * Fix some of the remarks from the linter * Fix linter * re-arrange duration calculation * revert exported HostDNSConfig * remove unused engine listener * remove development logs * refactor dns code and interface name propagation * clean dns server test * disable upstream deactivation for iOS * remove files after merge * fix dns server darwin * fix server mock * fix build flags * move service listen back to initialize * add wgInterface to hostManager initialization on android * fix typo and remove unused function * extract upstream exchange for ios and rest * remove todo * separate upstream logic to ios file * Fix upstream test * use interface and embedded struct for upstream * set properly upstream client * remove placeholder * remove ios specific attributes * fix upstream test * merge ipc parser and wg configurer for mobile * fix build annotation * use json for DNS settings handover through gomobile * add logs for DNS json string * bring back check on ios for private upstream * remove wrong (and unused) line * fix wrongly updated comments on DNSSetting export --------- Co-authored-by: Maycon Santos <mlsmaycon@gmail.com>
2023-12-18 11:46:58 +01:00
mobileDependency := MobileDependency{
FileDescriptor: fileDescriptor,
NetworkChangeListener: networkChangeListener,
DnsManager: dnsManager,
}
return c.run(mobileDependency, nil, nil)
Feature/add iOS support (#1244) * starting engine by passing file descriptor on engine start * inject logger that does not compile * logger and first client * first working connection * support for routes and working connection * small refactor for better code quality in swift * trying to add DNS * fix * updated * fix route deletion * trying to bind the DNS resolver dialer to an interface * use dns.Client.Exchange * fix metadata send on startup * switching between client to query upstream * fix panic on no dns response * fix after merge changes * add engine ready listener * replace engine listener with connection listener * disable relay connection for iOS until proxy is refactored into bind * Extract private upstream for iOS and fix function headers for other OS * Update mock Server * Fix dns server and upstream tests * Fix engine null pointer with mobile dependencies for other OS * Revert back to disabling upstream on no response * Fix some of the remarks from the linter * Fix linter * re-arrange duration calculation * revert exported HostDNSConfig * remove unused engine listener * remove development logs * refactor dns code and interface name propagation * clean dns server test * disable upstream deactivation for iOS * remove files after merge * fix dns server darwin * fix server mock * fix build flags * move service listen back to initialize * add wgInterface to hostManager initialization on android * fix typo and remove unused function * extract upstream exchange for ios and rest * remove todo * separate upstream logic to ios file * Fix upstream test * use interface and embedded struct for upstream * set properly upstream client * remove placeholder * remove ios specific attributes * fix upstream test * merge ipc parser and wg configurer for mobile * fix build annotation * use json for DNS settings handover through gomobile * add logs for DNS json string * bring back check on ios for private upstream * remove wrong (and unused) line * fix wrongly updated comments on DNSSetting export --------- Co-authored-by: Maycon Santos <mlsmaycon@gmail.com>
2023-12-18 11:46:58 +01:00
}
func (c *ConnectClient) run(mobileDependency MobileDependency, probes *ProbeHolder, runningChan chan error) error {
2024-04-09 20:27:27 +02:00
defer func() {
if r := recover(); r != nil {
log.Panicf("Panic occurred: %v, stack trace: %s", r, string(debug.Stack()))
}
}()
log.Infof("starting NetBird client version %s on %s/%s", version.NetbirdVersion(), runtime.GOOS, runtime.GOARCH)
2023-10-25 00:47:40 +02:00
backOff := &backoff.ExponentialBackOff{
InitialInterval: time.Second,
RandomizationFactor: 1,
Multiplier: 1.7,
MaxInterval: 15 * time.Second,
MaxElapsedTime: 3 * 30 * 24 * time.Hour, // 3 months
Stop: backoff.Stop,
Clock: backoff.SystemClock,
}
2024-05-10 10:47:16 +02:00
state := CtxGetState(c.ctx)
defer func() {
s, err := state.Status()
if err != nil || s != StatusNeedsLogin {
state.Set(StatusIdle)
}
}()
wrapErr := state.Wrap
2024-05-10 10:47:16 +02:00
myPrivateKey, err := wgtypes.ParseKey(c.config.PrivateKey)
2022-07-02 12:02:17 +02:00
if err != nil {
2024-05-10 10:47:16 +02:00
log.Errorf("failed parsing Wireguard key %s: [%s]", c.config.PrivateKey, err.Error())
2022-07-02 12:02:17 +02:00
return wrapErr(err)
}
var mgmTlsEnabled bool
2024-05-10 10:47:16 +02:00
if c.config.ManagementURL.Scheme == "https" {
2022-07-02 12:02:17 +02:00
mgmTlsEnabled = true
}
2024-05-10 10:47:16 +02:00
publicSSHKey, err := ssh.GeneratePublicKey([]byte(c.config.SSHKey))
2022-07-02 12:02:17 +02:00
if err != nil {
return err
}
2024-05-10 10:47:16 +02:00
defer c.statusRecorder.ClientStop()
runningChanOpen := true
operation := func() error {
// if context cancelled we not start new backoff cycle
if c.isContextCancelled() {
return nil
}
state.Set(StatusConnecting)
2024-05-10 10:47:16 +02:00
engineCtx, cancel := context.WithCancel(c.ctx)
2022-07-02 12:02:17 +02:00
defer func() {
2024-05-10 10:47:16 +02:00
c.statusRecorder.MarkManagementDisconnected(state.err)
c.statusRecorder.CleanLocalPeerState()
2022-07-02 12:02:17 +02:00
cancel()
}()
2024-05-10 10:47:16 +02:00
log.Debugf("connecting to the Management service %s", c.config.ManagementURL.Host)
mgmClient, err := mgm.NewClient(engineCtx, c.config.ManagementURL.Host, myPrivateKey, mgmTlsEnabled)
if err != nil {
return wrapErr(gstatus.Errorf(codes.FailedPrecondition, "failed connecting to Management Service : %s", err))
}
2024-05-10 10:47:16 +02:00
mgmNotifier := statusRecorderToMgmConnStateNotifier(c.statusRecorder)
mgmClient.SetConnStateListener(mgmNotifier)
2024-05-10 10:47:16 +02:00
log.Debugf("connected to the Management service %s", c.config.ManagementURL.Host)
defer func() {
if err = mgmClient.Close(); err != nil {
log.Warnf("failed to close the Management service client %v", err)
}
}()
// connect (just a connection, no stream yet) and login to Management Service to get an initial global Wiretrustee config
loginResp, err := loginToManagement(engineCtx, mgmClient, publicSSHKey)
if err != nil {
log.Debug(err)
if s, ok := gstatus.FromError(err); ok && (s.Code() == codes.PermissionDenied) {
state.Set(StatusNeedsLogin)
_ = c.Stop()
return backoff.Permanent(wrapErr(err)) // unrecoverable error
}
return wrapErr(err)
}
2024-05-10 10:47:16 +02:00
c.statusRecorder.MarkManagementConnected()
2022-07-02 12:02:17 +02:00
localPeerState := peer.LocalPeerState{
2022-07-02 12:02:17 +02:00
IP: loginResp.GetPeerConfig().GetAddress(),
PubKey: myPrivateKey.PublicKey().String(),
KernelInterface: device.WireGuardModuleIsLoaded(),
FQDN: loginResp.GetPeerConfig().GetFqdn(),
2022-07-02 12:02:17 +02:00
}
2024-05-10 10:47:16 +02:00
c.statusRecorder.UpdateLocalPeerState(localPeerState)
2022-07-02 12:02:17 +02:00
signalURL := fmt.Sprintf("%s://%s",
strings.ToLower(loginResp.GetWiretrusteeConfig().GetSignal().GetProtocol().String()),
loginResp.GetWiretrusteeConfig().GetSignal().GetUri(),
)
2024-05-10 10:47:16 +02:00
c.statusRecorder.UpdateSignalAddress(signalURL)
2024-05-10 10:47:16 +02:00
c.statusRecorder.MarkSignalDisconnected(nil)
defer func() {
_, err := state.Status()
c.statusRecorder.MarkSignalDisconnected(err)
}()
// with the global Wiretrustee config in hand connect (just a connection, no stream yet) Signal
signalClient, err := connectToSignal(engineCtx, loginResp.GetWiretrusteeConfig(), myPrivateKey)
if err != nil {
log.Error(err)
return wrapErr(err)
}
defer func() {
err = signalClient.Close()
if err != nil {
log.Warnf("failed closing Signal service client %v", err)
}
}()
2024-05-10 10:47:16 +02:00
signalNotifier := statusRecorderToSignalConnStateNotifier(c.statusRecorder)
signalClient.SetConnStateListener(signalNotifier)
2024-05-10 10:47:16 +02:00
c.statusRecorder.MarkSignalConnected()
2022-07-02 12:02:17 +02:00
relayURLs, token := parseRelayInfo(loginResp)
relayManager := relayClient.NewManager(engineCtx, relayURLs, myPrivateKey.PublicKey().String())
if len(relayURLs) > 0 {
if token != nil {
if err := relayManager.UpdateToken(token); err != nil {
log.Errorf("failed to update token: %s", err)
return wrapErr(err)
}
}
log.Infof("connecting to the Relay service(s): %s", strings.Join(relayURLs, ", "))
if err = relayManager.Serve(); err != nil {
log.Error(err)
return wrapErr(err)
}
c.statusRecorder.SetRelayMgr(relayManager)
}
peerConfig := loginResp.GetPeerConfig()
2024-05-10 10:47:16 +02:00
engineConfig, err := createEngineConfig(myPrivateKey, c.config, peerConfig)
if err != nil {
log.Error(err)
return wrapErr(err)
}
Release 0.28.0 (#2092) * compile client under freebsd (#1620) Compile netbird client under freebsd and now support netstack and userspace modes. Refactoring linux specific code to share same code with FreeBSD, move to *_unix.go files. Not implemented yet: Kernel mode not supported DNS probably does not work yet Routing also probably does not work yet SSH support did not tested yet Lack of test environment for freebsd (dedicated VM for github runners under FreeBSD required) Lack of tests for freebsd specific code info reporting need to review and also implement, for example OS reported as GENERIC instead of FreeBSD (lack of FreeBSD icon in management interface) Lack of proper client setup under FreeBSD Lack of FreeBSD port/package * Add DNS routes (#1943) Given domains are resolved periodically and resolved IPs are replaced with the new ones. Unless the flag keep_route is set to true, then only new ones are added. This option is helpful if there are long-running connections that might still point to old IP addresses from changed DNS records. * Add process posture check (#1693) Introduces a process posture check to validate the existence and active status of specific binaries on peer systems. The check ensures that files are present at specified paths, and that corresponding processes are running. This check supports Linux, Windows, and macOS systems. Co-authored-by: Evgenii <mail@skillcoder.com> Co-authored-by: Pascal Fischer <pascal@netbird.io> Co-authored-by: Zoltan Papp <zoltan.pmail@gmail.com> Co-authored-by: Viktor Liu <17948409+lixmal@users.noreply.github.com> Co-authored-by: Bethuel Mmbaga <bethuelmbaga12@gmail.com>
2024-06-13 13:24:24 +02:00
checks := loginResp.GetChecks()
2024-05-10 10:47:16 +02:00
c.engineMutex.Lock()
c.engine = NewEngineWithProbes(engineCtx, cancel, signalClient, mgmClient, relayManager, engineConfig, mobileDependency, c.statusRecorder, probes, checks)
2024-05-10 10:47:16 +02:00
c.engineMutex.Unlock()
if err := c.engine.Start(); err != nil {
log.Errorf("error while starting Netbird Connection Engine: %s", err)
return wrapErr(err)
}
log.Infof("Netbird engine started, the IP is: %s", peerConfig.GetAddress())
state.Set(StatusConnected)
if runningChan != nil && runningChanOpen {
runningChan <- nil
close(runningChan)
runningChanOpen = false
}
<-engineCtx.Done()
c.engineMutex.Lock()
if c.engine != nil && c.engine.wgInterface != nil {
log.Infof("ensuring %s is removed, Netbird engine context cancelled", c.engine.wgInterface.Name())
if err := c.engine.Stop(); err != nil {
log.Errorf("Failed to stop engine: %v", err)
}
c.engine = nil
}
c.engineMutex.Unlock()
2024-05-10 10:47:16 +02:00
c.statusRecorder.ClientTeardown()
backOff.Reset()
log.Info("stopped NetBird client")
if _, err := state.Status(); errors.Is(err, ErrResetConnection) {
return err
}
return nil
}
2024-05-10 10:47:16 +02:00
c.statusRecorder.ClientStart()
2022-07-02 12:02:17 +02:00
err = backoff.Retry(operation, backOff)
if err != nil {
log.Debugf("exiting client retry loop due to unrecoverable error: %s", err)
if s, ok := gstatus.FromError(err); ok && (s.Code() == codes.PermissionDenied) {
state.Set(StatusNeedsLogin)
_ = c.Stop()
}
return err
}
return nil
}
func parseRelayInfo(loginResp *mgmProto.LoginResponse) ([]string, *hmac.Token) {
relayCfg := loginResp.GetWiretrusteeConfig().GetRelay()
if relayCfg == nil {
return nil, nil
}
token := &hmac.Token{
Payload: relayCfg.GetTokenPayload(),
Signature: relayCfg.GetTokenSignature(),
}
return relayCfg.GetUrls(), token
}
2024-05-10 10:47:16 +02:00
func (c *ConnectClient) Engine() *Engine {
if c == nil {
return nil
}
2024-05-10 10:47:16 +02:00
var e *Engine
c.engineMutex.Lock()
e = c.engine
c.engineMutex.Unlock()
return e
}
func (c *ConnectClient) Stop() error {
if c == nil {
return nil
}
c.engineMutex.Lock()
defer c.engineMutex.Unlock()
if c.engine == nil {
return nil
}
if err := c.engine.Stop(); err != nil {
return fmt.Errorf("stop engine: %w", err)
}
return nil
}
func (c *ConnectClient) isContextCancelled() bool {
select {
case <-c.ctx.Done():
return true
default:
return false
}
}
// createEngineConfig converts configuration received from Management Service to EngineConfig
func createEngineConfig(key wgtypes.Key, config *Config, peerConfig *mgmProto.PeerConfig) (*EngineConfig, error) {
2024-06-14 14:22:49 +02:00
nm := false
if config.NetworkMonitor != nil {
nm = *config.NetworkMonitor
}
engineConf := &EngineConfig{
WgIfaceName: config.WgIface,
WgAddr: peerConfig.Address,
IFaceBlackList: config.IFaceBlackList,
DisableIPv6Discovery: config.DisableIPv6Discovery,
WgPrivateKey: key,
WgPort: config.WgPort,
2024-06-14 14:22:49 +02:00
NetworkMonitor: nm,
SSHKey: []byte(config.SSHKey),
NATExternalIPs: config.NATExternalIPs,
CustomDNSAddress: config.CustomDNSAddress,
RosenpassEnabled: config.RosenpassEnabled,
RosenpassPermissive: config.RosenpassPermissive,
ServerSSHAllowed: util.ReturnBoolWithDefaultTrue(config.ServerSSHAllowed),
Release 0.28.0 (#2092) * compile client under freebsd (#1620) Compile netbird client under freebsd and now support netstack and userspace modes. Refactoring linux specific code to share same code with FreeBSD, move to *_unix.go files. Not implemented yet: Kernel mode not supported DNS probably does not work yet Routing also probably does not work yet SSH support did not tested yet Lack of test environment for freebsd (dedicated VM for github runners under FreeBSD required) Lack of tests for freebsd specific code info reporting need to review and also implement, for example OS reported as GENERIC instead of FreeBSD (lack of FreeBSD icon in management interface) Lack of proper client setup under FreeBSD Lack of FreeBSD port/package * Add DNS routes (#1943) Given domains are resolved periodically and resolved IPs are replaced with the new ones. Unless the flag keep_route is set to true, then only new ones are added. This option is helpful if there are long-running connections that might still point to old IP addresses from changed DNS records. * Add process posture check (#1693) Introduces a process posture check to validate the existence and active status of specific binaries on peer systems. The check ensures that files are present at specified paths, and that corresponding processes are running. This check supports Linux, Windows, and macOS systems. Co-authored-by: Evgenii <mail@skillcoder.com> Co-authored-by: Pascal Fischer <pascal@netbird.io> Co-authored-by: Zoltan Papp <zoltan.pmail@gmail.com> Co-authored-by: Viktor Liu <17948409+lixmal@users.noreply.github.com> Co-authored-by: Bethuel Mmbaga <bethuelmbaga12@gmail.com>
2024-06-13 13:24:24 +02:00
DNSRouteInterval: config.DNSRouteInterval,
}
if config.PreSharedKey != "" {
preSharedKey, err := wgtypes.ParseKey(config.PreSharedKey)
if err != nil {
return nil, err
}
engineConf.PreSharedKey = &preSharedKey
}
port, err := freePort(config.WgPort)
if err != nil {
return nil, err
}
if port != config.WgPort {
log.Infof("using %d as wireguard port: %d is in use", port, config.WgPort)
}
engineConf.WgPort = port
return engineConf, nil
}
// connectToSignal creates Signal Service client and established a connection
func connectToSignal(ctx context.Context, wtConfig *mgmProto.WiretrusteeConfig, ourPrivateKey wgtypes.Key) (*signal.GrpcClient, error) {
var sigTLSEnabled bool
if wtConfig.Signal.Protocol == mgmProto.HostConfig_HTTPS {
sigTLSEnabled = true
} else {
sigTLSEnabled = false
}
signalClient, err := signal.NewClient(ctx, wtConfig.Signal.Uri, ourPrivateKey, sigTLSEnabled)
if err != nil {
log.Errorf("error while connecting to the Signal Exchange Service %s: %s", wtConfig.Signal.Uri, err)
2022-07-02 12:02:17 +02:00
return nil, gstatus.Errorf(codes.FailedPrecondition, "failed connecting to Signal Service : %s", err)
}
return signalClient, nil
}
// loginToManagement creates Management Services client, establishes a connection, logs-in and gets a global Wiretrustee config (signal, turn, stun hosts, etc)
func loginToManagement(ctx context.Context, client mgm.Client, pubSSHKey []byte) (*mgmProto.LoginResponse, error) {
serverPublicKey, err := client.GetServerPublicKey()
if err != nil {
return nil, gstatus.Errorf(codes.FailedPrecondition, "failed while getting Management Service public key: %s", err)
}
sysInfo := system.GetInfo(ctx)
loginResp, err := client.Login(*serverPublicKey, sysInfo, pubSSHKey)
if err != nil {
return nil, err
}
return loginResp, nil
}
func statusRecorderToMgmConnStateNotifier(statusRecorder *peer.Status) mgm.ConnStateNotifier {
var sri interface{} = statusRecorder
mgmNotifier, _ := sri.(mgm.ConnStateNotifier)
return mgmNotifier
}
func statusRecorderToSignalConnStateNotifier(statusRecorder *peer.Status) signal.ConnStateNotifier {
var sri interface{} = statusRecorder
notifier, _ := sri.(signal.ConnStateNotifier)
return notifier
}
// freePort attempts to determine if the provided port is available, if not it will ask the system for a free port.
func freePort(initPort int) (int, error) {
addr := net.UDPAddr{}
if initPort == 0 {
initPort = iface.DefaultWgPort
}
addr.Port = initPort
conn, err := net.ListenUDP("udp", &addr)
if err == nil {
closeConnWithLog(conn)
return initPort, nil
}
// if the port is already in use, ask the system for a free port
addr.Port = 0
conn, err = net.ListenUDP("udp", &addr)
if err != nil {
return 0, fmt.Errorf("unable to get a free port: %v", err)
}
udpAddr, ok := conn.LocalAddr().(*net.UDPAddr)
if !ok {
return 0, errors.New("wrong address type when getting a free port")
}
closeConnWithLog(conn)
return udpAddr.Port, nil
}
func closeConnWithLog(conn *net.UDPConn) {
startClosing := time.Now()
err := conn.Close()
if err != nil {
log.Warnf("closing probe port %d failed: %v. NetBird will still attempt to use this port for connection.", conn.LocalAddr().(*net.UDPAddr).Port, err)
}
if time.Since(startClosing) > time.Second {
log.Warnf("closing the testing port %d took %s. Usually it is safe to ignore, but continuous warnings may indicate a problem.", conn.LocalAddr().(*net.UDPAddr).Port, time.Since(startClosing))
}
}