From 04e4407ea7aa402d1c1fad4af710adacb701f1cc Mon Sep 17 00:00:00 2001 From: Maycon Santos Date: Sun, 16 Oct 2022 13:33:46 +0200 Subject: [PATCH] Add anonymous usage metrics collection (#508) This will help us understand usage on self-hosted deployments The collection may be disabled by using the flag --disable-anonymous-metrics or NETBIRD_DISABLE_ANONYMOUS_METRICS in setup.env --- .goreleaser.yaml | 2 +- infrastructure_files/base.setup.env | 3 + infrastructure_files/docker-compose.yml.tmpl | 2 +- infrastructure_files/setup.env.example | 5 +- management/cmd/management.go | 32 +++ management/cmd/root.go | 2 + management/server/file_store.go | 20 +- management/server/metrics/metrics.go | 283 +++++++++++++++++++ management/server/store.go | 2 + management/server/updatechannel.go | 11 + 10 files changed, 358 insertions(+), 4 deletions(-) create mode 100644 management/server/metrics/metrics.go diff --git a/.goreleaser.yaml b/.goreleaser.yaml index 154609052..c6b0a4c4a 100644 --- a/.goreleaser.yaml +++ b/.goreleaser.yaml @@ -41,7 +41,7 @@ builds: - arm64 - arm ldflags: - - -s -w -X main.version={{.Version}} -X main.commit={{.Commit}} -X main.date={{.CommitDate}} -X main.builtBy=goreleaser + - -s -w -X github.com/netbirdio/netbird/client/system.version={{.Version}} -X main.commit={{.Commit}} -X main.date={{.CommitDate}} -X main.builtBy=goreleaser mod_timestamp: '{{ .CommitTimestamp }}' - id: netbird-signal diff --git a/infrastructure_files/base.setup.env b/infrastructure_files/base.setup.env index 3b8b4fdce..61efeffd8 100644 --- a/infrastructure_files/base.setup.env +++ b/infrastructure_files/base.setup.env @@ -29,6 +29,8 @@ LETSENCRYPT_VOLUMESUFFIX="letsencrypt" NETBIRD_AUTH_DEVICE_AUTH_PROVIDER="none" +NETBIRD_DISABLE_ANONYMOUS_METRICS=${NETBIRD_DISABLE_ANONYMOUS_METRICS:-false} + # exports export NETBIRD_DOMAIN export NETBIRD_AUTH_CLIENT_ID @@ -55,3 +57,4 @@ export VOLUME_PREFIX export MGMT_VOLUMESUFFIX export SIGNAL_VOLUMESUFFIX export LETSENCRYPT_VOLUMESUFFIX +export NETBIRD_DISABLE_ANONYMOUS_METRICS diff --git a/infrastructure_files/docker-compose.yml.tmpl b/infrastructure_files/docker-compose.yml.tmpl index 9ea8fd083..add470c16 100644 --- a/infrastructure_files/docker-compose.yml.tmpl +++ b/infrastructure_files/docker-compose.yml.tmpl @@ -48,7 +48,7 @@ services: # # port and command for Let's Encrypt validation without dashboard container # - 443:443 # command: ["--letsencrypt-domain", "$NETBIRD_DOMAIN", "--log-file", "console"] - command: ["--port", "443", "--log-file", "console"] + command: ["--port", "443", "--log-file", "console", "--disable-anonymous-metrics=$NETBIRD_DISABLE_ANONYMOUS_METRICS"] # Coturn coturn: image: coturn/coturn diff --git a/infrastructure_files/setup.env.example b/infrastructure_files/setup.env.example index de960e86d..0d84a14f8 100644 --- a/infrastructure_files/setup.env.example +++ b/infrastructure_files/setup.env.example @@ -16,4 +16,7 @@ NETBIRD_LETSENCRYPT_EMAIL="" # if your IDP provider doesn't support fragmented URIs, configure custom # redirect and silent redirect URIs, these will be concatenated into your NETBIRD_DOMAIN domain. # NETBIRD_AUTH_REDIRECT_URI="/peers" -# NETBIRD_AUTH_SILENT_REDIRECT_URI="/add-peers" \ No newline at end of file +# NETBIRD_AUTH_SILENT_REDIRECT_URI="/add-peers" + +# Disable anonymous metrics collection, see more information at https://netbird.io/docs/FAQ/metrics-collection +NETBIRD_DISABLE_ANONYMOUS_METRICS=false \ No newline at end of file diff --git a/management/cmd/management.go b/management/cmd/management.go index e8fb7c278..82cf01f34 100644 --- a/management/cmd/management.go +++ b/management/cmd/management.go @@ -1,12 +1,15 @@ package cmd import ( + "context" "crypto/tls" "encoding/json" "errors" "flag" "fmt" + "github.com/google/uuid" httpapi "github.com/netbirdio/netbird/management/server/http" + "github.com/netbirdio/netbird/management/server/metrics" "golang.org/x/crypto/acme/autocert" "golang.org/x/net/http2" "golang.org/x/net/http2/h2c" @@ -161,6 +164,21 @@ var ( } mgmtProto.RegisterManagementServiceServer(gRPCAPIHandler, srv) + installationID, err := getInstallationID(store) + if err != nil { + log.Errorf("cannot load TLS credentials: %v", err) + return err + } + + fmt.Println("metrics ", disableMetrics) + + if !disableMetrics { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + metricsWorker := metrics.NewWorker(ctx, installationID, store, peersUpdateManager) + go metricsWorker.Run() + } + var compatListener net.Listener if mgmtPort != ManagementLegacyPort { // The Management gRPC server was running on port 33073 previously. Old agents that are already connected to it @@ -228,6 +246,20 @@ func notifyStop(msg string) { } } +func getInstallationID(store server.Store) (string, error) { + installationID := store.GetInstallationID() + if installationID != "" { + return installationID, nil + } + + installationID = strings.ToUpper(uuid.New().String()) + err := store.SaveInstallationID(installationID) + if err != nil { + return "", err + } + return installationID, nil +} + func serveGRPC(grpcServer *grpc.Server, port int) (net.Listener, error) { listener, err := net.Listen("tcp", fmt.Sprintf(":%d", port)) if err != nil { diff --git a/management/cmd/root.go b/management/cmd/root.go index a6cb951af..d9b6e7cb7 100644 --- a/management/cmd/root.go +++ b/management/cmd/root.go @@ -27,6 +27,7 @@ var ( mgmtConfig string logLevel string logFile string + disableMetrics bool rootCmd = &cobra.Command{ Use: "netbird-mgmt", @@ -66,6 +67,7 @@ func init() { mgmtCmd.Flags().StringVar(&mgmtLetsencryptDomain, "letsencrypt-domain", "", "a domain to issue Let's Encrypt certificate for. Enables TLS using Let's Encrypt. Will fetch and renew certificate, and run the server with TLS") mgmtCmd.Flags().StringVar(&certFile, "cert-file", "", "Location of your SSL certificate. Can be used when you have an existing certificate and don't want a new certificate be generated automatically. If letsencrypt-domain is specified this property has no effect") mgmtCmd.Flags().StringVar(&certKey, "cert-key", "", "Location of your SSL certificate private key. Can be used when you have an existing certificate and don't want a new certificate be generated automatically. If letsencrypt-domain is specified this property has no effect") + mgmtCmd.Flags().BoolVar(&disableMetrics, "disable-anonymous-metrics", false, "disables push of anonymous usage metrics to NetBird") rootCmd.MarkFlagRequired("config") //nolint rootCmd.PersistentFlags().StringVar(&logLevel, "log-level", "info", "") diff --git a/management/server/file_store.go b/management/server/file_store.go index 04dac6fac..d475a2ce9 100644 --- a/management/server/file_store.go +++ b/management/server/file_store.go @@ -29,6 +29,7 @@ type FileStore struct { PeerKeyId2DstRulesId map[string]map[string]struct{} `json:"-"` PeerKeyID2RouteIDs map[string]map[string]struct{} `json:"-"` AccountPrefix2RouteIDs map[string]map[string][]string `json:"-"` + InstallationID string // mutex to synchronise Store read/write operations mux sync.Mutex `json:"-"` @@ -415,8 +416,10 @@ func (s *FileStore) GetAccountPeers(accountId string) ([]*Peer, error) { // GetAllAccounts returns all accounts func (s *FileStore) GetAllAccounts() (all []*Account) { + s.mux.Lock() + defer s.mux.Unlock() for _, a := range s.Accounts { - all = append(all, a) + all = append(all, a.Copy()) } return all @@ -566,3 +569,18 @@ func (s *FileStore) GetRoutesByPrefix(accountID string, prefix netip.Prefix) ([] return routes, nil } + +// GetInstallationID returns the installation ID from the store +func (s *FileStore) GetInstallationID() string { + return s.InstallationID +} + +// SaveInstallationID saves the installation ID +func (s *FileStore) SaveInstallationID(id string) error { + s.mux.Lock() + defer s.mux.Unlock() + + s.InstallationID = id + + return s.persist(s.storeFile) +} diff --git a/management/server/metrics/metrics.go b/management/server/metrics/metrics.go new file mode 100644 index 000000000..d7287efd4 --- /dev/null +++ b/management/server/metrics/metrics.go @@ -0,0 +1,283 @@ +// Package metrics gather anonymous information about the usage of NetBird management +package metrics + +import ( + "context" + "encoding/json" + "fmt" + "github.com/netbirdio/netbird/client/system" + "github.com/netbirdio/netbird/management/server" + log "github.com/sirupsen/logrus" + "io" + "net/http" + "strings" + "time" +) + +const ( + // PayloadEvent identifies an event type + PayloadEvent = "self-hosted stats" + // payloadEndpoint metrics endpoint to send anonymous data + payloadEndpoint = "https://metrics.netbird.io" + // defaultPushInterval default interval to push metrics + defaultPushInterval = 24 * time.Hour + // requestTimeout http request timeout + requestTimeout = 30 * time.Second +) + +type getTokenResponse struct { + PublicAPIToken string `json:"public_api_token"` +} + +type pushPayload struct { + APIKey string `json:"api_key"` + DistinctID string `json:"distinct_id"` + Event string `json:"event"` + Properties properties `json:"properties"` + Timestamp time.Time `json:"timestamp"` +} + +// properties metrics to push +type properties map[string]interface{} + +// DataSource metric data source +type DataSource interface { + GetAllAccounts() []*server.Account +} + +// ConnManager peer connection manager that holds state for current active connections +type ConnManager interface { + GetAllConnectedPeers() map[string]struct{} +} + +// Worker metrics collector and pusher +type Worker struct { + ctx context.Context + id string + dataSource DataSource + connManager ConnManager + startupTime time.Time + lastRun time.Time +} + +// NewWorker returns a metrics worker +func NewWorker(ctx context.Context, id string, dataSource DataSource, connManager ConnManager) *Worker { + currentTime := time.Now() + return &Worker{ + ctx: ctx, + id: id, + dataSource: dataSource, + connManager: connManager, + startupTime: currentTime, + lastRun: currentTime, + } +} + +// Run runs the metrics worker +func (w *Worker) Run() { + pushTicker := time.NewTicker(defaultPushInterval) + for { + select { + case <-w.ctx.Done(): + return + case <-pushTicker.C: + err := w.sendMetrics() + if err != nil { + log.Error(err) + } + } + } +} + +func (w *Worker) sendMetrics() error { + ctx, cancel := context.WithTimeout(w.ctx, requestTimeout) + defer cancel() + + apiKey, err := getAPIKey(ctx) + if err != nil { + return err + } + + payload := w.generatePayload(apiKey) + + payloadString, err := buildMetricsPayload(payload) + if err != nil { + return err + } + + httpClient := http.Client{} + + exportJobReq, err := createPostRequest(ctx, payloadEndpoint+"/capture/", payloadString) + if err != nil { + return fmt.Errorf("unable to create metrics post request %v", err) + } + + jobResp, err := httpClient.Do(exportJobReq) + if err != nil { + return fmt.Errorf("unable to push metrics %v", err) + } + + defer func() { + err = jobResp.Body.Close() + if err != nil { + log.Errorf("error while closing update metrics response body: %v", err) + } + }() + + if jobResp.StatusCode != 200 { + return fmt.Errorf("unable to push anonymous metrics, got statusCode %d", jobResp.StatusCode) + } + + log.Infof("sent anonymous metrics, next push will happen in %s. "+ + "You can disable these metrics by running with flag --disable-anonymous-metrics,"+ + " see more information at https://netbird.io/docs/FAQ/metrics-collection", defaultPushInterval) + + return nil +} + +func (w *Worker) generatePayload(apiKey string) pushPayload { + properties := w.generateProperties() + + return pushPayload{ + APIKey: apiKey, + DistinctID: w.id, + Event: PayloadEvent, + Properties: properties, + Timestamp: time.Now(), + } +} + +func (w *Worker) generateProperties() properties { + var ( + uptime float64 + accounts int + users int + peers int + setupKeysUsage int + activePeersLastDay int + osPeers map[string]int + userPeers int + rules int + groups int + routes int + nameservers int + version string + ) + start := time.Now() + metricsProperties := make(properties) + osPeers = make(map[string]int) + uptime = time.Since(w.startupTime).Seconds() + connections := w.connManager.GetAllConnectedPeers() + version = system.NetbirdVersion() + + for _, account := range w.dataSource.GetAllAccounts() { + accounts++ + users = users + len(account.Users) + rules = rules + len(account.Rules) + groups = groups + len(account.Groups) + routes = routes + len(account.Routes) + nameservers = nameservers + len(account.NameServerGroups) + + for _, key := range account.SetupKeys { + setupKeysUsage = setupKeysUsage + key.UsedTimes + } + + for _, peer := range account.Peers { + peers++ + if peer.SetupKey != "" { + userPeers++ + } + + _, connected := connections[peer.Key] + if connected || peer.Status.LastSeen.After(w.lastRun) { + activePeersLastDay++ + } + osKey := strings.ToLower(fmt.Sprintf("peer_os_%s", peer.Meta.GoOS)) + osCount := osPeers[osKey] + osPeers[osKey] = osCount + 1 + } + } + + metricsProperties["uptime"] = uptime + metricsProperties["accounts"] = accounts + metricsProperties["users"] = users + metricsProperties["peers"] = peers + metricsProperties["setup_keys_usage"] = setupKeysUsage + metricsProperties["active_peers_last_day"] = activePeersLastDay + metricsProperties["user_peers"] = userPeers + metricsProperties["rules"] = rules + metricsProperties["groups"] = groups + metricsProperties["routes"] = routes + metricsProperties["nameservers"] = nameservers + metricsProperties["version"] = version + + for os, count := range osPeers { + metricsProperties[os] = count + } + + metricsProperties["metric_generation_time"] = time.Since(start).Milliseconds() + + return metricsProperties +} + +func getAPIKey(ctx context.Context) (string, error) { + + httpClient := http.Client{} + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, payloadEndpoint+"/GetToken", nil) + if err != nil { + return "", fmt.Errorf("unable to create request for metrics public api token %v", err) + } + response, err := httpClient.Do(req) + if err != nil { + return "", fmt.Errorf("unable to request metrics public api token %v", err) + } + + defer func() { + err = response.Body.Close() + if err != nil { + log.Errorf("error while closing metrics token response body: %v", err) + } + }() + + if response.StatusCode != 200 { + return "", fmt.Errorf("unable to retrieve metrics token, statusCode %d", response.StatusCode) + } + + body, err := io.ReadAll(response.Body) + if err != nil { + return "", fmt.Errorf("coudln't get metrics token response; %v", err) + } + + var tokenResponse getTokenResponse + + err = json.Unmarshal(body, &tokenResponse) + if err != nil { + return "", fmt.Errorf("coudln't parse metrics public api token; %v", err) + } + + return tokenResponse.PublicAPIToken, nil +} + +func buildMetricsPayload(payload pushPayload) (string, error) { + str, err := json.Marshal(payload) + if err != nil { + return "", fmt.Errorf("unable to marshal metrics payload, got err: %v", err) + } + return string(str), nil +} + +func createPostRequest(ctx context.Context, endpoint string, payloadStr string) (*http.Request, error) { + reqURL := endpoint + + payload := strings.NewReader(payloadStr) + + req, err := http.NewRequestWithContext(ctx, "POST", reqURL, payload) + if err != nil { + return nil, err + } + req.Header.Add("content-type", "application/json") + + return req, nil + +} diff --git a/management/server/store.go b/management/server/store.go index 9c0412304..2637b8ad9 100644 --- a/management/server/store.go +++ b/management/server/store.go @@ -21,4 +21,6 @@ type Store interface { SaveAccount(account *Account) error GetPeerRoutes(peerKey string) ([]*route.Route, error) GetRoutesByPrefix(accountID string, prefix netip.Prefix) ([]*route.Route, error) + GetInstallationID() string + SaveInstallationID(id string) error } diff --git a/management/server/updatechannel.go b/management/server/updatechannel.go index 22f5291f3..4943fe247 100644 --- a/management/server/updatechannel.go +++ b/management/server/updatechannel.go @@ -70,3 +70,14 @@ func (p *PeersUpdateManager) CloseChannel(peerKey string) { log.Debugf("closed updates channel of a peer %s", peerKey) } + +// GetAllConnectedPeers returns a copy of the connected peers map +func (p *PeersUpdateManager) GetAllConnectedPeers() map[string]struct{} { + p.channelsMux.Lock() + defer p.channelsMux.Unlock() + m := make(map[string]struct{}) + for key := range p.peerChannels { + m[key] = struct{}{} + } + return m +}