Add anonymous usage metrics collection (#508)

This will help us understand usage on self-hosted deployments

The collection may be disabled by using the flag --disable-anonymous-metrics or 
NETBIRD_DISABLE_ANONYMOUS_METRICS in setup.env
This commit is contained in:
Maycon Santos 2022-10-16 13:33:46 +02:00 committed by GitHub
parent 06055af361
commit 04e4407ea7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 358 additions and 4 deletions

View File

@ -41,7 +41,7 @@ builds:
- arm64
- arm
ldflags:
- -s -w -X main.version={{.Version}} -X main.commit={{.Commit}} -X main.date={{.CommitDate}} -X main.builtBy=goreleaser
- -s -w -X github.com/netbirdio/netbird/client/system.version={{.Version}} -X main.commit={{.Commit}} -X main.date={{.CommitDate}} -X main.builtBy=goreleaser
mod_timestamp: '{{ .CommitTimestamp }}'
- id: netbird-signal

View File

@ -29,6 +29,8 @@ LETSENCRYPT_VOLUMESUFFIX="letsencrypt"
NETBIRD_AUTH_DEVICE_AUTH_PROVIDER="none"
NETBIRD_DISABLE_ANONYMOUS_METRICS=${NETBIRD_DISABLE_ANONYMOUS_METRICS:-false}
# exports
export NETBIRD_DOMAIN
export NETBIRD_AUTH_CLIENT_ID
@ -55,3 +57,4 @@ export VOLUME_PREFIX
export MGMT_VOLUMESUFFIX
export SIGNAL_VOLUMESUFFIX
export LETSENCRYPT_VOLUMESUFFIX
export NETBIRD_DISABLE_ANONYMOUS_METRICS

View File

@ -48,7 +48,7 @@ services:
# # port and command for Let's Encrypt validation without dashboard container
# - 443:443
# command: ["--letsencrypt-domain", "$NETBIRD_DOMAIN", "--log-file", "console"]
command: ["--port", "443", "--log-file", "console"]
command: ["--port", "443", "--log-file", "console", "--disable-anonymous-metrics=$NETBIRD_DISABLE_ANONYMOUS_METRICS"]
# Coturn
coturn:
image: coturn/coturn

View File

@ -17,3 +17,6 @@ NETBIRD_LETSENCRYPT_EMAIL=""
# redirect and silent redirect URIs, these will be concatenated into your NETBIRD_DOMAIN domain.
# NETBIRD_AUTH_REDIRECT_URI="/peers"
# NETBIRD_AUTH_SILENT_REDIRECT_URI="/add-peers"
# Disable anonymous metrics collection, see more information at https://netbird.io/docs/FAQ/metrics-collection
NETBIRD_DISABLE_ANONYMOUS_METRICS=false

View File

@ -1,12 +1,15 @@
package cmd
import (
"context"
"crypto/tls"
"encoding/json"
"errors"
"flag"
"fmt"
"github.com/google/uuid"
httpapi "github.com/netbirdio/netbird/management/server/http"
"github.com/netbirdio/netbird/management/server/metrics"
"golang.org/x/crypto/acme/autocert"
"golang.org/x/net/http2"
"golang.org/x/net/http2/h2c"
@ -161,6 +164,21 @@ var (
}
mgmtProto.RegisterManagementServiceServer(gRPCAPIHandler, srv)
installationID, err := getInstallationID(store)
if err != nil {
log.Errorf("cannot load TLS credentials: %v", err)
return err
}
fmt.Println("metrics ", disableMetrics)
if !disableMetrics {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
metricsWorker := metrics.NewWorker(ctx, installationID, store, peersUpdateManager)
go metricsWorker.Run()
}
var compatListener net.Listener
if mgmtPort != ManagementLegacyPort {
// The Management gRPC server was running on port 33073 previously. Old agents that are already connected to it
@ -228,6 +246,20 @@ func notifyStop(msg string) {
}
}
func getInstallationID(store server.Store) (string, error) {
installationID := store.GetInstallationID()
if installationID != "" {
return installationID, nil
}
installationID = strings.ToUpper(uuid.New().String())
err := store.SaveInstallationID(installationID)
if err != nil {
return "", err
}
return installationID, nil
}
func serveGRPC(grpcServer *grpc.Server, port int) (net.Listener, error) {
listener, err := net.Listen("tcp", fmt.Sprintf(":%d", port))
if err != nil {

View File

@ -27,6 +27,7 @@ var (
mgmtConfig string
logLevel string
logFile string
disableMetrics bool
rootCmd = &cobra.Command{
Use: "netbird-mgmt",
@ -66,6 +67,7 @@ func init() {
mgmtCmd.Flags().StringVar(&mgmtLetsencryptDomain, "letsencrypt-domain", "", "a domain to issue Let's Encrypt certificate for. Enables TLS using Let's Encrypt. Will fetch and renew certificate, and run the server with TLS")
mgmtCmd.Flags().StringVar(&certFile, "cert-file", "", "Location of your SSL certificate. Can be used when you have an existing certificate and don't want a new certificate be generated automatically. If letsencrypt-domain is specified this property has no effect")
mgmtCmd.Flags().StringVar(&certKey, "cert-key", "", "Location of your SSL certificate private key. Can be used when you have an existing certificate and don't want a new certificate be generated automatically. If letsencrypt-domain is specified this property has no effect")
mgmtCmd.Flags().BoolVar(&disableMetrics, "disable-anonymous-metrics", false, "disables push of anonymous usage metrics to NetBird")
rootCmd.MarkFlagRequired("config") //nolint
rootCmd.PersistentFlags().StringVar(&logLevel, "log-level", "info", "")

View File

@ -29,6 +29,7 @@ type FileStore struct {
PeerKeyId2DstRulesId map[string]map[string]struct{} `json:"-"`
PeerKeyID2RouteIDs map[string]map[string]struct{} `json:"-"`
AccountPrefix2RouteIDs map[string]map[string][]string `json:"-"`
InstallationID string
// mutex to synchronise Store read/write operations
mux sync.Mutex `json:"-"`
@ -415,8 +416,10 @@ func (s *FileStore) GetAccountPeers(accountId string) ([]*Peer, error) {
// GetAllAccounts returns all accounts
func (s *FileStore) GetAllAccounts() (all []*Account) {
s.mux.Lock()
defer s.mux.Unlock()
for _, a := range s.Accounts {
all = append(all, a)
all = append(all, a.Copy())
}
return all
@ -566,3 +569,18 @@ func (s *FileStore) GetRoutesByPrefix(accountID string, prefix netip.Prefix) ([]
return routes, nil
}
// GetInstallationID returns the installation ID from the store
func (s *FileStore) GetInstallationID() string {
return s.InstallationID
}
// SaveInstallationID saves the installation ID
func (s *FileStore) SaveInstallationID(id string) error {
s.mux.Lock()
defer s.mux.Unlock()
s.InstallationID = id
return s.persist(s.storeFile)
}

View File

@ -0,0 +1,283 @@
// Package metrics gather anonymous information about the usage of NetBird management
package metrics
import (
"context"
"encoding/json"
"fmt"
"github.com/netbirdio/netbird/client/system"
"github.com/netbirdio/netbird/management/server"
log "github.com/sirupsen/logrus"
"io"
"net/http"
"strings"
"time"
)
const (
// PayloadEvent identifies an event type
PayloadEvent = "self-hosted stats"
// payloadEndpoint metrics endpoint to send anonymous data
payloadEndpoint = "https://metrics.netbird.io"
// defaultPushInterval default interval to push metrics
defaultPushInterval = 24 * time.Hour
// requestTimeout http request timeout
requestTimeout = 30 * time.Second
)
type getTokenResponse struct {
PublicAPIToken string `json:"public_api_token"`
}
type pushPayload struct {
APIKey string `json:"api_key"`
DistinctID string `json:"distinct_id"`
Event string `json:"event"`
Properties properties `json:"properties"`
Timestamp time.Time `json:"timestamp"`
}
// properties metrics to push
type properties map[string]interface{}
// DataSource metric data source
type DataSource interface {
GetAllAccounts() []*server.Account
}
// ConnManager peer connection manager that holds state for current active connections
type ConnManager interface {
GetAllConnectedPeers() map[string]struct{}
}
// Worker metrics collector and pusher
type Worker struct {
ctx context.Context
id string
dataSource DataSource
connManager ConnManager
startupTime time.Time
lastRun time.Time
}
// NewWorker returns a metrics worker
func NewWorker(ctx context.Context, id string, dataSource DataSource, connManager ConnManager) *Worker {
currentTime := time.Now()
return &Worker{
ctx: ctx,
id: id,
dataSource: dataSource,
connManager: connManager,
startupTime: currentTime,
lastRun: currentTime,
}
}
// Run runs the metrics worker
func (w *Worker) Run() {
pushTicker := time.NewTicker(defaultPushInterval)
for {
select {
case <-w.ctx.Done():
return
case <-pushTicker.C:
err := w.sendMetrics()
if err != nil {
log.Error(err)
}
}
}
}
func (w *Worker) sendMetrics() error {
ctx, cancel := context.WithTimeout(w.ctx, requestTimeout)
defer cancel()
apiKey, err := getAPIKey(ctx)
if err != nil {
return err
}
payload := w.generatePayload(apiKey)
payloadString, err := buildMetricsPayload(payload)
if err != nil {
return err
}
httpClient := http.Client{}
exportJobReq, err := createPostRequest(ctx, payloadEndpoint+"/capture/", payloadString)
if err != nil {
return fmt.Errorf("unable to create metrics post request %v", err)
}
jobResp, err := httpClient.Do(exportJobReq)
if err != nil {
return fmt.Errorf("unable to push metrics %v", err)
}
defer func() {
err = jobResp.Body.Close()
if err != nil {
log.Errorf("error while closing update metrics response body: %v", err)
}
}()
if jobResp.StatusCode != 200 {
return fmt.Errorf("unable to push anonymous metrics, got statusCode %d", jobResp.StatusCode)
}
log.Infof("sent anonymous metrics, next push will happen in %s. "+
"You can disable these metrics by running with flag --disable-anonymous-metrics,"+
" see more information at https://netbird.io/docs/FAQ/metrics-collection", defaultPushInterval)
return nil
}
func (w *Worker) generatePayload(apiKey string) pushPayload {
properties := w.generateProperties()
return pushPayload{
APIKey: apiKey,
DistinctID: w.id,
Event: PayloadEvent,
Properties: properties,
Timestamp: time.Now(),
}
}
func (w *Worker) generateProperties() properties {
var (
uptime float64
accounts int
users int
peers int
setupKeysUsage int
activePeersLastDay int
osPeers map[string]int
userPeers int
rules int
groups int
routes int
nameservers int
version string
)
start := time.Now()
metricsProperties := make(properties)
osPeers = make(map[string]int)
uptime = time.Since(w.startupTime).Seconds()
connections := w.connManager.GetAllConnectedPeers()
version = system.NetbirdVersion()
for _, account := range w.dataSource.GetAllAccounts() {
accounts++
users = users + len(account.Users)
rules = rules + len(account.Rules)
groups = groups + len(account.Groups)
routes = routes + len(account.Routes)
nameservers = nameservers + len(account.NameServerGroups)
for _, key := range account.SetupKeys {
setupKeysUsage = setupKeysUsage + key.UsedTimes
}
for _, peer := range account.Peers {
peers++
if peer.SetupKey != "" {
userPeers++
}
_, connected := connections[peer.Key]
if connected || peer.Status.LastSeen.After(w.lastRun) {
activePeersLastDay++
}
osKey := strings.ToLower(fmt.Sprintf("peer_os_%s", peer.Meta.GoOS))
osCount := osPeers[osKey]
osPeers[osKey] = osCount + 1
}
}
metricsProperties["uptime"] = uptime
metricsProperties["accounts"] = accounts
metricsProperties["users"] = users
metricsProperties["peers"] = peers
metricsProperties["setup_keys_usage"] = setupKeysUsage
metricsProperties["active_peers_last_day"] = activePeersLastDay
metricsProperties["user_peers"] = userPeers
metricsProperties["rules"] = rules
metricsProperties["groups"] = groups
metricsProperties["routes"] = routes
metricsProperties["nameservers"] = nameservers
metricsProperties["version"] = version
for os, count := range osPeers {
metricsProperties[os] = count
}
metricsProperties["metric_generation_time"] = time.Since(start).Milliseconds()
return metricsProperties
}
func getAPIKey(ctx context.Context) (string, error) {
httpClient := http.Client{}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, payloadEndpoint+"/GetToken", nil)
if err != nil {
return "", fmt.Errorf("unable to create request for metrics public api token %v", err)
}
response, err := httpClient.Do(req)
if err != nil {
return "", fmt.Errorf("unable to request metrics public api token %v", err)
}
defer func() {
err = response.Body.Close()
if err != nil {
log.Errorf("error while closing metrics token response body: %v", err)
}
}()
if response.StatusCode != 200 {
return "", fmt.Errorf("unable to retrieve metrics token, statusCode %d", response.StatusCode)
}
body, err := io.ReadAll(response.Body)
if err != nil {
return "", fmt.Errorf("coudln't get metrics token response; %v", err)
}
var tokenResponse getTokenResponse
err = json.Unmarshal(body, &tokenResponse)
if err != nil {
return "", fmt.Errorf("coudln't parse metrics public api token; %v", err)
}
return tokenResponse.PublicAPIToken, nil
}
func buildMetricsPayload(payload pushPayload) (string, error) {
str, err := json.Marshal(payload)
if err != nil {
return "", fmt.Errorf("unable to marshal metrics payload, got err: %v", err)
}
return string(str), nil
}
func createPostRequest(ctx context.Context, endpoint string, payloadStr string) (*http.Request, error) {
reqURL := endpoint
payload := strings.NewReader(payloadStr)
req, err := http.NewRequestWithContext(ctx, "POST", reqURL, payload)
if err != nil {
return nil, err
}
req.Header.Add("content-type", "application/json")
return req, nil
}

View File

@ -21,4 +21,6 @@ type Store interface {
SaveAccount(account *Account) error
GetPeerRoutes(peerKey string) ([]*route.Route, error)
GetRoutesByPrefix(accountID string, prefix netip.Prefix) ([]*route.Route, error)
GetInstallationID() string
SaveInstallationID(id string) error
}

View File

@ -70,3 +70,14 @@ func (p *PeersUpdateManager) CloseChannel(peerKey string) {
log.Debugf("closed updates channel of a peer %s", peerKey)
}
// GetAllConnectedPeers returns a copy of the connected peers map
func (p *PeersUpdateManager) GetAllConnectedPeers() map[string]struct{} {
p.channelsMux.Lock()
defer p.channelsMux.Unlock()
m := make(map[string]struct{})
for key := range p.peerChannels {
m[key] = struct{}{}
}
return m
}