mirror of
https://github.com/netbirdio/netbird.git
synced 2024-11-07 08:44:07 +01:00
Add telemetry to measure app durations (#878)
This commit is contained in:
parent
8b78209ae5
commit
03a42de5a0
@ -2,21 +2,23 @@ package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/netbirdio/netbird/management/server/activity"
|
||||
"net"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/netbirdio/netbird/management/server/activity"
|
||||
|
||||
"github.com/netbirdio/netbird/util"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
|
||||
clientProto "github.com/netbirdio/netbird/client/proto"
|
||||
client "github.com/netbirdio/netbird/client/server"
|
||||
mgmtProto "github.com/netbirdio/netbird/management/proto"
|
||||
mgmt "github.com/netbirdio/netbird/management/server"
|
||||
sigProto "github.com/netbirdio/netbird/signal/proto"
|
||||
sig "github.com/netbirdio/netbird/signal/server"
|
||||
"google.golang.org/grpc"
|
||||
)
|
||||
|
||||
func startTestingServices(t *testing.T) string {
|
||||
@ -63,7 +65,7 @@ func startManagement(t *testing.T, config *mgmt.Config) (*grpc.Server, net.Liste
|
||||
t.Fatal(err)
|
||||
}
|
||||
s := grpc.NewServer()
|
||||
store, err := mgmt.NewFileStore(config.Datadir)
|
||||
store, err := mgmt.NewFileStore(config.Datadir, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -3,8 +3,6 @@ package internal
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/netbirdio/netbird/iface/bind"
|
||||
"github.com/pion/transport/v2/stdnet"
|
||||
"net"
|
||||
"net/netip"
|
||||
"os"
|
||||
@ -15,6 +13,10 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/pion/transport/v2/stdnet"
|
||||
|
||||
"github.com/netbirdio/netbird/iface/bind"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
@ -1039,7 +1041,7 @@ func startManagement(dataDir string) (*grpc.Server, string, error) {
|
||||
return nil, "", err
|
||||
}
|
||||
s := grpc.NewServer(grpc.KeepaliveEnforcementPolicy(kaep), grpc.KeepaliveParams(kasp))
|
||||
store, err := server.NewFileStore(config.Datadir)
|
||||
store, err := server.NewFileStore(config.Datadir, nil)
|
||||
if err != nil {
|
||||
log.Fatalf("failed creating a store: %s: %v", config.Datadir, err)
|
||||
}
|
||||
|
@ -2,28 +2,31 @@ package client
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/netbirdio/netbird/management/server/activity"
|
||||
"net"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/netbirdio/netbird/management/server/activity"
|
||||
|
||||
"github.com/netbirdio/netbird/client/system"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"github.com/netbirdio/netbird/encryption"
|
||||
"github.com/netbirdio/netbird/management/proto"
|
||||
mgmtProto "github.com/netbirdio/netbird/management/proto"
|
||||
mgmt "github.com/netbirdio/netbird/management/server"
|
||||
"github.com/netbirdio/netbird/management/server/mock_server"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"github.com/netbirdio/netbird/util"
|
||||
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
|
||||
"github.com/netbirdio/netbird/util"
|
||||
)
|
||||
|
||||
const ValidKey = "A2C8E62B-38F5-4553-B31E-DD66C696CEBB"
|
||||
@ -50,7 +53,7 @@ func startManagement(t *testing.T) (*grpc.Server, net.Listener) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
s := grpc.NewServer()
|
||||
store, err := mgmt.NewFileStore(config.Datadir)
|
||||
store, err := mgmt.NewFileStore(config.Datadir, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -121,13 +121,6 @@ var (
|
||||
return fmt.Errorf("failed creating datadir: %s: %v", config.Datadir, err)
|
||||
}
|
||||
}
|
||||
|
||||
store, err := server.NewFileStore(config.Datadir)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed creating Store: %s: %v", config.Datadir, err)
|
||||
}
|
||||
peersUpdateManager := server.NewPeersUpdateManager()
|
||||
|
||||
appMetrics, err := telemetry.NewDefaultAppMetrics(cmd.Context())
|
||||
if err != nil {
|
||||
return err
|
||||
@ -136,6 +129,11 @@ var (
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
store, err := server.NewFileStore(config.Datadir, appMetrics)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed creating Store: %s: %v", config.Datadir, err)
|
||||
}
|
||||
peersUpdateManager := server.NewPeersUpdateManager()
|
||||
|
||||
var idpManager idp.Manager
|
||||
if config.IdpManagerConfig != nil {
|
||||
|
@ -1869,7 +1869,7 @@ func createManager(t *testing.T) (*DefaultAccountManager, error) {
|
||||
|
||||
func createStore(t *testing.T) (Store, error) {
|
||||
dataDir := t.TempDir()
|
||||
store, err := NewFileStore(dataDir)
|
||||
store, err := NewFileStore(dataDir, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -1,10 +1,12 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/netbirdio/netbird/management/server/activity"
|
||||
"github.com/netbirdio/netbird/management/server/status"
|
||||
"github.com/stretchr/testify/require"
|
||||
"testing"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -190,7 +192,7 @@ func createDNSManager(t *testing.T) (*DefaultAccountManager, error) {
|
||||
|
||||
func createDNSStore(t *testing.T) (Store, error) {
|
||||
dataDir := t.TempDir()
|
||||
store, err := NewFileStore(dataDir)
|
||||
store, err := NewFileStore(dataDir, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -11,6 +11,7 @@ import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/netbirdio/netbird/management/server/status"
|
||||
"github.com/netbirdio/netbird/management/server/telemetry"
|
||||
|
||||
"github.com/netbirdio/netbird/util"
|
||||
)
|
||||
@ -37,13 +38,20 @@ type FileStore struct {
|
||||
// sync.Mutex indexed by accountID
|
||||
accountLocks sync.Map `json:"-"`
|
||||
globalAccountLock sync.Mutex `json:"-"`
|
||||
|
||||
metrics telemetry.AppMetrics `json:"-"`
|
||||
}
|
||||
|
||||
type StoredAccount struct{}
|
||||
|
||||
// NewFileStore restores a store from the file located in the datadir
|
||||
func NewFileStore(dataDir string) (*FileStore, error) {
|
||||
return restore(filepath.Join(dataDir, storeFileName))
|
||||
func NewFileStore(dataDir string, metrics telemetry.AppMetrics) (*FileStore, error) {
|
||||
fs, err := restore(filepath.Join(dataDir, storeFileName))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fs.metrics = metrics
|
||||
return fs, nil
|
||||
}
|
||||
|
||||
// restore the state of the store from the file.
|
||||
@ -221,7 +229,17 @@ func restore(file string) (*FileStore, error) {
|
||||
// persist account data to a file
|
||||
// It is recommended to call it with locking FileStore.mux
|
||||
func (s *FileStore) persist(file string) error {
|
||||
return util.WriteJson(file, s)
|
||||
start := time.Now()
|
||||
err := util.WriteJson(file, s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
took := time.Since(start)
|
||||
if s.metrics != nil {
|
||||
s.metrics.StoreMetrics().CountPersistenceDuration(took)
|
||||
}
|
||||
log.Debugf("took %d ms to persist the FileStore", took.Milliseconds())
|
||||
return nil
|
||||
}
|
||||
|
||||
// AcquireGlobalLock acquires global lock across all the accounts and returns a function that releases the lock
|
||||
@ -235,6 +253,12 @@ func (s *FileStore) AcquireGlobalLock() (unlock func()) {
|
||||
log.Debugf("released global lock in %v", time.Since(start))
|
||||
}
|
||||
|
||||
took := time.Since(start)
|
||||
log.Debugf("took %v to acquire global lock", took)
|
||||
if s.metrics != nil {
|
||||
s.metrics.StoreMetrics().CountGlobalLockAcquisitionDuration(took)
|
||||
}
|
||||
|
||||
return unlock
|
||||
}
|
||||
|
||||
|
@ -25,7 +25,7 @@ func TestStalePeerIndices(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
store, err := NewFileStore(storeDir)
|
||||
store, err := NewFileStore(storeDir, nil)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
@ -172,7 +172,7 @@ func TestStore(t *testing.T) {
|
||||
return
|
||||
}
|
||||
|
||||
restored, err := NewFileStore(store.storeFile)
|
||||
restored, err := NewFileStore(store.storeFile, nil)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
@ -232,7 +232,7 @@ func TestRestore(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
store, err := NewFileStore(storeDir)
|
||||
store, err := NewFileStore(storeDir, nil)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
@ -270,7 +270,7 @@ func TestRestorePolicies_Migration(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
store, err := NewFileStore(storeDir)
|
||||
store, err := NewFileStore(storeDir, nil)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
@ -307,7 +307,7 @@ func TestGetAccountByPrivateDomain(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
store, err := NewFileStore(storeDir)
|
||||
store, err := NewFileStore(storeDir, nil)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
@ -336,7 +336,7 @@ func TestFileStore_GetAccount(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
store, err := NewFileStore(storeDir)
|
||||
store, err := NewFileStore(storeDir, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -378,7 +378,7 @@ func TestFileStore_GetTokenIDByHashedToken(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
store, err := NewFileStore(storeDir)
|
||||
store, err := NewFileStore(storeDir, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -431,7 +431,7 @@ func TestFileStore_GetTokenIDByHashedToken_Failure(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
store, err := NewFileStore(storeDir)
|
||||
store, err := NewFileStore(storeDir, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -456,7 +456,7 @@ func TestFileStore_GetUserByTokenID(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
store, err := NewFileStore(storeDir)
|
||||
store, err := NewFileStore(storeDir, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -484,7 +484,7 @@ func TestFileStore_GetUserByTokenID_Failure(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
store, err := NewFileStore(storeDir)
|
||||
store, err := NewFileStore(storeDir, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -503,7 +503,7 @@ func TestFileStore_SavePeerStatus(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
store, err := NewFileStore(storeDir)
|
||||
store, err := NewFileStore(storeDir, nil)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
@ -548,7 +548,7 @@ func TestFileStore_SavePeerStatus(t *testing.T) {
|
||||
}
|
||||
|
||||
func newStore(t *testing.T) *FileStore {
|
||||
store, err := NewFileStore(t.TempDir())
|
||||
store, err := NewFileStore(t.TempDir(), nil)
|
||||
if err != nil {
|
||||
t.Errorf("failed creating a new store")
|
||||
}
|
||||
|
@ -114,6 +114,7 @@ func (s *GRPCServer) GetServerKey(ctx context.Context, req *proto.Empty) (*proto
|
||||
// Sync validates the existence of a connecting peer, sends an initial state (all available for the connecting peers) and
|
||||
// notifies the connected peer of any updates (e.g. new peers under the same account)
|
||||
func (s *GRPCServer) Sync(req *proto.EncryptedMessage, srv proto.ManagementService_SyncServer) error {
|
||||
reqStart := time.Now()
|
||||
if s.appMetrics != nil {
|
||||
s.appMetrics.GRPCMetrics().CountSyncRequest()
|
||||
}
|
||||
@ -148,6 +149,11 @@ func (s *GRPCServer) Sync(req *proto.EncryptedMessage, srv proto.ManagementServi
|
||||
if s.config.TURNConfig.TimeBasedCredentials {
|
||||
s.turnCredentialsManager.SetupRefresh(peer.ID)
|
||||
}
|
||||
|
||||
if s.appMetrics != nil {
|
||||
s.appMetrics.GRPCMetrics().CountSyncRequestDuration(time.Since(reqStart))
|
||||
}
|
||||
|
||||
// keep a connection to the peer and send updates when available
|
||||
for {
|
||||
select {
|
||||
@ -262,6 +268,12 @@ func (s *GRPCServer) parseRequest(req *proto.EncryptedMessage, parsed pb.Message
|
||||
// In case it isn't, the endpoint checks whether setup key is provided within the request and tries to register a peer.
|
||||
// In case of the successful registration login is also successful
|
||||
func (s *GRPCServer) Login(ctx context.Context, req *proto.EncryptedMessage) (*proto.EncryptedMessage, error) {
|
||||
reqStart := time.Now()
|
||||
defer func() {
|
||||
if s.appMetrics != nil {
|
||||
s.appMetrics.GRPCMetrics().CountLoginRequestDuration(time.Since(reqStart))
|
||||
}
|
||||
}()
|
||||
if s.appMetrics != nil {
|
||||
s.appMetrics.GRPCMetrics().CountLoginRequest()
|
||||
}
|
||||
|
@ -2,7 +2,6 @@ package server
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/netbirdio/netbird/management/server/activity"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@ -10,14 +9,17 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/netbirdio/netbird/encryption"
|
||||
mgmtProto "github.com/netbirdio/netbird/management/proto"
|
||||
"github.com/netbirdio/netbird/util"
|
||||
"github.com/netbirdio/netbird/management/server/activity"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/credentials/insecure"
|
||||
"google.golang.org/grpc/keepalive"
|
||||
|
||||
"github.com/netbirdio/netbird/encryption"
|
||||
mgmtProto "github.com/netbirdio/netbird/management/proto"
|
||||
"github.com/netbirdio/netbird/util"
|
||||
)
|
||||
|
||||
var (
|
||||
@ -408,7 +410,7 @@ func startManagement(t *testing.T, config *Config) (*grpc.Server, string, error)
|
||||
return nil, "", err
|
||||
}
|
||||
s := grpc.NewServer(grpc.KeepaliveEnforcementPolicy(kaep), grpc.KeepaliveParams(kasp))
|
||||
store, err := NewFileStore(config.Datadir)
|
||||
store, err := NewFileStore(config.Datadir, nil)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
@ -496,7 +496,7 @@ func startServer(config *server.Config) (*grpc.Server, net.Listener) {
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
s := grpc.NewServer()
|
||||
|
||||
store, err := server.NewFileStore(config.Datadir)
|
||||
store, err := server.NewFileStore(config.Datadir, nil)
|
||||
if err != nil {
|
||||
log.Fatalf("failed creating a store: %s: %v", config.Datadir, err)
|
||||
}
|
||||
|
@ -1,11 +1,13 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
nbdns "github.com/netbirdio/netbird/dns"
|
||||
"github.com/netbirdio/netbird/management/server/activity"
|
||||
"github.com/stretchr/testify/require"
|
||||
"net/netip"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
nbdns "github.com/netbirdio/netbird/dns"
|
||||
"github.com/netbirdio/netbird/management/server/activity"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -1064,7 +1066,7 @@ func createNSManager(t *testing.T) (*DefaultAccountManager, error) {
|
||||
|
||||
func createNSStore(t *testing.T) (Store, error) {
|
||||
dataDir := t.TempDir()
|
||||
store, err := NewFileStore(dataDir)
|
||||
store, err := NewFileStore(dataDir, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -4,10 +4,11 @@ import (
|
||||
"net/netip"
|
||||
"testing"
|
||||
|
||||
"github.com/netbirdio/netbird/management/server/activity"
|
||||
"github.com/netbirdio/netbird/route"
|
||||
"github.com/rs/xid"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/netbirdio/netbird/management/server/activity"
|
||||
"github.com/netbirdio/netbird/route"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -946,7 +947,7 @@ func createRouterManager(t *testing.T) (*DefaultAccountManager, error) {
|
||||
|
||||
func createRouterStore(t *testing.T) (Store, error) {
|
||||
dataDir := t.TempDir()
|
||||
store, err := NewFileStore(dataDir)
|
||||
store, err := NewFileStore(dataDir, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -3,6 +3,10 @@ package telemetry
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"reflect"
|
||||
|
||||
"github.com/gorilla/mux"
|
||||
prometheus2 "github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
@ -10,9 +14,6 @@ import (
|
||||
"go.opentelemetry.io/otel/exporters/prometheus"
|
||||
metric2 "go.opentelemetry.io/otel/metric"
|
||||
"go.opentelemetry.io/otel/sdk/metric"
|
||||
"net"
|
||||
"net/http"
|
||||
"reflect"
|
||||
)
|
||||
|
||||
const defaultEndpoint = "/metrics"
|
||||
@ -25,6 +26,7 @@ type MockAppMetrics struct {
|
||||
IDPMetricsFunc func() *IDPMetrics
|
||||
HTTPMiddlewareFunc func() *HTTPMiddleware
|
||||
GRPCMetricsFunc func() *GRPCMetrics
|
||||
StoreMetricsFunc func() *StoreMetrics
|
||||
}
|
||||
|
||||
// GetMeter mocks the GetMeter function of the AppMetrics interface
|
||||
@ -75,6 +77,14 @@ func (mock *MockAppMetrics) GRPCMetrics() *GRPCMetrics {
|
||||
return nil
|
||||
}
|
||||
|
||||
// StoreMetrics mocks the MockAppMetrics function of the StoreMetrics interface
|
||||
func (mock *MockAppMetrics) StoreMetrics() *StoreMetrics {
|
||||
if mock.StoreMetricsFunc != nil {
|
||||
return mock.StoreMetricsFunc()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// AppMetrics is metrics interface
|
||||
type AppMetrics interface {
|
||||
GetMeter() metric2.Meter
|
||||
@ -83,6 +93,7 @@ type AppMetrics interface {
|
||||
IDPMetrics() *IDPMetrics
|
||||
HTTPMiddleware() *HTTPMiddleware
|
||||
GRPCMetrics() *GRPCMetrics
|
||||
StoreMetrics() *StoreMetrics
|
||||
}
|
||||
|
||||
// defaultAppMetrics are core application metrics based on OpenTelemetry https://opentelemetry.io/
|
||||
@ -94,6 +105,7 @@ type defaultAppMetrics struct {
|
||||
idpMetrics *IDPMetrics
|
||||
httpMiddleware *HTTPMiddleware
|
||||
grpcMetrics *GRPCMetrics
|
||||
storeMetrics *StoreMetrics
|
||||
}
|
||||
|
||||
// IDPMetrics returns metrics for the idp package
|
||||
@ -111,6 +123,11 @@ func (appMetrics *defaultAppMetrics) GRPCMetrics() *GRPCMetrics {
|
||||
return appMetrics.grpcMetrics
|
||||
}
|
||||
|
||||
// StoreMetrics returns metrics for the store
|
||||
func (appMetrics *defaultAppMetrics) StoreMetrics() *StoreMetrics {
|
||||
return appMetrics.storeMetrics
|
||||
}
|
||||
|
||||
// Close stop application metrics HTTP handler and closes listener.
|
||||
func (appMetrics *defaultAppMetrics) Close() error {
|
||||
if appMetrics.listener == nil {
|
||||
@ -171,11 +188,17 @@ func NewDefaultAppMetrics(ctx context.Context) (AppMetrics, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
grpcMetrics, err := NewGRPCMetrics(ctx, meter)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
storeMetrics, err := NewStoreMetrics(ctx, meter)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &defaultAppMetrics{Meter: meter, ctx: ctx, idpMetrics: idpMetrics, httpMiddleware: middleware,
|
||||
grpcMetrics: grpcMetrics}, nil
|
||||
grpcMetrics: grpcMetrics, storeMetrics: storeMetrics}, nil
|
||||
}
|
||||
|
@ -2,6 +2,8 @@ package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
"go.opentelemetry.io/otel/metric/instrument"
|
||||
"go.opentelemetry.io/otel/metric/instrument/asyncint64"
|
||||
@ -15,6 +17,8 @@ type GRPCMetrics struct {
|
||||
loginRequestsCounter syncint64.Counter
|
||||
getKeyRequestsCounter syncint64.Counter
|
||||
activeStreamsGauge asyncint64.Gauge
|
||||
syncRequestDuration syncint64.Histogram
|
||||
loginRequestDuration syncint64.Histogram
|
||||
ctx context.Context
|
||||
}
|
||||
|
||||
@ -38,12 +42,24 @@ func NewGRPCMetrics(ctx context.Context, meter metric.Meter) (*GRPCMetrics, erro
|
||||
return nil, err
|
||||
}
|
||||
|
||||
syncRequestDuration, err := meter.SyncInt64().Histogram("management.grpc.sync.request.duration.ms", instrument.WithUnit("milliseconds"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
loginRequestDuration, err := meter.SyncInt64().Histogram("management.grpc.login.request.duration.ms", instrument.WithUnit("milliseconds"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &GRPCMetrics{
|
||||
meter: meter,
|
||||
syncRequestsCounter: syncRequestsCounter,
|
||||
loginRequestsCounter: loginRequestsCounter,
|
||||
getKeyRequestsCounter: getKeyRequestsCounter,
|
||||
activeStreamsGauge: activeStreamsGauge,
|
||||
syncRequestDuration: syncRequestDuration,
|
||||
loginRequestDuration: loginRequestDuration,
|
||||
ctx: ctx,
|
||||
}, err
|
||||
}
|
||||
@ -63,6 +79,16 @@ func (grpcMetrics *GRPCMetrics) CountLoginRequest() {
|
||||
grpcMetrics.loginRequestsCounter.Add(grpcMetrics.ctx, 1)
|
||||
}
|
||||
|
||||
// CountLoginRequestDuration counts the duration of the login gRPC requests
|
||||
func (grpcMetrics *GRPCMetrics) CountLoginRequestDuration(duration time.Duration) {
|
||||
grpcMetrics.loginRequestDuration.Record(grpcMetrics.ctx, duration.Milliseconds())
|
||||
}
|
||||
|
||||
// CountSyncRequestDuration counts the duration of the sync gRPC requests
|
||||
func (grpcMetrics *GRPCMetrics) CountSyncRequestDuration(duration time.Duration) {
|
||||
grpcMetrics.syncRequestDuration.Record(grpcMetrics.ctx, duration.Milliseconds())
|
||||
}
|
||||
|
||||
// RegisterConnectedStreams registers a function that collects number of active streams and feeds it to the metrics gauge.
|
||||
func (grpcMetrics *GRPCMetrics) RegisterConnectedStreams(producer func() int64) error {
|
||||
return grpcMetrics.meter.RegisterCallback(
|
||||
|
@ -3,18 +3,21 @@ package telemetry
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"net/http"
|
||||
"strings"
|
||||
time "time"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
"go.opentelemetry.io/otel/metric/instrument"
|
||||
"go.opentelemetry.io/otel/metric/instrument/syncint64"
|
||||
"hash/fnv"
|
||||
"net/http"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
httpRequestCounterPrefix = "management.http.request.counter"
|
||||
httpResponseCounterPrefix = "management.http.response.counter"
|
||||
httpRequestDurationPrefix = "management.http.request.duration.ms"
|
||||
)
|
||||
|
||||
// WrappedResponseWriter is a wrapper for http.ResponseWriter that allows the
|
||||
@ -51,9 +54,9 @@ func (rw *WrappedResponseWriter) WriteHeader(code int) {
|
||||
type HTTPMiddleware struct {
|
||||
meter metric.Meter
|
||||
ctx context.Context
|
||||
// defaultEndpoint & method
|
||||
// all HTTP requests by endpoint & method
|
||||
httpRequestCounters map[string]syncint64.Counter
|
||||
// defaultEndpoint & method & status code
|
||||
// all HTTP responses by endpoint & method & status code
|
||||
httpResponseCounters map[string]syncint64.Counter
|
||||
// all HTTP requests
|
||||
totalHTTPRequestsCounter syncint64.Counter
|
||||
@ -61,6 +64,48 @@ type HTTPMiddleware struct {
|
||||
totalHTTPResponseCounter syncint64.Counter
|
||||
// all HTTP responses by status code
|
||||
totalHTTPResponseCodeCounters map[int]syncint64.Counter
|
||||
// all HTTP requests durations by endpoint and method
|
||||
httpRequestDurations map[string]syncint64.Histogram
|
||||
// all HTTP requests durations
|
||||
totalHTTPRequestDuration syncint64.Histogram
|
||||
}
|
||||
|
||||
// NewMetricsMiddleware creates a new HTTPMiddleware
|
||||
func NewMetricsMiddleware(ctx context.Context, meter metric.Meter) (*HTTPMiddleware, error) {
|
||||
|
||||
totalHTTPRequestsCounter, err := meter.SyncInt64().Counter(
|
||||
fmt.Sprintf("%s_total", httpRequestCounterPrefix),
|
||||
instrument.WithUnit("1"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
totalHTTPResponseCounter, err := meter.SyncInt64().Counter(
|
||||
fmt.Sprintf("%s_total", httpResponseCounterPrefix),
|
||||
instrument.WithUnit("1"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
totalHTTPRequestDuration, err := meter.SyncInt64().Histogram(
|
||||
fmt.Sprintf("%s_total", httpRequestDurationPrefix),
|
||||
instrument.WithUnit("milliseconds"))
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &HTTPMiddleware{
|
||||
ctx: ctx,
|
||||
httpRequestCounters: map[string]syncint64.Counter{},
|
||||
httpResponseCounters: map[string]syncint64.Counter{},
|
||||
httpRequestDurations: map[string]syncint64.Histogram{},
|
||||
totalHTTPResponseCodeCounters: map[int]syncint64.Counter{},
|
||||
meter: meter,
|
||||
totalHTTPRequestsCounter: totalHTTPRequestsCounter,
|
||||
totalHTTPResponseCounter: totalHTTPResponseCounter,
|
||||
totalHTTPRequestDuration: totalHTTPRequestDuration,
|
||||
},
|
||||
nil
|
||||
}
|
||||
|
||||
// AddHTTPRequestResponseCounter adds a new meter for an HTTP defaultEndpoint and Method (GET, POST, etc)
|
||||
@ -72,6 +117,12 @@ func (m *HTTPMiddleware) AddHTTPRequestResponseCounter(endpoint string, method s
|
||||
return err
|
||||
}
|
||||
m.httpRequestCounters[meterKey] = httpReqCounter
|
||||
durationKey := getRequestDurationKey(endpoint, method)
|
||||
requestDuration, err := m.meter.SyncInt64().Histogram(durationKey, instrument.WithUnit("milliseconds"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.httpRequestDurations[durationKey] = requestDuration
|
||||
respCodes := []int{200, 204, 400, 401, 403, 404, 500, 502, 503}
|
||||
for _, code := range respCodes {
|
||||
meterKey = getResponseCounterKey(endpoint, method, code)
|
||||
@ -92,38 +143,16 @@ func (m *HTTPMiddleware) AddHTTPRequestResponseCounter(endpoint string, method s
|
||||
return nil
|
||||
}
|
||||
|
||||
// NewMetricsMiddleware creates a new HTTPMiddleware
|
||||
func NewMetricsMiddleware(ctx context.Context, meter metric.Meter) (*HTTPMiddleware, error) {
|
||||
|
||||
totalHTTPRequestsCounter, err := meter.SyncInt64().Counter(
|
||||
fmt.Sprintf("%s_total", httpRequestCounterPrefix),
|
||||
instrument.WithUnit("1"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
totalHTTPResponseCounter, err := meter.SyncInt64().Counter(
|
||||
fmt.Sprintf("%s_total", httpResponseCounterPrefix),
|
||||
instrument.WithUnit("1"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &HTTPMiddleware{
|
||||
ctx: ctx,
|
||||
httpRequestCounters: map[string]syncint64.Counter{},
|
||||
httpResponseCounters: map[string]syncint64.Counter{},
|
||||
totalHTTPResponseCodeCounters: map[int]syncint64.Counter{},
|
||||
meter: meter,
|
||||
totalHTTPRequestsCounter: totalHTTPRequestsCounter,
|
||||
totalHTTPResponseCounter: totalHTTPResponseCounter,
|
||||
},
|
||||
nil
|
||||
}
|
||||
|
||||
func getRequestCounterKey(endpoint, method string) string {
|
||||
return fmt.Sprintf("%s%s_%s", httpRequestCounterPrefix,
|
||||
strings.ReplaceAll(endpoint, "/", "_"), method)
|
||||
}
|
||||
|
||||
func getRequestDurationKey(endpoint, method string) string {
|
||||
return fmt.Sprintf("%s%s_%s", httpRequestDurationPrefix,
|
||||
strings.ReplaceAll(endpoint, "/", "_"), method)
|
||||
}
|
||||
|
||||
func getResponseCounterKey(endpoint, method string, status int) string {
|
||||
return fmt.Sprintf("%s%s_%s_%d", httpResponseCounterPrefix,
|
||||
strings.ReplaceAll(endpoint, "/", "_"), method, status)
|
||||
@ -132,6 +161,10 @@ func getResponseCounterKey(endpoint, method string, status int) string {
|
||||
// Handler logs every request and response and adds the, to metrics.
|
||||
func (m *HTTPMiddleware) Handler(h http.Handler) http.Handler {
|
||||
fn := func(rw http.ResponseWriter, r *http.Request) {
|
||||
reqStart := time.Now()
|
||||
defer func() {
|
||||
m.totalHTTPRequestDuration.Record(m.ctx, time.Since(reqStart).Milliseconds())
|
||||
}()
|
||||
traceID := hash(fmt.Sprintf("%v", r))
|
||||
log.Tracef("HTTP request %v: %v %v", traceID, r.Method, r.URL)
|
||||
|
||||
@ -161,6 +194,14 @@ func (m *HTTPMiddleware) Handler(h http.Handler) http.Handler {
|
||||
if c, ok := m.totalHTTPResponseCodeCounters[w.Status()]; ok {
|
||||
c.Add(m.ctx, 1)
|
||||
}
|
||||
|
||||
durationKey := getRequestDurationKey(r.URL.Path, r.Method)
|
||||
reqTook := time.Since(reqStart)
|
||||
if c, ok := m.httpRequestDurations[durationKey]; ok {
|
||||
c.Record(m.ctx, reqTook.Milliseconds())
|
||||
}
|
||||
log.Debugf("request %s %s took %d ms", r.Method, r.URL.Path, reqTook.Milliseconds())
|
||||
|
||||
}
|
||||
|
||||
return http.HandlerFunc(fn)
|
||||
|
47
management/server/telemetry/store_metrics.go
Normal file
47
management/server/telemetry/store_metrics.go
Normal file
@ -0,0 +1,47 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
"go.opentelemetry.io/otel/metric/instrument"
|
||||
"go.opentelemetry.io/otel/metric/instrument/syncint64"
|
||||
)
|
||||
|
||||
// StoreMetrics represents all metrics related to the FileStore
|
||||
type StoreMetrics struct {
|
||||
globalLockAcquisitionDuration syncint64.Histogram
|
||||
persistenceDuration syncint64.Histogram
|
||||
ctx context.Context
|
||||
}
|
||||
|
||||
// NewStoreMetrics creates an instance of StoreMetrics
|
||||
func NewStoreMetrics(ctx context.Context, meter metric.Meter) (*StoreMetrics, error) {
|
||||
globalLockAcquisitionDuration, err := meter.SyncInt64().Histogram("management.store.global.lock.acquisition.duration.micro",
|
||||
instrument.WithUnit("microseconds"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
persistenceDuration, err := meter.SyncInt64().Histogram("management.store.persistence.duration.micro",
|
||||
instrument.WithUnit("microseconds"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &StoreMetrics{
|
||||
globalLockAcquisitionDuration: globalLockAcquisitionDuration,
|
||||
persistenceDuration: persistenceDuration,
|
||||
ctx: ctx,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// CountGlobalLockAcquisitionDuration counts the duration of the global lock acquisition
|
||||
func (metrics *StoreMetrics) CountGlobalLockAcquisitionDuration(duration time.Duration) {
|
||||
metrics.globalLockAcquisitionDuration.Record(metrics.ctx, duration.Microseconds())
|
||||
}
|
||||
|
||||
// CountPersistenceDuration counts the duration of a store persistence operation
|
||||
func (metrics *StoreMetrics) CountPersistenceDuration(duration time.Duration) {
|
||||
metrics.persistenceDuration.Record(metrics.ctx, duration.Microseconds())
|
||||
}
|
Loading…
Reference in New Issue
Block a user