mirror of
https://github.com/netbirdio/netbird.git
synced 2025-04-27 04:48:29 +02:00
[management] Refactor HTTP metrics (#2476)
* Add logging for slow SQL queries in SaveAccount and GetAccount * Add resource count log for large accounts * Refactor metrics middleware to simplify counters and histograms * Update log levels and remove redundant resource count check
This commit is contained in:
parent
33b264e598
commit
d97b03656f
@ -476,6 +476,12 @@ func (a *Account) GetPeerNetworkMap(
|
|||||||
objectCount := int64(len(peersToConnect) + len(expiredPeers) + len(routesUpdate) + len(firewallRules))
|
objectCount := int64(len(peersToConnect) + len(expiredPeers) + len(routesUpdate) + len(firewallRules))
|
||||||
metrics.CountNetworkMapObjects(objectCount)
|
metrics.CountNetworkMapObjects(objectCount)
|
||||||
metrics.CountGetPeerNetworkMapDuration(time.Since(start))
|
metrics.CountGetPeerNetworkMapDuration(time.Since(start))
|
||||||
|
|
||||||
|
if objectCount > 5000 {
|
||||||
|
log.WithContext(ctx).Tracef("account: %s has a total resource count of %d objects, "+
|
||||||
|
"peers to connect: %d, expired peers: %d, routes: %d, firewall rules: %d",
|
||||||
|
a.Id, objectCount, len(peersToConnect), len(expiredPeers), len(routesUpdate), len(firewallRules))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nm
|
return nm
|
||||||
|
@ -100,27 +100,6 @@ func APIHandler(ctx context.Context, accountManager s.AccountManager, LocationMa
|
|||||||
api.addPostureCheckEndpoint()
|
api.addPostureCheckEndpoint()
|
||||||
api.addLocationsEndpoint()
|
api.addLocationsEndpoint()
|
||||||
|
|
||||||
err := api.Router.Walk(func(route *mux.Route, _ *mux.Router, _ []*mux.Route) error {
|
|
||||||
methods, err := route.GetMethods()
|
|
||||||
if err != nil { // we may have wildcard routes from integrations without methods, skip them for now
|
|
||||||
methods = []string{}
|
|
||||||
}
|
|
||||||
for _, method := range methods {
|
|
||||||
template, err := route.GetPathTemplate()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
err = metricsMiddleware.AddHTTPRequestResponseCounter(template, method)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return rootRouter, nil
|
return rootRouter, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -134,6 +134,12 @@ func (s *SqlStore) AcquireReadLockByUID(ctx context.Context, uniqueID string) (u
|
|||||||
|
|
||||||
func (s *SqlStore) SaveAccount(ctx context.Context, account *Account) error {
|
func (s *SqlStore) SaveAccount(ctx context.Context, account *Account) error {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
defer func() {
|
||||||
|
elapsed := time.Since(start)
|
||||||
|
if elapsed > 1*time.Second {
|
||||||
|
log.WithContext(ctx).Tracef("SaveAccount for account %s exceeded 1s, took: %v", account.Id, elapsed)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
// todo: remove this check after the issue is resolved
|
// todo: remove this check after the issue is resolved
|
||||||
s.checkAccountDomainBeforeSave(ctx, account.Id, account.Domain)
|
s.checkAccountDomainBeforeSave(ctx, account.Id, account.Domain)
|
||||||
@ -513,6 +519,13 @@ func (s *SqlStore) GetAllAccounts(ctx context.Context) (all []*Account) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *SqlStore) GetAccount(ctx context.Context, accountID string) (*Account, error) {
|
func (s *SqlStore) GetAccount(ctx context.Context, accountID string) (*Account, error) {
|
||||||
|
start := time.Now()
|
||||||
|
defer func() {
|
||||||
|
elapsed := time.Since(start)
|
||||||
|
if elapsed > 1*time.Second {
|
||||||
|
log.WithContext(ctx).Tracef("GetAccount for account %s exceeded 1s, took: %v", accountID, elapsed)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
var account Account
|
var account Account
|
||||||
result := s.db.Model(&account).
|
result := s.db.Model(&account).
|
||||||
|
@ -8,6 +8,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
|
"github.com/gorilla/mux"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
"go.opentelemetry.io/otel/attribute"
|
"go.opentelemetry.io/otel/attribute"
|
||||||
"go.opentelemetry.io/otel/metric"
|
"go.opentelemetry.io/otel/metric"
|
||||||
@ -54,48 +55,66 @@ func (rw *WrappedResponseWriter) WriteHeader(code int) {
|
|||||||
// HTTPMiddleware handler used to collect metrics of every request/response coming to the API.
|
// HTTPMiddleware handler used to collect metrics of every request/response coming to the API.
|
||||||
// Also adds request tracing (logging).
|
// Also adds request tracing (logging).
|
||||||
type HTTPMiddleware struct {
|
type HTTPMiddleware struct {
|
||||||
meter metric.Meter
|
|
||||||
ctx context.Context
|
ctx context.Context
|
||||||
// all HTTP requests by endpoint & method
|
// all HTTP requests by endpoint & method
|
||||||
httpRequestCounters map[string]metric.Int64Counter
|
httpRequestCounter metric.Int64Counter
|
||||||
// all HTTP responses by endpoint & method & status code
|
// all HTTP responses by endpoint & method & status code
|
||||||
httpResponseCounters map[string]metric.Int64Counter
|
httpResponseCounter metric.Int64Counter
|
||||||
// all HTTP requests
|
// all HTTP requests
|
||||||
totalHTTPRequestsCounter metric.Int64Counter
|
totalHTTPRequestsCounter metric.Int64Counter
|
||||||
// all HTTP responses
|
// all HTTP responses
|
||||||
totalHTTPResponseCounter metric.Int64Counter
|
totalHTTPResponseCounter metric.Int64Counter
|
||||||
// all HTTP responses by status code
|
// all HTTP responses by status code
|
||||||
totalHTTPResponseCodeCounters map[int]metric.Int64Counter
|
totalHTTPResponseCodeCounter metric.Int64Counter
|
||||||
// all HTTP requests durations by endpoint and method
|
// all HTTP requests durations by endpoint and method
|
||||||
httpRequestDurations map[string]metric.Int64Histogram
|
httpRequestDuration metric.Int64Histogram
|
||||||
// all HTTP requests durations
|
// all HTTP requests durations
|
||||||
totalHTTPRequestDuration metric.Int64Histogram
|
totalHTTPRequestDuration metric.Int64Histogram
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewMetricsMiddleware creates a new HTTPMiddleware
|
// NewMetricsMiddleware creates a new HTTPMiddleware
|
||||||
func NewMetricsMiddleware(ctx context.Context, meter metric.Meter) (*HTTPMiddleware, error) {
|
func NewMetricsMiddleware(ctx context.Context, meter metric.Meter) (*HTTPMiddleware, error) {
|
||||||
totalHTTPRequestsCounter, err := meter.Int64Counter(fmt.Sprintf("%s_total", httpRequestCounterPrefix), metric.WithUnit("1"))
|
httpRequestCounter, err := meter.Int64Counter(httpRequestCounterPrefix, metric.WithUnit("1"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
totalHTTPResponseCounter, err := meter.Int64Counter(fmt.Sprintf("%s_total", httpResponseCounterPrefix), metric.WithUnit("1"))
|
httpResponseCounter, err := meter.Int64Counter(httpResponseCounterPrefix, metric.WithUnit("1"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
totalHTTPRequestDuration, err := meter.Int64Histogram(fmt.Sprintf("%s_total", httpRequestDurationPrefix), metric.WithUnit("milliseconds"))
|
totalHTTPRequestsCounter, err := meter.Int64Counter(fmt.Sprintf("%s.total", httpRequestCounterPrefix), metric.WithUnit("1"))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
totalHTTPResponseCounter, err := meter.Int64Counter(fmt.Sprintf("%s.total", httpResponseCounterPrefix), metric.WithUnit("1"))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
totalHTTPResponseCodeCounter, err := meter.Int64Counter(fmt.Sprintf("%s.code.total", httpResponseCounterPrefix), metric.WithUnit("1"))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
httpRequestDuration, err := meter.Int64Histogram(httpRequestDurationPrefix, metric.WithUnit("milliseconds"))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
totalHTTPRequestDuration, err := meter.Int64Histogram(fmt.Sprintf("%s.total", httpRequestDurationPrefix), metric.WithUnit("milliseconds"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return &HTTPMiddleware{
|
return &HTTPMiddleware{
|
||||||
ctx: ctx,
|
ctx: ctx,
|
||||||
httpRequestCounters: map[string]metric.Int64Counter{},
|
httpRequestCounter: httpRequestCounter,
|
||||||
httpResponseCounters: map[string]metric.Int64Counter{},
|
httpResponseCounter: httpResponseCounter,
|
||||||
httpRequestDurations: map[string]metric.Int64Histogram{},
|
httpRequestDuration: httpRequestDuration,
|
||||||
totalHTTPResponseCodeCounters: map[int]metric.Int64Counter{},
|
totalHTTPResponseCodeCounter: totalHTTPResponseCodeCounter,
|
||||||
meter: meter,
|
|
||||||
totalHTTPRequestsCounter: totalHTTPRequestsCounter,
|
totalHTTPRequestsCounter: totalHTTPRequestsCounter,
|
||||||
totalHTTPResponseCounter: totalHTTPResponseCounter,
|
totalHTTPResponseCounter: totalHTTPResponseCounter,
|
||||||
totalHTTPRequestDuration: totalHTTPRequestDuration,
|
totalHTTPRequestDuration: totalHTTPRequestDuration,
|
||||||
@ -103,63 +122,22 @@ func NewMetricsMiddleware(ctx context.Context, meter metric.Meter) (*HTTPMiddlew
|
|||||||
nil
|
nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// AddHTTPRequestResponseCounter adds a new meter for an HTTP defaultEndpoint and Method (GET, POST, etc)
|
|
||||||
// Creates one request counter and multiple response counters (one per http response status code).
|
|
||||||
func (m *HTTPMiddleware) AddHTTPRequestResponseCounter(endpoint string, method string) error {
|
|
||||||
meterKey := getRequestCounterKey(endpoint, method)
|
|
||||||
httpReqCounter, err := m.meter.Int64Counter(meterKey, metric.WithUnit("1"))
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m.httpRequestCounters[meterKey] = httpReqCounter
|
|
||||||
|
|
||||||
durationKey := getRequestDurationKey(endpoint, method)
|
|
||||||
requestDuration, err := m.meter.Int64Histogram(durationKey, metric.WithUnit("milliseconds"))
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m.httpRequestDurations[durationKey] = requestDuration
|
|
||||||
|
|
||||||
respCodes := []int{200, 204, 400, 401, 403, 404, 500, 502, 503}
|
|
||||||
for _, code := range respCodes {
|
|
||||||
meterKey = getResponseCounterKey(endpoint, method, code)
|
|
||||||
httpRespCounter, err := m.meter.Int64Counter(meterKey, metric.WithUnit("1"))
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m.httpResponseCounters[meterKey] = httpRespCounter
|
|
||||||
|
|
||||||
meterKey = fmt.Sprintf("%s_%d_total", httpResponseCounterPrefix, code)
|
|
||||||
totalHTTPResponseCodeCounter, err := m.meter.Int64Counter(meterKey, metric.WithUnit("1"))
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m.totalHTTPResponseCodeCounters[code] = totalHTTPResponseCodeCounter
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func replaceEndpointChars(endpoint string) string {
|
func replaceEndpointChars(endpoint string) string {
|
||||||
endpoint = strings.ReplaceAll(endpoint, "/", "_")
|
|
||||||
endpoint = strings.ReplaceAll(endpoint, "{", "")
|
endpoint = strings.ReplaceAll(endpoint, "{", "")
|
||||||
endpoint = strings.ReplaceAll(endpoint, "}", "")
|
endpoint = strings.ReplaceAll(endpoint, "}", "")
|
||||||
return endpoint
|
return endpoint
|
||||||
}
|
}
|
||||||
|
|
||||||
func getRequestCounterKey(endpoint, method string) string {
|
func getEndpointMetricAttr(r *http.Request) string {
|
||||||
endpoint = replaceEndpointChars(endpoint)
|
var endpoint string
|
||||||
return fmt.Sprintf("%s%s_%s", httpRequestCounterPrefix, endpoint, method)
|
route := mux.CurrentRoute(r)
|
||||||
|
if route != nil {
|
||||||
|
pathTmpl, err := route.GetPathTemplate()
|
||||||
|
if err == nil {
|
||||||
|
endpoint = replaceEndpointChars(pathTmpl)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getRequestDurationKey(endpoint, method string) string {
|
|
||||||
endpoint = replaceEndpointChars(endpoint)
|
|
||||||
return fmt.Sprintf("%s%s_%s", httpRequestDurationPrefix, endpoint, method)
|
|
||||||
}
|
}
|
||||||
|
return endpoint
|
||||||
func getResponseCounterKey(endpoint, method string, status int) string {
|
|
||||||
endpoint = replaceEndpointChars(endpoint)
|
|
||||||
return fmt.Sprintf("%s%s_%s_%d", httpResponseCounterPrefix, endpoint, method, status)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handler logs every request and response and adds the, to metrics.
|
// Handler logs every request and response and adds the, to metrics.
|
||||||
@ -176,11 +154,10 @@ func (m *HTTPMiddleware) Handler(h http.Handler) http.Handler {
|
|||||||
|
|
||||||
log.WithContext(ctx).Tracef("HTTP request %v: %v %v", reqID, r.Method, r.URL)
|
log.WithContext(ctx).Tracef("HTTP request %v: %v %v", reqID, r.Method, r.URL)
|
||||||
|
|
||||||
metricKey := getRequestCounterKey(r.URL.Path, r.Method)
|
endpointAttr := attribute.String("endpoint", getEndpointMetricAttr(r))
|
||||||
|
methodAttr := attribute.String("method", r.Method)
|
||||||
|
|
||||||
if c, ok := m.httpRequestCounters[metricKey]; ok {
|
m.httpRequestCounter.Add(m.ctx, 1, metric.WithAttributes(endpointAttr, methodAttr))
|
||||||
c.Add(m.ctx, 1)
|
|
||||||
}
|
|
||||||
m.totalHTTPRequestsCounter.Add(m.ctx, 1)
|
m.totalHTTPRequestsCounter.Add(m.ctx, 1)
|
||||||
|
|
||||||
w := WrapResponseWriter(rw)
|
w := WrapResponseWriter(rw)
|
||||||
@ -193,21 +170,14 @@ func (m *HTTPMiddleware) Handler(h http.Handler) http.Handler {
|
|||||||
log.WithContext(ctx).Tracef("HTTP response %v: %v %v status %v", reqID, r.Method, r.URL, w.Status())
|
log.WithContext(ctx).Tracef("HTTP response %v: %v %v status %v", reqID, r.Method, r.URL, w.Status())
|
||||||
}
|
}
|
||||||
|
|
||||||
metricKey = getResponseCounterKey(r.URL.Path, r.Method, w.Status())
|
statusCodeAttr := attribute.Int("code", w.Status())
|
||||||
if c, ok := m.httpResponseCounters[metricKey]; ok {
|
|
||||||
c.Add(m.ctx, 1)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
m.httpResponseCounter.Add(m.ctx, 1, metric.WithAttributes(endpointAttr, methodAttr, statusCodeAttr))
|
||||||
m.totalHTTPResponseCounter.Add(m.ctx, 1)
|
m.totalHTTPResponseCounter.Add(m.ctx, 1)
|
||||||
if c, ok := m.totalHTTPResponseCodeCounters[w.Status()]; ok {
|
m.totalHTTPResponseCodeCounter.Add(m.ctx, 1, metric.WithAttributes(statusCodeAttr))
|
||||||
c.Add(m.ctx, 1)
|
|
||||||
}
|
|
||||||
|
|
||||||
durationKey := getRequestDurationKey(r.URL.Path, r.Method)
|
|
||||||
reqTook := time.Since(reqStart)
|
reqTook := time.Since(reqStart)
|
||||||
if c, ok := m.httpRequestDurations[durationKey]; ok {
|
m.httpRequestDuration.Record(m.ctx, reqTook.Milliseconds(), metric.WithAttributes(endpointAttr, methodAttr))
|
||||||
c.Record(m.ctx, reqTook.Milliseconds())
|
|
||||||
}
|
|
||||||
log.WithContext(ctx).Debugf("request %s %s took %d ms and finished with status %d", r.Method, r.URL.Path, reqTook.Milliseconds(), w.Status())
|
log.WithContext(ctx).Debugf("request %s %s took %d ms and finished with status %d", r.Method, r.URL.Path, reqTook.Milliseconds(), w.Status())
|
||||||
|
|
||||||
if w.Status() == 200 && (r.Method == http.MethodPut || r.Method == http.MethodPost || r.Method == http.MethodDelete) {
|
if w.Status() == 200 && (r.Method == http.MethodPut || r.Method == http.MethodPost || r.Method == http.MethodDelete) {
|
||||||
|
Loading…
Reference in New Issue
Block a user