mirror of
https://github.com/netbirdio/netbird.git
synced 2025-02-02 11:29:46 +01:00
[management] Refactor HTTP metrics (#2476)
* Add logging for slow SQL queries in SaveAccount and GetAccount * Add resource count log for large accounts * Refactor metrics middleware to simplify counters and histograms * Update log levels and remove redundant resource count check
This commit is contained in:
parent
33b264e598
commit
d97b03656f
@ -476,6 +476,12 @@ func (a *Account) GetPeerNetworkMap(
|
||||
objectCount := int64(len(peersToConnect) + len(expiredPeers) + len(routesUpdate) + len(firewallRules))
|
||||
metrics.CountNetworkMapObjects(objectCount)
|
||||
metrics.CountGetPeerNetworkMapDuration(time.Since(start))
|
||||
|
||||
if objectCount > 5000 {
|
||||
log.WithContext(ctx).Tracef("account: %s has a total resource count of %d objects, "+
|
||||
"peers to connect: %d, expired peers: %d, routes: %d, firewall rules: %d",
|
||||
a.Id, objectCount, len(peersToConnect), len(expiredPeers), len(routesUpdate), len(firewallRules))
|
||||
}
|
||||
}
|
||||
|
||||
return nm
|
||||
|
@ -100,27 +100,6 @@ func APIHandler(ctx context.Context, accountManager s.AccountManager, LocationMa
|
||||
api.addPostureCheckEndpoint()
|
||||
api.addLocationsEndpoint()
|
||||
|
||||
err := api.Router.Walk(func(route *mux.Route, _ *mux.Router, _ []*mux.Route) error {
|
||||
methods, err := route.GetMethods()
|
||||
if err != nil { // we may have wildcard routes from integrations without methods, skip them for now
|
||||
methods = []string{}
|
||||
}
|
||||
for _, method := range methods {
|
||||
template, err := route.GetPathTemplate()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = metricsMiddleware.AddHTTPRequestResponseCounter(template, method)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return rootRouter, nil
|
||||
}
|
||||
|
||||
|
@ -134,6 +134,12 @@ func (s *SqlStore) AcquireReadLockByUID(ctx context.Context, uniqueID string) (u
|
||||
|
||||
func (s *SqlStore) SaveAccount(ctx context.Context, account *Account) error {
|
||||
start := time.Now()
|
||||
defer func() {
|
||||
elapsed := time.Since(start)
|
||||
if elapsed > 1*time.Second {
|
||||
log.WithContext(ctx).Tracef("SaveAccount for account %s exceeded 1s, took: %v", account.Id, elapsed)
|
||||
}
|
||||
}()
|
||||
|
||||
// todo: remove this check after the issue is resolved
|
||||
s.checkAccountDomainBeforeSave(ctx, account.Id, account.Domain)
|
||||
@ -513,6 +519,13 @@ func (s *SqlStore) GetAllAccounts(ctx context.Context) (all []*Account) {
|
||||
}
|
||||
|
||||
func (s *SqlStore) GetAccount(ctx context.Context, accountID string) (*Account, error) {
|
||||
start := time.Now()
|
||||
defer func() {
|
||||
elapsed := time.Since(start)
|
||||
if elapsed > 1*time.Second {
|
||||
log.WithContext(ctx).Tracef("GetAccount for account %s exceeded 1s, took: %v", accountID, elapsed)
|
||||
}
|
||||
}()
|
||||
|
||||
var account Account
|
||||
result := s.db.Model(&account).
|
||||
|
@ -8,6 +8,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/gorilla/mux"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
@ -54,112 +55,89 @@ func (rw *WrappedResponseWriter) WriteHeader(code int) {
|
||||
// HTTPMiddleware handler used to collect metrics of every request/response coming to the API.
|
||||
// Also adds request tracing (logging).
|
||||
type HTTPMiddleware struct {
|
||||
meter metric.Meter
|
||||
ctx context.Context
|
||||
ctx context.Context
|
||||
// all HTTP requests by endpoint & method
|
||||
httpRequestCounters map[string]metric.Int64Counter
|
||||
httpRequestCounter metric.Int64Counter
|
||||
// all HTTP responses by endpoint & method & status code
|
||||
httpResponseCounters map[string]metric.Int64Counter
|
||||
httpResponseCounter metric.Int64Counter
|
||||
// all HTTP requests
|
||||
totalHTTPRequestsCounter metric.Int64Counter
|
||||
// all HTTP responses
|
||||
totalHTTPResponseCounter metric.Int64Counter
|
||||
// all HTTP responses by status code
|
||||
totalHTTPResponseCodeCounters map[int]metric.Int64Counter
|
||||
totalHTTPResponseCodeCounter metric.Int64Counter
|
||||
// all HTTP requests durations by endpoint and method
|
||||
httpRequestDurations map[string]metric.Int64Histogram
|
||||
httpRequestDuration metric.Int64Histogram
|
||||
// all HTTP requests durations
|
||||
totalHTTPRequestDuration metric.Int64Histogram
|
||||
}
|
||||
|
||||
// NewMetricsMiddleware creates a new HTTPMiddleware
|
||||
func NewMetricsMiddleware(ctx context.Context, meter metric.Meter) (*HTTPMiddleware, error) {
|
||||
totalHTTPRequestsCounter, err := meter.Int64Counter(fmt.Sprintf("%s_total", httpRequestCounterPrefix), metric.WithUnit("1"))
|
||||
httpRequestCounter, err := meter.Int64Counter(httpRequestCounterPrefix, metric.WithUnit("1"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
totalHTTPResponseCounter, err := meter.Int64Counter(fmt.Sprintf("%s_total", httpResponseCounterPrefix), metric.WithUnit("1"))
|
||||
httpResponseCounter, err := meter.Int64Counter(httpResponseCounterPrefix, metric.WithUnit("1"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
totalHTTPRequestDuration, err := meter.Int64Histogram(fmt.Sprintf("%s_total", httpRequestDurationPrefix), metric.WithUnit("milliseconds"))
|
||||
totalHTTPRequestsCounter, err := meter.Int64Counter(fmt.Sprintf("%s.total", httpRequestCounterPrefix), metric.WithUnit("1"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
totalHTTPResponseCounter, err := meter.Int64Counter(fmt.Sprintf("%s.total", httpResponseCounterPrefix), metric.WithUnit("1"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
totalHTTPResponseCodeCounter, err := meter.Int64Counter(fmt.Sprintf("%s.code.total", httpResponseCounterPrefix), metric.WithUnit("1"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
httpRequestDuration, err := meter.Int64Histogram(httpRequestDurationPrefix, metric.WithUnit("milliseconds"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
totalHTTPRequestDuration, err := meter.Int64Histogram(fmt.Sprintf("%s.total", httpRequestDurationPrefix), metric.WithUnit("milliseconds"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &HTTPMiddleware{
|
||||
ctx: ctx,
|
||||
httpRequestCounters: map[string]metric.Int64Counter{},
|
||||
httpResponseCounters: map[string]metric.Int64Counter{},
|
||||
httpRequestDurations: map[string]metric.Int64Histogram{},
|
||||
totalHTTPResponseCodeCounters: map[int]metric.Int64Counter{},
|
||||
meter: meter,
|
||||
totalHTTPRequestsCounter: totalHTTPRequestsCounter,
|
||||
totalHTTPResponseCounter: totalHTTPResponseCounter,
|
||||
totalHTTPRequestDuration: totalHTTPRequestDuration,
|
||||
ctx: ctx,
|
||||
httpRequestCounter: httpRequestCounter,
|
||||
httpResponseCounter: httpResponseCounter,
|
||||
httpRequestDuration: httpRequestDuration,
|
||||
totalHTTPResponseCodeCounter: totalHTTPResponseCodeCounter,
|
||||
totalHTTPRequestsCounter: totalHTTPRequestsCounter,
|
||||
totalHTTPResponseCounter: totalHTTPResponseCounter,
|
||||
totalHTTPRequestDuration: totalHTTPRequestDuration,
|
||||
},
|
||||
nil
|
||||
}
|
||||
|
||||
// AddHTTPRequestResponseCounter adds a new meter for an HTTP defaultEndpoint and Method (GET, POST, etc)
|
||||
// Creates one request counter and multiple response counters (one per http response status code).
|
||||
func (m *HTTPMiddleware) AddHTTPRequestResponseCounter(endpoint string, method string) error {
|
||||
meterKey := getRequestCounterKey(endpoint, method)
|
||||
httpReqCounter, err := m.meter.Int64Counter(meterKey, metric.WithUnit("1"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.httpRequestCounters[meterKey] = httpReqCounter
|
||||
|
||||
durationKey := getRequestDurationKey(endpoint, method)
|
||||
requestDuration, err := m.meter.Int64Histogram(durationKey, metric.WithUnit("milliseconds"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.httpRequestDurations[durationKey] = requestDuration
|
||||
|
||||
respCodes := []int{200, 204, 400, 401, 403, 404, 500, 502, 503}
|
||||
for _, code := range respCodes {
|
||||
meterKey = getResponseCounterKey(endpoint, method, code)
|
||||
httpRespCounter, err := m.meter.Int64Counter(meterKey, metric.WithUnit("1"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.httpResponseCounters[meterKey] = httpRespCounter
|
||||
|
||||
meterKey = fmt.Sprintf("%s_%d_total", httpResponseCounterPrefix, code)
|
||||
totalHTTPResponseCodeCounter, err := m.meter.Int64Counter(meterKey, metric.WithUnit("1"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.totalHTTPResponseCodeCounters[code] = totalHTTPResponseCodeCounter
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func replaceEndpointChars(endpoint string) string {
|
||||
endpoint = strings.ReplaceAll(endpoint, "/", "_")
|
||||
endpoint = strings.ReplaceAll(endpoint, "{", "")
|
||||
endpoint = strings.ReplaceAll(endpoint, "}", "")
|
||||
return endpoint
|
||||
}
|
||||
|
||||
func getRequestCounterKey(endpoint, method string) string {
|
||||
endpoint = replaceEndpointChars(endpoint)
|
||||
return fmt.Sprintf("%s%s_%s", httpRequestCounterPrefix, endpoint, method)
|
||||
}
|
||||
|
||||
func getRequestDurationKey(endpoint, method string) string {
|
||||
endpoint = replaceEndpointChars(endpoint)
|
||||
return fmt.Sprintf("%s%s_%s", httpRequestDurationPrefix, endpoint, method)
|
||||
}
|
||||
|
||||
func getResponseCounterKey(endpoint, method string, status int) string {
|
||||
endpoint = replaceEndpointChars(endpoint)
|
||||
return fmt.Sprintf("%s%s_%s_%d", httpResponseCounterPrefix, endpoint, method, status)
|
||||
func getEndpointMetricAttr(r *http.Request) string {
|
||||
var endpoint string
|
||||
route := mux.CurrentRoute(r)
|
||||
if route != nil {
|
||||
pathTmpl, err := route.GetPathTemplate()
|
||||
if err == nil {
|
||||
endpoint = replaceEndpointChars(pathTmpl)
|
||||
}
|
||||
}
|
||||
return endpoint
|
||||
}
|
||||
|
||||
// Handler logs every request and response and adds the, to metrics.
|
||||
@ -176,11 +154,10 @@ func (m *HTTPMiddleware) Handler(h http.Handler) http.Handler {
|
||||
|
||||
log.WithContext(ctx).Tracef("HTTP request %v: %v %v", reqID, r.Method, r.URL)
|
||||
|
||||
metricKey := getRequestCounterKey(r.URL.Path, r.Method)
|
||||
endpointAttr := attribute.String("endpoint", getEndpointMetricAttr(r))
|
||||
methodAttr := attribute.String("method", r.Method)
|
||||
|
||||
if c, ok := m.httpRequestCounters[metricKey]; ok {
|
||||
c.Add(m.ctx, 1)
|
||||
}
|
||||
m.httpRequestCounter.Add(m.ctx, 1, metric.WithAttributes(endpointAttr, methodAttr))
|
||||
m.totalHTTPRequestsCounter.Add(m.ctx, 1)
|
||||
|
||||
w := WrapResponseWriter(rw)
|
||||
@ -193,21 +170,14 @@ func (m *HTTPMiddleware) Handler(h http.Handler) http.Handler {
|
||||
log.WithContext(ctx).Tracef("HTTP response %v: %v %v status %v", reqID, r.Method, r.URL, w.Status())
|
||||
}
|
||||
|
||||
metricKey = getResponseCounterKey(r.URL.Path, r.Method, w.Status())
|
||||
if c, ok := m.httpResponseCounters[metricKey]; ok {
|
||||
c.Add(m.ctx, 1)
|
||||
}
|
||||
statusCodeAttr := attribute.Int("code", w.Status())
|
||||
|
||||
m.httpResponseCounter.Add(m.ctx, 1, metric.WithAttributes(endpointAttr, methodAttr, statusCodeAttr))
|
||||
m.totalHTTPResponseCounter.Add(m.ctx, 1)
|
||||
if c, ok := m.totalHTTPResponseCodeCounters[w.Status()]; ok {
|
||||
c.Add(m.ctx, 1)
|
||||
}
|
||||
m.totalHTTPResponseCodeCounter.Add(m.ctx, 1, metric.WithAttributes(statusCodeAttr))
|
||||
|
||||
durationKey := getRequestDurationKey(r.URL.Path, r.Method)
|
||||
reqTook := time.Since(reqStart)
|
||||
if c, ok := m.httpRequestDurations[durationKey]; ok {
|
||||
c.Record(m.ctx, reqTook.Milliseconds())
|
||||
}
|
||||
m.httpRequestDuration.Record(m.ctx, reqTook.Milliseconds(), metric.WithAttributes(endpointAttr, methodAttr))
|
||||
log.WithContext(ctx).Debugf("request %s %s took %d ms and finished with status %d", r.Method, r.URL.Path, reqTook.Milliseconds(), w.Status())
|
||||
|
||||
if w.Status() == 200 && (r.Method == http.MethodPut || r.Method == http.MethodPost || r.Method == http.MethodDelete) {
|
||||
|
Loading…
Reference in New Issue
Block a user