From 8060a77b1f192c6b18dc11a1bb2237c09fb45090 Mon Sep 17 00:00:00 2001 From: Gary Hodgson Date: Thu, 14 Nov 2024 01:02:53 +0100 Subject: [PATCH] feat(logging): Allow configuring logging verbosity level (#872) * introduces TwiN/logr library * use new features of logr library * minor tweaks and formatting * Apply suggestions from code review --------- Co-authored-by: TwiN --- README.md | 1 + config/config.go | 4 ++++ go.mod | 3 ++- go.sum | 2 ++ main.go | 7 +++++++ watchdog/watchdog.go | 35 ++++++++++++++++------------------- 6 files changed, 32 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 4dda87fc..e65d7a0d 100644 --- a/README.md +++ b/README.md @@ -216,6 +216,7 @@ If you want to test it locally, see [Docker](#docker). | Parameter | Description | Default | |:-----------------------------|:-------------------------------------------------------------------------------------------------------------------------------------|:---------------------------| | `debug` | Whether to enable debug logs. | `false` | +| `log-level` | Log level: DEBUG, INFO, WARN, ERROR. | `INFO` | | `metrics` | Whether to expose metrics at `/metrics`. | `false` | | `storage` | [Storage configuration](#storage). | `{}` | | `alerting` | [Alerting configuration](#alerting). | `{}` | diff --git a/config/config.go b/config/config.go index 19fb2ab1..58242b1b 100644 --- a/config/config.go +++ b/config/config.go @@ -22,6 +22,7 @@ import ( "github.com/TwiN/gatus/v5/config/web" "github.com/TwiN/gatus/v5/security" "github.com/TwiN/gatus/v5/storage" + "github.com/TwiN/logr" "gopkg.in/yaml.v3" ) @@ -54,6 +55,9 @@ type Config struct { // Debug Whether to enable debug logs Debug bool `yaml:"debug,omitempty"` + // LogLevel is one of DEBUG, INFO, WARN and ERROR. Defaults to INFO + LogLevel logr.Level `yaml:"log-level,omitempty"` + // Metrics Whether to expose metrics at /metrics Metrics bool `yaml:"metrics,omitempty"` diff --git a/go.mod b/go.mod index 7792d251..25b0dea8 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/TwiN/gatus/v5 -go 1.22.2 +go 1.22.4 require ( code.gitea.io/sdk/gitea v0.19.0 @@ -8,6 +8,7 @@ require ( github.com/TwiN/g8/v2 v2.0.0 github.com/TwiN/gocache/v2 v2.2.2 github.com/TwiN/health v1.6.0 + github.com/TwiN/logr v0.2.1 github.com/TwiN/whois v1.1.9 github.com/aws/aws-sdk-go v1.54.10 github.com/coreos/go-oidc/v3 v3.11.0 diff --git a/go.sum b/go.sum index c32ef1a9..7c0a9b1a 100644 --- a/go.sum +++ b/go.sum @@ -16,6 +16,8 @@ github.com/TwiN/gocache/v2 v2.2.2 h1:4HToPfDV8FSbaYO5kkbhLpEllUYse5rAf+hVU/mSsuI github.com/TwiN/gocache/v2 v2.2.2/go.mod h1:WfIuwd7GR82/7EfQqEtmLFC3a2vqaKbs4Pe6neB7Gyc= github.com/TwiN/health v1.6.0 h1:L2ks575JhRgQqWWOfKjw9B0ec172hx7GdToqkYUycQM= github.com/TwiN/health v1.6.0/go.mod h1:Z6TszwQPMvtSiVx1QMidVRgvVr4KZGfiwqcD7/Z+3iw= +github.com/TwiN/logr v0.2.1 h1:kMhUmBBVlFxzqTvyHuNoYQ/uwqg8BW4y0AyZxI5JB3Q= +github.com/TwiN/logr v0.2.1/go.mod h1:oldDOkRjFXjZqiMP0+ca5NAQHXTiJ02zHirsuBJJH6k= github.com/TwiN/whois v1.1.9 h1:m20+m1CXnrstie+tW2ZmAJkfcT9zgwpVRUFsKeMw+ng= github.com/TwiN/whois v1.1.9/go.mod h1:TjipCMpJRAJYKmtz/rXQBU6UGxMh6bk8SHazu7OMnQE= github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= diff --git a/main.go b/main.go index 31de744c..54576463 100644 --- a/main.go +++ b/main.go @@ -12,6 +12,7 @@ import ( "github.com/TwiN/gatus/v5/controller" "github.com/TwiN/gatus/v5/storage/store" "github.com/TwiN/gatus/v5/watchdog" + "github.com/TwiN/logr" ) func main() { @@ -23,6 +24,7 @@ func main() { if err != nil { panic(err) } + configureLogging(cfg) initializeStorage(cfg) start(cfg) // Wait for termination signal @@ -57,6 +59,11 @@ func save() { } } +func configureLogging(cfg *config.Config) { + logr.SetThreshold(cfg.LogLevel) + logr.Infof("[main.configureLogging] Log Level is %s", logr.GetThreshold()) +} + func loadConfiguration() (*config.Config, error) { configPath := os.Getenv("GATUS_CONFIG_PATH") // Backwards compatibility diff --git a/watchdog/watchdog.go b/watchdog/watchdog.go index 3fcb977a..c185da50 100644 --- a/watchdog/watchdog.go +++ b/watchdog/watchdog.go @@ -13,6 +13,7 @@ import ( "github.com/TwiN/gatus/v5/config/maintenance" "github.com/TwiN/gatus/v5/metrics" "github.com/TwiN/gatus/v5/storage/store" + "github.com/TwiN/logr" ) var ( @@ -31,15 +32,15 @@ func Monitor(cfg *config.Config) { if endpoint.IsEnabled() { // To prevent multiple requests from running at the same time, we'll wait for a little before each iteration time.Sleep(777 * time.Millisecond) - go monitor(endpoint, cfg.Alerting, cfg.Maintenance, cfg.Connectivity, cfg.DisableMonitoringLock, cfg.Metrics, cfg.Debug, ctx) + go monitor(endpoint, cfg.Alerting, cfg.Maintenance, cfg.Connectivity, cfg.DisableMonitoringLock, cfg.Metrics, ctx) } } } // monitor a single endpoint in a loop -func monitor(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock, enabledMetrics, debug bool, ctx context.Context) { +func monitor(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock bool, enabledMetrics bool, ctx context.Context) { // Run it immediately on start - execute(ep, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics, debug) + execute(ep, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics) // Loop for the next executions for { select { @@ -47,7 +48,7 @@ func monitor(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenance log.Printf("[watchdog.monitor] Canceling current execution of group=%s; endpoint=%s", ep.Group, ep.Name) return case <-time.After(ep.Interval): - execute(ep, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics, debug) + execute(ep, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics) } } // Just in case somebody wandered all the way to here and wonders, "what about ExternalEndpoints?" @@ -55,7 +56,7 @@ func monitor(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenance // periodically like they are for normal endpoints. } -func execute(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock, enabledMetrics, debug bool) { +func execute(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock bool, enabledMetrics bool) { if !disableMonitoringLock { // By placing the lock here, we prevent multiple endpoints from being monitored at the exact same time, which // could cause performance issues and return inaccurate results @@ -64,37 +65,33 @@ func execute(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenance } // If there's a connectivity checker configured, check if Gatus has internet connectivity if connectivityConfig != nil && connectivityConfig.Checker != nil && !connectivityConfig.Checker.IsConnected() { - log.Println("[watchdog.execute] No connectivity; skipping execution") + logr.Infof("[watchdog.execute] No connectivity; skipping execution") return } - if debug { - log.Printf("[watchdog.execute] Monitoring group=%s; endpoint=%s", ep.Group, ep.Name) - } + logr.Debugf("[watchdog.execute] Monitoring group=%s; endpoint=%s", ep.Group, ep.Name) result := ep.EvaluateHealth() if enabledMetrics { metrics.PublishMetricsForEndpoint(ep, result) } UpdateEndpointStatuses(ep, result) - if debug && !result.Success { - log.Printf("[watchdog.execute] Monitored group=%s; endpoint=%s; success=%v; errors=%d; duration=%s; body=%s", ep.Group, ep.Name, result.Success, len(result.Errors), result.Duration.Round(time.Millisecond), result.Body) + if logr.GetThreshold() == logr.LevelDebug && !result.Success { + logr.Debugf("[watchdog.execute] Monitored group=%s; endpoint=%s; success=%v; errors=%d; duration=%s; body=%s", ep.Group, ep.Name, result.Success, len(result.Errors), result.Duration.Round(time.Millisecond), result.Body) } else { - log.Printf("[watchdog.execute] Monitored group=%s; endpoint=%s; success=%v; errors=%d; duration=%s", ep.Group, ep.Name, result.Success, len(result.Errors), result.Duration.Round(time.Millisecond)) + logr.Infof("[watchdog.execute] Monitored group=%s; endpoint=%s; success=%v; errors=%d; duration=%s", ep.Group, ep.Name, result.Success, len(result.Errors), result.Duration.Round(time.Millisecond)) } if !maintenanceConfig.IsUnderMaintenance() { // TODO: Consider moving this after the monitoring lock is unlocked? I mean, how much noise can a single alerting provider cause... - HandleAlerting(ep, result, alertingConfig, debug) - } else if debug { - log.Println("[watchdog.execute] Not handling alerting because currently in the maintenance window") - } - if debug { - log.Printf("[watchdog.execute] Waiting for interval=%s before monitoring group=%s endpoint=%s again", ep.Interval, ep.Group, ep.Name) + HandleAlerting(ep, result, alertingConfig, logr.GetThreshold() == logr.LevelDebug) + } else { + logr.Debugf("[watchdog.execute] Not handling alerting because currently in the maintenance window") } + logr.Debugf("[watchdog.execute] Waiting for interval=%s before monitoring group=%s endpoint=%s again", ep.Interval, ep.Group, ep.Name) } // UpdateEndpointStatuses updates the slice of endpoint statuses func UpdateEndpointStatuses(ep *endpoint.Endpoint, result *endpoint.Result) { if err := store.Get().Insert(ep, result); err != nil { - log.Println("[watchdog.UpdateEndpointStatuses] Failed to insert result in storage:", err.Error()) + logr.Errorf("[watchdog.UpdateEndpointStatuses] Failed to insert result in storage:", err.Error()) } }