diff --git a/.examples/docker-compose-multiple-config-files/config/global.yaml b/.examples/docker-compose-multiple-config-files/config/global.yaml index 44e54fa2..c6c73995 100644 --- a/.examples/docker-compose-multiple-config-files/config/global.yaml +++ b/.examples/docker-compose-multiple-config-files/config/global.yaml @@ -1,5 +1,4 @@ metrics: true -debug: false ui: header: Example Company link: https://example.org diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index d1a30256..b9a463d2 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -22,7 +22,7 @@ jobs: steps: - uses: actions/setup-go@v5 with: - go-version: 1.22.2 + go-version: 1.23.3 repository: "${{ github.event.inputs.repository || 'TwiN/gatus' }}" ref: "${{ github.event.inputs.ref || 'master' }}" - uses: actions/checkout@v4 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e53da2e2..f1551f0d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -18,7 +18,7 @@ jobs: steps: - uses: actions/setup-go@v5 with: - go-version: 1.22.2 + go-version: 1.23.3 - uses: actions/checkout@v4 - name: Build binary to make sure it works run: go build diff --git a/Dockerfile b/Dockerfile index d2bedac1..6f6a540b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,6 +15,8 @@ FROM scratch COPY --from=builder /app/gatus . COPY --from=builder /app/config.yaml ./config/config.yaml COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt -ENV PORT=8080 +ENV GATUS_CONFIG_PATH="" +ENV GATUS_LOG_LEVEL="INFO" +ENV PORT="8080" EXPOSE ${PORT} ENTRYPOINT ["/gatus"] diff --git a/README.md b/README.md index e65d7a0d..a6b02fad 100644 --- a/README.md +++ b/README.md @@ -202,7 +202,7 @@ If `GATUS_CONFIG_PATH` points to a directory, all `*.yaml` and `*.yml` files ins subdirectories are merged like so: - All maps/objects are deep merged (i.e. you could define `alerting.slack` in one file and `alerting.pagerduty` in another file) - All slices/arrays are appended (i.e. you can define `endpoints` in multiple files and each endpoint will be added to the final list of endpoints) -- Parameters with a primitive value (e.g. `debug`, `metrics`, `alerting.slack.webhook-url`, etc.) may only be defined once to forcefully avoid any ambiguity +- Parameters with a primitive value (e.g. `metrics`, `alerting.slack.webhook-url`, etc.) may only be defined once to forcefully avoid any ambiguity - To clarify, this also means that you could not define `alerting.slack.webhook-url` in two files with different values. All files are merged into one before they are processed. This is by design. > 💡 You can also use environment variables in the configuration file (e.g. `$DOMAIN`, `${DOMAIN}`) @@ -215,8 +215,6 @@ If you want to test it locally, see [Docker](#docker). ## Configuration | Parameter | Description | Default | |:-----------------------------|:-------------------------------------------------------------------------------------------------------------------------------------|:---------------------------| -| `debug` | Whether to enable debug logs. | `false` | -| `log-level` | Log level: DEBUG, INFO, WARN, ERROR. | `INFO` | | `metrics` | Whether to expose metrics at `/metrics`. | `false` | | `storage` | [Storage configuration](#storage). | `{}` | | `alerting` | [Alerting configuration](#alerting). | `{}` | @@ -242,6 +240,9 @@ If you want to test it locally, see [Docker](#docker). | `ui.buttons[].link` | Link to open when the button is clicked. | Required `""` | | `maintenance` | [Maintenance configuration](#maintenance). | `{}` | +If you want more verbose logging, you may set the `GATUS_LOG_LEVEL` environment variable to `DEBUG`. +Conversely, if you want less verbose logging, you can set the aforementioned environment variable to `WARN`, `ERROR` or `FATAL`. +The default value for `GATUS_LOG_LEVEL` is `INFO`. ### Endpoints Endpoints are URLs, applications, or services that you want to monitor. Each endpoint has a list of conditions that are diff --git a/alerting/config.go b/alerting/config.go index 9148670f..7a41e8b6 100644 --- a/alerting/config.go +++ b/alerting/config.go @@ -1,7 +1,6 @@ package alerting import ( - "log" "reflect" "strings" @@ -30,6 +29,7 @@ import ( "github.com/TwiN/gatus/v5/alerting/provider/telegram" "github.com/TwiN/gatus/v5/alerting/provider/twilio" "github.com/TwiN/gatus/v5/alerting/provider/zulip" + "github.com/TwiN/logr" ) // Config is the configuration for alerting providers @@ -118,7 +118,7 @@ func (config *Config) GetAlertingProviderByAlertType(alertType alert.Type) provi return fieldValue.Interface().(provider.AlertProvider) } } - log.Printf("[alerting.GetAlertingProviderByAlertType] No alerting provider found for alert type %s", alertType) + logr.Infof("[alerting.GetAlertingProviderByAlertType] No alerting provider found for alert type %s", alertType) return nil } diff --git a/alerting/provider/pagerduty/pagerduty.go b/alerting/provider/pagerduty/pagerduty.go index cc76751f..8eacf860 100644 --- a/alerting/provider/pagerduty/pagerduty.go +++ b/alerting/provider/pagerduty/pagerduty.go @@ -5,12 +5,12 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "github.com/TwiN/gatus/v5/alerting/alert" "github.com/TwiN/gatus/v5/client" "github.com/TwiN/gatus/v5/config/endpoint" + "github.com/TwiN/logr" ) const ( @@ -74,11 +74,10 @@ func (provider *AlertProvider) Send(ep *endpoint.Endpoint, alert *alert.Alert, r alert.ResolveKey = "" } else { // We need to retrieve the resolve key from the response - body, err := io.ReadAll(response.Body) var payload pagerDutyResponsePayload - if err = json.Unmarshal(body, &payload); err != nil { + if err = json.NewDecoder(response.Body).Decode(&payload); err != nil { // Silently fail. We don't want to create tons of alerts just because we failed to parse the body. - log.Printf("[pagerduty.Send] Ran into error unmarshaling pagerduty response: %s", err.Error()) + logr.Errorf("[pagerduty.Send] Ran into error decoding pagerduty response: %s", err.Error()) } else { alert.ResolveKey = payload.DedupKey } diff --git a/api/api.go b/api/api.go index 4635708b..acaccbeb 100644 --- a/api/api.go +++ b/api/api.go @@ -2,7 +2,6 @@ package api import ( "io/fs" - "log" "net/http" "os" @@ -10,6 +9,7 @@ import ( "github.com/TwiN/gatus/v5/config/web" static "github.com/TwiN/gatus/v5/web" "github.com/TwiN/health" + "github.com/TwiN/logr" fiber "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/adaptor" "github.com/gofiber/fiber/v2/middleware/compress" @@ -28,7 +28,7 @@ type API struct { func New(cfg *config.Config) *API { api := &API{} if cfg.Web == nil { - log.Println("[api.New] nil web config passed as parameter. This should only happen in tests. Using default web configuration") + logr.Warnf("[api.New] nil web config passed as parameter. This should only happen in tests. Using default web configuration") cfg.Web = web.GetDefaultConfig() } api.router = api.createRouter(cfg) @@ -42,7 +42,7 @@ func (a *API) Router() *fiber.App { func (a *API) createRouter(cfg *config.Config) *fiber.App { app := fiber.New(fiber.Config{ ErrorHandler: func(c *fiber.Ctx, err error) error { - log.Printf("[api.ErrorHandler] %s", err.Error()) + logr.Errorf("[api.ErrorHandler] %s", err.Error()) return fiber.DefaultErrorHandler(c, err) }, ReadBufferSize: cfg.Web.ReadBufferSize, diff --git a/api/chart.go b/api/chart.go index ff118e1d..81a4d11d 100644 --- a/api/chart.go +++ b/api/chart.go @@ -2,7 +2,6 @@ package api import ( "errors" - "log" "math" "net/http" "sort" @@ -10,6 +9,7 @@ import ( "github.com/TwiN/gatus/v5/storage/store" "github.com/TwiN/gatus/v5/storage/store/common" + "github.com/TwiN/logr" "github.com/gofiber/fiber/v2" "github.com/wcharczuk/go-chart/v2" "github.com/wcharczuk/go-chart/v2/drawing" @@ -116,7 +116,7 @@ func ResponseTimeChart(c *fiber.Ctx) error { c.Set("Expires", "0") c.Status(http.StatusOK) if err := graph.Render(chart.SVG, c); err != nil { - log.Println("[api.ResponseTimeChart] Failed to render response time chart:", err.Error()) + logr.Errorf("[api.ResponseTimeChart] Failed to render response time chart: %s", err.Error()) return c.Status(500).SendString(err.Error()) } return nil diff --git a/api/endpoint_status.go b/api/endpoint_status.go index b97eee11..64c5df72 100644 --- a/api/endpoint_status.go +++ b/api/endpoint_status.go @@ -4,7 +4,6 @@ import ( "encoding/json" "errors" "fmt" - "log" "github.com/TwiN/gatus/v5/client" "github.com/TwiN/gatus/v5/config" @@ -13,6 +12,7 @@ import ( "github.com/TwiN/gatus/v5/storage/store" "github.com/TwiN/gatus/v5/storage/store/common" "github.com/TwiN/gatus/v5/storage/store/common/paging" + "github.com/TwiN/logr" "github.com/gofiber/fiber/v2" ) @@ -26,19 +26,19 @@ func EndpointStatuses(cfg *config.Config) fiber.Handler { if !exists { endpointStatuses, err := store.Get().GetAllEndpointStatuses(paging.NewEndpointStatusParams().WithResults(page, pageSize)) if err != nil { - log.Printf("[api.EndpointStatuses] Failed to retrieve endpoint statuses: %s", err.Error()) + logr.Errorf("[api.EndpointStatuses] Failed to retrieve endpoint statuses: %s", err.Error()) return c.Status(500).SendString(err.Error()) } // ALPHA: Retrieve endpoint statuses from remote instances if endpointStatusesFromRemote, err := getEndpointStatusesFromRemoteInstances(cfg.Remote); err != nil { - log.Printf("[handler.EndpointStatuses] Silently failed to retrieve endpoint statuses from remote: %s", err.Error()) + logr.Errorf("[handler.EndpointStatuses] Silently failed to retrieve endpoint statuses from remote: %s", err.Error()) } else if endpointStatusesFromRemote != nil { endpointStatuses = append(endpointStatuses, endpointStatusesFromRemote...) } // Marshal endpoint statuses to JSON data, err = json.Marshal(endpointStatuses) if err != nil { - log.Printf("[api.EndpointStatuses] Unable to marshal object to JSON: %s", err.Error()) + logr.Errorf("[api.EndpointStatuses] Unable to marshal object to JSON: %s", err.Error()) return c.Status(500).SendString("unable to marshal object to JSON") } cache.SetWithTTL(fmt.Sprintf("endpoint-status-%d-%d", page, pageSize), data, cacheTTL) @@ -64,7 +64,7 @@ func getEndpointStatusesFromRemoteInstances(remoteConfig *remote.Config) ([]*end var endpointStatuses []*endpoint.Status if err = json.NewDecoder(response.Body).Decode(&endpointStatuses); err != nil { _ = response.Body.Close() - log.Printf("[api.getEndpointStatusesFromRemoteInstances] Silently failed to retrieve endpoint statuses from %s: %s", instance.URL, err.Error()) + logr.Errorf("[api.getEndpointStatusesFromRemoteInstances] Silently failed to retrieve endpoint statuses from %s: %s", instance.URL, err.Error()) continue } _ = response.Body.Close() @@ -84,16 +84,16 @@ func EndpointStatus(c *fiber.Ctx) error { if errors.Is(err, common.ErrEndpointNotFound) { return c.Status(404).SendString(err.Error()) } - log.Printf("[api.EndpointStatus] Failed to retrieve endpoint status: %s", err.Error()) + logr.Errorf("[api.EndpointStatus] Failed to retrieve endpoint status: %s", err.Error()) return c.Status(500).SendString(err.Error()) } if endpointStatus == nil { // XXX: is this check necessary? - log.Printf("[api.EndpointStatus] Endpoint with key=%s not found", c.Params("key")) + logr.Errorf("[api.EndpointStatus] Endpoint with key=%s not found", c.Params("key")) return c.Status(404).SendString("not found") } output, err := json.Marshal(endpointStatus) if err != nil { - log.Printf("[api.EndpointStatus] Unable to marshal object to JSON: %s", err.Error()) + logr.Errorf("[api.EndpointStatus] Unable to marshal object to JSON: %s", err.Error()) return c.Status(500).SendString("unable to marshal object to JSON") } c.Set("Content-Type", "application/json") diff --git a/api/external_endpoint.go b/api/external_endpoint.go index 6cbe6db3..c6492933 100644 --- a/api/external_endpoint.go +++ b/api/external_endpoint.go @@ -2,7 +2,6 @@ package api import ( "errors" - "log" "strings" "time" @@ -11,6 +10,7 @@ import ( "github.com/TwiN/gatus/v5/storage/store" "github.com/TwiN/gatus/v5/storage/store/common" "github.com/TwiN/gatus/v5/watchdog" + "github.com/TwiN/logr" "github.com/gofiber/fiber/v2" ) @@ -33,11 +33,11 @@ func CreateExternalEndpointResult(cfg *config.Config) fiber.Handler { key := c.Params("key") externalEndpoint := cfg.GetExternalEndpointByKey(key) if externalEndpoint == nil { - log.Printf("[api.CreateExternalEndpointResult] External endpoint with key=%s not found", key) + logr.Errorf("[api.CreateExternalEndpointResult] External endpoint with key=%s not found", key) return c.Status(404).SendString("not found") } if externalEndpoint.Token != token { - log.Printf("[api.CreateExternalEndpointResult] Invalid token for external endpoint with key=%s", key) + logr.Errorf("[api.CreateExternalEndpointResult] Invalid token for external endpoint with key=%s", key) return c.Status(401).SendString("invalid token") } // Persist the result in the storage @@ -54,13 +54,13 @@ func CreateExternalEndpointResult(cfg *config.Config) fiber.Handler { if errors.Is(err, common.ErrEndpointNotFound) { return c.Status(404).SendString(err.Error()) } - log.Printf("[api.CreateExternalEndpointResult] Failed to insert result in storage: %s", err.Error()) + logr.Errorf("[api.CreateExternalEndpointResult] Failed to insert result in storage: %s", err.Error()) return c.Status(500).SendString(err.Error()) } - log.Printf("[api.CreateExternalEndpointResult] Successfully inserted result for external endpoint with key=%s and success=%s", c.Params("key"), success) + logr.Infof("[api.CreateExternalEndpointResult] Successfully inserted result for external endpoint with key=%s and success=%s", c.Params("key"), success) // Check if an alert should be triggered or resolved if !cfg.Maintenance.IsUnderMaintenance() { - watchdog.HandleAlerting(convertedEndpoint, result, cfg.Alerting, cfg.Debug) + watchdog.HandleAlerting(convertedEndpoint, result, cfg.Alerting) externalEndpoint.NumberOfSuccessesInARow = convertedEndpoint.NumberOfSuccessesInARow externalEndpoint.NumberOfFailuresInARow = convertedEndpoint.NumberOfFailuresInARow } diff --git a/api/spa.go b/api/spa.go index 587eaf89..7f2a2c6a 100644 --- a/api/spa.go +++ b/api/spa.go @@ -3,10 +3,10 @@ package api import ( _ "embed" "html/template" - "log" "github.com/TwiN/gatus/v5/config/ui" static "github.com/TwiN/gatus/v5/web" + "github.com/TwiN/logr" "github.com/gofiber/fiber/v2" ) @@ -15,14 +15,14 @@ func SinglePageApplication(ui *ui.Config) fiber.Handler { t, err := template.ParseFS(static.FileSystem, static.IndexPath) if err != nil { // This should never happen, because ui.ValidateAndSetDefaults validates that the template works. - log.Println("[api.SinglePageApplication] Failed to parse template. This should never happen, because the template is validated on start. Error:", err.Error()) + logr.Errorf("[api.SinglePageApplication] Failed to parse template. This should never happen, because the template is validated on start. Error: %s", err.Error()) return c.Status(500).SendString("Failed to parse template. This should never happen, because the template is validated on start.") } c.Set("Content-Type", "text/html") err = t.Execute(c, ui) if err != nil { // This should never happen, because ui.ValidateAndSetDefaults validates that the template works. - log.Println("[api.SinglePageApplication] Failed to execute template. This should never happen, because the template is validated on start. Error:", err.Error()) + logr.Errorf("[api.SinglePageApplication] Failed to execute template. This should never happen, because the template is validated on start. Error: %s", err.Error()) return c.Status(500).SendString("Failed to parse template. This should never happen, because the template is validated on start.") } return c.SendStatus(200) diff --git a/client/client.go b/client/client.go index b63fb93b..e5c1503f 100644 --- a/client/client.go +++ b/client/client.go @@ -110,7 +110,6 @@ func CanCreateSCTPConnection(address string, config *Config) bool { _ = conn.Close() res <- true })(ch) - select { case result := <-ch: return result @@ -182,7 +181,6 @@ func CanCreateSSHConnection(address, username, password string, config *Config) } else { port = "22" } - cli, err := ssh.Dial("tcp", strings.Join([]string{address, port}, ":"), &ssh.ClientConfig{ HostKeyCallback: ssh.InsecureIgnoreHostKey(), User: username, @@ -194,7 +192,6 @@ func CanCreateSSHConnection(address, username, password string, config *Config) if err != nil { return false, nil, err } - return true, cli, nil } @@ -203,37 +200,29 @@ func ExecuteSSHCommand(sshClient *ssh.Client, body string, config *Config) (bool type Body struct { Command string `json:"command"` } - defer sshClient.Close() - var b Body if err := json.Unmarshal([]byte(body), &b); err != nil { return false, 0, err } - sess, err := sshClient.NewSession() if err != nil { return false, 0, err } - err = sess.Start(b.Command) if err != nil { return false, 0, err } - defer sess.Close() - err = sess.Wait() if err == nil { return true, 0, nil } - - e, ok := err.(*ssh.ExitError) - if !ok { + var exitErr *ssh.ExitError + if ok := errors.As(err, &exitErr); !ok { return false, 0, err } - - return true, e.ExitStatus(), nil + return true, exitErr.ExitStatus(), nil } // Ping checks if an address can be pinged and returns the round-trip time if the address can be pinged diff --git a/client/config.go b/client/config.go index 79effcee..370c8511 100644 --- a/client/config.go +++ b/client/config.go @@ -4,7 +4,6 @@ import ( "context" "crypto/tls" "errors" - "log" "net" "net/http" "net/url" @@ -12,6 +11,7 @@ import ( "strconv" "time" + "github.com/TwiN/logr" "golang.org/x/oauth2" "golang.org/x/oauth2/clientcredentials" "google.golang.org/api/idtoken" @@ -232,7 +232,7 @@ func (c *Config) getHTTPClient() *http.Client { if c.ProxyURL != "" { proxyURL, err := url.Parse(c.ProxyURL) if err != nil { - log.Println("[client.getHTTPClient] THIS SHOULD NOT HAPPEN. Silently ignoring custom proxy due to error:", err.Error()) + logr.Errorf("[client.getHTTPClient] THIS SHOULD NOT HAPPEN. Silently ignoring custom proxy due to error: %s", err.Error()) } else { c.httpClient.Transport.(*http.Transport).Proxy = http.ProxyURL(proxyURL) } @@ -242,7 +242,7 @@ func (c *Config) getHTTPClient() *http.Client { if err != nil { // We're ignoring the error, because it should have been validated on startup ValidateAndSetDefaults. // It shouldn't happen, but if it does, we'll log it... Better safe than sorry ;) - log.Println("[client.getHTTPClient] THIS SHOULD NOT HAPPEN. Silently ignoring invalid DNS resolver due to error:", err.Error()) + logr.Errorf("[client.getHTTPClient] THIS SHOULD NOT HAPPEN. Silently ignoring invalid DNS resolver due to error: %s", err.Error()) } else { dialer := &net.Dialer{ Resolver: &net.Resolver{ @@ -259,7 +259,7 @@ func (c *Config) getHTTPClient() *http.Client { } } if c.HasOAuth2Config() && c.HasIAPConfig() { - log.Println("[client.getHTTPClient] Error: Both Identity-Aware-Proxy and Oauth2 configuration are present.") + logr.Errorf("[client.getHTTPClient] Error: Both Identity-Aware-Proxy and Oauth2 configuration are present.") } else if c.HasOAuth2Config() { c.httpClient = configureOAuth2(c.httpClient, *c.OAuth2Config) } else if c.HasIAPConfig() { @@ -269,23 +269,22 @@ func (c *Config) getHTTPClient() *http.Client { return c.httpClient } -// validateIAPToken returns a boolean that will define if the google identity-aware-proxy token can be fetch +// validateIAPToken returns a boolean that will define if the Google identity-aware-proxy token can be fetched // and if is it valid. func validateIAPToken(ctx context.Context, c IAPConfig) bool { ts, err := idtoken.NewTokenSource(ctx, c.Audience) if err != nil { - log.Println("[client.ValidateIAPToken] Claiming Identity token failed. error:", err.Error()) + logr.Errorf("[client.ValidateIAPToken] Claiming Identity token failed: %s", err.Error()) return false } tok, err := ts.Token() if err != nil { - log.Println("[client.ValidateIAPToken] Get Identity-Aware-Proxy token failed. error:", err.Error()) + logr.Errorf("[client.ValidateIAPToken] Get Identity-Aware-Proxy token failed: %s", err.Error()) return false } - payload, err := idtoken.Validate(ctx, tok.AccessToken, c.Audience) - _ = payload + _, err = idtoken.Validate(ctx, tok.AccessToken, c.Audience) if err != nil { - log.Println("[client.ValidateIAPToken] Token Validation failed. error:", err.Error()) + logr.Errorf("[client.ValidateIAPToken] Token Validation failed: %s", err.Error()) return false } return true @@ -298,7 +297,7 @@ func configureIAP(httpClient *http.Client, c IAPConfig) *http.Client { if validateIAPToken(ctx, c) { ts, err := idtoken.NewTokenSource(ctx, c.Audience) if err != nil { - log.Println("[client.ConfigureIAP] Claiming Token Source failed. error:", err.Error()) + logr.Errorf("[client.configureIAP] Claiming Token Source failed: %s", err.Error()) return httpClient } client := oauth2.NewClient(ctx, ts) @@ -327,17 +326,17 @@ func configureOAuth2(httpClient *http.Client, c OAuth2Config) *http.Client { func configureTLS(tlsConfig *tls.Config, c TLSConfig) *tls.Config { clientTLSCert, err := tls.LoadX509KeyPair(c.CertificateFile, c.PrivateKeyFile) if err != nil { + logr.Errorf("[client.configureTLS] Failed to load certificate: %s", err.Error()) return nil } tlsConfig.Certificates = []tls.Certificate{clientTLSCert} tlsConfig.Renegotiation = tls.RenegotiateNever - - renegotionSupport := map[string]tls.RenegotiationSupport{ + renegotiationSupport := map[string]tls.RenegotiationSupport{ "once": tls.RenegotiateOnceAsClient, "freely": tls.RenegotiateFreelyAsClient, "never": tls.RenegotiateNever, } - if val, ok := renegotionSupport[c.RenegotiationSupport]; ok { + if val, ok := renegotiationSupport[c.RenegotiationSupport]; ok { tlsConfig.Renegotiation = val } return tlsConfig diff --git a/config/config.go b/config/config.go index 58242b1b..210752f4 100644 --- a/config/config.go +++ b/config/config.go @@ -4,7 +4,6 @@ import ( "errors" "fmt" "io/fs" - "log" "os" "path/filepath" "strings" @@ -53,11 +52,9 @@ var ( // Config is the main configuration structure type Config struct { // Debug Whether to enable debug logs + // Deprecated: Use the GATUS_LOG_LEVEL environment variable instead Debug bool `yaml:"debug,omitempty"` - // LogLevel is one of DEBUG, INFO, WARN and ERROR. Defaults to INFO - LogLevel logr.Level `yaml:"log-level,omitempty"` - // Metrics Whether to expose metrics at /metrics Metrics bool `yaml:"metrics,omitempty"` @@ -176,13 +173,11 @@ func LoadConfiguration(configPath string) (*Config, error) { if fileInfo.IsDir() { err := walkConfigDir(configPath, func(path string, d fs.DirEntry, err error) error { if err != nil { - log.Printf("[config.LoadConfiguration] Error walking path=%s: %s", path, err) - return err + return fmt.Errorf("error walking path %s: %w", path, err) } - log.Printf("[config.LoadConfiguration] Reading configuration from %s", path) + logr.Infof("[config.LoadConfiguration] Reading configuration from %s", path) data, err := os.ReadFile(path) if err != nil { - log.Printf("[config.LoadConfiguration] Error reading configuration from %s: %s", path, err) return fmt.Errorf("error reading configuration from file %s: %w", path, err) } configBytes, err = deepmerge.YAML(configBytes, data) @@ -192,9 +187,9 @@ func LoadConfiguration(configPath string) (*Config, error) { return nil, fmt.Errorf("error reading configuration from directory %s: %w", usedConfigPath, err) } } else { - log.Printf("[config.LoadConfiguration] Reading configuration from configFile=%s", usedConfigPath) + logr.Infof("[config.LoadConfiguration] Reading configuration from configFile=%s", usedConfigPath) if data, err := os.ReadFile(usedConfigPath); err != nil { - return nil, err + return nil, fmt.Errorf("error reading configuration from directory %s: %w", usedConfigPath, err) } else { configBytes = data } @@ -204,11 +199,11 @@ func LoadConfiguration(configPath string) (*Config, error) { } config, err := parseAndValidateConfigBytes(configBytes) if err != nil { - return nil, err + return nil, fmt.Errorf("error parsing config: %w", err) } config.configPath = usedConfigPath config.UpdateLastFileModTime() - return config, err + return config, nil } // walkConfigDir is a wrapper for filepath.WalkDir that strips directories and non-config files @@ -249,7 +244,13 @@ func parseAndValidateConfigBytes(yamlBytes []byte) (config *Config, err error) { if config == nil || config.Endpoints == nil || len(config.Endpoints) == 0 { err = ErrNoEndpointInConfig } else { - validateAlertingConfig(config.Alerting, config.Endpoints, config.ExternalEndpoints, config.Debug) + // XXX: Remove this in v6.0.0 + if config.Debug { + logr.Warn("WARNING: The 'debug' configuration has been deprecated and will be removed in v6.0.0") + logr.Warn("WARNING: Please use the GATUS_LOG_LEVEL environment variable instead") + } + // XXX: End of v6.0.0 removals + validateAlertingConfig(config.Alerting, config.Endpoints, config.ExternalEndpoints) if err := validateSecurityConfig(config); err != nil { return nil, err } @@ -342,9 +343,7 @@ func validateEndpointsConfig(config *Config) error { duplicateValidationMap := make(map[string]bool) // Validate endpoints for _, ep := range config.Endpoints { - if config.Debug { - log.Printf("[config.validateEndpointsConfig] Validating endpoint '%s'", ep.Name) - } + logr.Debugf("[config.validateEndpointsConfig] Validating endpoint with key %s", ep.Key()) if endpointKey := ep.Key(); duplicateValidationMap[endpointKey] { return fmt.Errorf("invalid endpoint %s: name and group combination must be unique", ep.Key()) } else { @@ -354,12 +353,10 @@ func validateEndpointsConfig(config *Config) error { return fmt.Errorf("invalid endpoint %s: %w", ep.Key(), err) } } - log.Printf("[config.validateEndpointsConfig] Validated %d endpoints", len(config.Endpoints)) + logr.Infof("[config.validateEndpointsConfig] Validated %d endpoints", len(config.Endpoints)) // Validate external endpoints for _, ee := range config.ExternalEndpoints { - if config.Debug { - log.Printf("[config.validateEndpointsConfig] Validating external endpoint '%s'", ee.Name) - } + logr.Debugf("[config.validateEndpointsConfig] Validating external endpoint '%s'", ee.Name) if endpointKey := ee.Key(); duplicateValidationMap[endpointKey] { return fmt.Errorf("invalid external endpoint %s: name and group combination must be unique", ee.Key()) } else { @@ -369,16 +366,14 @@ func validateEndpointsConfig(config *Config) error { return fmt.Errorf("invalid external endpoint %s: %w", ee.Key(), err) } } - log.Printf("[config.validateEndpointsConfig] Validated %d external endpoints", len(config.ExternalEndpoints)) + logr.Infof("[config.validateEndpointsConfig] Validated %d external endpoints", len(config.ExternalEndpoints)) return nil } func validateSecurityConfig(config *Config) error { if config.Security != nil { if config.Security.IsValid() { - if config.Debug { - log.Printf("[config.validateSecurityConfig] Basic security configuration has been validated") - } + logr.Debug("[config.validateSecurityConfig] Basic security configuration has been validated") } else { // If there was an attempt to configure security, then it must mean that some confidential or private // data are exposed. As a result, we'll force a panic because it's better to be safe than sorry. @@ -392,9 +387,9 @@ func validateSecurityConfig(config *Config) error { // Note that the alerting configuration has to be validated before the endpoint configuration, because the default alert // returned by provider.AlertProvider.GetDefaultAlert() must be parsed before endpoint.Endpoint.ValidateAndSetDefaults() // sets the default alert values when none are set. -func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoint.Endpoint, externalEndpoints []*endpoint.ExternalEndpoint, debug bool) { +func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoint.Endpoint, externalEndpoints []*endpoint.ExternalEndpoint) { if alertingConfig == nil { - log.Printf("[config.validateAlertingConfig] Alerting is not configured") + logr.Info("[config.validateAlertingConfig] Alerting is not configured") return } alertTypes := []alert.Type{ @@ -432,9 +427,7 @@ func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoi for _, ep := range endpoints { for alertIndex, endpointAlert := range ep.Alerts { if alertType == endpointAlert.Type { - if debug { - log.Printf("[config.validateAlertingConfig] Parsing alert %d with default alert for provider=%s in endpoint with key=%s", alertIndex, alertType, ep.Key()) - } + logr.Debugf("[config.validateAlertingConfig] Parsing alert %d with default alert for provider=%s in endpoint with key=%s", alertIndex, alertType, ep.Key()) provider.ParseWithDefaultAlert(alertProvider.GetDefaultAlert(), endpointAlert) } } @@ -442,9 +435,7 @@ func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoi for _, ee := range externalEndpoints { for alertIndex, endpointAlert := range ee.Alerts { if alertType == endpointAlert.Type { - if debug { - log.Printf("[config.validateAlertingConfig] Parsing alert %d with default alert for provider=%s in endpoint with key=%s", alertIndex, alertType, ee.Key()) - } + logr.Debugf("[config.validateAlertingConfig] Parsing alert %d with default alert for provider=%s in endpoint with key=%s", alertIndex, alertType, ee.Key()) provider.ParseWithDefaultAlert(alertProvider.GetDefaultAlert(), endpointAlert) } } @@ -452,7 +443,7 @@ func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoi } validProviders = append(validProviders, alertType) } else { - log.Printf("[config.validateAlertingConfig] Ignoring provider=%s because configuration is invalid", alertType) + logr.Warnf("[config.validateAlertingConfig] Ignoring provider=%s because configuration is invalid", alertType) invalidProviders = append(invalidProviders, alertType) alertingConfig.SetAlertingProviderToNil(alertProvider) } @@ -460,5 +451,5 @@ func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoi invalidProviders = append(invalidProviders, alertType) } } - log.Printf("[config.validateAlertingConfig] configuredProviders=%s; ignoredProviders=%s", validProviders, invalidProviders) + logr.Infof("[config.validateAlertingConfig] configuredProviders=%s; ignoredProviders=%s", validProviders, invalidProviders) } diff --git a/config/config_test.go b/config/config_test.go index e6f6c4bf..4650626b 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -27,6 +27,7 @@ import ( "github.com/TwiN/gatus/v5/alerting/provider/pushover" "github.com/TwiN/gatus/v5/alerting/provider/slack" "github.com/TwiN/gatus/v5/alerting/provider/teams" + "github.com/TwiN/gatus/v5/alerting/provider/teamsworkflows" "github.com/TwiN/gatus/v5/alerting/provider/telegram" "github.com/TwiN/gatus/v5/alerting/provider/twilio" "github.com/TwiN/gatus/v5/client" @@ -177,7 +178,6 @@ endpoints: conditions: - "[STATUS] == 200"`, "b.yaml": ` -debug: true alerting: discord: @@ -196,7 +196,6 @@ endpoints: - "[STATUS] == 200"`, }, expectedConfig: &Config{ - Debug: true, Metrics: true, Alerting: &alerting.Config{ Discord: &discord.AlertProvider{WebhookURL: "https://discord.com/api/webhooks/xxx/yyy"}, @@ -719,7 +718,6 @@ badconfig: func TestParseAndValidateConfigBytesWithAlerting(t *testing.T) { config, err := parseAndValidateConfigBytes([]byte(` -debug: true alerting: slack: webhook-url: "http://example.com" @@ -919,8 +917,6 @@ endpoints: func TestParseAndValidateConfigBytesWithAlertingAndDefaultAlert(t *testing.T) { config, err := parseAndValidateConfigBytes([]byte(` -debug: true - alerting: slack: webhook-url: "http://example.com" @@ -1493,6 +1489,7 @@ endpoints: t.Fatal("PagerDuty alerting config should've been set to nil, because its IsValid() method returned false and therefore alerting.Config.SetAlertingProviderToNil() should've been called") } } + func TestParseAndValidateConfigBytesWithInvalidPushoverAlertingConfig(t *testing.T) { config, err := parseAndValidateConfigBytes([]byte(` alerting: @@ -1801,7 +1798,7 @@ endpoints: func TestParseAndValidateConfigBytesWithValidSecurityConfig(t *testing.T) { const expectedUsername = "admin" const expectedPasswordHash = "JDJhJDEwJHRiMnRFakxWazZLdXBzRERQazB1TE8vckRLY05Yb1hSdnoxWU0yQ1FaYXZRSW1McmladDYu" - config, err := parseAndValidateConfigBytes([]byte(fmt.Sprintf(`debug: true + config, err := parseAndValidateConfigBytes([]byte(fmt.Sprintf(` security: basic: username: "%s" @@ -1889,6 +1886,7 @@ func TestGetAlertingProviderByAlertType(t *testing.T) { Telegram: &telegram.AlertProvider{}, Twilio: &twilio.AlertProvider{}, Teams: &teams.AlertProvider{}, + TeamsWorkflows: &teamsworkflows.AlertProvider{}, } scenarios := []struct { alertType alert.Type @@ -1912,6 +1910,7 @@ func TestGetAlertingProviderByAlertType(t *testing.T) { {alertType: alert.TypeTelegram, expected: alertingConfig.Telegram}, {alertType: alert.TypeTwilio, expected: alertingConfig.Twilio}, {alertType: alert.TypeTeams, expected: alertingConfig.Teams}, + {alertType: alert.TypeTeamsWorkflows, expected: alertingConfig.TeamsWorkflows}, } for _, scenario := range scenarios { t.Run(string(scenario.alertType), func(t *testing.T) { diff --git a/config/endpoint/condition.go b/config/endpoint/condition.go index 6d36babd..678ad5e2 100644 --- a/config/endpoint/condition.go +++ b/config/endpoint/condition.go @@ -150,7 +150,7 @@ func (c Condition) evaluate(result *Result, dontResolveFailedConditions bool) bo return false } if !success { - //log.Printf("[Condition.evaluate] Condition '%s' did not succeed because '%s' is false", condition, condition) + //logr.Debugf("[Condition.evaluate] Condition '%s' did not succeed because '%s' is false", condition, condition) } result.ConditionResults = append(result.ConditionResults, &ConditionResult{Condition: conditionToDisplay, Success: success}) return success diff --git a/config/remote/remote.go b/config/remote/remote.go index 1e65639d..d5bdbc47 100644 --- a/config/remote/remote.go +++ b/config/remote/remote.go @@ -1,9 +1,8 @@ package remote import ( - "log" - "github.com/TwiN/gatus/v5/client" + "github.com/TwiN/logr" ) // NOTICE: This is an experimental alpha feature and may be updated/removed in future versions. @@ -31,9 +30,8 @@ func (c *Config) ValidateAndSetDefaults() error { } } if len(c.Instances) > 0 { - log.Println("WARNING: Your configuration is using 'remote', which is in alpha and may be updated/removed in future versions.") - log.Println("WARNING: See https://github.com/TwiN/gatus/issues/64 for more information") - log.Println("WARNING: This feature is a candidate for removal in future versions. Please comment on the issue above if you need this feature.") + logr.Warn("WARNING: Your configuration is using 'remote', which is in alpha and may be updated/removed in future versions.") + logr.Warn("WARNING: See https://github.com/TwiN/gatus/issues/64 for more information") } return nil } diff --git a/controller/controller.go b/controller/controller.go index f0254336..47645a4c 100644 --- a/controller/controller.go +++ b/controller/controller.go @@ -1,12 +1,12 @@ package controller import ( - "log" "os" "time" "github.com/TwiN/gatus/v5/api" "github.com/TwiN/gatus/v5/config" + "github.com/TwiN/logr" "github.com/gofiber/fiber/v2" ) @@ -25,19 +25,19 @@ func Handle(cfg *config.Config) { if os.Getenv("ROUTER_TEST") == "true" { return } - log.Println("[controller.Handle] Listening on " + cfg.Web.SocketAddress()) + logr.Info("[controller.Handle] Listening on " + cfg.Web.SocketAddress()) if cfg.Web.HasTLS() { err := app.ListenTLS(cfg.Web.SocketAddress(), cfg.Web.TLS.CertificateFile, cfg.Web.TLS.PrivateKeyFile) if err != nil { - log.Fatal("[controller.Handle]", err) + logr.Fatalf("[controller.Handle] %s", err.Error()) } } else { err := app.Listen(cfg.Web.SocketAddress()) if err != nil { - log.Fatal("[controller.Handle]", err) + logr.Fatalf("[controller.Handle] %s", err.Error()) } } - log.Println("[controller.Handle] Server has shut down successfully") + logr.Info("[controller.Handle] Server has shut down successfully") } // Shutdown stops the server diff --git a/go.mod b/go.mod index 25b0dea8..664ab7f0 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/TwiN/gatus/v5 -go 1.22.4 +go 1.23.3 require ( code.gitea.io/sdk/gitea v0.19.0 @@ -8,7 +8,7 @@ require ( github.com/TwiN/g8/v2 v2.0.0 github.com/TwiN/gocache/v2 v2.2.2 github.com/TwiN/health v1.6.0 - github.com/TwiN/logr v0.2.1 + github.com/TwiN/logr v0.3.1 github.com/TwiN/whois v1.1.9 github.com/aws/aws-sdk-go v1.54.10 github.com/coreos/go-oidc/v3 v3.11.0 diff --git a/go.sum b/go.sum index 7c0a9b1a..ac9cab24 100644 --- a/go.sum +++ b/go.sum @@ -16,8 +16,8 @@ github.com/TwiN/gocache/v2 v2.2.2 h1:4HToPfDV8FSbaYO5kkbhLpEllUYse5rAf+hVU/mSsuI github.com/TwiN/gocache/v2 v2.2.2/go.mod h1:WfIuwd7GR82/7EfQqEtmLFC3a2vqaKbs4Pe6neB7Gyc= github.com/TwiN/health v1.6.0 h1:L2ks575JhRgQqWWOfKjw9B0ec172hx7GdToqkYUycQM= github.com/TwiN/health v1.6.0/go.mod h1:Z6TszwQPMvtSiVx1QMidVRgvVr4KZGfiwqcD7/Z+3iw= -github.com/TwiN/logr v0.2.1 h1:kMhUmBBVlFxzqTvyHuNoYQ/uwqg8BW4y0AyZxI5JB3Q= -github.com/TwiN/logr v0.2.1/go.mod h1:oldDOkRjFXjZqiMP0+ca5NAQHXTiJ02zHirsuBJJH6k= +github.com/TwiN/logr v0.3.1 h1:CfTKA83jUmsAoxqrr3p4JxEkqXOBnEE9/f35L5MODy4= +github.com/TwiN/logr v0.3.1/go.mod h1:BZgZFYq6fQdU3KtR8qYato3zUEw53yQDaIuujHb55Jw= github.com/TwiN/whois v1.1.9 h1:m20+m1CXnrstie+tW2ZmAJkfcT9zgwpVRUFsKeMw+ng= github.com/TwiN/whois v1.1.9/go.mod h1:TjipCMpJRAJYKmtz/rXQBU6UGxMh6bk8SHazu7OMnQE= github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= diff --git a/main.go b/main.go index 54576463..5e0dc273 100644 --- a/main.go +++ b/main.go @@ -1,7 +1,6 @@ package main import ( - "log" "os" "os/signal" "strconv" @@ -15,16 +14,22 @@ import ( "github.com/TwiN/logr" ) +const ( + GatusConfigPathEnvVar = "GATUS_CONFIG_PATH" + GatusConfigFileEnvVar = "GATUS_CONFIG_FILE" // Deprecated in favor of GatusConfigPathEnvVar + GatusLogLevelEnvVar = "GATUS_LOG_LEVEL" +) + func main() { if delayInSeconds, _ := strconv.Atoi(os.Getenv("GATUS_DELAY_START_SECONDS")); delayInSeconds > 0 { - log.Printf("Delaying start by %d seconds", delayInSeconds) + logr.Infof("Delaying start by %d seconds", delayInSeconds) time.Sleep(time.Duration(delayInSeconds) * time.Second) } + configureLogging() cfg, err := loadConfiguration() if err != nil { panic(err) } - configureLogging(cfg) initializeStorage(cfg) start(cfg) // Wait for termination signal @@ -33,13 +38,13 @@ func main() { signal.Notify(signalChannel, os.Interrupt, syscall.SIGTERM) go func() { <-signalChannel - log.Println("Received termination signal, attempting to gracefully shut down") + logr.Info("Received termination signal, attempting to gracefully shut down") stop(cfg) save() done <- true }() <-done - log.Println("Shutting down") + logr.Info("Shutting down") } func start(cfg *config.Config) { @@ -55,21 +60,32 @@ func stop(cfg *config.Config) { func save() { if err := store.Get().Save(); err != nil { - log.Println("Failed to save storage provider:", err.Error()) + logr.Errorf("Failed to save storage provider: %s", err.Error()) } } -func configureLogging(cfg *config.Config) { - logr.SetThreshold(cfg.LogLevel) - logr.Infof("[main.configureLogging] Log Level is %s", logr.GetThreshold()) +func configureLogging() { + logLevelAsString := os.Getenv(GatusLogLevelEnvVar) + if logLevel, err := logr.LevelFromString(logLevelAsString); err != nil { + logr.SetThreshold(logr.LevelInfo) + if len(logLevelAsString) == 0 { + logr.Infof("[main.configureLogging] Defaulting log level to %s", logr.LevelInfo) + } else { + logr.Warnf("[main.configureLogging] Invalid log level '%s', defaulting to %s", logLevelAsString, logr.LevelInfo) + } + } else { + logr.SetThreshold(logLevel) + logr.Infof("[main.configureLogging] Log Level is set to %s", logr.GetThreshold()) + } + } func loadConfiguration() (*config.Config, error) { - configPath := os.Getenv("GATUS_CONFIG_PATH") + configPath := os.Getenv(GatusConfigPathEnvVar) // Backwards compatibility if len(configPath) == 0 { - if configPath = os.Getenv("GATUS_CONFIG_FILE"); len(configPath) > 0 { - log.Println("WARNING: GATUS_CONFIG_FILE is deprecated. Please use GATUS_CONFIG_PATH instead.") + if configPath = os.Getenv(GatusConfigFileEnvVar); len(configPath) > 0 { + logr.Warnf("WARNING: %s is deprecated. Please use %s instead.", GatusConfigFileEnvVar, GatusConfigPathEnvVar) } } return config.LoadConfiguration(configPath) @@ -95,7 +111,7 @@ func initializeStorage(cfg *config.Config) { } numberOfEndpointStatusesDeleted := store.Get().DeleteAllEndpointStatusesNotInKeys(keys) if numberOfEndpointStatusesDeleted > 0 { - log.Printf("[main.initializeStorage] Deleted %d endpoint statuses because their matching endpoints no longer existed", numberOfEndpointStatusesDeleted) + logr.Infof("[main.initializeStorage] Deleted %d endpoint statuses because their matching endpoints no longer existed", numberOfEndpointStatusesDeleted) } // Clean up the triggered alerts from the storage provider and load valid triggered endpoint alerts numberOfPersistedTriggeredAlertsLoaded := 0 @@ -107,13 +123,13 @@ func initializeStorage(cfg *config.Config) { } } numberOfTriggeredAlertsDeleted := store.Get().DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep, checksums) - if cfg.Debug && numberOfTriggeredAlertsDeleted > 0 { - log.Printf("[main.initializeStorage] Deleted %d triggered alerts for endpoint with key=%s because their configurations have been changed or deleted", numberOfTriggeredAlertsDeleted, ep.Key()) + if numberOfTriggeredAlertsDeleted > 0 { + logr.Debugf("[main.initializeStorage] Deleted %d triggered alerts for endpoint with key=%s because their configurations have been changed or deleted", numberOfTriggeredAlertsDeleted, ep.Key()) } for _, alert := range ep.Alerts { exists, resolveKey, numberOfSuccessesInARow, err := store.Get().GetTriggeredEndpointAlert(ep, alert) if err != nil { - log.Printf("[main.initializeStorage] Failed to get triggered alert for endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[main.initializeStorage] Failed to get triggered alert for endpoint with key=%s: %s", ep.Key(), err.Error()) continue } if exists { @@ -132,13 +148,13 @@ func initializeStorage(cfg *config.Config) { } convertedEndpoint := ee.ToEndpoint() numberOfTriggeredAlertsDeleted := store.Get().DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(convertedEndpoint, checksums) - if cfg.Debug && numberOfTriggeredAlertsDeleted > 0 { - log.Printf("[main.initializeStorage] Deleted %d triggered alerts for endpoint with key=%s because their configurations have been changed or deleted", numberOfTriggeredAlertsDeleted, ee.Key()) + if numberOfTriggeredAlertsDeleted > 0 { + logr.Debugf("[main.initializeStorage] Deleted %d triggered alerts for endpoint with key=%s because their configurations have been changed or deleted", numberOfTriggeredAlertsDeleted, ee.Key()) } for _, alert := range ee.Alerts { exists, resolveKey, numberOfSuccessesInARow, err := store.Get().GetTriggeredEndpointAlert(convertedEndpoint, alert) if err != nil { - log.Printf("[main.initializeStorage] Failed to get triggered alert for endpoint with key=%s: %s", ee.Key(), err.Error()) + logr.Errorf("[main.initializeStorage] Failed to get triggered alert for endpoint with key=%s: %s", ee.Key(), err.Error()) continue } if exists { @@ -149,7 +165,7 @@ func initializeStorage(cfg *config.Config) { } } if numberOfPersistedTriggeredAlertsLoaded > 0 { - log.Printf("[main.initializeStorage] Loaded %d persisted triggered alerts", numberOfPersistedTriggeredAlertsLoaded) + logr.Infof("[main.initializeStorage] Loaded %d persisted triggered alerts", numberOfPersistedTriggeredAlertsLoaded) } } @@ -157,15 +173,15 @@ func listenToConfigurationFileChanges(cfg *config.Config) { for { time.Sleep(30 * time.Second) if cfg.HasLoadedConfigurationBeenModified() { - log.Println("[main.listenToConfigurationFileChanges] Configuration file has been modified") + logr.Info("[main.listenToConfigurationFileChanges] Configuration file has been modified") stop(cfg) time.Sleep(time.Second) // Wait a bit to make sure everything is done. save() updatedConfig, err := loadConfiguration() if err != nil { if cfg.SkipInvalidConfigUpdate { - log.Println("[main.listenToConfigurationFileChanges] Failed to load new configuration:", err.Error()) - log.Println("[main.listenToConfigurationFileChanges] The configuration file was updated, but it is not valid. The old configuration will continue being used.") + logr.Errorf("[main.listenToConfigurationFileChanges] Failed to load new configuration: %s", err.Error()) + logr.Error("[main.listenToConfigurationFileChanges] The configuration file was updated, but it is not valid. The old configuration will continue being used.") // Update the last file modification time to avoid trying to process the same invalid configuration again cfg.UpdateLastFileModTime() continue diff --git a/security/config.go b/security/config.go index 8f1febec..994fc0c2 100644 --- a/security/config.go +++ b/security/config.go @@ -2,10 +2,10 @@ package security import ( "encoding/base64" - "log" "net/http" g8 "github.com/TwiN/g8/v2" + "github.com/TwiN/logr" "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/adaptor" "github.com/gofiber/fiber/v2/middleware/basicauth" @@ -99,7 +99,7 @@ func (c *Config) IsAuthenticated(ctx *fiber.Ctx) bool { // TODO: Update g8 to support fasthttp natively? (see g8's fasthttp branch) request, err := adaptor.ConvertRequest(ctx, false) if err != nil { - log.Printf("[security.IsAuthenticated] Unexpected error converting request: %v", err) + logr.Errorf("[security.IsAuthenticated] Unexpected error converting request: %v", err) return false } token := c.gate.ExtractTokenFromRequest(request) diff --git a/security/oidc.go b/security/oidc.go index f1cc2c47..6505b8ef 100644 --- a/security/oidc.go +++ b/security/oidc.go @@ -2,11 +2,11 @@ package security import ( "context" - "log" "net/http" "strings" "time" + "github.com/TwiN/logr" "github.com/coreos/go-oidc/v3/oidc" "github.com/gofiber/fiber/v2" "github.com/google/uuid" @@ -124,7 +124,7 @@ func (c *OIDCConfig) callbackHandler(w http.ResponseWriter, r *http.Request) { / return } } - log.Printf("[security.callbackHandler] Subject %s is not in the list of allowed subjects", idToken.Subject) + logr.Debugf("[security.callbackHandler] Subject %s is not in the list of allowed subjects", idToken.Subject) http.Redirect(w, r, "/?error=access_denied", http.StatusFound) } diff --git a/storage/store/sql/sql.go b/storage/store/sql/sql.go index a2a6791d..30b7906f 100644 --- a/storage/store/sql/sql.go +++ b/storage/store/sql/sql.go @@ -4,7 +4,6 @@ import ( "database/sql" "errors" "fmt" - "log" "strconv" "strings" "time" @@ -14,6 +13,7 @@ import ( "github.com/TwiN/gatus/v5/storage/store/common" "github.com/TwiN/gatus/v5/storage/store/common/paging" "github.com/TwiN/gocache/v2" + "github.com/TwiN/logr" _ "github.com/lib/pq" _ "modernc.org/sqlite" ) @@ -237,12 +237,12 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error { // Endpoint doesn't exist in the database, insert it if endpointID, err = s.insertEndpoint(tx, ep); err != nil { _ = tx.Rollback() - log.Printf("[sql.Insert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[sql.Insert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error()) return err } } else { _ = tx.Rollback() - log.Printf("[sql.Insert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[sql.Insert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error()) return err } } @@ -253,12 +253,12 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error { // of type EventStart, in which case we will have to create a new event of type EventHealthy or EventUnhealthy // based on result.Success. // 2. The lastResult.Success != result.Success. This implies that the endpoint went from healthy to unhealthy or - // vice-versa, in which case we will have to create a new event of type EventHealthy or EventUnhealthy + // vice versa, in which case we will have to create a new event of type EventHealthy or EventUnhealthy // based on result.Success. numberOfEvents, err := s.getNumberOfEventsByEndpointID(tx, endpointID) if err != nil { // Silently fail - log.Printf("[sql.Insert] Failed to retrieve total number of events for endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[sql.Insert] Failed to retrieve total number of events for endpoint with key=%s: %s", ep.Key(), err.Error()) } if numberOfEvents == 0 { // There's no events yet, which means we need to add the EventStart and the first healthy/unhealthy event @@ -268,18 +268,18 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error { }) if err != nil { // Silently fail - log.Printf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", endpoint.EventStart, ep.Key(), err.Error()) + logr.Errorf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", endpoint.EventStart, ep.Key(), err.Error()) } event := endpoint.NewEventFromResult(result) if err = s.insertEndpointEvent(tx, endpointID, event); err != nil { // Silently fail - log.Printf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error()) + logr.Errorf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error()) } } else { // Get the success value of the previous result var lastResultSuccess bool if lastResultSuccess, err = s.getLastEndpointResultSuccessValue(tx, endpointID); err != nil { - log.Printf("[sql.Insert] Failed to retrieve outcome of previous result for endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[sql.Insert] Failed to retrieve outcome of previous result for endpoint with key=%s: %s", ep.Key(), err.Error()) } else { // If we managed to retrieve the outcome of the previous result, we'll compare it with the new result. // If the final outcome (success or failure) of the previous and the new result aren't the same, it means @@ -289,7 +289,7 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error { event := endpoint.NewEventFromResult(result) if err = s.insertEndpointEvent(tx, endpointID, event); err != nil { // Silently fail - log.Printf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error()) + logr.Errorf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error()) } } } @@ -298,42 +298,42 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error { // (since we're only deleting MaximumNumberOfEvents at a time instead of 1) if numberOfEvents > eventsCleanUpThreshold { if err = s.deleteOldEndpointEvents(tx, endpointID); err != nil { - log.Printf("[sql.Insert] Failed to delete old events for endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[sql.Insert] Failed to delete old events for endpoint with key=%s: %s", ep.Key(), err.Error()) } } } // Second, we need to insert the result. if err = s.insertEndpointResult(tx, endpointID, result); err != nil { - log.Printf("[sql.Insert] Failed to insert result for endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[sql.Insert] Failed to insert result for endpoint with key=%s: %s", ep.Key(), err.Error()) _ = tx.Rollback() // If we can't insert the result, we'll rollback now since there's no point continuing return err } // Clean up old results numberOfResults, err := s.getNumberOfResultsByEndpointID(tx, endpointID) if err != nil { - log.Printf("[sql.Insert] Failed to retrieve total number of results for endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[sql.Insert] Failed to retrieve total number of results for endpoint with key=%s: %s", ep.Key(), err.Error()) } else { if numberOfResults > resultsCleanUpThreshold { if err = s.deleteOldEndpointResults(tx, endpointID); err != nil { - log.Printf("[sql.Insert] Failed to delete old results for endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[sql.Insert] Failed to delete old results for endpoint with key=%s: %s", ep.Key(), err.Error()) } } } // Finally, we need to insert the uptime data. // Because the uptime data significantly outlives the results, we can't rely on the results for determining the uptime if err = s.updateEndpointUptime(tx, endpointID, result); err != nil { - log.Printf("[sql.Insert] Failed to update uptime for endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[sql.Insert] Failed to update uptime for endpoint with key=%s: %s", ep.Key(), err.Error()) } // Merge hourly uptime entries that can be merged into daily entries and clean up old uptime entries numberOfUptimeEntries, err := s.getNumberOfUptimeEntriesByEndpointID(tx, endpointID) if err != nil { - log.Printf("[sql.Insert] Failed to retrieve total number of uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[sql.Insert] Failed to retrieve total number of uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error()) } else { // Merge older hourly uptime entries into daily uptime entries if we have more than uptimeTotalEntriesMergeThreshold if numberOfUptimeEntries >= uptimeTotalEntriesMergeThreshold { - log.Printf("[sql.Insert] Merging hourly uptime entries for endpoint with key=%s; This is a lot of work, it shouldn't happen too often", ep.Key()) + logr.Infof("[sql.Insert] Merging hourly uptime entries for endpoint with key=%s; This is a lot of work, it shouldn't happen too often", ep.Key()) if err = s.mergeHourlyUptimeEntriesOlderThanMergeThresholdIntoDailyUptimeEntries(tx, endpointID); err != nil { - log.Printf("[sql.Insert] Failed to merge hourly uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[sql.Insert] Failed to merge hourly uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error()) } } } @@ -342,11 +342,11 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error { // but if Gatus was temporarily shut down, we might have some old entries that need to be cleaned up ageOfOldestUptimeEntry, err := s.getAgeOfOldestEndpointUptimeEntry(tx, endpointID) if err != nil { - log.Printf("[sql.Insert] Failed to retrieve oldest endpoint uptime entry for endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[sql.Insert] Failed to retrieve oldest endpoint uptime entry for endpoint with key=%s: %s", ep.Key(), err.Error()) } else { if ageOfOldestUptimeEntry > uptimeAgeCleanUpThreshold { if err = s.deleteOldUptimeEntries(tx, endpointID, time.Now().Add(-(uptimeRetention + time.Hour))); err != nil { - log.Printf("[sql.Insert] Failed to delete old uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[sql.Insert] Failed to delete old uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error()) } } } @@ -356,7 +356,7 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error { s.writeThroughCache.Delete(cacheKey) endpointKey, params, err := extractKeyAndParamsFromCacheKey(cacheKey) if err != nil { - log.Printf("[sql.Insert] Silently deleting cache key %s instead of refreshing due to error: %s", cacheKey, err.Error()) + logr.Errorf("[sql.Insert] Silently deleting cache key %s instead of refreshing due to error: %s", cacheKey, err.Error()) continue } // Retrieve the endpoint status by key, which will in turn refresh the cache @@ -387,7 +387,7 @@ func (s *Store) DeleteAllEndpointStatusesNotInKeys(keys []string) int { result, err = s.db.Exec(query, args...) } if err != nil { - log.Printf("[sql.DeleteAllEndpointStatusesNotInKeys] Failed to delete rows that do not belong to any of keys=%v: %s", keys, err.Error()) + logr.Errorf("[sql.DeleteAllEndpointStatusesNotInKeys] Failed to delete rows that do not belong to any of keys=%v: %s", keys, err.Error()) return 0 } if s.writeThroughCache != nil { @@ -403,7 +403,7 @@ func (s *Store) DeleteAllEndpointStatusesNotInKeys(keys []string) int { // GetTriggeredEndpointAlert returns whether the triggered alert for the specified endpoint as well as the necessary information to resolve it func (s *Store) GetTriggeredEndpointAlert(ep *endpoint.Endpoint, alert *alert.Alert) (exists bool, resolveKey string, numberOfSuccessesInARow int, err error) { - //log.Printf("[sql.GetTriggeredEndpointAlert] Getting triggered alert with checksum=%s for endpoint with key=%s", alert.Checksum(), ep.Key()) + //logr.Debugf("[sql.GetTriggeredEndpointAlert] Getting triggered alert with checksum=%s for endpoint with key=%s", alert.Checksum(), ep.Key()) err = s.db.QueryRow( "SELECT resolve_key, number_of_successes_in_a_row FROM endpoint_alerts_triggered WHERE endpoint_id = (SELECT endpoint_id FROM endpoints WHERE endpoint_key = $1 LIMIT 1) AND configuration_checksum = $2", ep.Key(), @@ -421,7 +421,7 @@ func (s *Store) GetTriggeredEndpointAlert(ep *endpoint.Endpoint, alert *alert.Al // UpsertTriggeredEndpointAlert inserts/updates a triggered alert for an endpoint // Used for persistence of triggered alerts across application restarts func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAlert *alert.Alert) error { - //log.Printf("[sql.UpsertTriggeredEndpointAlert] Upserting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key()) + //logr.Debugf("[sql.UpsertTriggeredEndpointAlert] Upserting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key()) tx, err := s.db.Begin() if err != nil { return err @@ -433,12 +433,12 @@ func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAle // This shouldn't happen, but we'll handle it anyway if endpointID, err = s.insertEndpoint(tx, ep); err != nil { _ = tx.Rollback() - log.Printf("[sql.UpsertTriggeredEndpointAlert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[sql.UpsertTriggeredEndpointAlert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error()) return err } } else { _ = tx.Rollback() - log.Printf("[sql.UpsertTriggeredEndpointAlert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[sql.UpsertTriggeredEndpointAlert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error()) return err } } @@ -457,7 +457,7 @@ func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAle ) if err != nil { _ = tx.Rollback() - log.Printf("[sql.UpsertTriggeredEndpointAlert] Failed to persist triggered alert for endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[sql.UpsertTriggeredEndpointAlert] Failed to persist triggered alert for endpoint with key=%s: %s", ep.Key(), err.Error()) return err } if err = tx.Commit(); err != nil { @@ -468,7 +468,7 @@ func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAle // DeleteTriggeredEndpointAlert deletes a triggered alert for an endpoint func (s *Store) DeleteTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAlert *alert.Alert) error { - //log.Printf("[sql.DeleteTriggeredEndpointAlert] Deleting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key()) + //logr.Debugf("[sql.DeleteTriggeredEndpointAlert] Deleting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key()) _, err := s.db.Exec("DELETE FROM endpoint_alerts_triggered WHERE configuration_checksum = $1 AND endpoint_id = (SELECT endpoint_id FROM endpoints WHERE endpoint_key = $2 LIMIT 1)", triggeredAlert.Checksum(), ep.Key()) return err } @@ -477,7 +477,7 @@ func (s *Store) DeleteTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAle // configurations are not provided in the checksums list. // This prevents triggered alerts that have been removed or modified from lingering in the database. func (s *Store) DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep *endpoint.Endpoint, checksums []string) int { - //log.Printf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Deleting triggered alerts for endpoint with key=%s that do not belong to any of checksums=%v", ep.Key(), checksums) + //logr.Debugf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Deleting triggered alerts for endpoint with key=%s that do not belong to any of checksums=%v", ep.Key(), checksums) var err error var result sql.Result if len(checksums) == 0 { @@ -498,7 +498,7 @@ func (s *Store) DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep *endpoint.En result, err = s.db.Exec(query, args...) } if err != nil { - log.Printf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Failed to delete rows for endpoint with key=%s that do not belong to any of checksums=%v: %s", ep.Key(), checksums, err.Error()) + logr.Errorf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Failed to delete rows for endpoint with key=%s that do not belong to any of checksums=%v: %s", ep.Key(), checksums, err.Error()) return 0 } // Return number of rows deleted @@ -530,7 +530,7 @@ func (s *Store) Close() { // insertEndpoint inserts an endpoint in the store and returns the generated id of said endpoint func (s *Store) insertEndpoint(tx *sql.Tx, ep *endpoint.Endpoint) (int64, error) { - //log.Printf("[sql.insertEndpoint] Inserting endpoint with group=%s and name=%s", ep.Group, ep.Name) + //logr.Debugf("[sql.insertEndpoint] Inserting endpoint with group=%s and name=%s", ep.Group, ep.Name) var id int64 err := tx.QueryRow( "INSERT INTO endpoints (endpoint_key, endpoint_name, endpoint_group) VALUES ($1, $2, $3) RETURNING endpoint_id", @@ -655,12 +655,12 @@ func (s *Store) getEndpointStatusByKey(tx *sql.Tx, key string, parameters *pagin endpointStatus := endpoint.NewStatus(group, endpointName) if parameters.EventsPageSize > 0 { if endpointStatus.Events, err = s.getEndpointEventsByEndpointID(tx, endpointID, parameters.EventsPage, parameters.EventsPageSize); err != nil { - log.Printf("[sql.getEndpointStatusByKey] Failed to retrieve events for key=%s: %s", key, err.Error()) + logr.Errorf("[sql.getEndpointStatusByKey] Failed to retrieve events for key=%s: %s", key, err.Error()) } } if parameters.ResultsPageSize > 0 { if endpointStatus.Results, err = s.getEndpointResultsByEndpointID(tx, endpointID, parameters.ResultsPage, parameters.ResultsPageSize); err != nil { - log.Printf("[sql.getEndpointStatusByKey] Failed to retrieve results for key=%s: %s", key, err.Error()) + logr.Errorf("[sql.getEndpointStatusByKey] Failed to retrieve results for key=%s: %s", key, err.Error()) } } if s.writeThroughCache != nil { @@ -735,7 +735,7 @@ func (s *Store) getEndpointResultsByEndpointID(tx *sql.Tx, endpointID int64, pag var joinedErrors string err = rows.Scan(&id, &result.Success, &joinedErrors, &result.Connected, &result.HTTPStatus, &result.DNSRCode, &result.CertificateExpiration, &result.DomainExpiration, &result.Hostname, &result.IP, &result.Duration, &result.Timestamp) if err != nil { - log.Printf("[sql.getEndpointResultsByEndpointID] Silently failed to retrieve endpoint result for endpointID=%d: %s", endpointID, err.Error()) + logr.Errorf("[sql.getEndpointResultsByEndpointID] Silently failed to retrieve endpoint result for endpointID=%d: %s", endpointID, err.Error()) err = nil } if len(joinedErrors) != 0 { diff --git a/storage/store/store.go b/storage/store/store.go index 5cb20e36..772c3ce0 100644 --- a/storage/store/store.go +++ b/storage/store/store.go @@ -2,7 +2,6 @@ package store import ( "context" - "log" "time" "github.com/TwiN/gatus/v5/alerting/alert" @@ -11,6 +10,7 @@ import ( "github.com/TwiN/gatus/v5/storage/store/common/paging" "github.com/TwiN/gatus/v5/storage/store/memory" "github.com/TwiN/gatus/v5/storage/store/sql" + "github.com/TwiN/logr" ) // Store is the interface that each store should implement @@ -91,7 +91,7 @@ var ( func Get() Store { if !initialized { // This only happens in tests - log.Println("[store.Get] Provider requested before it was initialized, automatically initializing") + logr.Info("[store.Get] Provider requested before it was initialized, automatically initializing") err := Initialize(nil) if err != nil { panic("failed to automatically initialize store: " + err.Error()) @@ -110,11 +110,11 @@ func Initialize(cfg *storage.Config) error { } if cfg == nil { // This only happens in tests - log.Println("[store.Initialize] nil storage config passed as parameter. This should only happen in tests. Defaulting to an empty config.") + logr.Warn("[store.Initialize] nil storage config passed as parameter. This should only happen in tests. Defaulting to an empty config.") cfg = &storage.Config{} } if len(cfg.Path) == 0 && cfg.Type != storage.TypePostgres { - log.Printf("[store.Initialize] Creating storage provider of type=%s", cfg.Type) + logr.Infof("[store.Initialize] Creating storage provider of type=%s", cfg.Type) } ctx, cancelFunc = context.WithCancel(context.Background()) switch cfg.Type { @@ -136,13 +136,12 @@ func autoSave(ctx context.Context, store Store, interval time.Duration) { for { select { case <-ctx.Done(): - log.Printf("[store.autoSave] Stopping active job") + logr.Info("[store.autoSave] Stopping active job") return case <-time.After(interval): - log.Printf("[store.autoSave] Saving") - err := store.Save() - if err != nil { - log.Println("[store.autoSave] Save failed:", err.Error()) + logr.Info("[store.autoSave] Saving") + if err := store.Save(); err != nil { + logr.Errorf("[store.autoSave] Save failed: %s", err.Error()) } } } diff --git a/watchdog/alerting.go b/watchdog/alerting.go index 6fd7a246..5bd4cfca 100644 --- a/watchdog/alerting.go +++ b/watchdog/alerting.go @@ -2,27 +2,27 @@ package watchdog import ( "errors" - "log" "os" "github.com/TwiN/gatus/v5/alerting" "github.com/TwiN/gatus/v5/config/endpoint" "github.com/TwiN/gatus/v5/storage/store" + "github.com/TwiN/logr" ) // HandleAlerting takes care of alerts to resolve and alerts to trigger based on result success or failure -func HandleAlerting(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config, debug bool) { +func HandleAlerting(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config) { if alertingConfig == nil { return } if result.Success { - handleAlertsToResolve(ep, result, alertingConfig, debug) + handleAlertsToResolve(ep, result, alertingConfig) } else { - handleAlertsToTrigger(ep, result, alertingConfig, debug) + handleAlertsToTrigger(ep, result, alertingConfig) } } -func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config, debug bool) { +func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config) { ep.NumberOfSuccessesInARow = 0 ep.NumberOfFailuresInARow++ for _, endpointAlert := range ep.Alerts { @@ -31,14 +31,12 @@ func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alert continue } if endpointAlert.Triggered { - if debug { - log.Printf("[watchdog.handleAlertsToTrigger] Alert for endpoint=%s with description='%s' has already been TRIGGERED, skipping", ep.Name, endpointAlert.GetDescription()) - } + logr.Debugf("[watchdog.handleAlertsToTrigger] Alert for endpoint with key=%s with description='%s' has already been TRIGGERED, skipping", ep.Key(), endpointAlert.GetDescription()) continue } alertProvider := alertingConfig.GetAlertingProviderByAlertType(endpointAlert.Type) if alertProvider != nil { - log.Printf("[watchdog.handleAlertsToTrigger] Sending %s alert because alert for endpoint=%s with description='%s' has been TRIGGERED", endpointAlert.Type, ep.Name, endpointAlert.GetDescription()) + logr.Infof("[watchdog.handleAlertsToTrigger] Sending %s alert because alert for endpoint with key=%s with description='%s' has been TRIGGERED", endpointAlert.Type, ep.Key(), endpointAlert.GetDescription()) var err error if os.Getenv("MOCK_ALERT_PROVIDER") == "true" { if os.Getenv("MOCK_ALERT_PROVIDER_ERROR") == "true" { @@ -48,27 +46,27 @@ func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alert err = alertProvider.Send(ep, endpointAlert, result, false) } if err != nil { - log.Printf("[watchdog.handleAlertsToTrigger] Failed to send an alert for endpoint=%s: %s", ep.Name, err.Error()) + logr.Errorf("[watchdog.handleAlertsToTrigger] Failed to send an alert for endpoint with key=%s: %s", ep.Key(), err.Error()) } else { endpointAlert.Triggered = true if err := store.Get().UpsertTriggeredEndpointAlert(ep, endpointAlert); err != nil { - log.Printf("[watchdog.handleAlertsToTrigger] Failed to persist triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[watchdog.handleAlertsToTrigger] Failed to persist triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error()) } } } else { - log.Printf("[watchdog.handleAlertsToTrigger] Not sending alert of type=%s despite being TRIGGERED, because the provider wasn't configured properly", endpointAlert.Type) + logr.Warnf("[watchdog.handleAlertsToTrigger] Not sending alert of type=%s endpoint with key=%s despite being TRIGGERED, because the provider wasn't configured properly", endpointAlert.Type, ep.Key()) } } } -func handleAlertsToResolve(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config, debug bool) { +func handleAlertsToResolve(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config) { ep.NumberOfSuccessesInARow++ for _, endpointAlert := range ep.Alerts { isStillBelowSuccessThreshold := endpointAlert.SuccessThreshold > ep.NumberOfSuccessesInARow if isStillBelowSuccessThreshold && endpointAlert.IsEnabled() && endpointAlert.Triggered { // Persist NumberOfSuccessesInARow if err := store.Get().UpsertTriggeredEndpointAlert(ep, endpointAlert); err != nil { - log.Printf("[watchdog.handleAlertsToResolve] Failed to update triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[watchdog.handleAlertsToResolve] Failed to update triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error()) } } if !endpointAlert.IsEnabled() || !endpointAlert.Triggered || isStillBelowSuccessThreshold { @@ -78,20 +76,20 @@ func handleAlertsToResolve(ep *endpoint.Endpoint, result *endpoint.Result, alert // Further explanation can be found on Alert's Triggered field. endpointAlert.Triggered = false if err := store.Get().DeleteTriggeredEndpointAlert(ep, endpointAlert); err != nil { - log.Printf("[watchdog.handleAlertsToResolve] Failed to delete persisted triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[watchdog.handleAlertsToResolve] Failed to delete persisted triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error()) } if !endpointAlert.IsSendingOnResolved() { continue } alertProvider := alertingConfig.GetAlertingProviderByAlertType(endpointAlert.Type) if alertProvider != nil { - log.Printf("[watchdog.handleAlertsToResolve] Sending %s alert because alert for endpoint with key=%s with description='%s' has been RESOLVED", endpointAlert.Type, ep.Key(), endpointAlert.GetDescription()) + logr.Infof("[watchdog.handleAlertsToResolve] Sending %s alert because alert for endpoint with key=%s with description='%s' has been RESOLVED", endpointAlert.Type, ep.Key(), endpointAlert.GetDescription()) err := alertProvider.Send(ep, endpointAlert, result, true) if err != nil { - log.Printf("[watchdog.handleAlertsToResolve] Failed to send an alert for endpoint with key=%s: %s", ep.Key(), err.Error()) + logr.Errorf("[watchdog.handleAlertsToResolve] Failed to send an alert for endpoint with key=%s: %s", ep.Key(), err.Error()) } } else { - log.Printf("[watchdog.handleAlertsToResolve] Not sending alert of type=%s despite being RESOLVED, because the provider wasn't configured properly", endpointAlert.Type) + logr.Warnf("[watchdog.handleAlertsToResolve] Not sending alert of type=%s for endpoint with key=%s despite being RESOLVED, because the provider wasn't configured properly", endpointAlert.Type, ep.Key()) } } ep.NumberOfFailuresInARow = 0 diff --git a/watchdog/alerting_test.go b/watchdog/alerting_test.go index 914355e4..001db55b 100644 --- a/watchdog/alerting_test.go +++ b/watchdog/alerting_test.go @@ -28,7 +28,6 @@ func TestHandleAlerting(t *testing.T) { defer os.Clearenv() cfg := &config.Config{ - Debug: true, Alerting: &alerting.Config{ Custom: &custom.AlertProvider{ URL: "https://twin.sh/health", @@ -52,28 +51,28 @@ func TestHandleAlerting(t *testing.T) { } verify(t, ep, 0, 0, false, "The alert shouldn't start triggered") - HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting) verify(t, ep, 1, 0, false, "The alert shouldn't have triggered") - HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting) verify(t, ep, 2, 0, true, "The alert should've triggered") - HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting) verify(t, ep, 3, 0, true, "The alert should still be triggered") - HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting) verify(t, ep, 4, 0, true, "The alert should still be triggered") - HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting) verify(t, ep, 0, 1, true, "The alert should still be triggered (because endpoint.Alerts[0].SuccessThreshold is 3)") - HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting) verify(t, ep, 0, 2, true, "The alert should still be triggered (because endpoint.Alerts[0].SuccessThreshold is 3)") - HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting) verify(t, ep, 0, 3, false, "The alert should've been resolved") - HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting) verify(t, ep, 0, 4, false, "The alert should no longer be triggered") } func TestHandleAlertingWhenAlertingConfigIsNil(t *testing.T) { _ = os.Setenv("MOCK_ALERT_PROVIDER", "true") defer os.Clearenv() - HandleAlerting(nil, nil, nil, true) + HandleAlerting(nil, nil, nil) } func TestHandleAlertingWithBadAlertProvider(t *testing.T) { @@ -96,9 +95,9 @@ func TestHandleAlertingWithBadAlertProvider(t *testing.T) { } verify(t, ep, 0, 0, false, "The alert shouldn't start triggered") - HandleAlerting(ep, &endpoint.Result{Success: false}, &alerting.Config{}, false) + HandleAlerting(ep, &endpoint.Result{Success: false}, &alerting.Config{}) verify(t, ep, 1, 0, false, "The alert shouldn't have triggered") - HandleAlerting(ep, &endpoint.Result{Success: false}, &alerting.Config{}, false) + HandleAlerting(ep, &endpoint.Result{Success: false}, &alerting.Config{}) verify(t, ep, 2, 0, false, "The alert shouldn't have triggered, because the provider wasn't configured properly") } @@ -107,7 +106,6 @@ func TestHandleAlertingWhenTriggeredAlertIsAlmostResolvedButendpointStartFailing defer os.Clearenv() cfg := &config.Config{ - Debug: true, Alerting: &alerting.Config{ Custom: &custom.AlertProvider{ URL: "https://twin.sh/health", @@ -132,7 +130,7 @@ func TestHandleAlertingWhenTriggeredAlertIsAlmostResolvedButendpointStartFailing } // This test simulate an alert that was already triggered - HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting) verify(t, ep, 2, 0, true, "The alert was already triggered at the beginning of this test") } @@ -141,7 +139,6 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedButSendOnResolvedIsFalse(t *t defer os.Clearenv() cfg := &config.Config{ - Debug: true, Alerting: &alerting.Config{ Custom: &custom.AlertProvider{ URL: "https://twin.sh/health", @@ -166,7 +163,7 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedButSendOnResolvedIsFalse(t *t NumberOfFailuresInARow: 1, } - HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting) verify(t, ep, 0, 1, false, "The alert should've been resolved") } @@ -175,7 +172,6 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPagerDuty(t *testing.T) { defer os.Clearenv() cfg := &config.Config{ - Debug: true, Alerting: &alerting.Config{ PagerDuty: &pagerduty.AlertProvider{ IntegrationKey: "00000000000000000000000000000000", @@ -198,10 +194,10 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPagerDuty(t *testing.T) { NumberOfFailuresInARow: 0, } - HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting) verify(t, ep, 1, 0, true, "") - HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting) verify(t, ep, 0, 1, false, "The alert should've been resolved") } @@ -210,7 +206,6 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPushover(t *testing.T) { defer os.Clearenv() cfg := &config.Config{ - Debug: true, Alerting: &alerting.Config{ Pushover: &pushover.AlertProvider{ ApplicationToken: "000000000000000000000000000000", @@ -234,10 +229,10 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPushover(t *testing.T) { NumberOfFailuresInARow: 0, } - HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting) verify(t, ep, 1, 0, true, "") - HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting) verify(t, ep, 0, 1, false, "The alert should've been resolved") } @@ -403,32 +398,32 @@ func TestHandleAlertingWithProviderThatReturnsAnError(t *testing.T) { }, } _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true") - HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true) + HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig) verify(t, ep, 1, 0, false, "") - HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true) + HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig) verify(t, ep, 2, 0, false, "The alert should have failed to trigger, because the alert provider is returning an error") - HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true) + HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig) verify(t, ep, 3, 0, false, "The alert should still not be triggered, because the alert provider is still returning an error") - HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true) + HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig) verify(t, ep, 4, 0, false, "The alert should still not be triggered, because the alert provider is still returning an error") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false") - HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true) + HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig) verify(t, ep, 5, 0, true, "The alert should've been triggered because the alert provider is no longer returning an error") - HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig, true) + HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig) verify(t, ep, 0, 1, true, "The alert should've still been triggered") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true") - HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig, true) + HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig) verify(t, ep, 0, 2, false, "The alert should've been resolved DESPITE THE ALERT PROVIDER RETURNING AN ERROR. See Alert.Triggered for further explanation.") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false") // Make sure that everything's working as expected after a rough patch - HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true) + HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig) verify(t, ep, 1, 0, false, "") - HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true) + HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig) verify(t, ep, 2, 0, true, "The alert should have triggered") - HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig, true) + HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig) verify(t, ep, 0, 1, true, "The alert should still be triggered") - HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig, true) + HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig) verify(t, ep, 0, 2, false, "The alert should have been resolved") }) } @@ -440,7 +435,6 @@ func TestHandleAlertingWithProviderThatOnlyReturnsErrorOnResolve(t *testing.T) { defer os.Clearenv() cfg := &config.Config{ - Debug: true, Alerting: &alerting.Config{ Custom: &custom.AlertProvider{ URL: "https://twin.sh/health", @@ -463,27 +457,27 @@ func TestHandleAlertingWithProviderThatOnlyReturnsErrorOnResolve(t *testing.T) { }, } - HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting) verify(t, ep, 1, 0, true, "") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true") - HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting) verify(t, ep, 0, 1, false, "") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false") - HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting) verify(t, ep, 1, 0, true, "") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true") - HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting) verify(t, ep, 0, 1, false, "") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false") // Make sure that everything's working as expected after a rough patch - HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting) verify(t, ep, 1, 0, true, "") - HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting) verify(t, ep, 2, 0, true, "") - HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting) verify(t, ep, 0, 1, false, "") - HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) + HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting) verify(t, ep, 0, 2, false, "") } diff --git a/watchdog/watchdog.go b/watchdog/watchdog.go index c185da50..36b0149c 100644 --- a/watchdog/watchdog.go +++ b/watchdog/watchdog.go @@ -2,7 +2,6 @@ package watchdog import ( "context" - "log" "sync" "time" @@ -45,7 +44,7 @@ func monitor(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenance for { select { case <-ctx.Done(): - log.Printf("[watchdog.monitor] Canceling current execution of group=%s; endpoint=%s", ep.Group, ep.Name) + logr.Warnf("[watchdog.monitor] Canceling current execution of group=%s; endpoint=%s; key=%s", ep.Group, ep.Name, ep.Key()) return case <-time.After(ep.Interval): execute(ep, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics) @@ -68,30 +67,30 @@ func execute(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenance logr.Infof("[watchdog.execute] No connectivity; skipping execution") return } - logr.Debugf("[watchdog.execute] Monitoring group=%s; endpoint=%s", ep.Group, ep.Name) + logr.Debugf("[watchdog.execute] Monitoring group=%s; endpoint=%s; key=%s", ep.Group, ep.Name, ep.Key()) result := ep.EvaluateHealth() if enabledMetrics { metrics.PublishMetricsForEndpoint(ep, result) } UpdateEndpointStatuses(ep, result) if logr.GetThreshold() == logr.LevelDebug && !result.Success { - logr.Debugf("[watchdog.execute] Monitored group=%s; endpoint=%s; success=%v; errors=%d; duration=%s; body=%s", ep.Group, ep.Name, result.Success, len(result.Errors), result.Duration.Round(time.Millisecond), result.Body) + logr.Debugf("[watchdog.execute] Monitored group=%s; endpoint=%s; key=%s; success=%v; errors=%d; duration=%s; body=%s", ep.Group, ep.Name, ep.Key(), result.Success, len(result.Errors), result.Duration.Round(time.Millisecond), result.Body) } else { - logr.Infof("[watchdog.execute] Monitored group=%s; endpoint=%s; success=%v; errors=%d; duration=%s", ep.Group, ep.Name, result.Success, len(result.Errors), result.Duration.Round(time.Millisecond)) + logr.Infof("[watchdog.execute] Monitored group=%s; endpoint=%s; key=%s; success=%v; errors=%d; duration=%s", ep.Group, ep.Name, ep.Key(), result.Success, len(result.Errors), result.Duration.Round(time.Millisecond)) } if !maintenanceConfig.IsUnderMaintenance() { // TODO: Consider moving this after the monitoring lock is unlocked? I mean, how much noise can a single alerting provider cause... - HandleAlerting(ep, result, alertingConfig, logr.GetThreshold() == logr.LevelDebug) + HandleAlerting(ep, result, alertingConfig) } else { - logr.Debugf("[watchdog.execute] Not handling alerting because currently in the maintenance window") + logr.Debug("[watchdog.execute] Not handling alerting because currently in the maintenance window") } - logr.Debugf("[watchdog.execute] Waiting for interval=%s before monitoring group=%s endpoint=%s again", ep.Interval, ep.Group, ep.Name) + logr.Debugf("[watchdog.execute] Waiting for interval=%s before monitoring group=%s endpoint=%s (key=%s) again", ep.Interval, ep.Group, ep.Name, ep.Key()) } // UpdateEndpointStatuses updates the slice of endpoint statuses func UpdateEndpointStatuses(ep *endpoint.Endpoint, result *endpoint.Result) { if err := store.Get().Insert(ep, result); err != nil { - logr.Errorf("[watchdog.UpdateEndpointStatuses] Failed to insert result in storage:", err.Error()) + logr.Errorf("[watchdog.UpdateEndpointStatuses] Failed to insert result in storage: %s", err.Error()) } }