fix(logging): Replace log-level parameter by GATUS_LOG_LEVEL env var (#895)

* fix(logging): Replace log-level parameter by GATUS_LOG_LEVEL env var

* Improve log message if GATUS_LOG_LEVEL isn't set
This commit is contained in:
TwiN 2024-11-13 23:54:00 -05:00 committed by GitHub
parent 8060a77b1f
commit 01131755bc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
29 changed files with 239 additions and 256 deletions

View File

@ -1,5 +1,4 @@
metrics: true metrics: true
debug: false
ui: ui:
header: Example Company header: Example Company
link: https://example.org link: https://example.org

View File

@ -22,7 +22,7 @@ jobs:
steps: steps:
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version: 1.22.2 go-version: 1.23.3
repository: "${{ github.event.inputs.repository || 'TwiN/gatus' }}" repository: "${{ github.event.inputs.repository || 'TwiN/gatus' }}"
ref: "${{ github.event.inputs.ref || 'master' }}" ref: "${{ github.event.inputs.ref || 'master' }}"
- uses: actions/checkout@v4 - uses: actions/checkout@v4

View File

@ -18,7 +18,7 @@ jobs:
steps: steps:
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version: 1.22.2 go-version: 1.23.3
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Build binary to make sure it works - name: Build binary to make sure it works
run: go build run: go build

View File

@ -15,6 +15,8 @@ FROM scratch
COPY --from=builder /app/gatus . COPY --from=builder /app/gatus .
COPY --from=builder /app/config.yaml ./config/config.yaml COPY --from=builder /app/config.yaml ./config/config.yaml
COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
ENV PORT=8080 ENV GATUS_CONFIG_PATH=""
ENV GATUS_LOG_LEVEL="INFO"
ENV PORT="8080"
EXPOSE ${PORT} EXPOSE ${PORT}
ENTRYPOINT ["/gatus"] ENTRYPOINT ["/gatus"]

View File

@ -202,7 +202,7 @@ If `GATUS_CONFIG_PATH` points to a directory, all `*.yaml` and `*.yml` files ins
subdirectories are merged like so: subdirectories are merged like so:
- All maps/objects are deep merged (i.e. you could define `alerting.slack` in one file and `alerting.pagerduty` in another file) - All maps/objects are deep merged (i.e. you could define `alerting.slack` in one file and `alerting.pagerduty` in another file)
- All slices/arrays are appended (i.e. you can define `endpoints` in multiple files and each endpoint will be added to the final list of endpoints) - All slices/arrays are appended (i.e. you can define `endpoints` in multiple files and each endpoint will be added to the final list of endpoints)
- Parameters with a primitive value (e.g. `debug`, `metrics`, `alerting.slack.webhook-url`, etc.) may only be defined once to forcefully avoid any ambiguity - Parameters with a primitive value (e.g. `metrics`, `alerting.slack.webhook-url`, etc.) may only be defined once to forcefully avoid any ambiguity
- To clarify, this also means that you could not define `alerting.slack.webhook-url` in two files with different values. All files are merged into one before they are processed. This is by design. - To clarify, this also means that you could not define `alerting.slack.webhook-url` in two files with different values. All files are merged into one before they are processed. This is by design.
> 💡 You can also use environment variables in the configuration file (e.g. `$DOMAIN`, `${DOMAIN}`) > 💡 You can also use environment variables in the configuration file (e.g. `$DOMAIN`, `${DOMAIN}`)
@ -215,8 +215,6 @@ If you want to test it locally, see [Docker](#docker).
## Configuration ## Configuration
| Parameter | Description | Default | | Parameter | Description | Default |
|:-----------------------------|:-------------------------------------------------------------------------------------------------------------------------------------|:---------------------------| |:-----------------------------|:-------------------------------------------------------------------------------------------------------------------------------------|:---------------------------|
| `debug` | Whether to enable debug logs. | `false` |
| `log-level` | Log level: DEBUG, INFO, WARN, ERROR. | `INFO` |
| `metrics` | Whether to expose metrics at `/metrics`. | `false` | | `metrics` | Whether to expose metrics at `/metrics`. | `false` |
| `storage` | [Storage configuration](#storage). | `{}` | | `storage` | [Storage configuration](#storage). | `{}` |
| `alerting` | [Alerting configuration](#alerting). | `{}` | | `alerting` | [Alerting configuration](#alerting). | `{}` |
@ -242,6 +240,9 @@ If you want to test it locally, see [Docker](#docker).
| `ui.buttons[].link` | Link to open when the button is clicked. | Required `""` | | `ui.buttons[].link` | Link to open when the button is clicked. | Required `""` |
| `maintenance` | [Maintenance configuration](#maintenance). | `{}` | | `maintenance` | [Maintenance configuration](#maintenance). | `{}` |
If you want more verbose logging, you may set the `GATUS_LOG_LEVEL` environment variable to `DEBUG`.
Conversely, if you want less verbose logging, you can set the aforementioned environment variable to `WARN`, `ERROR` or `FATAL`.
The default value for `GATUS_LOG_LEVEL` is `INFO`.
### Endpoints ### Endpoints
Endpoints are URLs, applications, or services that you want to monitor. Each endpoint has a list of conditions that are Endpoints are URLs, applications, or services that you want to monitor. Each endpoint has a list of conditions that are

View File

@ -1,7 +1,6 @@
package alerting package alerting
import ( import (
"log"
"reflect" "reflect"
"strings" "strings"
@ -30,6 +29,7 @@ import (
"github.com/TwiN/gatus/v5/alerting/provider/telegram" "github.com/TwiN/gatus/v5/alerting/provider/telegram"
"github.com/TwiN/gatus/v5/alerting/provider/twilio" "github.com/TwiN/gatus/v5/alerting/provider/twilio"
"github.com/TwiN/gatus/v5/alerting/provider/zulip" "github.com/TwiN/gatus/v5/alerting/provider/zulip"
"github.com/TwiN/logr"
) )
// Config is the configuration for alerting providers // Config is the configuration for alerting providers
@ -118,7 +118,7 @@ func (config *Config) GetAlertingProviderByAlertType(alertType alert.Type) provi
return fieldValue.Interface().(provider.AlertProvider) return fieldValue.Interface().(provider.AlertProvider)
} }
} }
log.Printf("[alerting.GetAlertingProviderByAlertType] No alerting provider found for alert type %s", alertType) logr.Infof("[alerting.GetAlertingProviderByAlertType] No alerting provider found for alert type %s", alertType)
return nil return nil
} }

View File

@ -5,12 +5,12 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
"log"
"net/http" "net/http"
"github.com/TwiN/gatus/v5/alerting/alert" "github.com/TwiN/gatus/v5/alerting/alert"
"github.com/TwiN/gatus/v5/client" "github.com/TwiN/gatus/v5/client"
"github.com/TwiN/gatus/v5/config/endpoint" "github.com/TwiN/gatus/v5/config/endpoint"
"github.com/TwiN/logr"
) )
const ( const (
@ -74,11 +74,10 @@ func (provider *AlertProvider) Send(ep *endpoint.Endpoint, alert *alert.Alert, r
alert.ResolveKey = "" alert.ResolveKey = ""
} else { } else {
// We need to retrieve the resolve key from the response // We need to retrieve the resolve key from the response
body, err := io.ReadAll(response.Body)
var payload pagerDutyResponsePayload var payload pagerDutyResponsePayload
if err = json.Unmarshal(body, &payload); err != nil { if err = json.NewDecoder(response.Body).Decode(&payload); err != nil {
// Silently fail. We don't want to create tons of alerts just because we failed to parse the body. // Silently fail. We don't want to create tons of alerts just because we failed to parse the body.
log.Printf("[pagerduty.Send] Ran into error unmarshaling pagerduty response: %s", err.Error()) logr.Errorf("[pagerduty.Send] Ran into error decoding pagerduty response: %s", err.Error())
} else { } else {
alert.ResolveKey = payload.DedupKey alert.ResolveKey = payload.DedupKey
} }

View File

@ -2,7 +2,6 @@ package api
import ( import (
"io/fs" "io/fs"
"log"
"net/http" "net/http"
"os" "os"
@ -10,6 +9,7 @@ import (
"github.com/TwiN/gatus/v5/config/web" "github.com/TwiN/gatus/v5/config/web"
static "github.com/TwiN/gatus/v5/web" static "github.com/TwiN/gatus/v5/web"
"github.com/TwiN/health" "github.com/TwiN/health"
"github.com/TwiN/logr"
fiber "github.com/gofiber/fiber/v2" fiber "github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/adaptor" "github.com/gofiber/fiber/v2/middleware/adaptor"
"github.com/gofiber/fiber/v2/middleware/compress" "github.com/gofiber/fiber/v2/middleware/compress"
@ -28,7 +28,7 @@ type API struct {
func New(cfg *config.Config) *API { func New(cfg *config.Config) *API {
api := &API{} api := &API{}
if cfg.Web == nil { if cfg.Web == nil {
log.Println("[api.New] nil web config passed as parameter. This should only happen in tests. Using default web configuration") logr.Warnf("[api.New] nil web config passed as parameter. This should only happen in tests. Using default web configuration")
cfg.Web = web.GetDefaultConfig() cfg.Web = web.GetDefaultConfig()
} }
api.router = api.createRouter(cfg) api.router = api.createRouter(cfg)
@ -42,7 +42,7 @@ func (a *API) Router() *fiber.App {
func (a *API) createRouter(cfg *config.Config) *fiber.App { func (a *API) createRouter(cfg *config.Config) *fiber.App {
app := fiber.New(fiber.Config{ app := fiber.New(fiber.Config{
ErrorHandler: func(c *fiber.Ctx, err error) error { ErrorHandler: func(c *fiber.Ctx, err error) error {
log.Printf("[api.ErrorHandler] %s", err.Error()) logr.Errorf("[api.ErrorHandler] %s", err.Error())
return fiber.DefaultErrorHandler(c, err) return fiber.DefaultErrorHandler(c, err)
}, },
ReadBufferSize: cfg.Web.ReadBufferSize, ReadBufferSize: cfg.Web.ReadBufferSize,

View File

@ -2,7 +2,6 @@ package api
import ( import (
"errors" "errors"
"log"
"math" "math"
"net/http" "net/http"
"sort" "sort"
@ -10,6 +9,7 @@ import (
"github.com/TwiN/gatus/v5/storage/store" "github.com/TwiN/gatus/v5/storage/store"
"github.com/TwiN/gatus/v5/storage/store/common" "github.com/TwiN/gatus/v5/storage/store/common"
"github.com/TwiN/logr"
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
"github.com/wcharczuk/go-chart/v2" "github.com/wcharczuk/go-chart/v2"
"github.com/wcharczuk/go-chart/v2/drawing" "github.com/wcharczuk/go-chart/v2/drawing"
@ -116,7 +116,7 @@ func ResponseTimeChart(c *fiber.Ctx) error {
c.Set("Expires", "0") c.Set("Expires", "0")
c.Status(http.StatusOK) c.Status(http.StatusOK)
if err := graph.Render(chart.SVG, c); err != nil { if err := graph.Render(chart.SVG, c); err != nil {
log.Println("[api.ResponseTimeChart] Failed to render response time chart:", err.Error()) logr.Errorf("[api.ResponseTimeChart] Failed to render response time chart: %s", err.Error())
return c.Status(500).SendString(err.Error()) return c.Status(500).SendString(err.Error())
} }
return nil return nil

View File

@ -4,7 +4,6 @@ import (
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"log"
"github.com/TwiN/gatus/v5/client" "github.com/TwiN/gatus/v5/client"
"github.com/TwiN/gatus/v5/config" "github.com/TwiN/gatus/v5/config"
@ -13,6 +12,7 @@ import (
"github.com/TwiN/gatus/v5/storage/store" "github.com/TwiN/gatus/v5/storage/store"
"github.com/TwiN/gatus/v5/storage/store/common" "github.com/TwiN/gatus/v5/storage/store/common"
"github.com/TwiN/gatus/v5/storage/store/common/paging" "github.com/TwiN/gatus/v5/storage/store/common/paging"
"github.com/TwiN/logr"
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
) )
@ -26,19 +26,19 @@ func EndpointStatuses(cfg *config.Config) fiber.Handler {
if !exists { if !exists {
endpointStatuses, err := store.Get().GetAllEndpointStatuses(paging.NewEndpointStatusParams().WithResults(page, pageSize)) endpointStatuses, err := store.Get().GetAllEndpointStatuses(paging.NewEndpointStatusParams().WithResults(page, pageSize))
if err != nil { if err != nil {
log.Printf("[api.EndpointStatuses] Failed to retrieve endpoint statuses: %s", err.Error()) logr.Errorf("[api.EndpointStatuses] Failed to retrieve endpoint statuses: %s", err.Error())
return c.Status(500).SendString(err.Error()) return c.Status(500).SendString(err.Error())
} }
// ALPHA: Retrieve endpoint statuses from remote instances // ALPHA: Retrieve endpoint statuses from remote instances
if endpointStatusesFromRemote, err := getEndpointStatusesFromRemoteInstances(cfg.Remote); err != nil { if endpointStatusesFromRemote, err := getEndpointStatusesFromRemoteInstances(cfg.Remote); err != nil {
log.Printf("[handler.EndpointStatuses] Silently failed to retrieve endpoint statuses from remote: %s", err.Error()) logr.Errorf("[handler.EndpointStatuses] Silently failed to retrieve endpoint statuses from remote: %s", err.Error())
} else if endpointStatusesFromRemote != nil { } else if endpointStatusesFromRemote != nil {
endpointStatuses = append(endpointStatuses, endpointStatusesFromRemote...) endpointStatuses = append(endpointStatuses, endpointStatusesFromRemote...)
} }
// Marshal endpoint statuses to JSON // Marshal endpoint statuses to JSON
data, err = json.Marshal(endpointStatuses) data, err = json.Marshal(endpointStatuses)
if err != nil { if err != nil {
log.Printf("[api.EndpointStatuses] Unable to marshal object to JSON: %s", err.Error()) logr.Errorf("[api.EndpointStatuses] Unable to marshal object to JSON: %s", err.Error())
return c.Status(500).SendString("unable to marshal object to JSON") return c.Status(500).SendString("unable to marshal object to JSON")
} }
cache.SetWithTTL(fmt.Sprintf("endpoint-status-%d-%d", page, pageSize), data, cacheTTL) cache.SetWithTTL(fmt.Sprintf("endpoint-status-%d-%d", page, pageSize), data, cacheTTL)
@ -64,7 +64,7 @@ func getEndpointStatusesFromRemoteInstances(remoteConfig *remote.Config) ([]*end
var endpointStatuses []*endpoint.Status var endpointStatuses []*endpoint.Status
if err = json.NewDecoder(response.Body).Decode(&endpointStatuses); err != nil { if err = json.NewDecoder(response.Body).Decode(&endpointStatuses); err != nil {
_ = response.Body.Close() _ = response.Body.Close()
log.Printf("[api.getEndpointStatusesFromRemoteInstances] Silently failed to retrieve endpoint statuses from %s: %s", instance.URL, err.Error()) logr.Errorf("[api.getEndpointStatusesFromRemoteInstances] Silently failed to retrieve endpoint statuses from %s: %s", instance.URL, err.Error())
continue continue
} }
_ = response.Body.Close() _ = response.Body.Close()
@ -84,16 +84,16 @@ func EndpointStatus(c *fiber.Ctx) error {
if errors.Is(err, common.ErrEndpointNotFound) { if errors.Is(err, common.ErrEndpointNotFound) {
return c.Status(404).SendString(err.Error()) return c.Status(404).SendString(err.Error())
} }
log.Printf("[api.EndpointStatus] Failed to retrieve endpoint status: %s", err.Error()) logr.Errorf("[api.EndpointStatus] Failed to retrieve endpoint status: %s", err.Error())
return c.Status(500).SendString(err.Error()) return c.Status(500).SendString(err.Error())
} }
if endpointStatus == nil { // XXX: is this check necessary? if endpointStatus == nil { // XXX: is this check necessary?
log.Printf("[api.EndpointStatus] Endpoint with key=%s not found", c.Params("key")) logr.Errorf("[api.EndpointStatus] Endpoint with key=%s not found", c.Params("key"))
return c.Status(404).SendString("not found") return c.Status(404).SendString("not found")
} }
output, err := json.Marshal(endpointStatus) output, err := json.Marshal(endpointStatus)
if err != nil { if err != nil {
log.Printf("[api.EndpointStatus] Unable to marshal object to JSON: %s", err.Error()) logr.Errorf("[api.EndpointStatus] Unable to marshal object to JSON: %s", err.Error())
return c.Status(500).SendString("unable to marshal object to JSON") return c.Status(500).SendString("unable to marshal object to JSON")
} }
c.Set("Content-Type", "application/json") c.Set("Content-Type", "application/json")

View File

@ -2,7 +2,6 @@ package api
import ( import (
"errors" "errors"
"log"
"strings" "strings"
"time" "time"
@ -11,6 +10,7 @@ import (
"github.com/TwiN/gatus/v5/storage/store" "github.com/TwiN/gatus/v5/storage/store"
"github.com/TwiN/gatus/v5/storage/store/common" "github.com/TwiN/gatus/v5/storage/store/common"
"github.com/TwiN/gatus/v5/watchdog" "github.com/TwiN/gatus/v5/watchdog"
"github.com/TwiN/logr"
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
) )
@ -33,11 +33,11 @@ func CreateExternalEndpointResult(cfg *config.Config) fiber.Handler {
key := c.Params("key") key := c.Params("key")
externalEndpoint := cfg.GetExternalEndpointByKey(key) externalEndpoint := cfg.GetExternalEndpointByKey(key)
if externalEndpoint == nil { if externalEndpoint == nil {
log.Printf("[api.CreateExternalEndpointResult] External endpoint with key=%s not found", key) logr.Errorf("[api.CreateExternalEndpointResult] External endpoint with key=%s not found", key)
return c.Status(404).SendString("not found") return c.Status(404).SendString("not found")
} }
if externalEndpoint.Token != token { if externalEndpoint.Token != token {
log.Printf("[api.CreateExternalEndpointResult] Invalid token for external endpoint with key=%s", key) logr.Errorf("[api.CreateExternalEndpointResult] Invalid token for external endpoint with key=%s", key)
return c.Status(401).SendString("invalid token") return c.Status(401).SendString("invalid token")
} }
// Persist the result in the storage // Persist the result in the storage
@ -54,13 +54,13 @@ func CreateExternalEndpointResult(cfg *config.Config) fiber.Handler {
if errors.Is(err, common.ErrEndpointNotFound) { if errors.Is(err, common.ErrEndpointNotFound) {
return c.Status(404).SendString(err.Error()) return c.Status(404).SendString(err.Error())
} }
log.Printf("[api.CreateExternalEndpointResult] Failed to insert result in storage: %s", err.Error()) logr.Errorf("[api.CreateExternalEndpointResult] Failed to insert result in storage: %s", err.Error())
return c.Status(500).SendString(err.Error()) return c.Status(500).SendString(err.Error())
} }
log.Printf("[api.CreateExternalEndpointResult] Successfully inserted result for external endpoint with key=%s and success=%s", c.Params("key"), success) logr.Infof("[api.CreateExternalEndpointResult] Successfully inserted result for external endpoint with key=%s and success=%s", c.Params("key"), success)
// Check if an alert should be triggered or resolved // Check if an alert should be triggered or resolved
if !cfg.Maintenance.IsUnderMaintenance() { if !cfg.Maintenance.IsUnderMaintenance() {
watchdog.HandleAlerting(convertedEndpoint, result, cfg.Alerting, cfg.Debug) watchdog.HandleAlerting(convertedEndpoint, result, cfg.Alerting)
externalEndpoint.NumberOfSuccessesInARow = convertedEndpoint.NumberOfSuccessesInARow externalEndpoint.NumberOfSuccessesInARow = convertedEndpoint.NumberOfSuccessesInARow
externalEndpoint.NumberOfFailuresInARow = convertedEndpoint.NumberOfFailuresInARow externalEndpoint.NumberOfFailuresInARow = convertedEndpoint.NumberOfFailuresInARow
} }

View File

@ -3,10 +3,10 @@ package api
import ( import (
_ "embed" _ "embed"
"html/template" "html/template"
"log"
"github.com/TwiN/gatus/v5/config/ui" "github.com/TwiN/gatus/v5/config/ui"
static "github.com/TwiN/gatus/v5/web" static "github.com/TwiN/gatus/v5/web"
"github.com/TwiN/logr"
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
) )
@ -15,14 +15,14 @@ func SinglePageApplication(ui *ui.Config) fiber.Handler {
t, err := template.ParseFS(static.FileSystem, static.IndexPath) t, err := template.ParseFS(static.FileSystem, static.IndexPath)
if err != nil { if err != nil {
// This should never happen, because ui.ValidateAndSetDefaults validates that the template works. // This should never happen, because ui.ValidateAndSetDefaults validates that the template works.
log.Println("[api.SinglePageApplication] Failed to parse template. This should never happen, because the template is validated on start. Error:", err.Error()) logr.Errorf("[api.SinglePageApplication] Failed to parse template. This should never happen, because the template is validated on start. Error: %s", err.Error())
return c.Status(500).SendString("Failed to parse template. This should never happen, because the template is validated on start.") return c.Status(500).SendString("Failed to parse template. This should never happen, because the template is validated on start.")
} }
c.Set("Content-Type", "text/html") c.Set("Content-Type", "text/html")
err = t.Execute(c, ui) err = t.Execute(c, ui)
if err != nil { if err != nil {
// This should never happen, because ui.ValidateAndSetDefaults validates that the template works. // This should never happen, because ui.ValidateAndSetDefaults validates that the template works.
log.Println("[api.SinglePageApplication] Failed to execute template. This should never happen, because the template is validated on start. Error:", err.Error()) logr.Errorf("[api.SinglePageApplication] Failed to execute template. This should never happen, because the template is validated on start. Error: %s", err.Error())
return c.Status(500).SendString("Failed to parse template. This should never happen, because the template is validated on start.") return c.Status(500).SendString("Failed to parse template. This should never happen, because the template is validated on start.")
} }
return c.SendStatus(200) return c.SendStatus(200)

View File

@ -110,7 +110,6 @@ func CanCreateSCTPConnection(address string, config *Config) bool {
_ = conn.Close() _ = conn.Close()
res <- true res <- true
})(ch) })(ch)
select { select {
case result := <-ch: case result := <-ch:
return result return result
@ -182,7 +181,6 @@ func CanCreateSSHConnection(address, username, password string, config *Config)
} else { } else {
port = "22" port = "22"
} }
cli, err := ssh.Dial("tcp", strings.Join([]string{address, port}, ":"), &ssh.ClientConfig{ cli, err := ssh.Dial("tcp", strings.Join([]string{address, port}, ":"), &ssh.ClientConfig{
HostKeyCallback: ssh.InsecureIgnoreHostKey(), HostKeyCallback: ssh.InsecureIgnoreHostKey(),
User: username, User: username,
@ -194,7 +192,6 @@ func CanCreateSSHConnection(address, username, password string, config *Config)
if err != nil { if err != nil {
return false, nil, err return false, nil, err
} }
return true, cli, nil return true, cli, nil
} }
@ -203,37 +200,29 @@ func ExecuteSSHCommand(sshClient *ssh.Client, body string, config *Config) (bool
type Body struct { type Body struct {
Command string `json:"command"` Command string `json:"command"`
} }
defer sshClient.Close() defer sshClient.Close()
var b Body var b Body
if err := json.Unmarshal([]byte(body), &b); err != nil { if err := json.Unmarshal([]byte(body), &b); err != nil {
return false, 0, err return false, 0, err
} }
sess, err := sshClient.NewSession() sess, err := sshClient.NewSession()
if err != nil { if err != nil {
return false, 0, err return false, 0, err
} }
err = sess.Start(b.Command) err = sess.Start(b.Command)
if err != nil { if err != nil {
return false, 0, err return false, 0, err
} }
defer sess.Close() defer sess.Close()
err = sess.Wait() err = sess.Wait()
if err == nil { if err == nil {
return true, 0, nil return true, 0, nil
} }
var exitErr *ssh.ExitError
e, ok := err.(*ssh.ExitError) if ok := errors.As(err, &exitErr); !ok {
if !ok {
return false, 0, err return false, 0, err
} }
return true, exitErr.ExitStatus(), nil
return true, e.ExitStatus(), nil
} }
// Ping checks if an address can be pinged and returns the round-trip time if the address can be pinged // Ping checks if an address can be pinged and returns the round-trip time if the address can be pinged

View File

@ -4,7 +4,6 @@ import (
"context" "context"
"crypto/tls" "crypto/tls"
"errors" "errors"
"log"
"net" "net"
"net/http" "net/http"
"net/url" "net/url"
@ -12,6 +11,7 @@ import (
"strconv" "strconv"
"time" "time"
"github.com/TwiN/logr"
"golang.org/x/oauth2" "golang.org/x/oauth2"
"golang.org/x/oauth2/clientcredentials" "golang.org/x/oauth2/clientcredentials"
"google.golang.org/api/idtoken" "google.golang.org/api/idtoken"
@ -232,7 +232,7 @@ func (c *Config) getHTTPClient() *http.Client {
if c.ProxyURL != "" { if c.ProxyURL != "" {
proxyURL, err := url.Parse(c.ProxyURL) proxyURL, err := url.Parse(c.ProxyURL)
if err != nil { if err != nil {
log.Println("[client.getHTTPClient] THIS SHOULD NOT HAPPEN. Silently ignoring custom proxy due to error:", err.Error()) logr.Errorf("[client.getHTTPClient] THIS SHOULD NOT HAPPEN. Silently ignoring custom proxy due to error: %s", err.Error())
} else { } else {
c.httpClient.Transport.(*http.Transport).Proxy = http.ProxyURL(proxyURL) c.httpClient.Transport.(*http.Transport).Proxy = http.ProxyURL(proxyURL)
} }
@ -242,7 +242,7 @@ func (c *Config) getHTTPClient() *http.Client {
if err != nil { if err != nil {
// We're ignoring the error, because it should have been validated on startup ValidateAndSetDefaults. // We're ignoring the error, because it should have been validated on startup ValidateAndSetDefaults.
// It shouldn't happen, but if it does, we'll log it... Better safe than sorry ;) // It shouldn't happen, but if it does, we'll log it... Better safe than sorry ;)
log.Println("[client.getHTTPClient] THIS SHOULD NOT HAPPEN. Silently ignoring invalid DNS resolver due to error:", err.Error()) logr.Errorf("[client.getHTTPClient] THIS SHOULD NOT HAPPEN. Silently ignoring invalid DNS resolver due to error: %s", err.Error())
} else { } else {
dialer := &net.Dialer{ dialer := &net.Dialer{
Resolver: &net.Resolver{ Resolver: &net.Resolver{
@ -259,7 +259,7 @@ func (c *Config) getHTTPClient() *http.Client {
} }
} }
if c.HasOAuth2Config() && c.HasIAPConfig() { if c.HasOAuth2Config() && c.HasIAPConfig() {
log.Println("[client.getHTTPClient] Error: Both Identity-Aware-Proxy and Oauth2 configuration are present.") logr.Errorf("[client.getHTTPClient] Error: Both Identity-Aware-Proxy and Oauth2 configuration are present.")
} else if c.HasOAuth2Config() { } else if c.HasOAuth2Config() {
c.httpClient = configureOAuth2(c.httpClient, *c.OAuth2Config) c.httpClient = configureOAuth2(c.httpClient, *c.OAuth2Config)
} else if c.HasIAPConfig() { } else if c.HasIAPConfig() {
@ -269,23 +269,22 @@ func (c *Config) getHTTPClient() *http.Client {
return c.httpClient return c.httpClient
} }
// validateIAPToken returns a boolean that will define if the google identity-aware-proxy token can be fetch // validateIAPToken returns a boolean that will define if the Google identity-aware-proxy token can be fetched
// and if is it valid. // and if is it valid.
func validateIAPToken(ctx context.Context, c IAPConfig) bool { func validateIAPToken(ctx context.Context, c IAPConfig) bool {
ts, err := idtoken.NewTokenSource(ctx, c.Audience) ts, err := idtoken.NewTokenSource(ctx, c.Audience)
if err != nil { if err != nil {
log.Println("[client.ValidateIAPToken] Claiming Identity token failed. error:", err.Error()) logr.Errorf("[client.ValidateIAPToken] Claiming Identity token failed: %s", err.Error())
return false return false
} }
tok, err := ts.Token() tok, err := ts.Token()
if err != nil { if err != nil {
log.Println("[client.ValidateIAPToken] Get Identity-Aware-Proxy token failed. error:", err.Error()) logr.Errorf("[client.ValidateIAPToken] Get Identity-Aware-Proxy token failed: %s", err.Error())
return false return false
} }
payload, err := idtoken.Validate(ctx, tok.AccessToken, c.Audience) _, err = idtoken.Validate(ctx, tok.AccessToken, c.Audience)
_ = payload
if err != nil { if err != nil {
log.Println("[client.ValidateIAPToken] Token Validation failed. error:", err.Error()) logr.Errorf("[client.ValidateIAPToken] Token Validation failed: %s", err.Error())
return false return false
} }
return true return true
@ -298,7 +297,7 @@ func configureIAP(httpClient *http.Client, c IAPConfig) *http.Client {
if validateIAPToken(ctx, c) { if validateIAPToken(ctx, c) {
ts, err := idtoken.NewTokenSource(ctx, c.Audience) ts, err := idtoken.NewTokenSource(ctx, c.Audience)
if err != nil { if err != nil {
log.Println("[client.ConfigureIAP] Claiming Token Source failed. error:", err.Error()) logr.Errorf("[client.configureIAP] Claiming Token Source failed: %s", err.Error())
return httpClient return httpClient
} }
client := oauth2.NewClient(ctx, ts) client := oauth2.NewClient(ctx, ts)
@ -327,17 +326,17 @@ func configureOAuth2(httpClient *http.Client, c OAuth2Config) *http.Client {
func configureTLS(tlsConfig *tls.Config, c TLSConfig) *tls.Config { func configureTLS(tlsConfig *tls.Config, c TLSConfig) *tls.Config {
clientTLSCert, err := tls.LoadX509KeyPair(c.CertificateFile, c.PrivateKeyFile) clientTLSCert, err := tls.LoadX509KeyPair(c.CertificateFile, c.PrivateKeyFile)
if err != nil { if err != nil {
logr.Errorf("[client.configureTLS] Failed to load certificate: %s", err.Error())
return nil return nil
} }
tlsConfig.Certificates = []tls.Certificate{clientTLSCert} tlsConfig.Certificates = []tls.Certificate{clientTLSCert}
tlsConfig.Renegotiation = tls.RenegotiateNever tlsConfig.Renegotiation = tls.RenegotiateNever
renegotiationSupport := map[string]tls.RenegotiationSupport{
renegotionSupport := map[string]tls.RenegotiationSupport{
"once": tls.RenegotiateOnceAsClient, "once": tls.RenegotiateOnceAsClient,
"freely": tls.RenegotiateFreelyAsClient, "freely": tls.RenegotiateFreelyAsClient,
"never": tls.RenegotiateNever, "never": tls.RenegotiateNever,
} }
if val, ok := renegotionSupport[c.RenegotiationSupport]; ok { if val, ok := renegotiationSupport[c.RenegotiationSupport]; ok {
tlsConfig.Renegotiation = val tlsConfig.Renegotiation = val
} }
return tlsConfig return tlsConfig

View File

@ -4,7 +4,6 @@ import (
"errors" "errors"
"fmt" "fmt"
"io/fs" "io/fs"
"log"
"os" "os"
"path/filepath" "path/filepath"
"strings" "strings"
@ -53,11 +52,9 @@ var (
// Config is the main configuration structure // Config is the main configuration structure
type Config struct { type Config struct {
// Debug Whether to enable debug logs // Debug Whether to enable debug logs
// Deprecated: Use the GATUS_LOG_LEVEL environment variable instead
Debug bool `yaml:"debug,omitempty"` Debug bool `yaml:"debug,omitempty"`
// LogLevel is one of DEBUG, INFO, WARN and ERROR. Defaults to INFO
LogLevel logr.Level `yaml:"log-level,omitempty"`
// Metrics Whether to expose metrics at /metrics // Metrics Whether to expose metrics at /metrics
Metrics bool `yaml:"metrics,omitempty"` Metrics bool `yaml:"metrics,omitempty"`
@ -176,13 +173,11 @@ func LoadConfiguration(configPath string) (*Config, error) {
if fileInfo.IsDir() { if fileInfo.IsDir() {
err := walkConfigDir(configPath, func(path string, d fs.DirEntry, err error) error { err := walkConfigDir(configPath, func(path string, d fs.DirEntry, err error) error {
if err != nil { if err != nil {
log.Printf("[config.LoadConfiguration] Error walking path=%s: %s", path, err) return fmt.Errorf("error walking path %s: %w", path, err)
return err
} }
log.Printf("[config.LoadConfiguration] Reading configuration from %s", path) logr.Infof("[config.LoadConfiguration] Reading configuration from %s", path)
data, err := os.ReadFile(path) data, err := os.ReadFile(path)
if err != nil { if err != nil {
log.Printf("[config.LoadConfiguration] Error reading configuration from %s: %s", path, err)
return fmt.Errorf("error reading configuration from file %s: %w", path, err) return fmt.Errorf("error reading configuration from file %s: %w", path, err)
} }
configBytes, err = deepmerge.YAML(configBytes, data) configBytes, err = deepmerge.YAML(configBytes, data)
@ -192,9 +187,9 @@ func LoadConfiguration(configPath string) (*Config, error) {
return nil, fmt.Errorf("error reading configuration from directory %s: %w", usedConfigPath, err) return nil, fmt.Errorf("error reading configuration from directory %s: %w", usedConfigPath, err)
} }
} else { } else {
log.Printf("[config.LoadConfiguration] Reading configuration from configFile=%s", usedConfigPath) logr.Infof("[config.LoadConfiguration] Reading configuration from configFile=%s", usedConfigPath)
if data, err := os.ReadFile(usedConfigPath); err != nil { if data, err := os.ReadFile(usedConfigPath); err != nil {
return nil, err return nil, fmt.Errorf("error reading configuration from directory %s: %w", usedConfigPath, err)
} else { } else {
configBytes = data configBytes = data
} }
@ -204,11 +199,11 @@ func LoadConfiguration(configPath string) (*Config, error) {
} }
config, err := parseAndValidateConfigBytes(configBytes) config, err := parseAndValidateConfigBytes(configBytes)
if err != nil { if err != nil {
return nil, err return nil, fmt.Errorf("error parsing config: %w", err)
} }
config.configPath = usedConfigPath config.configPath = usedConfigPath
config.UpdateLastFileModTime() config.UpdateLastFileModTime()
return config, err return config, nil
} }
// walkConfigDir is a wrapper for filepath.WalkDir that strips directories and non-config files // walkConfigDir is a wrapper for filepath.WalkDir that strips directories and non-config files
@ -249,7 +244,13 @@ func parseAndValidateConfigBytes(yamlBytes []byte) (config *Config, err error) {
if config == nil || config.Endpoints == nil || len(config.Endpoints) == 0 { if config == nil || config.Endpoints == nil || len(config.Endpoints) == 0 {
err = ErrNoEndpointInConfig err = ErrNoEndpointInConfig
} else { } else {
validateAlertingConfig(config.Alerting, config.Endpoints, config.ExternalEndpoints, config.Debug) // XXX: Remove this in v6.0.0
if config.Debug {
logr.Warn("WARNING: The 'debug' configuration has been deprecated and will be removed in v6.0.0")
logr.Warn("WARNING: Please use the GATUS_LOG_LEVEL environment variable instead")
}
// XXX: End of v6.0.0 removals
validateAlertingConfig(config.Alerting, config.Endpoints, config.ExternalEndpoints)
if err := validateSecurityConfig(config); err != nil { if err := validateSecurityConfig(config); err != nil {
return nil, err return nil, err
} }
@ -342,9 +343,7 @@ func validateEndpointsConfig(config *Config) error {
duplicateValidationMap := make(map[string]bool) duplicateValidationMap := make(map[string]bool)
// Validate endpoints // Validate endpoints
for _, ep := range config.Endpoints { for _, ep := range config.Endpoints {
if config.Debug { logr.Debugf("[config.validateEndpointsConfig] Validating endpoint with key %s", ep.Key())
log.Printf("[config.validateEndpointsConfig] Validating endpoint '%s'", ep.Name)
}
if endpointKey := ep.Key(); duplicateValidationMap[endpointKey] { if endpointKey := ep.Key(); duplicateValidationMap[endpointKey] {
return fmt.Errorf("invalid endpoint %s: name and group combination must be unique", ep.Key()) return fmt.Errorf("invalid endpoint %s: name and group combination must be unique", ep.Key())
} else { } else {
@ -354,12 +353,10 @@ func validateEndpointsConfig(config *Config) error {
return fmt.Errorf("invalid endpoint %s: %w", ep.Key(), err) return fmt.Errorf("invalid endpoint %s: %w", ep.Key(), err)
} }
} }
log.Printf("[config.validateEndpointsConfig] Validated %d endpoints", len(config.Endpoints)) logr.Infof("[config.validateEndpointsConfig] Validated %d endpoints", len(config.Endpoints))
// Validate external endpoints // Validate external endpoints
for _, ee := range config.ExternalEndpoints { for _, ee := range config.ExternalEndpoints {
if config.Debug { logr.Debugf("[config.validateEndpointsConfig] Validating external endpoint '%s'", ee.Name)
log.Printf("[config.validateEndpointsConfig] Validating external endpoint '%s'", ee.Name)
}
if endpointKey := ee.Key(); duplicateValidationMap[endpointKey] { if endpointKey := ee.Key(); duplicateValidationMap[endpointKey] {
return fmt.Errorf("invalid external endpoint %s: name and group combination must be unique", ee.Key()) return fmt.Errorf("invalid external endpoint %s: name and group combination must be unique", ee.Key())
} else { } else {
@ -369,16 +366,14 @@ func validateEndpointsConfig(config *Config) error {
return fmt.Errorf("invalid external endpoint %s: %w", ee.Key(), err) return fmt.Errorf("invalid external endpoint %s: %w", ee.Key(), err)
} }
} }
log.Printf("[config.validateEndpointsConfig] Validated %d external endpoints", len(config.ExternalEndpoints)) logr.Infof("[config.validateEndpointsConfig] Validated %d external endpoints", len(config.ExternalEndpoints))
return nil return nil
} }
func validateSecurityConfig(config *Config) error { func validateSecurityConfig(config *Config) error {
if config.Security != nil { if config.Security != nil {
if config.Security.IsValid() { if config.Security.IsValid() {
if config.Debug { logr.Debug("[config.validateSecurityConfig] Basic security configuration has been validated")
log.Printf("[config.validateSecurityConfig] Basic security configuration has been validated")
}
} else { } else {
// If there was an attempt to configure security, then it must mean that some confidential or private // If there was an attempt to configure security, then it must mean that some confidential or private
// data are exposed. As a result, we'll force a panic because it's better to be safe than sorry. // data are exposed. As a result, we'll force a panic because it's better to be safe than sorry.
@ -392,9 +387,9 @@ func validateSecurityConfig(config *Config) error {
// Note that the alerting configuration has to be validated before the endpoint configuration, because the default alert // Note that the alerting configuration has to be validated before the endpoint configuration, because the default alert
// returned by provider.AlertProvider.GetDefaultAlert() must be parsed before endpoint.Endpoint.ValidateAndSetDefaults() // returned by provider.AlertProvider.GetDefaultAlert() must be parsed before endpoint.Endpoint.ValidateAndSetDefaults()
// sets the default alert values when none are set. // sets the default alert values when none are set.
func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoint.Endpoint, externalEndpoints []*endpoint.ExternalEndpoint, debug bool) { func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoint.Endpoint, externalEndpoints []*endpoint.ExternalEndpoint) {
if alertingConfig == nil { if alertingConfig == nil {
log.Printf("[config.validateAlertingConfig] Alerting is not configured") logr.Info("[config.validateAlertingConfig] Alerting is not configured")
return return
} }
alertTypes := []alert.Type{ alertTypes := []alert.Type{
@ -432,9 +427,7 @@ func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoi
for _, ep := range endpoints { for _, ep := range endpoints {
for alertIndex, endpointAlert := range ep.Alerts { for alertIndex, endpointAlert := range ep.Alerts {
if alertType == endpointAlert.Type { if alertType == endpointAlert.Type {
if debug { logr.Debugf("[config.validateAlertingConfig] Parsing alert %d with default alert for provider=%s in endpoint with key=%s", alertIndex, alertType, ep.Key())
log.Printf("[config.validateAlertingConfig] Parsing alert %d with default alert for provider=%s in endpoint with key=%s", alertIndex, alertType, ep.Key())
}
provider.ParseWithDefaultAlert(alertProvider.GetDefaultAlert(), endpointAlert) provider.ParseWithDefaultAlert(alertProvider.GetDefaultAlert(), endpointAlert)
} }
} }
@ -442,9 +435,7 @@ func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoi
for _, ee := range externalEndpoints { for _, ee := range externalEndpoints {
for alertIndex, endpointAlert := range ee.Alerts { for alertIndex, endpointAlert := range ee.Alerts {
if alertType == endpointAlert.Type { if alertType == endpointAlert.Type {
if debug { logr.Debugf("[config.validateAlertingConfig] Parsing alert %d with default alert for provider=%s in endpoint with key=%s", alertIndex, alertType, ee.Key())
log.Printf("[config.validateAlertingConfig] Parsing alert %d with default alert for provider=%s in endpoint with key=%s", alertIndex, alertType, ee.Key())
}
provider.ParseWithDefaultAlert(alertProvider.GetDefaultAlert(), endpointAlert) provider.ParseWithDefaultAlert(alertProvider.GetDefaultAlert(), endpointAlert)
} }
} }
@ -452,7 +443,7 @@ func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoi
} }
validProviders = append(validProviders, alertType) validProviders = append(validProviders, alertType)
} else { } else {
log.Printf("[config.validateAlertingConfig] Ignoring provider=%s because configuration is invalid", alertType) logr.Warnf("[config.validateAlertingConfig] Ignoring provider=%s because configuration is invalid", alertType)
invalidProviders = append(invalidProviders, alertType) invalidProviders = append(invalidProviders, alertType)
alertingConfig.SetAlertingProviderToNil(alertProvider) alertingConfig.SetAlertingProviderToNil(alertProvider)
} }
@ -460,5 +451,5 @@ func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoi
invalidProviders = append(invalidProviders, alertType) invalidProviders = append(invalidProviders, alertType)
} }
} }
log.Printf("[config.validateAlertingConfig] configuredProviders=%s; ignoredProviders=%s", validProviders, invalidProviders) logr.Infof("[config.validateAlertingConfig] configuredProviders=%s; ignoredProviders=%s", validProviders, invalidProviders)
} }

View File

@ -27,6 +27,7 @@ import (
"github.com/TwiN/gatus/v5/alerting/provider/pushover" "github.com/TwiN/gatus/v5/alerting/provider/pushover"
"github.com/TwiN/gatus/v5/alerting/provider/slack" "github.com/TwiN/gatus/v5/alerting/provider/slack"
"github.com/TwiN/gatus/v5/alerting/provider/teams" "github.com/TwiN/gatus/v5/alerting/provider/teams"
"github.com/TwiN/gatus/v5/alerting/provider/teamsworkflows"
"github.com/TwiN/gatus/v5/alerting/provider/telegram" "github.com/TwiN/gatus/v5/alerting/provider/telegram"
"github.com/TwiN/gatus/v5/alerting/provider/twilio" "github.com/TwiN/gatus/v5/alerting/provider/twilio"
"github.com/TwiN/gatus/v5/client" "github.com/TwiN/gatus/v5/client"
@ -177,7 +178,6 @@ endpoints:
conditions: conditions:
- "[STATUS] == 200"`, - "[STATUS] == 200"`,
"b.yaml": ` "b.yaml": `
debug: true
alerting: alerting:
discord: discord:
@ -196,7 +196,6 @@ endpoints:
- "[STATUS] == 200"`, - "[STATUS] == 200"`,
}, },
expectedConfig: &Config{ expectedConfig: &Config{
Debug: true,
Metrics: true, Metrics: true,
Alerting: &alerting.Config{ Alerting: &alerting.Config{
Discord: &discord.AlertProvider{WebhookURL: "https://discord.com/api/webhooks/xxx/yyy"}, Discord: &discord.AlertProvider{WebhookURL: "https://discord.com/api/webhooks/xxx/yyy"},
@ -719,7 +718,6 @@ badconfig:
func TestParseAndValidateConfigBytesWithAlerting(t *testing.T) { func TestParseAndValidateConfigBytesWithAlerting(t *testing.T) {
config, err := parseAndValidateConfigBytes([]byte(` config, err := parseAndValidateConfigBytes([]byte(`
debug: true
alerting: alerting:
slack: slack:
webhook-url: "http://example.com" webhook-url: "http://example.com"
@ -919,8 +917,6 @@ endpoints:
func TestParseAndValidateConfigBytesWithAlertingAndDefaultAlert(t *testing.T) { func TestParseAndValidateConfigBytesWithAlertingAndDefaultAlert(t *testing.T) {
config, err := parseAndValidateConfigBytes([]byte(` config, err := parseAndValidateConfigBytes([]byte(`
debug: true
alerting: alerting:
slack: slack:
webhook-url: "http://example.com" webhook-url: "http://example.com"
@ -1493,6 +1489,7 @@ endpoints:
t.Fatal("PagerDuty alerting config should've been set to nil, because its IsValid() method returned false and therefore alerting.Config.SetAlertingProviderToNil() should've been called") t.Fatal("PagerDuty alerting config should've been set to nil, because its IsValid() method returned false and therefore alerting.Config.SetAlertingProviderToNil() should've been called")
} }
} }
func TestParseAndValidateConfigBytesWithInvalidPushoverAlertingConfig(t *testing.T) { func TestParseAndValidateConfigBytesWithInvalidPushoverAlertingConfig(t *testing.T) {
config, err := parseAndValidateConfigBytes([]byte(` config, err := parseAndValidateConfigBytes([]byte(`
alerting: alerting:
@ -1801,7 +1798,7 @@ endpoints:
func TestParseAndValidateConfigBytesWithValidSecurityConfig(t *testing.T) { func TestParseAndValidateConfigBytesWithValidSecurityConfig(t *testing.T) {
const expectedUsername = "admin" const expectedUsername = "admin"
const expectedPasswordHash = "JDJhJDEwJHRiMnRFakxWazZLdXBzRERQazB1TE8vckRLY05Yb1hSdnoxWU0yQ1FaYXZRSW1McmladDYu" const expectedPasswordHash = "JDJhJDEwJHRiMnRFakxWazZLdXBzRERQazB1TE8vckRLY05Yb1hSdnoxWU0yQ1FaYXZRSW1McmladDYu"
config, err := parseAndValidateConfigBytes([]byte(fmt.Sprintf(`debug: true config, err := parseAndValidateConfigBytes([]byte(fmt.Sprintf(`
security: security:
basic: basic:
username: "%s" username: "%s"
@ -1889,6 +1886,7 @@ func TestGetAlertingProviderByAlertType(t *testing.T) {
Telegram: &telegram.AlertProvider{}, Telegram: &telegram.AlertProvider{},
Twilio: &twilio.AlertProvider{}, Twilio: &twilio.AlertProvider{},
Teams: &teams.AlertProvider{}, Teams: &teams.AlertProvider{},
TeamsWorkflows: &teamsworkflows.AlertProvider{},
} }
scenarios := []struct { scenarios := []struct {
alertType alert.Type alertType alert.Type
@ -1912,6 +1910,7 @@ func TestGetAlertingProviderByAlertType(t *testing.T) {
{alertType: alert.TypeTelegram, expected: alertingConfig.Telegram}, {alertType: alert.TypeTelegram, expected: alertingConfig.Telegram},
{alertType: alert.TypeTwilio, expected: alertingConfig.Twilio}, {alertType: alert.TypeTwilio, expected: alertingConfig.Twilio},
{alertType: alert.TypeTeams, expected: alertingConfig.Teams}, {alertType: alert.TypeTeams, expected: alertingConfig.Teams},
{alertType: alert.TypeTeamsWorkflows, expected: alertingConfig.TeamsWorkflows},
} }
for _, scenario := range scenarios { for _, scenario := range scenarios {
t.Run(string(scenario.alertType), func(t *testing.T) { t.Run(string(scenario.alertType), func(t *testing.T) {

View File

@ -150,7 +150,7 @@ func (c Condition) evaluate(result *Result, dontResolveFailedConditions bool) bo
return false return false
} }
if !success { if !success {
//log.Printf("[Condition.evaluate] Condition '%s' did not succeed because '%s' is false", condition, condition) //logr.Debugf("[Condition.evaluate] Condition '%s' did not succeed because '%s' is false", condition, condition)
} }
result.ConditionResults = append(result.ConditionResults, &ConditionResult{Condition: conditionToDisplay, Success: success}) result.ConditionResults = append(result.ConditionResults, &ConditionResult{Condition: conditionToDisplay, Success: success})
return success return success

View File

@ -1,9 +1,8 @@
package remote package remote
import ( import (
"log"
"github.com/TwiN/gatus/v5/client" "github.com/TwiN/gatus/v5/client"
"github.com/TwiN/logr"
) )
// NOTICE: This is an experimental alpha feature and may be updated/removed in future versions. // NOTICE: This is an experimental alpha feature and may be updated/removed in future versions.
@ -31,9 +30,8 @@ func (c *Config) ValidateAndSetDefaults() error {
} }
} }
if len(c.Instances) > 0 { if len(c.Instances) > 0 {
log.Println("WARNING: Your configuration is using 'remote', which is in alpha and may be updated/removed in future versions.") logr.Warn("WARNING: Your configuration is using 'remote', which is in alpha and may be updated/removed in future versions.")
log.Println("WARNING: See https://github.com/TwiN/gatus/issues/64 for more information") logr.Warn("WARNING: See https://github.com/TwiN/gatus/issues/64 for more information")
log.Println("WARNING: This feature is a candidate for removal in future versions. Please comment on the issue above if you need this feature.")
} }
return nil return nil
} }

View File

@ -1,12 +1,12 @@
package controller package controller
import ( import (
"log"
"os" "os"
"time" "time"
"github.com/TwiN/gatus/v5/api" "github.com/TwiN/gatus/v5/api"
"github.com/TwiN/gatus/v5/config" "github.com/TwiN/gatus/v5/config"
"github.com/TwiN/logr"
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
) )
@ -25,19 +25,19 @@ func Handle(cfg *config.Config) {
if os.Getenv("ROUTER_TEST") == "true" { if os.Getenv("ROUTER_TEST") == "true" {
return return
} }
log.Println("[controller.Handle] Listening on " + cfg.Web.SocketAddress()) logr.Info("[controller.Handle] Listening on " + cfg.Web.SocketAddress())
if cfg.Web.HasTLS() { if cfg.Web.HasTLS() {
err := app.ListenTLS(cfg.Web.SocketAddress(), cfg.Web.TLS.CertificateFile, cfg.Web.TLS.PrivateKeyFile) err := app.ListenTLS(cfg.Web.SocketAddress(), cfg.Web.TLS.CertificateFile, cfg.Web.TLS.PrivateKeyFile)
if err != nil { if err != nil {
log.Fatal("[controller.Handle]", err) logr.Fatalf("[controller.Handle] %s", err.Error())
} }
} else { } else {
err := app.Listen(cfg.Web.SocketAddress()) err := app.Listen(cfg.Web.SocketAddress())
if err != nil { if err != nil {
log.Fatal("[controller.Handle]", err) logr.Fatalf("[controller.Handle] %s", err.Error())
} }
} }
log.Println("[controller.Handle] Server has shut down successfully") logr.Info("[controller.Handle] Server has shut down successfully")
} }
// Shutdown stops the server // Shutdown stops the server

4
go.mod
View File

@ -1,6 +1,6 @@
module github.com/TwiN/gatus/v5 module github.com/TwiN/gatus/v5
go 1.22.4 go 1.23.3
require ( require (
code.gitea.io/sdk/gitea v0.19.0 code.gitea.io/sdk/gitea v0.19.0
@ -8,7 +8,7 @@ require (
github.com/TwiN/g8/v2 v2.0.0 github.com/TwiN/g8/v2 v2.0.0
github.com/TwiN/gocache/v2 v2.2.2 github.com/TwiN/gocache/v2 v2.2.2
github.com/TwiN/health v1.6.0 github.com/TwiN/health v1.6.0
github.com/TwiN/logr v0.2.1 github.com/TwiN/logr v0.3.1
github.com/TwiN/whois v1.1.9 github.com/TwiN/whois v1.1.9
github.com/aws/aws-sdk-go v1.54.10 github.com/aws/aws-sdk-go v1.54.10
github.com/coreos/go-oidc/v3 v3.11.0 github.com/coreos/go-oidc/v3 v3.11.0

4
go.sum
View File

@ -16,8 +16,8 @@ github.com/TwiN/gocache/v2 v2.2.2 h1:4HToPfDV8FSbaYO5kkbhLpEllUYse5rAf+hVU/mSsuI
github.com/TwiN/gocache/v2 v2.2.2/go.mod h1:WfIuwd7GR82/7EfQqEtmLFC3a2vqaKbs4Pe6neB7Gyc= github.com/TwiN/gocache/v2 v2.2.2/go.mod h1:WfIuwd7GR82/7EfQqEtmLFC3a2vqaKbs4Pe6neB7Gyc=
github.com/TwiN/health v1.6.0 h1:L2ks575JhRgQqWWOfKjw9B0ec172hx7GdToqkYUycQM= github.com/TwiN/health v1.6.0 h1:L2ks575JhRgQqWWOfKjw9B0ec172hx7GdToqkYUycQM=
github.com/TwiN/health v1.6.0/go.mod h1:Z6TszwQPMvtSiVx1QMidVRgvVr4KZGfiwqcD7/Z+3iw= github.com/TwiN/health v1.6.0/go.mod h1:Z6TszwQPMvtSiVx1QMidVRgvVr4KZGfiwqcD7/Z+3iw=
github.com/TwiN/logr v0.2.1 h1:kMhUmBBVlFxzqTvyHuNoYQ/uwqg8BW4y0AyZxI5JB3Q= github.com/TwiN/logr v0.3.1 h1:CfTKA83jUmsAoxqrr3p4JxEkqXOBnEE9/f35L5MODy4=
github.com/TwiN/logr v0.2.1/go.mod h1:oldDOkRjFXjZqiMP0+ca5NAQHXTiJ02zHirsuBJJH6k= github.com/TwiN/logr v0.3.1/go.mod h1:BZgZFYq6fQdU3KtR8qYato3zUEw53yQDaIuujHb55Jw=
github.com/TwiN/whois v1.1.9 h1:m20+m1CXnrstie+tW2ZmAJkfcT9zgwpVRUFsKeMw+ng= github.com/TwiN/whois v1.1.9 h1:m20+m1CXnrstie+tW2ZmAJkfcT9zgwpVRUFsKeMw+ng=
github.com/TwiN/whois v1.1.9/go.mod h1:TjipCMpJRAJYKmtz/rXQBU6UGxMh6bk8SHazu7OMnQE= github.com/TwiN/whois v1.1.9/go.mod h1:TjipCMpJRAJYKmtz/rXQBU6UGxMh6bk8SHazu7OMnQE=
github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=

62
main.go
View File

@ -1,7 +1,6 @@
package main package main
import ( import (
"log"
"os" "os"
"os/signal" "os/signal"
"strconv" "strconv"
@ -15,16 +14,22 @@ import (
"github.com/TwiN/logr" "github.com/TwiN/logr"
) )
const (
GatusConfigPathEnvVar = "GATUS_CONFIG_PATH"
GatusConfigFileEnvVar = "GATUS_CONFIG_FILE" // Deprecated in favor of GatusConfigPathEnvVar
GatusLogLevelEnvVar = "GATUS_LOG_LEVEL"
)
func main() { func main() {
if delayInSeconds, _ := strconv.Atoi(os.Getenv("GATUS_DELAY_START_SECONDS")); delayInSeconds > 0 { if delayInSeconds, _ := strconv.Atoi(os.Getenv("GATUS_DELAY_START_SECONDS")); delayInSeconds > 0 {
log.Printf("Delaying start by %d seconds", delayInSeconds) logr.Infof("Delaying start by %d seconds", delayInSeconds)
time.Sleep(time.Duration(delayInSeconds) * time.Second) time.Sleep(time.Duration(delayInSeconds) * time.Second)
} }
configureLogging()
cfg, err := loadConfiguration() cfg, err := loadConfiguration()
if err != nil { if err != nil {
panic(err) panic(err)
} }
configureLogging(cfg)
initializeStorage(cfg) initializeStorage(cfg)
start(cfg) start(cfg)
// Wait for termination signal // Wait for termination signal
@ -33,13 +38,13 @@ func main() {
signal.Notify(signalChannel, os.Interrupt, syscall.SIGTERM) signal.Notify(signalChannel, os.Interrupt, syscall.SIGTERM)
go func() { go func() {
<-signalChannel <-signalChannel
log.Println("Received termination signal, attempting to gracefully shut down") logr.Info("Received termination signal, attempting to gracefully shut down")
stop(cfg) stop(cfg)
save() save()
done <- true done <- true
}() }()
<-done <-done
log.Println("Shutting down") logr.Info("Shutting down")
} }
func start(cfg *config.Config) { func start(cfg *config.Config) {
@ -55,21 +60,32 @@ func stop(cfg *config.Config) {
func save() { func save() {
if err := store.Get().Save(); err != nil { if err := store.Get().Save(); err != nil {
log.Println("Failed to save storage provider:", err.Error()) logr.Errorf("Failed to save storage provider: %s", err.Error())
} }
} }
func configureLogging(cfg *config.Config) { func configureLogging() {
logr.SetThreshold(cfg.LogLevel) logLevelAsString := os.Getenv(GatusLogLevelEnvVar)
logr.Infof("[main.configureLogging] Log Level is %s", logr.GetThreshold()) if logLevel, err := logr.LevelFromString(logLevelAsString); err != nil {
logr.SetThreshold(logr.LevelInfo)
if len(logLevelAsString) == 0 {
logr.Infof("[main.configureLogging] Defaulting log level to %s", logr.LevelInfo)
} else {
logr.Warnf("[main.configureLogging] Invalid log level '%s', defaulting to %s", logLevelAsString, logr.LevelInfo)
}
} else {
logr.SetThreshold(logLevel)
logr.Infof("[main.configureLogging] Log Level is set to %s", logr.GetThreshold())
}
} }
func loadConfiguration() (*config.Config, error) { func loadConfiguration() (*config.Config, error) {
configPath := os.Getenv("GATUS_CONFIG_PATH") configPath := os.Getenv(GatusConfigPathEnvVar)
// Backwards compatibility // Backwards compatibility
if len(configPath) == 0 { if len(configPath) == 0 {
if configPath = os.Getenv("GATUS_CONFIG_FILE"); len(configPath) > 0 { if configPath = os.Getenv(GatusConfigFileEnvVar); len(configPath) > 0 {
log.Println("WARNING: GATUS_CONFIG_FILE is deprecated. Please use GATUS_CONFIG_PATH instead.") logr.Warnf("WARNING: %s is deprecated. Please use %s instead.", GatusConfigFileEnvVar, GatusConfigPathEnvVar)
} }
} }
return config.LoadConfiguration(configPath) return config.LoadConfiguration(configPath)
@ -95,7 +111,7 @@ func initializeStorage(cfg *config.Config) {
} }
numberOfEndpointStatusesDeleted := store.Get().DeleteAllEndpointStatusesNotInKeys(keys) numberOfEndpointStatusesDeleted := store.Get().DeleteAllEndpointStatusesNotInKeys(keys)
if numberOfEndpointStatusesDeleted > 0 { if numberOfEndpointStatusesDeleted > 0 {
log.Printf("[main.initializeStorage] Deleted %d endpoint statuses because their matching endpoints no longer existed", numberOfEndpointStatusesDeleted) logr.Infof("[main.initializeStorage] Deleted %d endpoint statuses because their matching endpoints no longer existed", numberOfEndpointStatusesDeleted)
} }
// Clean up the triggered alerts from the storage provider and load valid triggered endpoint alerts // Clean up the triggered alerts from the storage provider and load valid triggered endpoint alerts
numberOfPersistedTriggeredAlertsLoaded := 0 numberOfPersistedTriggeredAlertsLoaded := 0
@ -107,13 +123,13 @@ func initializeStorage(cfg *config.Config) {
} }
} }
numberOfTriggeredAlertsDeleted := store.Get().DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep, checksums) numberOfTriggeredAlertsDeleted := store.Get().DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep, checksums)
if cfg.Debug && numberOfTriggeredAlertsDeleted > 0 { if numberOfTriggeredAlertsDeleted > 0 {
log.Printf("[main.initializeStorage] Deleted %d triggered alerts for endpoint with key=%s because their configurations have been changed or deleted", numberOfTriggeredAlertsDeleted, ep.Key()) logr.Debugf("[main.initializeStorage] Deleted %d triggered alerts for endpoint with key=%s because their configurations have been changed or deleted", numberOfTriggeredAlertsDeleted, ep.Key())
} }
for _, alert := range ep.Alerts { for _, alert := range ep.Alerts {
exists, resolveKey, numberOfSuccessesInARow, err := store.Get().GetTriggeredEndpointAlert(ep, alert) exists, resolveKey, numberOfSuccessesInARow, err := store.Get().GetTriggeredEndpointAlert(ep, alert)
if err != nil { if err != nil {
log.Printf("[main.initializeStorage] Failed to get triggered alert for endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[main.initializeStorage] Failed to get triggered alert for endpoint with key=%s: %s", ep.Key(), err.Error())
continue continue
} }
if exists { if exists {
@ -132,13 +148,13 @@ func initializeStorage(cfg *config.Config) {
} }
convertedEndpoint := ee.ToEndpoint() convertedEndpoint := ee.ToEndpoint()
numberOfTriggeredAlertsDeleted := store.Get().DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(convertedEndpoint, checksums) numberOfTriggeredAlertsDeleted := store.Get().DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(convertedEndpoint, checksums)
if cfg.Debug && numberOfTriggeredAlertsDeleted > 0 { if numberOfTriggeredAlertsDeleted > 0 {
log.Printf("[main.initializeStorage] Deleted %d triggered alerts for endpoint with key=%s because their configurations have been changed or deleted", numberOfTriggeredAlertsDeleted, ee.Key()) logr.Debugf("[main.initializeStorage] Deleted %d triggered alerts for endpoint with key=%s because their configurations have been changed or deleted", numberOfTriggeredAlertsDeleted, ee.Key())
} }
for _, alert := range ee.Alerts { for _, alert := range ee.Alerts {
exists, resolveKey, numberOfSuccessesInARow, err := store.Get().GetTriggeredEndpointAlert(convertedEndpoint, alert) exists, resolveKey, numberOfSuccessesInARow, err := store.Get().GetTriggeredEndpointAlert(convertedEndpoint, alert)
if err != nil { if err != nil {
log.Printf("[main.initializeStorage] Failed to get triggered alert for endpoint with key=%s: %s", ee.Key(), err.Error()) logr.Errorf("[main.initializeStorage] Failed to get triggered alert for endpoint with key=%s: %s", ee.Key(), err.Error())
continue continue
} }
if exists { if exists {
@ -149,7 +165,7 @@ func initializeStorage(cfg *config.Config) {
} }
} }
if numberOfPersistedTriggeredAlertsLoaded > 0 { if numberOfPersistedTriggeredAlertsLoaded > 0 {
log.Printf("[main.initializeStorage] Loaded %d persisted triggered alerts", numberOfPersistedTriggeredAlertsLoaded) logr.Infof("[main.initializeStorage] Loaded %d persisted triggered alerts", numberOfPersistedTriggeredAlertsLoaded)
} }
} }
@ -157,15 +173,15 @@ func listenToConfigurationFileChanges(cfg *config.Config) {
for { for {
time.Sleep(30 * time.Second) time.Sleep(30 * time.Second)
if cfg.HasLoadedConfigurationBeenModified() { if cfg.HasLoadedConfigurationBeenModified() {
log.Println("[main.listenToConfigurationFileChanges] Configuration file has been modified") logr.Info("[main.listenToConfigurationFileChanges] Configuration file has been modified")
stop(cfg) stop(cfg)
time.Sleep(time.Second) // Wait a bit to make sure everything is done. time.Sleep(time.Second) // Wait a bit to make sure everything is done.
save() save()
updatedConfig, err := loadConfiguration() updatedConfig, err := loadConfiguration()
if err != nil { if err != nil {
if cfg.SkipInvalidConfigUpdate { if cfg.SkipInvalidConfigUpdate {
log.Println("[main.listenToConfigurationFileChanges] Failed to load new configuration:", err.Error()) logr.Errorf("[main.listenToConfigurationFileChanges] Failed to load new configuration: %s", err.Error())
log.Println("[main.listenToConfigurationFileChanges] The configuration file was updated, but it is not valid. The old configuration will continue being used.") logr.Error("[main.listenToConfigurationFileChanges] The configuration file was updated, but it is not valid. The old configuration will continue being used.")
// Update the last file modification time to avoid trying to process the same invalid configuration again // Update the last file modification time to avoid trying to process the same invalid configuration again
cfg.UpdateLastFileModTime() cfg.UpdateLastFileModTime()
continue continue

View File

@ -2,10 +2,10 @@ package security
import ( import (
"encoding/base64" "encoding/base64"
"log"
"net/http" "net/http"
g8 "github.com/TwiN/g8/v2" g8 "github.com/TwiN/g8/v2"
"github.com/TwiN/logr"
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/adaptor" "github.com/gofiber/fiber/v2/middleware/adaptor"
"github.com/gofiber/fiber/v2/middleware/basicauth" "github.com/gofiber/fiber/v2/middleware/basicauth"
@ -99,7 +99,7 @@ func (c *Config) IsAuthenticated(ctx *fiber.Ctx) bool {
// TODO: Update g8 to support fasthttp natively? (see g8's fasthttp branch) // TODO: Update g8 to support fasthttp natively? (see g8's fasthttp branch)
request, err := adaptor.ConvertRequest(ctx, false) request, err := adaptor.ConvertRequest(ctx, false)
if err != nil { if err != nil {
log.Printf("[security.IsAuthenticated] Unexpected error converting request: %v", err) logr.Errorf("[security.IsAuthenticated] Unexpected error converting request: %v", err)
return false return false
} }
token := c.gate.ExtractTokenFromRequest(request) token := c.gate.ExtractTokenFromRequest(request)

View File

@ -2,11 +2,11 @@ package security
import ( import (
"context" "context"
"log"
"net/http" "net/http"
"strings" "strings"
"time" "time"
"github.com/TwiN/logr"
"github.com/coreos/go-oidc/v3/oidc" "github.com/coreos/go-oidc/v3/oidc"
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
"github.com/google/uuid" "github.com/google/uuid"
@ -124,7 +124,7 @@ func (c *OIDCConfig) callbackHandler(w http.ResponseWriter, r *http.Request) { /
return return
} }
} }
log.Printf("[security.callbackHandler] Subject %s is not in the list of allowed subjects", idToken.Subject) logr.Debugf("[security.callbackHandler] Subject %s is not in the list of allowed subjects", idToken.Subject)
http.Redirect(w, r, "/?error=access_denied", http.StatusFound) http.Redirect(w, r, "/?error=access_denied", http.StatusFound)
} }

View File

@ -4,7 +4,6 @@ import (
"database/sql" "database/sql"
"errors" "errors"
"fmt" "fmt"
"log"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@ -14,6 +13,7 @@ import (
"github.com/TwiN/gatus/v5/storage/store/common" "github.com/TwiN/gatus/v5/storage/store/common"
"github.com/TwiN/gatus/v5/storage/store/common/paging" "github.com/TwiN/gatus/v5/storage/store/common/paging"
"github.com/TwiN/gocache/v2" "github.com/TwiN/gocache/v2"
"github.com/TwiN/logr"
_ "github.com/lib/pq" _ "github.com/lib/pq"
_ "modernc.org/sqlite" _ "modernc.org/sqlite"
) )
@ -237,12 +237,12 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
// Endpoint doesn't exist in the database, insert it // Endpoint doesn't exist in the database, insert it
if endpointID, err = s.insertEndpoint(tx, ep); err != nil { if endpointID, err = s.insertEndpoint(tx, ep); err != nil {
_ = tx.Rollback() _ = tx.Rollback()
log.Printf("[sql.Insert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[sql.Insert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error())
return err return err
} }
} else { } else {
_ = tx.Rollback() _ = tx.Rollback()
log.Printf("[sql.Insert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[sql.Insert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error())
return err return err
} }
} }
@ -253,12 +253,12 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
// of type EventStart, in which case we will have to create a new event of type EventHealthy or EventUnhealthy // of type EventStart, in which case we will have to create a new event of type EventHealthy or EventUnhealthy
// based on result.Success. // based on result.Success.
// 2. The lastResult.Success != result.Success. This implies that the endpoint went from healthy to unhealthy or // 2. The lastResult.Success != result.Success. This implies that the endpoint went from healthy to unhealthy or
// vice-versa, in which case we will have to create a new event of type EventHealthy or EventUnhealthy // vice versa, in which case we will have to create a new event of type EventHealthy or EventUnhealthy
// based on result.Success. // based on result.Success.
numberOfEvents, err := s.getNumberOfEventsByEndpointID(tx, endpointID) numberOfEvents, err := s.getNumberOfEventsByEndpointID(tx, endpointID)
if err != nil { if err != nil {
// Silently fail // Silently fail
log.Printf("[sql.Insert] Failed to retrieve total number of events for endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[sql.Insert] Failed to retrieve total number of events for endpoint with key=%s: %s", ep.Key(), err.Error())
} }
if numberOfEvents == 0 { if numberOfEvents == 0 {
// There's no events yet, which means we need to add the EventStart and the first healthy/unhealthy event // There's no events yet, which means we need to add the EventStart and the first healthy/unhealthy event
@ -268,18 +268,18 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
}) })
if err != nil { if err != nil {
// Silently fail // Silently fail
log.Printf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", endpoint.EventStart, ep.Key(), err.Error()) logr.Errorf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", endpoint.EventStart, ep.Key(), err.Error())
} }
event := endpoint.NewEventFromResult(result) event := endpoint.NewEventFromResult(result)
if err = s.insertEndpointEvent(tx, endpointID, event); err != nil { if err = s.insertEndpointEvent(tx, endpointID, event); err != nil {
// Silently fail // Silently fail
log.Printf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error()) logr.Errorf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error())
} }
} else { } else {
// Get the success value of the previous result // Get the success value of the previous result
var lastResultSuccess bool var lastResultSuccess bool
if lastResultSuccess, err = s.getLastEndpointResultSuccessValue(tx, endpointID); err != nil { if lastResultSuccess, err = s.getLastEndpointResultSuccessValue(tx, endpointID); err != nil {
log.Printf("[sql.Insert] Failed to retrieve outcome of previous result for endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[sql.Insert] Failed to retrieve outcome of previous result for endpoint with key=%s: %s", ep.Key(), err.Error())
} else { } else {
// If we managed to retrieve the outcome of the previous result, we'll compare it with the new result. // If we managed to retrieve the outcome of the previous result, we'll compare it with the new result.
// If the final outcome (success or failure) of the previous and the new result aren't the same, it means // If the final outcome (success or failure) of the previous and the new result aren't the same, it means
@ -289,7 +289,7 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
event := endpoint.NewEventFromResult(result) event := endpoint.NewEventFromResult(result)
if err = s.insertEndpointEvent(tx, endpointID, event); err != nil { if err = s.insertEndpointEvent(tx, endpointID, event); err != nil {
// Silently fail // Silently fail
log.Printf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error()) logr.Errorf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error())
} }
} }
} }
@ -298,42 +298,42 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
// (since we're only deleting MaximumNumberOfEvents at a time instead of 1) // (since we're only deleting MaximumNumberOfEvents at a time instead of 1)
if numberOfEvents > eventsCleanUpThreshold { if numberOfEvents > eventsCleanUpThreshold {
if err = s.deleteOldEndpointEvents(tx, endpointID); err != nil { if err = s.deleteOldEndpointEvents(tx, endpointID); err != nil {
log.Printf("[sql.Insert] Failed to delete old events for endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[sql.Insert] Failed to delete old events for endpoint with key=%s: %s", ep.Key(), err.Error())
} }
} }
} }
// Second, we need to insert the result. // Second, we need to insert the result.
if err = s.insertEndpointResult(tx, endpointID, result); err != nil { if err = s.insertEndpointResult(tx, endpointID, result); err != nil {
log.Printf("[sql.Insert] Failed to insert result for endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[sql.Insert] Failed to insert result for endpoint with key=%s: %s", ep.Key(), err.Error())
_ = tx.Rollback() // If we can't insert the result, we'll rollback now since there's no point continuing _ = tx.Rollback() // If we can't insert the result, we'll rollback now since there's no point continuing
return err return err
} }
// Clean up old results // Clean up old results
numberOfResults, err := s.getNumberOfResultsByEndpointID(tx, endpointID) numberOfResults, err := s.getNumberOfResultsByEndpointID(tx, endpointID)
if err != nil { if err != nil {
log.Printf("[sql.Insert] Failed to retrieve total number of results for endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[sql.Insert] Failed to retrieve total number of results for endpoint with key=%s: %s", ep.Key(), err.Error())
} else { } else {
if numberOfResults > resultsCleanUpThreshold { if numberOfResults > resultsCleanUpThreshold {
if err = s.deleteOldEndpointResults(tx, endpointID); err != nil { if err = s.deleteOldEndpointResults(tx, endpointID); err != nil {
log.Printf("[sql.Insert] Failed to delete old results for endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[sql.Insert] Failed to delete old results for endpoint with key=%s: %s", ep.Key(), err.Error())
} }
} }
} }
// Finally, we need to insert the uptime data. // Finally, we need to insert the uptime data.
// Because the uptime data significantly outlives the results, we can't rely on the results for determining the uptime // Because the uptime data significantly outlives the results, we can't rely on the results for determining the uptime
if err = s.updateEndpointUptime(tx, endpointID, result); err != nil { if err = s.updateEndpointUptime(tx, endpointID, result); err != nil {
log.Printf("[sql.Insert] Failed to update uptime for endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[sql.Insert] Failed to update uptime for endpoint with key=%s: %s", ep.Key(), err.Error())
} }
// Merge hourly uptime entries that can be merged into daily entries and clean up old uptime entries // Merge hourly uptime entries that can be merged into daily entries and clean up old uptime entries
numberOfUptimeEntries, err := s.getNumberOfUptimeEntriesByEndpointID(tx, endpointID) numberOfUptimeEntries, err := s.getNumberOfUptimeEntriesByEndpointID(tx, endpointID)
if err != nil { if err != nil {
log.Printf("[sql.Insert] Failed to retrieve total number of uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[sql.Insert] Failed to retrieve total number of uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
} else { } else {
// Merge older hourly uptime entries into daily uptime entries if we have more than uptimeTotalEntriesMergeThreshold // Merge older hourly uptime entries into daily uptime entries if we have more than uptimeTotalEntriesMergeThreshold
if numberOfUptimeEntries >= uptimeTotalEntriesMergeThreshold { if numberOfUptimeEntries >= uptimeTotalEntriesMergeThreshold {
log.Printf("[sql.Insert] Merging hourly uptime entries for endpoint with key=%s; This is a lot of work, it shouldn't happen too often", ep.Key()) logr.Infof("[sql.Insert] Merging hourly uptime entries for endpoint with key=%s; This is a lot of work, it shouldn't happen too often", ep.Key())
if err = s.mergeHourlyUptimeEntriesOlderThanMergeThresholdIntoDailyUptimeEntries(tx, endpointID); err != nil { if err = s.mergeHourlyUptimeEntriesOlderThanMergeThresholdIntoDailyUptimeEntries(tx, endpointID); err != nil {
log.Printf("[sql.Insert] Failed to merge hourly uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[sql.Insert] Failed to merge hourly uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
} }
} }
} }
@ -342,11 +342,11 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
// but if Gatus was temporarily shut down, we might have some old entries that need to be cleaned up // but if Gatus was temporarily shut down, we might have some old entries that need to be cleaned up
ageOfOldestUptimeEntry, err := s.getAgeOfOldestEndpointUptimeEntry(tx, endpointID) ageOfOldestUptimeEntry, err := s.getAgeOfOldestEndpointUptimeEntry(tx, endpointID)
if err != nil { if err != nil {
log.Printf("[sql.Insert] Failed to retrieve oldest endpoint uptime entry for endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[sql.Insert] Failed to retrieve oldest endpoint uptime entry for endpoint with key=%s: %s", ep.Key(), err.Error())
} else { } else {
if ageOfOldestUptimeEntry > uptimeAgeCleanUpThreshold { if ageOfOldestUptimeEntry > uptimeAgeCleanUpThreshold {
if err = s.deleteOldUptimeEntries(tx, endpointID, time.Now().Add(-(uptimeRetention + time.Hour))); err != nil { if err = s.deleteOldUptimeEntries(tx, endpointID, time.Now().Add(-(uptimeRetention + time.Hour))); err != nil {
log.Printf("[sql.Insert] Failed to delete old uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[sql.Insert] Failed to delete old uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
} }
} }
} }
@ -356,7 +356,7 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
s.writeThroughCache.Delete(cacheKey) s.writeThroughCache.Delete(cacheKey)
endpointKey, params, err := extractKeyAndParamsFromCacheKey(cacheKey) endpointKey, params, err := extractKeyAndParamsFromCacheKey(cacheKey)
if err != nil { if err != nil {
log.Printf("[sql.Insert] Silently deleting cache key %s instead of refreshing due to error: %s", cacheKey, err.Error()) logr.Errorf("[sql.Insert] Silently deleting cache key %s instead of refreshing due to error: %s", cacheKey, err.Error())
continue continue
} }
// Retrieve the endpoint status by key, which will in turn refresh the cache // Retrieve the endpoint status by key, which will in turn refresh the cache
@ -387,7 +387,7 @@ func (s *Store) DeleteAllEndpointStatusesNotInKeys(keys []string) int {
result, err = s.db.Exec(query, args...) result, err = s.db.Exec(query, args...)
} }
if err != nil { if err != nil {
log.Printf("[sql.DeleteAllEndpointStatusesNotInKeys] Failed to delete rows that do not belong to any of keys=%v: %s", keys, err.Error()) logr.Errorf("[sql.DeleteAllEndpointStatusesNotInKeys] Failed to delete rows that do not belong to any of keys=%v: %s", keys, err.Error())
return 0 return 0
} }
if s.writeThroughCache != nil { if s.writeThroughCache != nil {
@ -403,7 +403,7 @@ func (s *Store) DeleteAllEndpointStatusesNotInKeys(keys []string) int {
// GetTriggeredEndpointAlert returns whether the triggered alert for the specified endpoint as well as the necessary information to resolve it // GetTriggeredEndpointAlert returns whether the triggered alert for the specified endpoint as well as the necessary information to resolve it
func (s *Store) GetTriggeredEndpointAlert(ep *endpoint.Endpoint, alert *alert.Alert) (exists bool, resolveKey string, numberOfSuccessesInARow int, err error) { func (s *Store) GetTriggeredEndpointAlert(ep *endpoint.Endpoint, alert *alert.Alert) (exists bool, resolveKey string, numberOfSuccessesInARow int, err error) {
//log.Printf("[sql.GetTriggeredEndpointAlert] Getting triggered alert with checksum=%s for endpoint with key=%s", alert.Checksum(), ep.Key()) //logr.Debugf("[sql.GetTriggeredEndpointAlert] Getting triggered alert with checksum=%s for endpoint with key=%s", alert.Checksum(), ep.Key())
err = s.db.QueryRow( err = s.db.QueryRow(
"SELECT resolve_key, number_of_successes_in_a_row FROM endpoint_alerts_triggered WHERE endpoint_id = (SELECT endpoint_id FROM endpoints WHERE endpoint_key = $1 LIMIT 1) AND configuration_checksum = $2", "SELECT resolve_key, number_of_successes_in_a_row FROM endpoint_alerts_triggered WHERE endpoint_id = (SELECT endpoint_id FROM endpoints WHERE endpoint_key = $1 LIMIT 1) AND configuration_checksum = $2",
ep.Key(), ep.Key(),
@ -421,7 +421,7 @@ func (s *Store) GetTriggeredEndpointAlert(ep *endpoint.Endpoint, alert *alert.Al
// UpsertTriggeredEndpointAlert inserts/updates a triggered alert for an endpoint // UpsertTriggeredEndpointAlert inserts/updates a triggered alert for an endpoint
// Used for persistence of triggered alerts across application restarts // Used for persistence of triggered alerts across application restarts
func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAlert *alert.Alert) error { func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAlert *alert.Alert) error {
//log.Printf("[sql.UpsertTriggeredEndpointAlert] Upserting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key()) //logr.Debugf("[sql.UpsertTriggeredEndpointAlert] Upserting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key())
tx, err := s.db.Begin() tx, err := s.db.Begin()
if err != nil { if err != nil {
return err return err
@ -433,12 +433,12 @@ func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAle
// This shouldn't happen, but we'll handle it anyway // This shouldn't happen, but we'll handle it anyway
if endpointID, err = s.insertEndpoint(tx, ep); err != nil { if endpointID, err = s.insertEndpoint(tx, ep); err != nil {
_ = tx.Rollback() _ = tx.Rollback()
log.Printf("[sql.UpsertTriggeredEndpointAlert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[sql.UpsertTriggeredEndpointAlert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error())
return err return err
} }
} else { } else {
_ = tx.Rollback() _ = tx.Rollback()
log.Printf("[sql.UpsertTriggeredEndpointAlert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[sql.UpsertTriggeredEndpointAlert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error())
return err return err
} }
} }
@ -457,7 +457,7 @@ func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAle
) )
if err != nil { if err != nil {
_ = tx.Rollback() _ = tx.Rollback()
log.Printf("[sql.UpsertTriggeredEndpointAlert] Failed to persist triggered alert for endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[sql.UpsertTriggeredEndpointAlert] Failed to persist triggered alert for endpoint with key=%s: %s", ep.Key(), err.Error())
return err return err
} }
if err = tx.Commit(); err != nil { if err = tx.Commit(); err != nil {
@ -468,7 +468,7 @@ func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAle
// DeleteTriggeredEndpointAlert deletes a triggered alert for an endpoint // DeleteTriggeredEndpointAlert deletes a triggered alert for an endpoint
func (s *Store) DeleteTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAlert *alert.Alert) error { func (s *Store) DeleteTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAlert *alert.Alert) error {
//log.Printf("[sql.DeleteTriggeredEndpointAlert] Deleting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key()) //logr.Debugf("[sql.DeleteTriggeredEndpointAlert] Deleting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key())
_, err := s.db.Exec("DELETE FROM endpoint_alerts_triggered WHERE configuration_checksum = $1 AND endpoint_id = (SELECT endpoint_id FROM endpoints WHERE endpoint_key = $2 LIMIT 1)", triggeredAlert.Checksum(), ep.Key()) _, err := s.db.Exec("DELETE FROM endpoint_alerts_triggered WHERE configuration_checksum = $1 AND endpoint_id = (SELECT endpoint_id FROM endpoints WHERE endpoint_key = $2 LIMIT 1)", triggeredAlert.Checksum(), ep.Key())
return err return err
} }
@ -477,7 +477,7 @@ func (s *Store) DeleteTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAle
// configurations are not provided in the checksums list. // configurations are not provided in the checksums list.
// This prevents triggered alerts that have been removed or modified from lingering in the database. // This prevents triggered alerts that have been removed or modified from lingering in the database.
func (s *Store) DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep *endpoint.Endpoint, checksums []string) int { func (s *Store) DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep *endpoint.Endpoint, checksums []string) int {
//log.Printf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Deleting triggered alerts for endpoint with key=%s that do not belong to any of checksums=%v", ep.Key(), checksums) //logr.Debugf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Deleting triggered alerts for endpoint with key=%s that do not belong to any of checksums=%v", ep.Key(), checksums)
var err error var err error
var result sql.Result var result sql.Result
if len(checksums) == 0 { if len(checksums) == 0 {
@ -498,7 +498,7 @@ func (s *Store) DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep *endpoint.En
result, err = s.db.Exec(query, args...) result, err = s.db.Exec(query, args...)
} }
if err != nil { if err != nil {
log.Printf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Failed to delete rows for endpoint with key=%s that do not belong to any of checksums=%v: %s", ep.Key(), checksums, err.Error()) logr.Errorf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Failed to delete rows for endpoint with key=%s that do not belong to any of checksums=%v: %s", ep.Key(), checksums, err.Error())
return 0 return 0
} }
// Return number of rows deleted // Return number of rows deleted
@ -530,7 +530,7 @@ func (s *Store) Close() {
// insertEndpoint inserts an endpoint in the store and returns the generated id of said endpoint // insertEndpoint inserts an endpoint in the store and returns the generated id of said endpoint
func (s *Store) insertEndpoint(tx *sql.Tx, ep *endpoint.Endpoint) (int64, error) { func (s *Store) insertEndpoint(tx *sql.Tx, ep *endpoint.Endpoint) (int64, error) {
//log.Printf("[sql.insertEndpoint] Inserting endpoint with group=%s and name=%s", ep.Group, ep.Name) //logr.Debugf("[sql.insertEndpoint] Inserting endpoint with group=%s and name=%s", ep.Group, ep.Name)
var id int64 var id int64
err := tx.QueryRow( err := tx.QueryRow(
"INSERT INTO endpoints (endpoint_key, endpoint_name, endpoint_group) VALUES ($1, $2, $3) RETURNING endpoint_id", "INSERT INTO endpoints (endpoint_key, endpoint_name, endpoint_group) VALUES ($1, $2, $3) RETURNING endpoint_id",
@ -655,12 +655,12 @@ func (s *Store) getEndpointStatusByKey(tx *sql.Tx, key string, parameters *pagin
endpointStatus := endpoint.NewStatus(group, endpointName) endpointStatus := endpoint.NewStatus(group, endpointName)
if parameters.EventsPageSize > 0 { if parameters.EventsPageSize > 0 {
if endpointStatus.Events, err = s.getEndpointEventsByEndpointID(tx, endpointID, parameters.EventsPage, parameters.EventsPageSize); err != nil { if endpointStatus.Events, err = s.getEndpointEventsByEndpointID(tx, endpointID, parameters.EventsPage, parameters.EventsPageSize); err != nil {
log.Printf("[sql.getEndpointStatusByKey] Failed to retrieve events for key=%s: %s", key, err.Error()) logr.Errorf("[sql.getEndpointStatusByKey] Failed to retrieve events for key=%s: %s", key, err.Error())
} }
} }
if parameters.ResultsPageSize > 0 { if parameters.ResultsPageSize > 0 {
if endpointStatus.Results, err = s.getEndpointResultsByEndpointID(tx, endpointID, parameters.ResultsPage, parameters.ResultsPageSize); err != nil { if endpointStatus.Results, err = s.getEndpointResultsByEndpointID(tx, endpointID, parameters.ResultsPage, parameters.ResultsPageSize); err != nil {
log.Printf("[sql.getEndpointStatusByKey] Failed to retrieve results for key=%s: %s", key, err.Error()) logr.Errorf("[sql.getEndpointStatusByKey] Failed to retrieve results for key=%s: %s", key, err.Error())
} }
} }
if s.writeThroughCache != nil { if s.writeThroughCache != nil {
@ -735,7 +735,7 @@ func (s *Store) getEndpointResultsByEndpointID(tx *sql.Tx, endpointID int64, pag
var joinedErrors string var joinedErrors string
err = rows.Scan(&id, &result.Success, &joinedErrors, &result.Connected, &result.HTTPStatus, &result.DNSRCode, &result.CertificateExpiration, &result.DomainExpiration, &result.Hostname, &result.IP, &result.Duration, &result.Timestamp) err = rows.Scan(&id, &result.Success, &joinedErrors, &result.Connected, &result.HTTPStatus, &result.DNSRCode, &result.CertificateExpiration, &result.DomainExpiration, &result.Hostname, &result.IP, &result.Duration, &result.Timestamp)
if err != nil { if err != nil {
log.Printf("[sql.getEndpointResultsByEndpointID] Silently failed to retrieve endpoint result for endpointID=%d: %s", endpointID, err.Error()) logr.Errorf("[sql.getEndpointResultsByEndpointID] Silently failed to retrieve endpoint result for endpointID=%d: %s", endpointID, err.Error())
err = nil err = nil
} }
if len(joinedErrors) != 0 { if len(joinedErrors) != 0 {

View File

@ -2,7 +2,6 @@ package store
import ( import (
"context" "context"
"log"
"time" "time"
"github.com/TwiN/gatus/v5/alerting/alert" "github.com/TwiN/gatus/v5/alerting/alert"
@ -11,6 +10,7 @@ import (
"github.com/TwiN/gatus/v5/storage/store/common/paging" "github.com/TwiN/gatus/v5/storage/store/common/paging"
"github.com/TwiN/gatus/v5/storage/store/memory" "github.com/TwiN/gatus/v5/storage/store/memory"
"github.com/TwiN/gatus/v5/storage/store/sql" "github.com/TwiN/gatus/v5/storage/store/sql"
"github.com/TwiN/logr"
) )
// Store is the interface that each store should implement // Store is the interface that each store should implement
@ -91,7 +91,7 @@ var (
func Get() Store { func Get() Store {
if !initialized { if !initialized {
// This only happens in tests // This only happens in tests
log.Println("[store.Get] Provider requested before it was initialized, automatically initializing") logr.Info("[store.Get] Provider requested before it was initialized, automatically initializing")
err := Initialize(nil) err := Initialize(nil)
if err != nil { if err != nil {
panic("failed to automatically initialize store: " + err.Error()) panic("failed to automatically initialize store: " + err.Error())
@ -110,11 +110,11 @@ func Initialize(cfg *storage.Config) error {
} }
if cfg == nil { if cfg == nil {
// This only happens in tests // This only happens in tests
log.Println("[store.Initialize] nil storage config passed as parameter. This should only happen in tests. Defaulting to an empty config.") logr.Warn("[store.Initialize] nil storage config passed as parameter. This should only happen in tests. Defaulting to an empty config.")
cfg = &storage.Config{} cfg = &storage.Config{}
} }
if len(cfg.Path) == 0 && cfg.Type != storage.TypePostgres { if len(cfg.Path) == 0 && cfg.Type != storage.TypePostgres {
log.Printf("[store.Initialize] Creating storage provider of type=%s", cfg.Type) logr.Infof("[store.Initialize] Creating storage provider of type=%s", cfg.Type)
} }
ctx, cancelFunc = context.WithCancel(context.Background()) ctx, cancelFunc = context.WithCancel(context.Background())
switch cfg.Type { switch cfg.Type {
@ -136,13 +136,12 @@ func autoSave(ctx context.Context, store Store, interval time.Duration) {
for { for {
select { select {
case <-ctx.Done(): case <-ctx.Done():
log.Printf("[store.autoSave] Stopping active job") logr.Info("[store.autoSave] Stopping active job")
return return
case <-time.After(interval): case <-time.After(interval):
log.Printf("[store.autoSave] Saving") logr.Info("[store.autoSave] Saving")
err := store.Save() if err := store.Save(); err != nil {
if err != nil { logr.Errorf("[store.autoSave] Save failed: %s", err.Error())
log.Println("[store.autoSave] Save failed:", err.Error())
} }
} }
} }

View File

@ -2,27 +2,27 @@ package watchdog
import ( import (
"errors" "errors"
"log"
"os" "os"
"github.com/TwiN/gatus/v5/alerting" "github.com/TwiN/gatus/v5/alerting"
"github.com/TwiN/gatus/v5/config/endpoint" "github.com/TwiN/gatus/v5/config/endpoint"
"github.com/TwiN/gatus/v5/storage/store" "github.com/TwiN/gatus/v5/storage/store"
"github.com/TwiN/logr"
) )
// HandleAlerting takes care of alerts to resolve and alerts to trigger based on result success or failure // HandleAlerting takes care of alerts to resolve and alerts to trigger based on result success or failure
func HandleAlerting(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config, debug bool) { func HandleAlerting(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config) {
if alertingConfig == nil { if alertingConfig == nil {
return return
} }
if result.Success { if result.Success {
handleAlertsToResolve(ep, result, alertingConfig, debug) handleAlertsToResolve(ep, result, alertingConfig)
} else { } else {
handleAlertsToTrigger(ep, result, alertingConfig, debug) handleAlertsToTrigger(ep, result, alertingConfig)
} }
} }
func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config, debug bool) { func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config) {
ep.NumberOfSuccessesInARow = 0 ep.NumberOfSuccessesInARow = 0
ep.NumberOfFailuresInARow++ ep.NumberOfFailuresInARow++
for _, endpointAlert := range ep.Alerts { for _, endpointAlert := range ep.Alerts {
@ -31,14 +31,12 @@ func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alert
continue continue
} }
if endpointAlert.Triggered { if endpointAlert.Triggered {
if debug { logr.Debugf("[watchdog.handleAlertsToTrigger] Alert for endpoint with key=%s with description='%s' has already been TRIGGERED, skipping", ep.Key(), endpointAlert.GetDescription())
log.Printf("[watchdog.handleAlertsToTrigger] Alert for endpoint=%s with description='%s' has already been TRIGGERED, skipping", ep.Name, endpointAlert.GetDescription())
}
continue continue
} }
alertProvider := alertingConfig.GetAlertingProviderByAlertType(endpointAlert.Type) alertProvider := alertingConfig.GetAlertingProviderByAlertType(endpointAlert.Type)
if alertProvider != nil { if alertProvider != nil {
log.Printf("[watchdog.handleAlertsToTrigger] Sending %s alert because alert for endpoint=%s with description='%s' has been TRIGGERED", endpointAlert.Type, ep.Name, endpointAlert.GetDescription()) logr.Infof("[watchdog.handleAlertsToTrigger] Sending %s alert because alert for endpoint with key=%s with description='%s' has been TRIGGERED", endpointAlert.Type, ep.Key(), endpointAlert.GetDescription())
var err error var err error
if os.Getenv("MOCK_ALERT_PROVIDER") == "true" { if os.Getenv("MOCK_ALERT_PROVIDER") == "true" {
if os.Getenv("MOCK_ALERT_PROVIDER_ERROR") == "true" { if os.Getenv("MOCK_ALERT_PROVIDER_ERROR") == "true" {
@ -48,27 +46,27 @@ func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alert
err = alertProvider.Send(ep, endpointAlert, result, false) err = alertProvider.Send(ep, endpointAlert, result, false)
} }
if err != nil { if err != nil {
log.Printf("[watchdog.handleAlertsToTrigger] Failed to send an alert for endpoint=%s: %s", ep.Name, err.Error()) logr.Errorf("[watchdog.handleAlertsToTrigger] Failed to send an alert for endpoint with key=%s: %s", ep.Key(), err.Error())
} else { } else {
endpointAlert.Triggered = true endpointAlert.Triggered = true
if err := store.Get().UpsertTriggeredEndpointAlert(ep, endpointAlert); err != nil { if err := store.Get().UpsertTriggeredEndpointAlert(ep, endpointAlert); err != nil {
log.Printf("[watchdog.handleAlertsToTrigger] Failed to persist triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[watchdog.handleAlertsToTrigger] Failed to persist triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
} }
} }
} else { } else {
log.Printf("[watchdog.handleAlertsToTrigger] Not sending alert of type=%s despite being TRIGGERED, because the provider wasn't configured properly", endpointAlert.Type) logr.Warnf("[watchdog.handleAlertsToTrigger] Not sending alert of type=%s endpoint with key=%s despite being TRIGGERED, because the provider wasn't configured properly", endpointAlert.Type, ep.Key())
} }
} }
} }
func handleAlertsToResolve(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config, debug bool) { func handleAlertsToResolve(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config) {
ep.NumberOfSuccessesInARow++ ep.NumberOfSuccessesInARow++
for _, endpointAlert := range ep.Alerts { for _, endpointAlert := range ep.Alerts {
isStillBelowSuccessThreshold := endpointAlert.SuccessThreshold > ep.NumberOfSuccessesInARow isStillBelowSuccessThreshold := endpointAlert.SuccessThreshold > ep.NumberOfSuccessesInARow
if isStillBelowSuccessThreshold && endpointAlert.IsEnabled() && endpointAlert.Triggered { if isStillBelowSuccessThreshold && endpointAlert.IsEnabled() && endpointAlert.Triggered {
// Persist NumberOfSuccessesInARow // Persist NumberOfSuccessesInARow
if err := store.Get().UpsertTriggeredEndpointAlert(ep, endpointAlert); err != nil { if err := store.Get().UpsertTriggeredEndpointAlert(ep, endpointAlert); err != nil {
log.Printf("[watchdog.handleAlertsToResolve] Failed to update triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[watchdog.handleAlertsToResolve] Failed to update triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
} }
} }
if !endpointAlert.IsEnabled() || !endpointAlert.Triggered || isStillBelowSuccessThreshold { if !endpointAlert.IsEnabled() || !endpointAlert.Triggered || isStillBelowSuccessThreshold {
@ -78,20 +76,20 @@ func handleAlertsToResolve(ep *endpoint.Endpoint, result *endpoint.Result, alert
// Further explanation can be found on Alert's Triggered field. // Further explanation can be found on Alert's Triggered field.
endpointAlert.Triggered = false endpointAlert.Triggered = false
if err := store.Get().DeleteTriggeredEndpointAlert(ep, endpointAlert); err != nil { if err := store.Get().DeleteTriggeredEndpointAlert(ep, endpointAlert); err != nil {
log.Printf("[watchdog.handleAlertsToResolve] Failed to delete persisted triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[watchdog.handleAlertsToResolve] Failed to delete persisted triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
} }
if !endpointAlert.IsSendingOnResolved() { if !endpointAlert.IsSendingOnResolved() {
continue continue
} }
alertProvider := alertingConfig.GetAlertingProviderByAlertType(endpointAlert.Type) alertProvider := alertingConfig.GetAlertingProviderByAlertType(endpointAlert.Type)
if alertProvider != nil { if alertProvider != nil {
log.Printf("[watchdog.handleAlertsToResolve] Sending %s alert because alert for endpoint with key=%s with description='%s' has been RESOLVED", endpointAlert.Type, ep.Key(), endpointAlert.GetDescription()) logr.Infof("[watchdog.handleAlertsToResolve] Sending %s alert because alert for endpoint with key=%s with description='%s' has been RESOLVED", endpointAlert.Type, ep.Key(), endpointAlert.GetDescription())
err := alertProvider.Send(ep, endpointAlert, result, true) err := alertProvider.Send(ep, endpointAlert, result, true)
if err != nil { if err != nil {
log.Printf("[watchdog.handleAlertsToResolve] Failed to send an alert for endpoint with key=%s: %s", ep.Key(), err.Error()) logr.Errorf("[watchdog.handleAlertsToResolve] Failed to send an alert for endpoint with key=%s: %s", ep.Key(), err.Error())
} }
} else { } else {
log.Printf("[watchdog.handleAlertsToResolve] Not sending alert of type=%s despite being RESOLVED, because the provider wasn't configured properly", endpointAlert.Type) logr.Warnf("[watchdog.handleAlertsToResolve] Not sending alert of type=%s for endpoint with key=%s despite being RESOLVED, because the provider wasn't configured properly", endpointAlert.Type, ep.Key())
} }
} }
ep.NumberOfFailuresInARow = 0 ep.NumberOfFailuresInARow = 0

View File

@ -28,7 +28,6 @@ func TestHandleAlerting(t *testing.T) {
defer os.Clearenv() defer os.Clearenv()
cfg := &config.Config{ cfg := &config.Config{
Debug: true,
Alerting: &alerting.Config{ Alerting: &alerting.Config{
Custom: &custom.AlertProvider{ Custom: &custom.AlertProvider{
URL: "https://twin.sh/health", URL: "https://twin.sh/health",
@ -52,28 +51,28 @@ func TestHandleAlerting(t *testing.T) {
} }
verify(t, ep, 0, 0, false, "The alert shouldn't start triggered") verify(t, ep, 0, 0, false, "The alert shouldn't start triggered")
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 1, 0, false, "The alert shouldn't have triggered") verify(t, ep, 1, 0, false, "The alert shouldn't have triggered")
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 2, 0, true, "The alert should've triggered") verify(t, ep, 2, 0, true, "The alert should've triggered")
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 3, 0, true, "The alert should still be triggered") verify(t, ep, 3, 0, true, "The alert should still be triggered")
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 4, 0, true, "The alert should still be triggered") verify(t, ep, 4, 0, true, "The alert should still be triggered")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 1, true, "The alert should still be triggered (because endpoint.Alerts[0].SuccessThreshold is 3)") verify(t, ep, 0, 1, true, "The alert should still be triggered (because endpoint.Alerts[0].SuccessThreshold is 3)")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 2, true, "The alert should still be triggered (because endpoint.Alerts[0].SuccessThreshold is 3)") verify(t, ep, 0, 2, true, "The alert should still be triggered (because endpoint.Alerts[0].SuccessThreshold is 3)")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 3, false, "The alert should've been resolved") verify(t, ep, 0, 3, false, "The alert should've been resolved")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 4, false, "The alert should no longer be triggered") verify(t, ep, 0, 4, false, "The alert should no longer be triggered")
} }
func TestHandleAlertingWhenAlertingConfigIsNil(t *testing.T) { func TestHandleAlertingWhenAlertingConfigIsNil(t *testing.T) {
_ = os.Setenv("MOCK_ALERT_PROVIDER", "true") _ = os.Setenv("MOCK_ALERT_PROVIDER", "true")
defer os.Clearenv() defer os.Clearenv()
HandleAlerting(nil, nil, nil, true) HandleAlerting(nil, nil, nil)
} }
func TestHandleAlertingWithBadAlertProvider(t *testing.T) { func TestHandleAlertingWithBadAlertProvider(t *testing.T) {
@ -96,9 +95,9 @@ func TestHandleAlertingWithBadAlertProvider(t *testing.T) {
} }
verify(t, ep, 0, 0, false, "The alert shouldn't start triggered") verify(t, ep, 0, 0, false, "The alert shouldn't start triggered")
HandleAlerting(ep, &endpoint.Result{Success: false}, &alerting.Config{}, false) HandleAlerting(ep, &endpoint.Result{Success: false}, &alerting.Config{})
verify(t, ep, 1, 0, false, "The alert shouldn't have triggered") verify(t, ep, 1, 0, false, "The alert shouldn't have triggered")
HandleAlerting(ep, &endpoint.Result{Success: false}, &alerting.Config{}, false) HandleAlerting(ep, &endpoint.Result{Success: false}, &alerting.Config{})
verify(t, ep, 2, 0, false, "The alert shouldn't have triggered, because the provider wasn't configured properly") verify(t, ep, 2, 0, false, "The alert shouldn't have triggered, because the provider wasn't configured properly")
} }
@ -107,7 +106,6 @@ func TestHandleAlertingWhenTriggeredAlertIsAlmostResolvedButendpointStartFailing
defer os.Clearenv() defer os.Clearenv()
cfg := &config.Config{ cfg := &config.Config{
Debug: true,
Alerting: &alerting.Config{ Alerting: &alerting.Config{
Custom: &custom.AlertProvider{ Custom: &custom.AlertProvider{
URL: "https://twin.sh/health", URL: "https://twin.sh/health",
@ -132,7 +130,7 @@ func TestHandleAlertingWhenTriggeredAlertIsAlmostResolvedButendpointStartFailing
} }
// This test simulate an alert that was already triggered // This test simulate an alert that was already triggered
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 2, 0, true, "The alert was already triggered at the beginning of this test") verify(t, ep, 2, 0, true, "The alert was already triggered at the beginning of this test")
} }
@ -141,7 +139,6 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedButSendOnResolvedIsFalse(t *t
defer os.Clearenv() defer os.Clearenv()
cfg := &config.Config{ cfg := &config.Config{
Debug: true,
Alerting: &alerting.Config{ Alerting: &alerting.Config{
Custom: &custom.AlertProvider{ Custom: &custom.AlertProvider{
URL: "https://twin.sh/health", URL: "https://twin.sh/health",
@ -166,7 +163,7 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedButSendOnResolvedIsFalse(t *t
NumberOfFailuresInARow: 1, NumberOfFailuresInARow: 1,
} }
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 1, false, "The alert should've been resolved") verify(t, ep, 0, 1, false, "The alert should've been resolved")
} }
@ -175,7 +172,6 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPagerDuty(t *testing.T) {
defer os.Clearenv() defer os.Clearenv()
cfg := &config.Config{ cfg := &config.Config{
Debug: true,
Alerting: &alerting.Config{ Alerting: &alerting.Config{
PagerDuty: &pagerduty.AlertProvider{ PagerDuty: &pagerduty.AlertProvider{
IntegrationKey: "00000000000000000000000000000000", IntegrationKey: "00000000000000000000000000000000",
@ -198,10 +194,10 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPagerDuty(t *testing.T) {
NumberOfFailuresInARow: 0, NumberOfFailuresInARow: 0,
} }
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 1, 0, true, "") verify(t, ep, 1, 0, true, "")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 1, false, "The alert should've been resolved") verify(t, ep, 0, 1, false, "The alert should've been resolved")
} }
@ -210,7 +206,6 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPushover(t *testing.T) {
defer os.Clearenv() defer os.Clearenv()
cfg := &config.Config{ cfg := &config.Config{
Debug: true,
Alerting: &alerting.Config{ Alerting: &alerting.Config{
Pushover: &pushover.AlertProvider{ Pushover: &pushover.AlertProvider{
ApplicationToken: "000000000000000000000000000000", ApplicationToken: "000000000000000000000000000000",
@ -234,10 +229,10 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPushover(t *testing.T) {
NumberOfFailuresInARow: 0, NumberOfFailuresInARow: 0,
} }
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 1, 0, true, "") verify(t, ep, 1, 0, true, "")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 1, false, "The alert should've been resolved") verify(t, ep, 0, 1, false, "The alert should've been resolved")
} }
@ -403,32 +398,32 @@ func TestHandleAlertingWithProviderThatReturnsAnError(t *testing.T) {
}, },
} }
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true")
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true) HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
verify(t, ep, 1, 0, false, "") verify(t, ep, 1, 0, false, "")
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true) HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
verify(t, ep, 2, 0, false, "The alert should have failed to trigger, because the alert provider is returning an error") verify(t, ep, 2, 0, false, "The alert should have failed to trigger, because the alert provider is returning an error")
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true) HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
verify(t, ep, 3, 0, false, "The alert should still not be triggered, because the alert provider is still returning an error") verify(t, ep, 3, 0, false, "The alert should still not be triggered, because the alert provider is still returning an error")
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true) HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
verify(t, ep, 4, 0, false, "The alert should still not be triggered, because the alert provider is still returning an error") verify(t, ep, 4, 0, false, "The alert should still not be triggered, because the alert provider is still returning an error")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false")
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true) HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
verify(t, ep, 5, 0, true, "The alert should've been triggered because the alert provider is no longer returning an error") verify(t, ep, 5, 0, true, "The alert should've been triggered because the alert provider is no longer returning an error")
HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig, true) HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig)
verify(t, ep, 0, 1, true, "The alert should've still been triggered") verify(t, ep, 0, 1, true, "The alert should've still been triggered")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true")
HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig, true) HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig)
verify(t, ep, 0, 2, false, "The alert should've been resolved DESPITE THE ALERT PROVIDER RETURNING AN ERROR. See Alert.Triggered for further explanation.") verify(t, ep, 0, 2, false, "The alert should've been resolved DESPITE THE ALERT PROVIDER RETURNING AN ERROR. See Alert.Triggered for further explanation.")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false")
// Make sure that everything's working as expected after a rough patch // Make sure that everything's working as expected after a rough patch
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true) HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
verify(t, ep, 1, 0, false, "") verify(t, ep, 1, 0, false, "")
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true) HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
verify(t, ep, 2, 0, true, "The alert should have triggered") verify(t, ep, 2, 0, true, "The alert should have triggered")
HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig, true) HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig)
verify(t, ep, 0, 1, true, "The alert should still be triggered") verify(t, ep, 0, 1, true, "The alert should still be triggered")
HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig, true) HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig)
verify(t, ep, 0, 2, false, "The alert should have been resolved") verify(t, ep, 0, 2, false, "The alert should have been resolved")
}) })
} }
@ -440,7 +435,6 @@ func TestHandleAlertingWithProviderThatOnlyReturnsErrorOnResolve(t *testing.T) {
defer os.Clearenv() defer os.Clearenv()
cfg := &config.Config{ cfg := &config.Config{
Debug: true,
Alerting: &alerting.Config{ Alerting: &alerting.Config{
Custom: &custom.AlertProvider{ Custom: &custom.AlertProvider{
URL: "https://twin.sh/health", URL: "https://twin.sh/health",
@ -463,27 +457,27 @@ func TestHandleAlertingWithProviderThatOnlyReturnsErrorOnResolve(t *testing.T) {
}, },
} }
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 1, 0, true, "") verify(t, ep, 1, 0, true, "")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 1, false, "") verify(t, ep, 0, 1, false, "")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false")
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 1, 0, true, "") verify(t, ep, 1, 0, true, "")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 1, false, "") verify(t, ep, 0, 1, false, "")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false")
// Make sure that everything's working as expected after a rough patch // Make sure that everything's working as expected after a rough patch
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 1, 0, true, "") verify(t, ep, 1, 0, true, "")
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 2, 0, true, "") verify(t, ep, 2, 0, true, "")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 1, false, "") verify(t, ep, 0, 1, false, "")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug) HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 2, false, "") verify(t, ep, 0, 2, false, "")
} }

View File

@ -2,7 +2,6 @@ package watchdog
import ( import (
"context" "context"
"log"
"sync" "sync"
"time" "time"
@ -45,7 +44,7 @@ func monitor(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenance
for { for {
select { select {
case <-ctx.Done(): case <-ctx.Done():
log.Printf("[watchdog.monitor] Canceling current execution of group=%s; endpoint=%s", ep.Group, ep.Name) logr.Warnf("[watchdog.monitor] Canceling current execution of group=%s; endpoint=%s; key=%s", ep.Group, ep.Name, ep.Key())
return return
case <-time.After(ep.Interval): case <-time.After(ep.Interval):
execute(ep, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics) execute(ep, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics)
@ -68,30 +67,30 @@ func execute(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenance
logr.Infof("[watchdog.execute] No connectivity; skipping execution") logr.Infof("[watchdog.execute] No connectivity; skipping execution")
return return
} }
logr.Debugf("[watchdog.execute] Monitoring group=%s; endpoint=%s", ep.Group, ep.Name) logr.Debugf("[watchdog.execute] Monitoring group=%s; endpoint=%s; key=%s", ep.Group, ep.Name, ep.Key())
result := ep.EvaluateHealth() result := ep.EvaluateHealth()
if enabledMetrics { if enabledMetrics {
metrics.PublishMetricsForEndpoint(ep, result) metrics.PublishMetricsForEndpoint(ep, result)
} }
UpdateEndpointStatuses(ep, result) UpdateEndpointStatuses(ep, result)
if logr.GetThreshold() == logr.LevelDebug && !result.Success { if logr.GetThreshold() == logr.LevelDebug && !result.Success {
logr.Debugf("[watchdog.execute] Monitored group=%s; endpoint=%s; success=%v; errors=%d; duration=%s; body=%s", ep.Group, ep.Name, result.Success, len(result.Errors), result.Duration.Round(time.Millisecond), result.Body) logr.Debugf("[watchdog.execute] Monitored group=%s; endpoint=%s; key=%s; success=%v; errors=%d; duration=%s; body=%s", ep.Group, ep.Name, ep.Key(), result.Success, len(result.Errors), result.Duration.Round(time.Millisecond), result.Body)
} else { } else {
logr.Infof("[watchdog.execute] Monitored group=%s; endpoint=%s; success=%v; errors=%d; duration=%s", ep.Group, ep.Name, result.Success, len(result.Errors), result.Duration.Round(time.Millisecond)) logr.Infof("[watchdog.execute] Monitored group=%s; endpoint=%s; key=%s; success=%v; errors=%d; duration=%s", ep.Group, ep.Name, ep.Key(), result.Success, len(result.Errors), result.Duration.Round(time.Millisecond))
} }
if !maintenanceConfig.IsUnderMaintenance() { if !maintenanceConfig.IsUnderMaintenance() {
// TODO: Consider moving this after the monitoring lock is unlocked? I mean, how much noise can a single alerting provider cause... // TODO: Consider moving this after the monitoring lock is unlocked? I mean, how much noise can a single alerting provider cause...
HandleAlerting(ep, result, alertingConfig, logr.GetThreshold() == logr.LevelDebug) HandleAlerting(ep, result, alertingConfig)
} else { } else {
logr.Debugf("[watchdog.execute] Not handling alerting because currently in the maintenance window") logr.Debug("[watchdog.execute] Not handling alerting because currently in the maintenance window")
} }
logr.Debugf("[watchdog.execute] Waiting for interval=%s before monitoring group=%s endpoint=%s again", ep.Interval, ep.Group, ep.Name) logr.Debugf("[watchdog.execute] Waiting for interval=%s before monitoring group=%s endpoint=%s (key=%s) again", ep.Interval, ep.Group, ep.Name, ep.Key())
} }
// UpdateEndpointStatuses updates the slice of endpoint statuses // UpdateEndpointStatuses updates the slice of endpoint statuses
func UpdateEndpointStatuses(ep *endpoint.Endpoint, result *endpoint.Result) { func UpdateEndpointStatuses(ep *endpoint.Endpoint, result *endpoint.Result) {
if err := store.Get().Insert(ep, result); err != nil { if err := store.Get().Insert(ep, result); err != nil {
logr.Errorf("[watchdog.UpdateEndpointStatuses] Failed to insert result in storage:", err.Error()) logr.Errorf("[watchdog.UpdateEndpointStatuses] Failed to insert result in storage: %s", err.Error())
} }
} }