fix(logging): Replace log-level parameter by GATUS_LOG_LEVEL env var (#895)

* fix(logging): Replace log-level parameter by GATUS_LOG_LEVEL env var

* Improve log message if GATUS_LOG_LEVEL isn't set
This commit is contained in:
TwiN 2024-11-13 23:54:00 -05:00 committed by GitHub
parent 8060a77b1f
commit 01131755bc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
29 changed files with 239 additions and 256 deletions

View File

@ -1,5 +1,4 @@
metrics: true
debug: false
ui:
header: Example Company
link: https://example.org

View File

@ -22,7 +22,7 @@ jobs:
steps:
- uses: actions/setup-go@v5
with:
go-version: 1.22.2
go-version: 1.23.3
repository: "${{ github.event.inputs.repository || 'TwiN/gatus' }}"
ref: "${{ github.event.inputs.ref || 'master' }}"
- uses: actions/checkout@v4

View File

@ -18,7 +18,7 @@ jobs:
steps:
- uses: actions/setup-go@v5
with:
go-version: 1.22.2
go-version: 1.23.3
- uses: actions/checkout@v4
- name: Build binary to make sure it works
run: go build

View File

@ -15,6 +15,8 @@ FROM scratch
COPY --from=builder /app/gatus .
COPY --from=builder /app/config.yaml ./config/config.yaml
COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
ENV PORT=8080
ENV GATUS_CONFIG_PATH=""
ENV GATUS_LOG_LEVEL="INFO"
ENV PORT="8080"
EXPOSE ${PORT}
ENTRYPOINT ["/gatus"]

View File

@ -202,7 +202,7 @@ If `GATUS_CONFIG_PATH` points to a directory, all `*.yaml` and `*.yml` files ins
subdirectories are merged like so:
- All maps/objects are deep merged (i.e. you could define `alerting.slack` in one file and `alerting.pagerduty` in another file)
- All slices/arrays are appended (i.e. you can define `endpoints` in multiple files and each endpoint will be added to the final list of endpoints)
- Parameters with a primitive value (e.g. `debug`, `metrics`, `alerting.slack.webhook-url`, etc.) may only be defined once to forcefully avoid any ambiguity
- Parameters with a primitive value (e.g. `metrics`, `alerting.slack.webhook-url`, etc.) may only be defined once to forcefully avoid any ambiguity
- To clarify, this also means that you could not define `alerting.slack.webhook-url` in two files with different values. All files are merged into one before they are processed. This is by design.
> 💡 You can also use environment variables in the configuration file (e.g. `$DOMAIN`, `${DOMAIN}`)
@ -215,8 +215,6 @@ If you want to test it locally, see [Docker](#docker).
## Configuration
| Parameter | Description | Default |
|:-----------------------------|:-------------------------------------------------------------------------------------------------------------------------------------|:---------------------------|
| `debug` | Whether to enable debug logs. | `false` |
| `log-level` | Log level: DEBUG, INFO, WARN, ERROR. | `INFO` |
| `metrics` | Whether to expose metrics at `/metrics`. | `false` |
| `storage` | [Storage configuration](#storage). | `{}` |
| `alerting` | [Alerting configuration](#alerting). | `{}` |
@ -242,6 +240,9 @@ If you want to test it locally, see [Docker](#docker).
| `ui.buttons[].link` | Link to open when the button is clicked. | Required `""` |
| `maintenance` | [Maintenance configuration](#maintenance). | `{}` |
If you want more verbose logging, you may set the `GATUS_LOG_LEVEL` environment variable to `DEBUG`.
Conversely, if you want less verbose logging, you can set the aforementioned environment variable to `WARN`, `ERROR` or `FATAL`.
The default value for `GATUS_LOG_LEVEL` is `INFO`.
### Endpoints
Endpoints are URLs, applications, or services that you want to monitor. Each endpoint has a list of conditions that are

View File

@ -1,7 +1,6 @@
package alerting
import (
"log"
"reflect"
"strings"
@ -30,6 +29,7 @@ import (
"github.com/TwiN/gatus/v5/alerting/provider/telegram"
"github.com/TwiN/gatus/v5/alerting/provider/twilio"
"github.com/TwiN/gatus/v5/alerting/provider/zulip"
"github.com/TwiN/logr"
)
// Config is the configuration for alerting providers
@ -118,7 +118,7 @@ func (config *Config) GetAlertingProviderByAlertType(alertType alert.Type) provi
return fieldValue.Interface().(provider.AlertProvider)
}
}
log.Printf("[alerting.GetAlertingProviderByAlertType] No alerting provider found for alert type %s", alertType)
logr.Infof("[alerting.GetAlertingProviderByAlertType] No alerting provider found for alert type %s", alertType)
return nil
}

View File

@ -5,12 +5,12 @@ import (
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"github.com/TwiN/gatus/v5/alerting/alert"
"github.com/TwiN/gatus/v5/client"
"github.com/TwiN/gatus/v5/config/endpoint"
"github.com/TwiN/logr"
)
const (
@ -74,11 +74,10 @@ func (provider *AlertProvider) Send(ep *endpoint.Endpoint, alert *alert.Alert, r
alert.ResolveKey = ""
} else {
// We need to retrieve the resolve key from the response
body, err := io.ReadAll(response.Body)
var payload pagerDutyResponsePayload
if err = json.Unmarshal(body, &payload); err != nil {
if err = json.NewDecoder(response.Body).Decode(&payload); err != nil {
// Silently fail. We don't want to create tons of alerts just because we failed to parse the body.
log.Printf("[pagerduty.Send] Ran into error unmarshaling pagerduty response: %s", err.Error())
logr.Errorf("[pagerduty.Send] Ran into error decoding pagerduty response: %s", err.Error())
} else {
alert.ResolveKey = payload.DedupKey
}

View File

@ -2,7 +2,6 @@ package api
import (
"io/fs"
"log"
"net/http"
"os"
@ -10,6 +9,7 @@ import (
"github.com/TwiN/gatus/v5/config/web"
static "github.com/TwiN/gatus/v5/web"
"github.com/TwiN/health"
"github.com/TwiN/logr"
fiber "github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/adaptor"
"github.com/gofiber/fiber/v2/middleware/compress"
@ -28,7 +28,7 @@ type API struct {
func New(cfg *config.Config) *API {
api := &API{}
if cfg.Web == nil {
log.Println("[api.New] nil web config passed as parameter. This should only happen in tests. Using default web configuration")
logr.Warnf("[api.New] nil web config passed as parameter. This should only happen in tests. Using default web configuration")
cfg.Web = web.GetDefaultConfig()
}
api.router = api.createRouter(cfg)
@ -42,7 +42,7 @@ func (a *API) Router() *fiber.App {
func (a *API) createRouter(cfg *config.Config) *fiber.App {
app := fiber.New(fiber.Config{
ErrorHandler: func(c *fiber.Ctx, err error) error {
log.Printf("[api.ErrorHandler] %s", err.Error())
logr.Errorf("[api.ErrorHandler] %s", err.Error())
return fiber.DefaultErrorHandler(c, err)
},
ReadBufferSize: cfg.Web.ReadBufferSize,

View File

@ -2,7 +2,6 @@ package api
import (
"errors"
"log"
"math"
"net/http"
"sort"
@ -10,6 +9,7 @@ import (
"github.com/TwiN/gatus/v5/storage/store"
"github.com/TwiN/gatus/v5/storage/store/common"
"github.com/TwiN/logr"
"github.com/gofiber/fiber/v2"
"github.com/wcharczuk/go-chart/v2"
"github.com/wcharczuk/go-chart/v2/drawing"
@ -116,7 +116,7 @@ func ResponseTimeChart(c *fiber.Ctx) error {
c.Set("Expires", "0")
c.Status(http.StatusOK)
if err := graph.Render(chart.SVG, c); err != nil {
log.Println("[api.ResponseTimeChart] Failed to render response time chart:", err.Error())
logr.Errorf("[api.ResponseTimeChart] Failed to render response time chart: %s", err.Error())
return c.Status(500).SendString(err.Error())
}
return nil

View File

@ -4,7 +4,6 @@ import (
"encoding/json"
"errors"
"fmt"
"log"
"github.com/TwiN/gatus/v5/client"
"github.com/TwiN/gatus/v5/config"
@ -13,6 +12,7 @@ import (
"github.com/TwiN/gatus/v5/storage/store"
"github.com/TwiN/gatus/v5/storage/store/common"
"github.com/TwiN/gatus/v5/storage/store/common/paging"
"github.com/TwiN/logr"
"github.com/gofiber/fiber/v2"
)
@ -26,19 +26,19 @@ func EndpointStatuses(cfg *config.Config) fiber.Handler {
if !exists {
endpointStatuses, err := store.Get().GetAllEndpointStatuses(paging.NewEndpointStatusParams().WithResults(page, pageSize))
if err != nil {
log.Printf("[api.EndpointStatuses] Failed to retrieve endpoint statuses: %s", err.Error())
logr.Errorf("[api.EndpointStatuses] Failed to retrieve endpoint statuses: %s", err.Error())
return c.Status(500).SendString(err.Error())
}
// ALPHA: Retrieve endpoint statuses from remote instances
if endpointStatusesFromRemote, err := getEndpointStatusesFromRemoteInstances(cfg.Remote); err != nil {
log.Printf("[handler.EndpointStatuses] Silently failed to retrieve endpoint statuses from remote: %s", err.Error())
logr.Errorf("[handler.EndpointStatuses] Silently failed to retrieve endpoint statuses from remote: %s", err.Error())
} else if endpointStatusesFromRemote != nil {
endpointStatuses = append(endpointStatuses, endpointStatusesFromRemote...)
}
// Marshal endpoint statuses to JSON
data, err = json.Marshal(endpointStatuses)
if err != nil {
log.Printf("[api.EndpointStatuses] Unable to marshal object to JSON: %s", err.Error())
logr.Errorf("[api.EndpointStatuses] Unable to marshal object to JSON: %s", err.Error())
return c.Status(500).SendString("unable to marshal object to JSON")
}
cache.SetWithTTL(fmt.Sprintf("endpoint-status-%d-%d", page, pageSize), data, cacheTTL)
@ -64,7 +64,7 @@ func getEndpointStatusesFromRemoteInstances(remoteConfig *remote.Config) ([]*end
var endpointStatuses []*endpoint.Status
if err = json.NewDecoder(response.Body).Decode(&endpointStatuses); err != nil {
_ = response.Body.Close()
log.Printf("[api.getEndpointStatusesFromRemoteInstances] Silently failed to retrieve endpoint statuses from %s: %s", instance.URL, err.Error())
logr.Errorf("[api.getEndpointStatusesFromRemoteInstances] Silently failed to retrieve endpoint statuses from %s: %s", instance.URL, err.Error())
continue
}
_ = response.Body.Close()
@ -84,16 +84,16 @@ func EndpointStatus(c *fiber.Ctx) error {
if errors.Is(err, common.ErrEndpointNotFound) {
return c.Status(404).SendString(err.Error())
}
log.Printf("[api.EndpointStatus] Failed to retrieve endpoint status: %s", err.Error())
logr.Errorf("[api.EndpointStatus] Failed to retrieve endpoint status: %s", err.Error())
return c.Status(500).SendString(err.Error())
}
if endpointStatus == nil { // XXX: is this check necessary?
log.Printf("[api.EndpointStatus] Endpoint with key=%s not found", c.Params("key"))
logr.Errorf("[api.EndpointStatus] Endpoint with key=%s not found", c.Params("key"))
return c.Status(404).SendString("not found")
}
output, err := json.Marshal(endpointStatus)
if err != nil {
log.Printf("[api.EndpointStatus] Unable to marshal object to JSON: %s", err.Error())
logr.Errorf("[api.EndpointStatus] Unable to marshal object to JSON: %s", err.Error())
return c.Status(500).SendString("unable to marshal object to JSON")
}
c.Set("Content-Type", "application/json")

View File

@ -2,7 +2,6 @@ package api
import (
"errors"
"log"
"strings"
"time"
@ -11,6 +10,7 @@ import (
"github.com/TwiN/gatus/v5/storage/store"
"github.com/TwiN/gatus/v5/storage/store/common"
"github.com/TwiN/gatus/v5/watchdog"
"github.com/TwiN/logr"
"github.com/gofiber/fiber/v2"
)
@ -33,11 +33,11 @@ func CreateExternalEndpointResult(cfg *config.Config) fiber.Handler {
key := c.Params("key")
externalEndpoint := cfg.GetExternalEndpointByKey(key)
if externalEndpoint == nil {
log.Printf("[api.CreateExternalEndpointResult] External endpoint with key=%s not found", key)
logr.Errorf("[api.CreateExternalEndpointResult] External endpoint with key=%s not found", key)
return c.Status(404).SendString("not found")
}
if externalEndpoint.Token != token {
log.Printf("[api.CreateExternalEndpointResult] Invalid token for external endpoint with key=%s", key)
logr.Errorf("[api.CreateExternalEndpointResult] Invalid token for external endpoint with key=%s", key)
return c.Status(401).SendString("invalid token")
}
// Persist the result in the storage
@ -54,13 +54,13 @@ func CreateExternalEndpointResult(cfg *config.Config) fiber.Handler {
if errors.Is(err, common.ErrEndpointNotFound) {
return c.Status(404).SendString(err.Error())
}
log.Printf("[api.CreateExternalEndpointResult] Failed to insert result in storage: %s", err.Error())
logr.Errorf("[api.CreateExternalEndpointResult] Failed to insert result in storage: %s", err.Error())
return c.Status(500).SendString(err.Error())
}
log.Printf("[api.CreateExternalEndpointResult] Successfully inserted result for external endpoint with key=%s and success=%s", c.Params("key"), success)
logr.Infof("[api.CreateExternalEndpointResult] Successfully inserted result for external endpoint with key=%s and success=%s", c.Params("key"), success)
// Check if an alert should be triggered or resolved
if !cfg.Maintenance.IsUnderMaintenance() {
watchdog.HandleAlerting(convertedEndpoint, result, cfg.Alerting, cfg.Debug)
watchdog.HandleAlerting(convertedEndpoint, result, cfg.Alerting)
externalEndpoint.NumberOfSuccessesInARow = convertedEndpoint.NumberOfSuccessesInARow
externalEndpoint.NumberOfFailuresInARow = convertedEndpoint.NumberOfFailuresInARow
}

View File

@ -3,10 +3,10 @@ package api
import (
_ "embed"
"html/template"
"log"
"github.com/TwiN/gatus/v5/config/ui"
static "github.com/TwiN/gatus/v5/web"
"github.com/TwiN/logr"
"github.com/gofiber/fiber/v2"
)
@ -15,14 +15,14 @@ func SinglePageApplication(ui *ui.Config) fiber.Handler {
t, err := template.ParseFS(static.FileSystem, static.IndexPath)
if err != nil {
// This should never happen, because ui.ValidateAndSetDefaults validates that the template works.
log.Println("[api.SinglePageApplication] Failed to parse template. This should never happen, because the template is validated on start. Error:", err.Error())
logr.Errorf("[api.SinglePageApplication] Failed to parse template. This should never happen, because the template is validated on start. Error: %s", err.Error())
return c.Status(500).SendString("Failed to parse template. This should never happen, because the template is validated on start.")
}
c.Set("Content-Type", "text/html")
err = t.Execute(c, ui)
if err != nil {
// This should never happen, because ui.ValidateAndSetDefaults validates that the template works.
log.Println("[api.SinglePageApplication] Failed to execute template. This should never happen, because the template is validated on start. Error:", err.Error())
logr.Errorf("[api.SinglePageApplication] Failed to execute template. This should never happen, because the template is validated on start. Error: %s", err.Error())
return c.Status(500).SendString("Failed to parse template. This should never happen, because the template is validated on start.")
}
return c.SendStatus(200)

View File

@ -110,7 +110,6 @@ func CanCreateSCTPConnection(address string, config *Config) bool {
_ = conn.Close()
res <- true
})(ch)
select {
case result := <-ch:
return result
@ -182,7 +181,6 @@ func CanCreateSSHConnection(address, username, password string, config *Config)
} else {
port = "22"
}
cli, err := ssh.Dial("tcp", strings.Join([]string{address, port}, ":"), &ssh.ClientConfig{
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
User: username,
@ -194,7 +192,6 @@ func CanCreateSSHConnection(address, username, password string, config *Config)
if err != nil {
return false, nil, err
}
return true, cli, nil
}
@ -203,37 +200,29 @@ func ExecuteSSHCommand(sshClient *ssh.Client, body string, config *Config) (bool
type Body struct {
Command string `json:"command"`
}
defer sshClient.Close()
var b Body
if err := json.Unmarshal([]byte(body), &b); err != nil {
return false, 0, err
}
sess, err := sshClient.NewSession()
if err != nil {
return false, 0, err
}
err = sess.Start(b.Command)
if err != nil {
return false, 0, err
}
defer sess.Close()
err = sess.Wait()
if err == nil {
return true, 0, nil
}
e, ok := err.(*ssh.ExitError)
if !ok {
var exitErr *ssh.ExitError
if ok := errors.As(err, &exitErr); !ok {
return false, 0, err
}
return true, e.ExitStatus(), nil
return true, exitErr.ExitStatus(), nil
}
// Ping checks if an address can be pinged and returns the round-trip time if the address can be pinged

View File

@ -4,7 +4,6 @@ import (
"context"
"crypto/tls"
"errors"
"log"
"net"
"net/http"
"net/url"
@ -12,6 +11,7 @@ import (
"strconv"
"time"
"github.com/TwiN/logr"
"golang.org/x/oauth2"
"golang.org/x/oauth2/clientcredentials"
"google.golang.org/api/idtoken"
@ -232,7 +232,7 @@ func (c *Config) getHTTPClient() *http.Client {
if c.ProxyURL != "" {
proxyURL, err := url.Parse(c.ProxyURL)
if err != nil {
log.Println("[client.getHTTPClient] THIS SHOULD NOT HAPPEN. Silently ignoring custom proxy due to error:", err.Error())
logr.Errorf("[client.getHTTPClient] THIS SHOULD NOT HAPPEN. Silently ignoring custom proxy due to error: %s", err.Error())
} else {
c.httpClient.Transport.(*http.Transport).Proxy = http.ProxyURL(proxyURL)
}
@ -242,7 +242,7 @@ func (c *Config) getHTTPClient() *http.Client {
if err != nil {
// We're ignoring the error, because it should have been validated on startup ValidateAndSetDefaults.
// It shouldn't happen, but if it does, we'll log it... Better safe than sorry ;)
log.Println("[client.getHTTPClient] THIS SHOULD NOT HAPPEN. Silently ignoring invalid DNS resolver due to error:", err.Error())
logr.Errorf("[client.getHTTPClient] THIS SHOULD NOT HAPPEN. Silently ignoring invalid DNS resolver due to error: %s", err.Error())
} else {
dialer := &net.Dialer{
Resolver: &net.Resolver{
@ -259,7 +259,7 @@ func (c *Config) getHTTPClient() *http.Client {
}
}
if c.HasOAuth2Config() && c.HasIAPConfig() {
log.Println("[client.getHTTPClient] Error: Both Identity-Aware-Proxy and Oauth2 configuration are present.")
logr.Errorf("[client.getHTTPClient] Error: Both Identity-Aware-Proxy and Oauth2 configuration are present.")
} else if c.HasOAuth2Config() {
c.httpClient = configureOAuth2(c.httpClient, *c.OAuth2Config)
} else if c.HasIAPConfig() {
@ -269,23 +269,22 @@ func (c *Config) getHTTPClient() *http.Client {
return c.httpClient
}
// validateIAPToken returns a boolean that will define if the google identity-aware-proxy token can be fetch
// validateIAPToken returns a boolean that will define if the Google identity-aware-proxy token can be fetched
// and if is it valid.
func validateIAPToken(ctx context.Context, c IAPConfig) bool {
ts, err := idtoken.NewTokenSource(ctx, c.Audience)
if err != nil {
log.Println("[client.ValidateIAPToken] Claiming Identity token failed. error:", err.Error())
logr.Errorf("[client.ValidateIAPToken] Claiming Identity token failed: %s", err.Error())
return false
}
tok, err := ts.Token()
if err != nil {
log.Println("[client.ValidateIAPToken] Get Identity-Aware-Proxy token failed. error:", err.Error())
logr.Errorf("[client.ValidateIAPToken] Get Identity-Aware-Proxy token failed: %s", err.Error())
return false
}
payload, err := idtoken.Validate(ctx, tok.AccessToken, c.Audience)
_ = payload
_, err = idtoken.Validate(ctx, tok.AccessToken, c.Audience)
if err != nil {
log.Println("[client.ValidateIAPToken] Token Validation failed. error:", err.Error())
logr.Errorf("[client.ValidateIAPToken] Token Validation failed: %s", err.Error())
return false
}
return true
@ -298,7 +297,7 @@ func configureIAP(httpClient *http.Client, c IAPConfig) *http.Client {
if validateIAPToken(ctx, c) {
ts, err := idtoken.NewTokenSource(ctx, c.Audience)
if err != nil {
log.Println("[client.ConfigureIAP] Claiming Token Source failed. error:", err.Error())
logr.Errorf("[client.configureIAP] Claiming Token Source failed: %s", err.Error())
return httpClient
}
client := oauth2.NewClient(ctx, ts)
@ -327,17 +326,17 @@ func configureOAuth2(httpClient *http.Client, c OAuth2Config) *http.Client {
func configureTLS(tlsConfig *tls.Config, c TLSConfig) *tls.Config {
clientTLSCert, err := tls.LoadX509KeyPair(c.CertificateFile, c.PrivateKeyFile)
if err != nil {
logr.Errorf("[client.configureTLS] Failed to load certificate: %s", err.Error())
return nil
}
tlsConfig.Certificates = []tls.Certificate{clientTLSCert}
tlsConfig.Renegotiation = tls.RenegotiateNever
renegotionSupport := map[string]tls.RenegotiationSupport{
renegotiationSupport := map[string]tls.RenegotiationSupport{
"once": tls.RenegotiateOnceAsClient,
"freely": tls.RenegotiateFreelyAsClient,
"never": tls.RenegotiateNever,
}
if val, ok := renegotionSupport[c.RenegotiationSupport]; ok {
if val, ok := renegotiationSupport[c.RenegotiationSupport]; ok {
tlsConfig.Renegotiation = val
}
return tlsConfig

View File

@ -4,7 +4,6 @@ import (
"errors"
"fmt"
"io/fs"
"log"
"os"
"path/filepath"
"strings"
@ -53,11 +52,9 @@ var (
// Config is the main configuration structure
type Config struct {
// Debug Whether to enable debug logs
// Deprecated: Use the GATUS_LOG_LEVEL environment variable instead
Debug bool `yaml:"debug,omitempty"`
// LogLevel is one of DEBUG, INFO, WARN and ERROR. Defaults to INFO
LogLevel logr.Level `yaml:"log-level,omitempty"`
// Metrics Whether to expose metrics at /metrics
Metrics bool `yaml:"metrics,omitempty"`
@ -176,13 +173,11 @@ func LoadConfiguration(configPath string) (*Config, error) {
if fileInfo.IsDir() {
err := walkConfigDir(configPath, func(path string, d fs.DirEntry, err error) error {
if err != nil {
log.Printf("[config.LoadConfiguration] Error walking path=%s: %s", path, err)
return err
return fmt.Errorf("error walking path %s: %w", path, err)
}
log.Printf("[config.LoadConfiguration] Reading configuration from %s", path)
logr.Infof("[config.LoadConfiguration] Reading configuration from %s", path)
data, err := os.ReadFile(path)
if err != nil {
log.Printf("[config.LoadConfiguration] Error reading configuration from %s: %s", path, err)
return fmt.Errorf("error reading configuration from file %s: %w", path, err)
}
configBytes, err = deepmerge.YAML(configBytes, data)
@ -192,9 +187,9 @@ func LoadConfiguration(configPath string) (*Config, error) {
return nil, fmt.Errorf("error reading configuration from directory %s: %w", usedConfigPath, err)
}
} else {
log.Printf("[config.LoadConfiguration] Reading configuration from configFile=%s", usedConfigPath)
logr.Infof("[config.LoadConfiguration] Reading configuration from configFile=%s", usedConfigPath)
if data, err := os.ReadFile(usedConfigPath); err != nil {
return nil, err
return nil, fmt.Errorf("error reading configuration from directory %s: %w", usedConfigPath, err)
} else {
configBytes = data
}
@ -204,11 +199,11 @@ func LoadConfiguration(configPath string) (*Config, error) {
}
config, err := parseAndValidateConfigBytes(configBytes)
if err != nil {
return nil, err
return nil, fmt.Errorf("error parsing config: %w", err)
}
config.configPath = usedConfigPath
config.UpdateLastFileModTime()
return config, err
return config, nil
}
// walkConfigDir is a wrapper for filepath.WalkDir that strips directories and non-config files
@ -249,7 +244,13 @@ func parseAndValidateConfigBytes(yamlBytes []byte) (config *Config, err error) {
if config == nil || config.Endpoints == nil || len(config.Endpoints) == 0 {
err = ErrNoEndpointInConfig
} else {
validateAlertingConfig(config.Alerting, config.Endpoints, config.ExternalEndpoints, config.Debug)
// XXX: Remove this in v6.0.0
if config.Debug {
logr.Warn("WARNING: The 'debug' configuration has been deprecated and will be removed in v6.0.0")
logr.Warn("WARNING: Please use the GATUS_LOG_LEVEL environment variable instead")
}
// XXX: End of v6.0.0 removals
validateAlertingConfig(config.Alerting, config.Endpoints, config.ExternalEndpoints)
if err := validateSecurityConfig(config); err != nil {
return nil, err
}
@ -342,9 +343,7 @@ func validateEndpointsConfig(config *Config) error {
duplicateValidationMap := make(map[string]bool)
// Validate endpoints
for _, ep := range config.Endpoints {
if config.Debug {
log.Printf("[config.validateEndpointsConfig] Validating endpoint '%s'", ep.Name)
}
logr.Debugf("[config.validateEndpointsConfig] Validating endpoint with key %s", ep.Key())
if endpointKey := ep.Key(); duplicateValidationMap[endpointKey] {
return fmt.Errorf("invalid endpoint %s: name and group combination must be unique", ep.Key())
} else {
@ -354,12 +353,10 @@ func validateEndpointsConfig(config *Config) error {
return fmt.Errorf("invalid endpoint %s: %w", ep.Key(), err)
}
}
log.Printf("[config.validateEndpointsConfig] Validated %d endpoints", len(config.Endpoints))
logr.Infof("[config.validateEndpointsConfig] Validated %d endpoints", len(config.Endpoints))
// Validate external endpoints
for _, ee := range config.ExternalEndpoints {
if config.Debug {
log.Printf("[config.validateEndpointsConfig] Validating external endpoint '%s'", ee.Name)
}
logr.Debugf("[config.validateEndpointsConfig] Validating external endpoint '%s'", ee.Name)
if endpointKey := ee.Key(); duplicateValidationMap[endpointKey] {
return fmt.Errorf("invalid external endpoint %s: name and group combination must be unique", ee.Key())
} else {
@ -369,16 +366,14 @@ func validateEndpointsConfig(config *Config) error {
return fmt.Errorf("invalid external endpoint %s: %w", ee.Key(), err)
}
}
log.Printf("[config.validateEndpointsConfig] Validated %d external endpoints", len(config.ExternalEndpoints))
logr.Infof("[config.validateEndpointsConfig] Validated %d external endpoints", len(config.ExternalEndpoints))
return nil
}
func validateSecurityConfig(config *Config) error {
if config.Security != nil {
if config.Security.IsValid() {
if config.Debug {
log.Printf("[config.validateSecurityConfig] Basic security configuration has been validated")
}
logr.Debug("[config.validateSecurityConfig] Basic security configuration has been validated")
} else {
// If there was an attempt to configure security, then it must mean that some confidential or private
// data are exposed. As a result, we'll force a panic because it's better to be safe than sorry.
@ -392,9 +387,9 @@ func validateSecurityConfig(config *Config) error {
// Note that the alerting configuration has to be validated before the endpoint configuration, because the default alert
// returned by provider.AlertProvider.GetDefaultAlert() must be parsed before endpoint.Endpoint.ValidateAndSetDefaults()
// sets the default alert values when none are set.
func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoint.Endpoint, externalEndpoints []*endpoint.ExternalEndpoint, debug bool) {
func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoint.Endpoint, externalEndpoints []*endpoint.ExternalEndpoint) {
if alertingConfig == nil {
log.Printf("[config.validateAlertingConfig] Alerting is not configured")
logr.Info("[config.validateAlertingConfig] Alerting is not configured")
return
}
alertTypes := []alert.Type{
@ -432,9 +427,7 @@ func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoi
for _, ep := range endpoints {
for alertIndex, endpointAlert := range ep.Alerts {
if alertType == endpointAlert.Type {
if debug {
log.Printf("[config.validateAlertingConfig] Parsing alert %d with default alert for provider=%s in endpoint with key=%s", alertIndex, alertType, ep.Key())
}
logr.Debugf("[config.validateAlertingConfig] Parsing alert %d with default alert for provider=%s in endpoint with key=%s", alertIndex, alertType, ep.Key())
provider.ParseWithDefaultAlert(alertProvider.GetDefaultAlert(), endpointAlert)
}
}
@ -442,9 +435,7 @@ func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoi
for _, ee := range externalEndpoints {
for alertIndex, endpointAlert := range ee.Alerts {
if alertType == endpointAlert.Type {
if debug {
log.Printf("[config.validateAlertingConfig] Parsing alert %d with default alert for provider=%s in endpoint with key=%s", alertIndex, alertType, ee.Key())
}
logr.Debugf("[config.validateAlertingConfig] Parsing alert %d with default alert for provider=%s in endpoint with key=%s", alertIndex, alertType, ee.Key())
provider.ParseWithDefaultAlert(alertProvider.GetDefaultAlert(), endpointAlert)
}
}
@ -452,7 +443,7 @@ func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoi
}
validProviders = append(validProviders, alertType)
} else {
log.Printf("[config.validateAlertingConfig] Ignoring provider=%s because configuration is invalid", alertType)
logr.Warnf("[config.validateAlertingConfig] Ignoring provider=%s because configuration is invalid", alertType)
invalidProviders = append(invalidProviders, alertType)
alertingConfig.SetAlertingProviderToNil(alertProvider)
}
@ -460,5 +451,5 @@ func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoi
invalidProviders = append(invalidProviders, alertType)
}
}
log.Printf("[config.validateAlertingConfig] configuredProviders=%s; ignoredProviders=%s", validProviders, invalidProviders)
logr.Infof("[config.validateAlertingConfig] configuredProviders=%s; ignoredProviders=%s", validProviders, invalidProviders)
}

View File

@ -27,6 +27,7 @@ import (
"github.com/TwiN/gatus/v5/alerting/provider/pushover"
"github.com/TwiN/gatus/v5/alerting/provider/slack"
"github.com/TwiN/gatus/v5/alerting/provider/teams"
"github.com/TwiN/gatus/v5/alerting/provider/teamsworkflows"
"github.com/TwiN/gatus/v5/alerting/provider/telegram"
"github.com/TwiN/gatus/v5/alerting/provider/twilio"
"github.com/TwiN/gatus/v5/client"
@ -177,7 +178,6 @@ endpoints:
conditions:
- "[STATUS] == 200"`,
"b.yaml": `
debug: true
alerting:
discord:
@ -196,7 +196,6 @@ endpoints:
- "[STATUS] == 200"`,
},
expectedConfig: &Config{
Debug: true,
Metrics: true,
Alerting: &alerting.Config{
Discord: &discord.AlertProvider{WebhookURL: "https://discord.com/api/webhooks/xxx/yyy"},
@ -719,7 +718,6 @@ badconfig:
func TestParseAndValidateConfigBytesWithAlerting(t *testing.T) {
config, err := parseAndValidateConfigBytes([]byte(`
debug: true
alerting:
slack:
webhook-url: "http://example.com"
@ -919,8 +917,6 @@ endpoints:
func TestParseAndValidateConfigBytesWithAlertingAndDefaultAlert(t *testing.T) {
config, err := parseAndValidateConfigBytes([]byte(`
debug: true
alerting:
slack:
webhook-url: "http://example.com"
@ -1493,6 +1489,7 @@ endpoints:
t.Fatal("PagerDuty alerting config should've been set to nil, because its IsValid() method returned false and therefore alerting.Config.SetAlertingProviderToNil() should've been called")
}
}
func TestParseAndValidateConfigBytesWithInvalidPushoverAlertingConfig(t *testing.T) {
config, err := parseAndValidateConfigBytes([]byte(`
alerting:
@ -1801,7 +1798,7 @@ endpoints:
func TestParseAndValidateConfigBytesWithValidSecurityConfig(t *testing.T) {
const expectedUsername = "admin"
const expectedPasswordHash = "JDJhJDEwJHRiMnRFakxWazZLdXBzRERQazB1TE8vckRLY05Yb1hSdnoxWU0yQ1FaYXZRSW1McmladDYu"
config, err := parseAndValidateConfigBytes([]byte(fmt.Sprintf(`debug: true
config, err := parseAndValidateConfigBytes([]byte(fmt.Sprintf(`
security:
basic:
username: "%s"
@ -1889,6 +1886,7 @@ func TestGetAlertingProviderByAlertType(t *testing.T) {
Telegram: &telegram.AlertProvider{},
Twilio: &twilio.AlertProvider{},
Teams: &teams.AlertProvider{},
TeamsWorkflows: &teamsworkflows.AlertProvider{},
}
scenarios := []struct {
alertType alert.Type
@ -1912,6 +1910,7 @@ func TestGetAlertingProviderByAlertType(t *testing.T) {
{alertType: alert.TypeTelegram, expected: alertingConfig.Telegram},
{alertType: alert.TypeTwilio, expected: alertingConfig.Twilio},
{alertType: alert.TypeTeams, expected: alertingConfig.Teams},
{alertType: alert.TypeTeamsWorkflows, expected: alertingConfig.TeamsWorkflows},
}
for _, scenario := range scenarios {
t.Run(string(scenario.alertType), func(t *testing.T) {

View File

@ -150,7 +150,7 @@ func (c Condition) evaluate(result *Result, dontResolveFailedConditions bool) bo
return false
}
if !success {
//log.Printf("[Condition.evaluate] Condition '%s' did not succeed because '%s' is false", condition, condition)
//logr.Debugf("[Condition.evaluate] Condition '%s' did not succeed because '%s' is false", condition, condition)
}
result.ConditionResults = append(result.ConditionResults, &ConditionResult{Condition: conditionToDisplay, Success: success})
return success

View File

@ -1,9 +1,8 @@
package remote
import (
"log"
"github.com/TwiN/gatus/v5/client"
"github.com/TwiN/logr"
)
// NOTICE: This is an experimental alpha feature and may be updated/removed in future versions.
@ -31,9 +30,8 @@ func (c *Config) ValidateAndSetDefaults() error {
}
}
if len(c.Instances) > 0 {
log.Println("WARNING: Your configuration is using 'remote', which is in alpha and may be updated/removed in future versions.")
log.Println("WARNING: See https://github.com/TwiN/gatus/issues/64 for more information")
log.Println("WARNING: This feature is a candidate for removal in future versions. Please comment on the issue above if you need this feature.")
logr.Warn("WARNING: Your configuration is using 'remote', which is in alpha and may be updated/removed in future versions.")
logr.Warn("WARNING: See https://github.com/TwiN/gatus/issues/64 for more information")
}
return nil
}

View File

@ -1,12 +1,12 @@
package controller
import (
"log"
"os"
"time"
"github.com/TwiN/gatus/v5/api"
"github.com/TwiN/gatus/v5/config"
"github.com/TwiN/logr"
"github.com/gofiber/fiber/v2"
)
@ -25,19 +25,19 @@ func Handle(cfg *config.Config) {
if os.Getenv("ROUTER_TEST") == "true" {
return
}
log.Println("[controller.Handle] Listening on " + cfg.Web.SocketAddress())
logr.Info("[controller.Handle] Listening on " + cfg.Web.SocketAddress())
if cfg.Web.HasTLS() {
err := app.ListenTLS(cfg.Web.SocketAddress(), cfg.Web.TLS.CertificateFile, cfg.Web.TLS.PrivateKeyFile)
if err != nil {
log.Fatal("[controller.Handle]", err)
logr.Fatalf("[controller.Handle] %s", err.Error())
}
} else {
err := app.Listen(cfg.Web.SocketAddress())
if err != nil {
log.Fatal("[controller.Handle]", err)
logr.Fatalf("[controller.Handle] %s", err.Error())
}
}
log.Println("[controller.Handle] Server has shut down successfully")
logr.Info("[controller.Handle] Server has shut down successfully")
}
// Shutdown stops the server

4
go.mod
View File

@ -1,6 +1,6 @@
module github.com/TwiN/gatus/v5
go 1.22.4
go 1.23.3
require (
code.gitea.io/sdk/gitea v0.19.0
@ -8,7 +8,7 @@ require (
github.com/TwiN/g8/v2 v2.0.0
github.com/TwiN/gocache/v2 v2.2.2
github.com/TwiN/health v1.6.0
github.com/TwiN/logr v0.2.1
github.com/TwiN/logr v0.3.1
github.com/TwiN/whois v1.1.9
github.com/aws/aws-sdk-go v1.54.10
github.com/coreos/go-oidc/v3 v3.11.0

4
go.sum
View File

@ -16,8 +16,8 @@ github.com/TwiN/gocache/v2 v2.2.2 h1:4HToPfDV8FSbaYO5kkbhLpEllUYse5rAf+hVU/mSsuI
github.com/TwiN/gocache/v2 v2.2.2/go.mod h1:WfIuwd7GR82/7EfQqEtmLFC3a2vqaKbs4Pe6neB7Gyc=
github.com/TwiN/health v1.6.0 h1:L2ks575JhRgQqWWOfKjw9B0ec172hx7GdToqkYUycQM=
github.com/TwiN/health v1.6.0/go.mod h1:Z6TszwQPMvtSiVx1QMidVRgvVr4KZGfiwqcD7/Z+3iw=
github.com/TwiN/logr v0.2.1 h1:kMhUmBBVlFxzqTvyHuNoYQ/uwqg8BW4y0AyZxI5JB3Q=
github.com/TwiN/logr v0.2.1/go.mod h1:oldDOkRjFXjZqiMP0+ca5NAQHXTiJ02zHirsuBJJH6k=
github.com/TwiN/logr v0.3.1 h1:CfTKA83jUmsAoxqrr3p4JxEkqXOBnEE9/f35L5MODy4=
github.com/TwiN/logr v0.3.1/go.mod h1:BZgZFYq6fQdU3KtR8qYato3zUEw53yQDaIuujHb55Jw=
github.com/TwiN/whois v1.1.9 h1:m20+m1CXnrstie+tW2ZmAJkfcT9zgwpVRUFsKeMw+ng=
github.com/TwiN/whois v1.1.9/go.mod h1:TjipCMpJRAJYKmtz/rXQBU6UGxMh6bk8SHazu7OMnQE=
github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=

62
main.go
View File

@ -1,7 +1,6 @@
package main
import (
"log"
"os"
"os/signal"
"strconv"
@ -15,16 +14,22 @@ import (
"github.com/TwiN/logr"
)
const (
GatusConfigPathEnvVar = "GATUS_CONFIG_PATH"
GatusConfigFileEnvVar = "GATUS_CONFIG_FILE" // Deprecated in favor of GatusConfigPathEnvVar
GatusLogLevelEnvVar = "GATUS_LOG_LEVEL"
)
func main() {
if delayInSeconds, _ := strconv.Atoi(os.Getenv("GATUS_DELAY_START_SECONDS")); delayInSeconds > 0 {
log.Printf("Delaying start by %d seconds", delayInSeconds)
logr.Infof("Delaying start by %d seconds", delayInSeconds)
time.Sleep(time.Duration(delayInSeconds) * time.Second)
}
configureLogging()
cfg, err := loadConfiguration()
if err != nil {
panic(err)
}
configureLogging(cfg)
initializeStorage(cfg)
start(cfg)
// Wait for termination signal
@ -33,13 +38,13 @@ func main() {
signal.Notify(signalChannel, os.Interrupt, syscall.SIGTERM)
go func() {
<-signalChannel
log.Println("Received termination signal, attempting to gracefully shut down")
logr.Info("Received termination signal, attempting to gracefully shut down")
stop(cfg)
save()
done <- true
}()
<-done
log.Println("Shutting down")
logr.Info("Shutting down")
}
func start(cfg *config.Config) {
@ -55,21 +60,32 @@ func stop(cfg *config.Config) {
func save() {
if err := store.Get().Save(); err != nil {
log.Println("Failed to save storage provider:", err.Error())
logr.Errorf("Failed to save storage provider: %s", err.Error())
}
}
func configureLogging(cfg *config.Config) {
logr.SetThreshold(cfg.LogLevel)
logr.Infof("[main.configureLogging] Log Level is %s", logr.GetThreshold())
func configureLogging() {
logLevelAsString := os.Getenv(GatusLogLevelEnvVar)
if logLevel, err := logr.LevelFromString(logLevelAsString); err != nil {
logr.SetThreshold(logr.LevelInfo)
if len(logLevelAsString) == 0 {
logr.Infof("[main.configureLogging] Defaulting log level to %s", logr.LevelInfo)
} else {
logr.Warnf("[main.configureLogging] Invalid log level '%s', defaulting to %s", logLevelAsString, logr.LevelInfo)
}
} else {
logr.SetThreshold(logLevel)
logr.Infof("[main.configureLogging] Log Level is set to %s", logr.GetThreshold())
}
}
func loadConfiguration() (*config.Config, error) {
configPath := os.Getenv("GATUS_CONFIG_PATH")
configPath := os.Getenv(GatusConfigPathEnvVar)
// Backwards compatibility
if len(configPath) == 0 {
if configPath = os.Getenv("GATUS_CONFIG_FILE"); len(configPath) > 0 {
log.Println("WARNING: GATUS_CONFIG_FILE is deprecated. Please use GATUS_CONFIG_PATH instead.")
if configPath = os.Getenv(GatusConfigFileEnvVar); len(configPath) > 0 {
logr.Warnf("WARNING: %s is deprecated. Please use %s instead.", GatusConfigFileEnvVar, GatusConfigPathEnvVar)
}
}
return config.LoadConfiguration(configPath)
@ -95,7 +111,7 @@ func initializeStorage(cfg *config.Config) {
}
numberOfEndpointStatusesDeleted := store.Get().DeleteAllEndpointStatusesNotInKeys(keys)
if numberOfEndpointStatusesDeleted > 0 {
log.Printf("[main.initializeStorage] Deleted %d endpoint statuses because their matching endpoints no longer existed", numberOfEndpointStatusesDeleted)
logr.Infof("[main.initializeStorage] Deleted %d endpoint statuses because their matching endpoints no longer existed", numberOfEndpointStatusesDeleted)
}
// Clean up the triggered alerts from the storage provider and load valid triggered endpoint alerts
numberOfPersistedTriggeredAlertsLoaded := 0
@ -107,13 +123,13 @@ func initializeStorage(cfg *config.Config) {
}
}
numberOfTriggeredAlertsDeleted := store.Get().DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep, checksums)
if cfg.Debug && numberOfTriggeredAlertsDeleted > 0 {
log.Printf("[main.initializeStorage] Deleted %d triggered alerts for endpoint with key=%s because their configurations have been changed or deleted", numberOfTriggeredAlertsDeleted, ep.Key())
if numberOfTriggeredAlertsDeleted > 0 {
logr.Debugf("[main.initializeStorage] Deleted %d triggered alerts for endpoint with key=%s because their configurations have been changed or deleted", numberOfTriggeredAlertsDeleted, ep.Key())
}
for _, alert := range ep.Alerts {
exists, resolveKey, numberOfSuccessesInARow, err := store.Get().GetTriggeredEndpointAlert(ep, alert)
if err != nil {
log.Printf("[main.initializeStorage] Failed to get triggered alert for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[main.initializeStorage] Failed to get triggered alert for endpoint with key=%s: %s", ep.Key(), err.Error())
continue
}
if exists {
@ -132,13 +148,13 @@ func initializeStorage(cfg *config.Config) {
}
convertedEndpoint := ee.ToEndpoint()
numberOfTriggeredAlertsDeleted := store.Get().DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(convertedEndpoint, checksums)
if cfg.Debug && numberOfTriggeredAlertsDeleted > 0 {
log.Printf("[main.initializeStorage] Deleted %d triggered alerts for endpoint with key=%s because their configurations have been changed or deleted", numberOfTriggeredAlertsDeleted, ee.Key())
if numberOfTriggeredAlertsDeleted > 0 {
logr.Debugf("[main.initializeStorage] Deleted %d triggered alerts for endpoint with key=%s because their configurations have been changed or deleted", numberOfTriggeredAlertsDeleted, ee.Key())
}
for _, alert := range ee.Alerts {
exists, resolveKey, numberOfSuccessesInARow, err := store.Get().GetTriggeredEndpointAlert(convertedEndpoint, alert)
if err != nil {
log.Printf("[main.initializeStorage] Failed to get triggered alert for endpoint with key=%s: %s", ee.Key(), err.Error())
logr.Errorf("[main.initializeStorage] Failed to get triggered alert for endpoint with key=%s: %s", ee.Key(), err.Error())
continue
}
if exists {
@ -149,7 +165,7 @@ func initializeStorage(cfg *config.Config) {
}
}
if numberOfPersistedTriggeredAlertsLoaded > 0 {
log.Printf("[main.initializeStorage] Loaded %d persisted triggered alerts", numberOfPersistedTriggeredAlertsLoaded)
logr.Infof("[main.initializeStorage] Loaded %d persisted triggered alerts", numberOfPersistedTriggeredAlertsLoaded)
}
}
@ -157,15 +173,15 @@ func listenToConfigurationFileChanges(cfg *config.Config) {
for {
time.Sleep(30 * time.Second)
if cfg.HasLoadedConfigurationBeenModified() {
log.Println("[main.listenToConfigurationFileChanges] Configuration file has been modified")
logr.Info("[main.listenToConfigurationFileChanges] Configuration file has been modified")
stop(cfg)
time.Sleep(time.Second) // Wait a bit to make sure everything is done.
save()
updatedConfig, err := loadConfiguration()
if err != nil {
if cfg.SkipInvalidConfigUpdate {
log.Println("[main.listenToConfigurationFileChanges] Failed to load new configuration:", err.Error())
log.Println("[main.listenToConfigurationFileChanges] The configuration file was updated, but it is not valid. The old configuration will continue being used.")
logr.Errorf("[main.listenToConfigurationFileChanges] Failed to load new configuration: %s", err.Error())
logr.Error("[main.listenToConfigurationFileChanges] The configuration file was updated, but it is not valid. The old configuration will continue being used.")
// Update the last file modification time to avoid trying to process the same invalid configuration again
cfg.UpdateLastFileModTime()
continue

View File

@ -2,10 +2,10 @@ package security
import (
"encoding/base64"
"log"
"net/http"
g8 "github.com/TwiN/g8/v2"
"github.com/TwiN/logr"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/adaptor"
"github.com/gofiber/fiber/v2/middleware/basicauth"
@ -99,7 +99,7 @@ func (c *Config) IsAuthenticated(ctx *fiber.Ctx) bool {
// TODO: Update g8 to support fasthttp natively? (see g8's fasthttp branch)
request, err := adaptor.ConvertRequest(ctx, false)
if err != nil {
log.Printf("[security.IsAuthenticated] Unexpected error converting request: %v", err)
logr.Errorf("[security.IsAuthenticated] Unexpected error converting request: %v", err)
return false
}
token := c.gate.ExtractTokenFromRequest(request)

View File

@ -2,11 +2,11 @@ package security
import (
"context"
"log"
"net/http"
"strings"
"time"
"github.com/TwiN/logr"
"github.com/coreos/go-oidc/v3/oidc"
"github.com/gofiber/fiber/v2"
"github.com/google/uuid"
@ -124,7 +124,7 @@ func (c *OIDCConfig) callbackHandler(w http.ResponseWriter, r *http.Request) { /
return
}
}
log.Printf("[security.callbackHandler] Subject %s is not in the list of allowed subjects", idToken.Subject)
logr.Debugf("[security.callbackHandler] Subject %s is not in the list of allowed subjects", idToken.Subject)
http.Redirect(w, r, "/?error=access_denied", http.StatusFound)
}

View File

@ -4,7 +4,6 @@ import (
"database/sql"
"errors"
"fmt"
"log"
"strconv"
"strings"
"time"
@ -14,6 +13,7 @@ import (
"github.com/TwiN/gatus/v5/storage/store/common"
"github.com/TwiN/gatus/v5/storage/store/common/paging"
"github.com/TwiN/gocache/v2"
"github.com/TwiN/logr"
_ "github.com/lib/pq"
_ "modernc.org/sqlite"
)
@ -237,12 +237,12 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
// Endpoint doesn't exist in the database, insert it
if endpointID, err = s.insertEndpoint(tx, ep); err != nil {
_ = tx.Rollback()
log.Printf("[sql.Insert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error())
return err
}
} else {
_ = tx.Rollback()
log.Printf("[sql.Insert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error())
return err
}
}
@ -253,12 +253,12 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
// of type EventStart, in which case we will have to create a new event of type EventHealthy or EventUnhealthy
// based on result.Success.
// 2. The lastResult.Success != result.Success. This implies that the endpoint went from healthy to unhealthy or
// vice-versa, in which case we will have to create a new event of type EventHealthy or EventUnhealthy
// vice versa, in which case we will have to create a new event of type EventHealthy or EventUnhealthy
// based on result.Success.
numberOfEvents, err := s.getNumberOfEventsByEndpointID(tx, endpointID)
if err != nil {
// Silently fail
log.Printf("[sql.Insert] Failed to retrieve total number of events for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to retrieve total number of events for endpoint with key=%s: %s", ep.Key(), err.Error())
}
if numberOfEvents == 0 {
// There's no events yet, which means we need to add the EventStart and the first healthy/unhealthy event
@ -268,18 +268,18 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
})
if err != nil {
// Silently fail
log.Printf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", endpoint.EventStart, ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", endpoint.EventStart, ep.Key(), err.Error())
}
event := endpoint.NewEventFromResult(result)
if err = s.insertEndpointEvent(tx, endpointID, event); err != nil {
// Silently fail
log.Printf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error())
}
} else {
// Get the success value of the previous result
var lastResultSuccess bool
if lastResultSuccess, err = s.getLastEndpointResultSuccessValue(tx, endpointID); err != nil {
log.Printf("[sql.Insert] Failed to retrieve outcome of previous result for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to retrieve outcome of previous result for endpoint with key=%s: %s", ep.Key(), err.Error())
} else {
// If we managed to retrieve the outcome of the previous result, we'll compare it with the new result.
// If the final outcome (success or failure) of the previous and the new result aren't the same, it means
@ -289,7 +289,7 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
event := endpoint.NewEventFromResult(result)
if err = s.insertEndpointEvent(tx, endpointID, event); err != nil {
// Silently fail
log.Printf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error())
}
}
}
@ -298,42 +298,42 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
// (since we're only deleting MaximumNumberOfEvents at a time instead of 1)
if numberOfEvents > eventsCleanUpThreshold {
if err = s.deleteOldEndpointEvents(tx, endpointID); err != nil {
log.Printf("[sql.Insert] Failed to delete old events for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to delete old events for endpoint with key=%s: %s", ep.Key(), err.Error())
}
}
}
// Second, we need to insert the result.
if err = s.insertEndpointResult(tx, endpointID, result); err != nil {
log.Printf("[sql.Insert] Failed to insert result for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to insert result for endpoint with key=%s: %s", ep.Key(), err.Error())
_ = tx.Rollback() // If we can't insert the result, we'll rollback now since there's no point continuing
return err
}
// Clean up old results
numberOfResults, err := s.getNumberOfResultsByEndpointID(tx, endpointID)
if err != nil {
log.Printf("[sql.Insert] Failed to retrieve total number of results for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to retrieve total number of results for endpoint with key=%s: %s", ep.Key(), err.Error())
} else {
if numberOfResults > resultsCleanUpThreshold {
if err = s.deleteOldEndpointResults(tx, endpointID); err != nil {
log.Printf("[sql.Insert] Failed to delete old results for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to delete old results for endpoint with key=%s: %s", ep.Key(), err.Error())
}
}
}
// Finally, we need to insert the uptime data.
// Because the uptime data significantly outlives the results, we can't rely on the results for determining the uptime
if err = s.updateEndpointUptime(tx, endpointID, result); err != nil {
log.Printf("[sql.Insert] Failed to update uptime for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to update uptime for endpoint with key=%s: %s", ep.Key(), err.Error())
}
// Merge hourly uptime entries that can be merged into daily entries and clean up old uptime entries
numberOfUptimeEntries, err := s.getNumberOfUptimeEntriesByEndpointID(tx, endpointID)
if err != nil {
log.Printf("[sql.Insert] Failed to retrieve total number of uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to retrieve total number of uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
} else {
// Merge older hourly uptime entries into daily uptime entries if we have more than uptimeTotalEntriesMergeThreshold
if numberOfUptimeEntries >= uptimeTotalEntriesMergeThreshold {
log.Printf("[sql.Insert] Merging hourly uptime entries for endpoint with key=%s; This is a lot of work, it shouldn't happen too often", ep.Key())
logr.Infof("[sql.Insert] Merging hourly uptime entries for endpoint with key=%s; This is a lot of work, it shouldn't happen too often", ep.Key())
if err = s.mergeHourlyUptimeEntriesOlderThanMergeThresholdIntoDailyUptimeEntries(tx, endpointID); err != nil {
log.Printf("[sql.Insert] Failed to merge hourly uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to merge hourly uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
}
}
}
@ -342,11 +342,11 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
// but if Gatus was temporarily shut down, we might have some old entries that need to be cleaned up
ageOfOldestUptimeEntry, err := s.getAgeOfOldestEndpointUptimeEntry(tx, endpointID)
if err != nil {
log.Printf("[sql.Insert] Failed to retrieve oldest endpoint uptime entry for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to retrieve oldest endpoint uptime entry for endpoint with key=%s: %s", ep.Key(), err.Error())
} else {
if ageOfOldestUptimeEntry > uptimeAgeCleanUpThreshold {
if err = s.deleteOldUptimeEntries(tx, endpointID, time.Now().Add(-(uptimeRetention + time.Hour))); err != nil {
log.Printf("[sql.Insert] Failed to delete old uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to delete old uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
}
}
}
@ -356,7 +356,7 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
s.writeThroughCache.Delete(cacheKey)
endpointKey, params, err := extractKeyAndParamsFromCacheKey(cacheKey)
if err != nil {
log.Printf("[sql.Insert] Silently deleting cache key %s instead of refreshing due to error: %s", cacheKey, err.Error())
logr.Errorf("[sql.Insert] Silently deleting cache key %s instead of refreshing due to error: %s", cacheKey, err.Error())
continue
}
// Retrieve the endpoint status by key, which will in turn refresh the cache
@ -387,7 +387,7 @@ func (s *Store) DeleteAllEndpointStatusesNotInKeys(keys []string) int {
result, err = s.db.Exec(query, args...)
}
if err != nil {
log.Printf("[sql.DeleteAllEndpointStatusesNotInKeys] Failed to delete rows that do not belong to any of keys=%v: %s", keys, err.Error())
logr.Errorf("[sql.DeleteAllEndpointStatusesNotInKeys] Failed to delete rows that do not belong to any of keys=%v: %s", keys, err.Error())
return 0
}
if s.writeThroughCache != nil {
@ -403,7 +403,7 @@ func (s *Store) DeleteAllEndpointStatusesNotInKeys(keys []string) int {
// GetTriggeredEndpointAlert returns whether the triggered alert for the specified endpoint as well as the necessary information to resolve it
func (s *Store) GetTriggeredEndpointAlert(ep *endpoint.Endpoint, alert *alert.Alert) (exists bool, resolveKey string, numberOfSuccessesInARow int, err error) {
//log.Printf("[sql.GetTriggeredEndpointAlert] Getting triggered alert with checksum=%s for endpoint with key=%s", alert.Checksum(), ep.Key())
//logr.Debugf("[sql.GetTriggeredEndpointAlert] Getting triggered alert with checksum=%s for endpoint with key=%s", alert.Checksum(), ep.Key())
err = s.db.QueryRow(
"SELECT resolve_key, number_of_successes_in_a_row FROM endpoint_alerts_triggered WHERE endpoint_id = (SELECT endpoint_id FROM endpoints WHERE endpoint_key = $1 LIMIT 1) AND configuration_checksum = $2",
ep.Key(),
@ -421,7 +421,7 @@ func (s *Store) GetTriggeredEndpointAlert(ep *endpoint.Endpoint, alert *alert.Al
// UpsertTriggeredEndpointAlert inserts/updates a triggered alert for an endpoint
// Used for persistence of triggered alerts across application restarts
func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAlert *alert.Alert) error {
//log.Printf("[sql.UpsertTriggeredEndpointAlert] Upserting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key())
//logr.Debugf("[sql.UpsertTriggeredEndpointAlert] Upserting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key())
tx, err := s.db.Begin()
if err != nil {
return err
@ -433,12 +433,12 @@ func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAle
// This shouldn't happen, but we'll handle it anyway
if endpointID, err = s.insertEndpoint(tx, ep); err != nil {
_ = tx.Rollback()
log.Printf("[sql.UpsertTriggeredEndpointAlert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.UpsertTriggeredEndpointAlert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error())
return err
}
} else {
_ = tx.Rollback()
log.Printf("[sql.UpsertTriggeredEndpointAlert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.UpsertTriggeredEndpointAlert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error())
return err
}
}
@ -457,7 +457,7 @@ func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAle
)
if err != nil {
_ = tx.Rollback()
log.Printf("[sql.UpsertTriggeredEndpointAlert] Failed to persist triggered alert for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.UpsertTriggeredEndpointAlert] Failed to persist triggered alert for endpoint with key=%s: %s", ep.Key(), err.Error())
return err
}
if err = tx.Commit(); err != nil {
@ -468,7 +468,7 @@ func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAle
// DeleteTriggeredEndpointAlert deletes a triggered alert for an endpoint
func (s *Store) DeleteTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAlert *alert.Alert) error {
//log.Printf("[sql.DeleteTriggeredEndpointAlert] Deleting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key())
//logr.Debugf("[sql.DeleteTriggeredEndpointAlert] Deleting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key())
_, err := s.db.Exec("DELETE FROM endpoint_alerts_triggered WHERE configuration_checksum = $1 AND endpoint_id = (SELECT endpoint_id FROM endpoints WHERE endpoint_key = $2 LIMIT 1)", triggeredAlert.Checksum(), ep.Key())
return err
}
@ -477,7 +477,7 @@ func (s *Store) DeleteTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAle
// configurations are not provided in the checksums list.
// This prevents triggered alerts that have been removed or modified from lingering in the database.
func (s *Store) DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep *endpoint.Endpoint, checksums []string) int {
//log.Printf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Deleting triggered alerts for endpoint with key=%s that do not belong to any of checksums=%v", ep.Key(), checksums)
//logr.Debugf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Deleting triggered alerts for endpoint with key=%s that do not belong to any of checksums=%v", ep.Key(), checksums)
var err error
var result sql.Result
if len(checksums) == 0 {
@ -498,7 +498,7 @@ func (s *Store) DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep *endpoint.En
result, err = s.db.Exec(query, args...)
}
if err != nil {
log.Printf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Failed to delete rows for endpoint with key=%s that do not belong to any of checksums=%v: %s", ep.Key(), checksums, err.Error())
logr.Errorf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Failed to delete rows for endpoint with key=%s that do not belong to any of checksums=%v: %s", ep.Key(), checksums, err.Error())
return 0
}
// Return number of rows deleted
@ -530,7 +530,7 @@ func (s *Store) Close() {
// insertEndpoint inserts an endpoint in the store and returns the generated id of said endpoint
func (s *Store) insertEndpoint(tx *sql.Tx, ep *endpoint.Endpoint) (int64, error) {
//log.Printf("[sql.insertEndpoint] Inserting endpoint with group=%s and name=%s", ep.Group, ep.Name)
//logr.Debugf("[sql.insertEndpoint] Inserting endpoint with group=%s and name=%s", ep.Group, ep.Name)
var id int64
err := tx.QueryRow(
"INSERT INTO endpoints (endpoint_key, endpoint_name, endpoint_group) VALUES ($1, $2, $3) RETURNING endpoint_id",
@ -655,12 +655,12 @@ func (s *Store) getEndpointStatusByKey(tx *sql.Tx, key string, parameters *pagin
endpointStatus := endpoint.NewStatus(group, endpointName)
if parameters.EventsPageSize > 0 {
if endpointStatus.Events, err = s.getEndpointEventsByEndpointID(tx, endpointID, parameters.EventsPage, parameters.EventsPageSize); err != nil {
log.Printf("[sql.getEndpointStatusByKey] Failed to retrieve events for key=%s: %s", key, err.Error())
logr.Errorf("[sql.getEndpointStatusByKey] Failed to retrieve events for key=%s: %s", key, err.Error())
}
}
if parameters.ResultsPageSize > 0 {
if endpointStatus.Results, err = s.getEndpointResultsByEndpointID(tx, endpointID, parameters.ResultsPage, parameters.ResultsPageSize); err != nil {
log.Printf("[sql.getEndpointStatusByKey] Failed to retrieve results for key=%s: %s", key, err.Error())
logr.Errorf("[sql.getEndpointStatusByKey] Failed to retrieve results for key=%s: %s", key, err.Error())
}
}
if s.writeThroughCache != nil {
@ -735,7 +735,7 @@ func (s *Store) getEndpointResultsByEndpointID(tx *sql.Tx, endpointID int64, pag
var joinedErrors string
err = rows.Scan(&id, &result.Success, &joinedErrors, &result.Connected, &result.HTTPStatus, &result.DNSRCode, &result.CertificateExpiration, &result.DomainExpiration, &result.Hostname, &result.IP, &result.Duration, &result.Timestamp)
if err != nil {
log.Printf("[sql.getEndpointResultsByEndpointID] Silently failed to retrieve endpoint result for endpointID=%d: %s", endpointID, err.Error())
logr.Errorf("[sql.getEndpointResultsByEndpointID] Silently failed to retrieve endpoint result for endpointID=%d: %s", endpointID, err.Error())
err = nil
}
if len(joinedErrors) != 0 {

View File

@ -2,7 +2,6 @@ package store
import (
"context"
"log"
"time"
"github.com/TwiN/gatus/v5/alerting/alert"
@ -11,6 +10,7 @@ import (
"github.com/TwiN/gatus/v5/storage/store/common/paging"
"github.com/TwiN/gatus/v5/storage/store/memory"
"github.com/TwiN/gatus/v5/storage/store/sql"
"github.com/TwiN/logr"
)
// Store is the interface that each store should implement
@ -91,7 +91,7 @@ var (
func Get() Store {
if !initialized {
// This only happens in tests
log.Println("[store.Get] Provider requested before it was initialized, automatically initializing")
logr.Info("[store.Get] Provider requested before it was initialized, automatically initializing")
err := Initialize(nil)
if err != nil {
panic("failed to automatically initialize store: " + err.Error())
@ -110,11 +110,11 @@ func Initialize(cfg *storage.Config) error {
}
if cfg == nil {
// This only happens in tests
log.Println("[store.Initialize] nil storage config passed as parameter. This should only happen in tests. Defaulting to an empty config.")
logr.Warn("[store.Initialize] nil storage config passed as parameter. This should only happen in tests. Defaulting to an empty config.")
cfg = &storage.Config{}
}
if len(cfg.Path) == 0 && cfg.Type != storage.TypePostgres {
log.Printf("[store.Initialize] Creating storage provider of type=%s", cfg.Type)
logr.Infof("[store.Initialize] Creating storage provider of type=%s", cfg.Type)
}
ctx, cancelFunc = context.WithCancel(context.Background())
switch cfg.Type {
@ -136,13 +136,12 @@ func autoSave(ctx context.Context, store Store, interval time.Duration) {
for {
select {
case <-ctx.Done():
log.Printf("[store.autoSave] Stopping active job")
logr.Info("[store.autoSave] Stopping active job")
return
case <-time.After(interval):
log.Printf("[store.autoSave] Saving")
err := store.Save()
if err != nil {
log.Println("[store.autoSave] Save failed:", err.Error())
logr.Info("[store.autoSave] Saving")
if err := store.Save(); err != nil {
logr.Errorf("[store.autoSave] Save failed: %s", err.Error())
}
}
}

View File

@ -2,27 +2,27 @@ package watchdog
import (
"errors"
"log"
"os"
"github.com/TwiN/gatus/v5/alerting"
"github.com/TwiN/gatus/v5/config/endpoint"
"github.com/TwiN/gatus/v5/storage/store"
"github.com/TwiN/logr"
)
// HandleAlerting takes care of alerts to resolve and alerts to trigger based on result success or failure
func HandleAlerting(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config, debug bool) {
func HandleAlerting(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config) {
if alertingConfig == nil {
return
}
if result.Success {
handleAlertsToResolve(ep, result, alertingConfig, debug)
handleAlertsToResolve(ep, result, alertingConfig)
} else {
handleAlertsToTrigger(ep, result, alertingConfig, debug)
handleAlertsToTrigger(ep, result, alertingConfig)
}
}
func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config, debug bool) {
func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config) {
ep.NumberOfSuccessesInARow = 0
ep.NumberOfFailuresInARow++
for _, endpointAlert := range ep.Alerts {
@ -31,14 +31,12 @@ func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alert
continue
}
if endpointAlert.Triggered {
if debug {
log.Printf("[watchdog.handleAlertsToTrigger] Alert for endpoint=%s with description='%s' has already been TRIGGERED, skipping", ep.Name, endpointAlert.GetDescription())
}
logr.Debugf("[watchdog.handleAlertsToTrigger] Alert for endpoint with key=%s with description='%s' has already been TRIGGERED, skipping", ep.Key(), endpointAlert.GetDescription())
continue
}
alertProvider := alertingConfig.GetAlertingProviderByAlertType(endpointAlert.Type)
if alertProvider != nil {
log.Printf("[watchdog.handleAlertsToTrigger] Sending %s alert because alert for endpoint=%s with description='%s' has been TRIGGERED", endpointAlert.Type, ep.Name, endpointAlert.GetDescription())
logr.Infof("[watchdog.handleAlertsToTrigger] Sending %s alert because alert for endpoint with key=%s with description='%s' has been TRIGGERED", endpointAlert.Type, ep.Key(), endpointAlert.GetDescription())
var err error
if os.Getenv("MOCK_ALERT_PROVIDER") == "true" {
if os.Getenv("MOCK_ALERT_PROVIDER_ERROR") == "true" {
@ -48,27 +46,27 @@ func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alert
err = alertProvider.Send(ep, endpointAlert, result, false)
}
if err != nil {
log.Printf("[watchdog.handleAlertsToTrigger] Failed to send an alert for endpoint=%s: %s", ep.Name, err.Error())
logr.Errorf("[watchdog.handleAlertsToTrigger] Failed to send an alert for endpoint with key=%s: %s", ep.Key(), err.Error())
} else {
endpointAlert.Triggered = true
if err := store.Get().UpsertTriggeredEndpointAlert(ep, endpointAlert); err != nil {
log.Printf("[watchdog.handleAlertsToTrigger] Failed to persist triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[watchdog.handleAlertsToTrigger] Failed to persist triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
}
}
} else {
log.Printf("[watchdog.handleAlertsToTrigger] Not sending alert of type=%s despite being TRIGGERED, because the provider wasn't configured properly", endpointAlert.Type)
logr.Warnf("[watchdog.handleAlertsToTrigger] Not sending alert of type=%s endpoint with key=%s despite being TRIGGERED, because the provider wasn't configured properly", endpointAlert.Type, ep.Key())
}
}
}
func handleAlertsToResolve(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config, debug bool) {
func handleAlertsToResolve(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config) {
ep.NumberOfSuccessesInARow++
for _, endpointAlert := range ep.Alerts {
isStillBelowSuccessThreshold := endpointAlert.SuccessThreshold > ep.NumberOfSuccessesInARow
if isStillBelowSuccessThreshold && endpointAlert.IsEnabled() && endpointAlert.Triggered {
// Persist NumberOfSuccessesInARow
if err := store.Get().UpsertTriggeredEndpointAlert(ep, endpointAlert); err != nil {
log.Printf("[watchdog.handleAlertsToResolve] Failed to update triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[watchdog.handleAlertsToResolve] Failed to update triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
}
}
if !endpointAlert.IsEnabled() || !endpointAlert.Triggered || isStillBelowSuccessThreshold {
@ -78,20 +76,20 @@ func handleAlertsToResolve(ep *endpoint.Endpoint, result *endpoint.Result, alert
// Further explanation can be found on Alert's Triggered field.
endpointAlert.Triggered = false
if err := store.Get().DeleteTriggeredEndpointAlert(ep, endpointAlert); err != nil {
log.Printf("[watchdog.handleAlertsToResolve] Failed to delete persisted triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[watchdog.handleAlertsToResolve] Failed to delete persisted triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
}
if !endpointAlert.IsSendingOnResolved() {
continue
}
alertProvider := alertingConfig.GetAlertingProviderByAlertType(endpointAlert.Type)
if alertProvider != nil {
log.Printf("[watchdog.handleAlertsToResolve] Sending %s alert because alert for endpoint with key=%s with description='%s' has been RESOLVED", endpointAlert.Type, ep.Key(), endpointAlert.GetDescription())
logr.Infof("[watchdog.handleAlertsToResolve] Sending %s alert because alert for endpoint with key=%s with description='%s' has been RESOLVED", endpointAlert.Type, ep.Key(), endpointAlert.GetDescription())
err := alertProvider.Send(ep, endpointAlert, result, true)
if err != nil {
log.Printf("[watchdog.handleAlertsToResolve] Failed to send an alert for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[watchdog.handleAlertsToResolve] Failed to send an alert for endpoint with key=%s: %s", ep.Key(), err.Error())
}
} else {
log.Printf("[watchdog.handleAlertsToResolve] Not sending alert of type=%s despite being RESOLVED, because the provider wasn't configured properly", endpointAlert.Type)
logr.Warnf("[watchdog.handleAlertsToResolve] Not sending alert of type=%s for endpoint with key=%s despite being RESOLVED, because the provider wasn't configured properly", endpointAlert.Type, ep.Key())
}
}
ep.NumberOfFailuresInARow = 0

View File

@ -28,7 +28,6 @@ func TestHandleAlerting(t *testing.T) {
defer os.Clearenv()
cfg := &config.Config{
Debug: true,
Alerting: &alerting.Config{
Custom: &custom.AlertProvider{
URL: "https://twin.sh/health",
@ -52,28 +51,28 @@ func TestHandleAlerting(t *testing.T) {
}
verify(t, ep, 0, 0, false, "The alert shouldn't start triggered")
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 1, 0, false, "The alert shouldn't have triggered")
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 2, 0, true, "The alert should've triggered")
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 3, 0, true, "The alert should still be triggered")
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 4, 0, true, "The alert should still be triggered")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 1, true, "The alert should still be triggered (because endpoint.Alerts[0].SuccessThreshold is 3)")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 2, true, "The alert should still be triggered (because endpoint.Alerts[0].SuccessThreshold is 3)")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 3, false, "The alert should've been resolved")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 4, false, "The alert should no longer be triggered")
}
func TestHandleAlertingWhenAlertingConfigIsNil(t *testing.T) {
_ = os.Setenv("MOCK_ALERT_PROVIDER", "true")
defer os.Clearenv()
HandleAlerting(nil, nil, nil, true)
HandleAlerting(nil, nil, nil)
}
func TestHandleAlertingWithBadAlertProvider(t *testing.T) {
@ -96,9 +95,9 @@ func TestHandleAlertingWithBadAlertProvider(t *testing.T) {
}
verify(t, ep, 0, 0, false, "The alert shouldn't start triggered")
HandleAlerting(ep, &endpoint.Result{Success: false}, &alerting.Config{}, false)
HandleAlerting(ep, &endpoint.Result{Success: false}, &alerting.Config{})
verify(t, ep, 1, 0, false, "The alert shouldn't have triggered")
HandleAlerting(ep, &endpoint.Result{Success: false}, &alerting.Config{}, false)
HandleAlerting(ep, &endpoint.Result{Success: false}, &alerting.Config{})
verify(t, ep, 2, 0, false, "The alert shouldn't have triggered, because the provider wasn't configured properly")
}
@ -107,7 +106,6 @@ func TestHandleAlertingWhenTriggeredAlertIsAlmostResolvedButendpointStartFailing
defer os.Clearenv()
cfg := &config.Config{
Debug: true,
Alerting: &alerting.Config{
Custom: &custom.AlertProvider{
URL: "https://twin.sh/health",
@ -132,7 +130,7 @@ func TestHandleAlertingWhenTriggeredAlertIsAlmostResolvedButendpointStartFailing
}
// This test simulate an alert that was already triggered
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 2, 0, true, "The alert was already triggered at the beginning of this test")
}
@ -141,7 +139,6 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedButSendOnResolvedIsFalse(t *t
defer os.Clearenv()
cfg := &config.Config{
Debug: true,
Alerting: &alerting.Config{
Custom: &custom.AlertProvider{
URL: "https://twin.sh/health",
@ -166,7 +163,7 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedButSendOnResolvedIsFalse(t *t
NumberOfFailuresInARow: 1,
}
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 1, false, "The alert should've been resolved")
}
@ -175,7 +172,6 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPagerDuty(t *testing.T) {
defer os.Clearenv()
cfg := &config.Config{
Debug: true,
Alerting: &alerting.Config{
PagerDuty: &pagerduty.AlertProvider{
IntegrationKey: "00000000000000000000000000000000",
@ -198,10 +194,10 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPagerDuty(t *testing.T) {
NumberOfFailuresInARow: 0,
}
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 1, 0, true, "")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 1, false, "The alert should've been resolved")
}
@ -210,7 +206,6 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPushover(t *testing.T) {
defer os.Clearenv()
cfg := &config.Config{
Debug: true,
Alerting: &alerting.Config{
Pushover: &pushover.AlertProvider{
ApplicationToken: "000000000000000000000000000000",
@ -234,10 +229,10 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPushover(t *testing.T) {
NumberOfFailuresInARow: 0,
}
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 1, 0, true, "")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 1, false, "The alert should've been resolved")
}
@ -403,32 +398,32 @@ func TestHandleAlertingWithProviderThatReturnsAnError(t *testing.T) {
},
}
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true")
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true)
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
verify(t, ep, 1, 0, false, "")
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true)
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
verify(t, ep, 2, 0, false, "The alert should have failed to trigger, because the alert provider is returning an error")
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true)
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
verify(t, ep, 3, 0, false, "The alert should still not be triggered, because the alert provider is still returning an error")
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true)
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
verify(t, ep, 4, 0, false, "The alert should still not be triggered, because the alert provider is still returning an error")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false")
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true)
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
verify(t, ep, 5, 0, true, "The alert should've been triggered because the alert provider is no longer returning an error")
HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig, true)
HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig)
verify(t, ep, 0, 1, true, "The alert should've still been triggered")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true")
HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig, true)
HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig)
verify(t, ep, 0, 2, false, "The alert should've been resolved DESPITE THE ALERT PROVIDER RETURNING AN ERROR. See Alert.Triggered for further explanation.")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false")
// Make sure that everything's working as expected after a rough patch
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true)
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
verify(t, ep, 1, 0, false, "")
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true)
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
verify(t, ep, 2, 0, true, "The alert should have triggered")
HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig, true)
HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig)
verify(t, ep, 0, 1, true, "The alert should still be triggered")
HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig, true)
HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig)
verify(t, ep, 0, 2, false, "The alert should have been resolved")
})
}
@ -440,7 +435,6 @@ func TestHandleAlertingWithProviderThatOnlyReturnsErrorOnResolve(t *testing.T) {
defer os.Clearenv()
cfg := &config.Config{
Debug: true,
Alerting: &alerting.Config{
Custom: &custom.AlertProvider{
URL: "https://twin.sh/health",
@ -463,27 +457,27 @@ func TestHandleAlertingWithProviderThatOnlyReturnsErrorOnResolve(t *testing.T) {
},
}
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 1, 0, true, "")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 1, false, "")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false")
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 1, 0, true, "")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 1, false, "")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false")
// Make sure that everything's working as expected after a rough patch
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 1, 0, true, "")
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 2, 0, true, "")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 1, false, "")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
verify(t, ep, 0, 2, false, "")
}

View File

@ -2,7 +2,6 @@ package watchdog
import (
"context"
"log"
"sync"
"time"
@ -45,7 +44,7 @@ func monitor(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenance
for {
select {
case <-ctx.Done():
log.Printf("[watchdog.monitor] Canceling current execution of group=%s; endpoint=%s", ep.Group, ep.Name)
logr.Warnf("[watchdog.monitor] Canceling current execution of group=%s; endpoint=%s; key=%s", ep.Group, ep.Name, ep.Key())
return
case <-time.After(ep.Interval):
execute(ep, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics)
@ -68,30 +67,30 @@ func execute(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenance
logr.Infof("[watchdog.execute] No connectivity; skipping execution")
return
}
logr.Debugf("[watchdog.execute] Monitoring group=%s; endpoint=%s", ep.Group, ep.Name)
logr.Debugf("[watchdog.execute] Monitoring group=%s; endpoint=%s; key=%s", ep.Group, ep.Name, ep.Key())
result := ep.EvaluateHealth()
if enabledMetrics {
metrics.PublishMetricsForEndpoint(ep, result)
}
UpdateEndpointStatuses(ep, result)
if logr.GetThreshold() == logr.LevelDebug && !result.Success {
logr.Debugf("[watchdog.execute] Monitored group=%s; endpoint=%s; success=%v; errors=%d; duration=%s; body=%s", ep.Group, ep.Name, result.Success, len(result.Errors), result.Duration.Round(time.Millisecond), result.Body)
logr.Debugf("[watchdog.execute] Monitored group=%s; endpoint=%s; key=%s; success=%v; errors=%d; duration=%s; body=%s", ep.Group, ep.Name, ep.Key(), result.Success, len(result.Errors), result.Duration.Round(time.Millisecond), result.Body)
} else {
logr.Infof("[watchdog.execute] Monitored group=%s; endpoint=%s; success=%v; errors=%d; duration=%s", ep.Group, ep.Name, result.Success, len(result.Errors), result.Duration.Round(time.Millisecond))
logr.Infof("[watchdog.execute] Monitored group=%s; endpoint=%s; key=%s; success=%v; errors=%d; duration=%s", ep.Group, ep.Name, ep.Key(), result.Success, len(result.Errors), result.Duration.Round(time.Millisecond))
}
if !maintenanceConfig.IsUnderMaintenance() {
// TODO: Consider moving this after the monitoring lock is unlocked? I mean, how much noise can a single alerting provider cause...
HandleAlerting(ep, result, alertingConfig, logr.GetThreshold() == logr.LevelDebug)
HandleAlerting(ep, result, alertingConfig)
} else {
logr.Debugf("[watchdog.execute] Not handling alerting because currently in the maintenance window")
logr.Debug("[watchdog.execute] Not handling alerting because currently in the maintenance window")
}
logr.Debugf("[watchdog.execute] Waiting for interval=%s before monitoring group=%s endpoint=%s again", ep.Interval, ep.Group, ep.Name)
logr.Debugf("[watchdog.execute] Waiting for interval=%s before monitoring group=%s endpoint=%s (key=%s) again", ep.Interval, ep.Group, ep.Name, ep.Key())
}
// UpdateEndpointStatuses updates the slice of endpoint statuses
func UpdateEndpointStatuses(ep *endpoint.Endpoint, result *endpoint.Result) {
if err := store.Get().Insert(ep, result); err != nil {
logr.Errorf("[watchdog.UpdateEndpointStatuses] Failed to insert result in storage:", err.Error())
logr.Errorf("[watchdog.UpdateEndpointStatuses] Failed to insert result in storage: %s", err.Error())
}
}