mirror of
https://github.com/TwiN/gatus.git
synced 2024-11-21 07:23:53 +01:00
fix(logging): Replace log-level parameter by GATUS_LOG_LEVEL env var (#895)
* fix(logging): Replace log-level parameter by GATUS_LOG_LEVEL env var * Improve log message if GATUS_LOG_LEVEL isn't set
This commit is contained in:
parent
8060a77b1f
commit
01131755bc
@ -1,5 +1,4 @@
|
||||
metrics: true
|
||||
debug: false
|
||||
ui:
|
||||
header: Example Company
|
||||
link: https://example.org
|
||||
|
2
.github/workflows/benchmark.yml
vendored
2
.github/workflows/benchmark.yml
vendored
@ -22,7 +22,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 1.22.2
|
||||
go-version: 1.23.3
|
||||
repository: "${{ github.event.inputs.repository || 'TwiN/gatus' }}"
|
||||
ref: "${{ github.event.inputs.ref || 'master' }}"
|
||||
- uses: actions/checkout@v4
|
||||
|
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@ -18,7 +18,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 1.22.2
|
||||
go-version: 1.23.3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Build binary to make sure it works
|
||||
run: go build
|
||||
|
@ -15,6 +15,8 @@ FROM scratch
|
||||
COPY --from=builder /app/gatus .
|
||||
COPY --from=builder /app/config.yaml ./config/config.yaml
|
||||
COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
ENV PORT=8080
|
||||
ENV GATUS_CONFIG_PATH=""
|
||||
ENV GATUS_LOG_LEVEL="INFO"
|
||||
ENV PORT="8080"
|
||||
EXPOSE ${PORT}
|
||||
ENTRYPOINT ["/gatus"]
|
||||
|
@ -202,7 +202,7 @@ If `GATUS_CONFIG_PATH` points to a directory, all `*.yaml` and `*.yml` files ins
|
||||
subdirectories are merged like so:
|
||||
- All maps/objects are deep merged (i.e. you could define `alerting.slack` in one file and `alerting.pagerduty` in another file)
|
||||
- All slices/arrays are appended (i.e. you can define `endpoints` in multiple files and each endpoint will be added to the final list of endpoints)
|
||||
- Parameters with a primitive value (e.g. `debug`, `metrics`, `alerting.slack.webhook-url`, etc.) may only be defined once to forcefully avoid any ambiguity
|
||||
- Parameters with a primitive value (e.g. `metrics`, `alerting.slack.webhook-url`, etc.) may only be defined once to forcefully avoid any ambiguity
|
||||
- To clarify, this also means that you could not define `alerting.slack.webhook-url` in two files with different values. All files are merged into one before they are processed. This is by design.
|
||||
|
||||
> 💡 You can also use environment variables in the configuration file (e.g. `$DOMAIN`, `${DOMAIN}`)
|
||||
@ -215,8 +215,6 @@ If you want to test it locally, see [Docker](#docker).
|
||||
## Configuration
|
||||
| Parameter | Description | Default |
|
||||
|:-----------------------------|:-------------------------------------------------------------------------------------------------------------------------------------|:---------------------------|
|
||||
| `debug` | Whether to enable debug logs. | `false` |
|
||||
| `log-level` | Log level: DEBUG, INFO, WARN, ERROR. | `INFO` |
|
||||
| `metrics` | Whether to expose metrics at `/metrics`. | `false` |
|
||||
| `storage` | [Storage configuration](#storage). | `{}` |
|
||||
| `alerting` | [Alerting configuration](#alerting). | `{}` |
|
||||
@ -242,6 +240,9 @@ If you want to test it locally, see [Docker](#docker).
|
||||
| `ui.buttons[].link` | Link to open when the button is clicked. | Required `""` |
|
||||
| `maintenance` | [Maintenance configuration](#maintenance). | `{}` |
|
||||
|
||||
If you want more verbose logging, you may set the `GATUS_LOG_LEVEL` environment variable to `DEBUG`.
|
||||
Conversely, if you want less verbose logging, you can set the aforementioned environment variable to `WARN`, `ERROR` or `FATAL`.
|
||||
The default value for `GATUS_LOG_LEVEL` is `INFO`.
|
||||
|
||||
### Endpoints
|
||||
Endpoints are URLs, applications, or services that you want to monitor. Each endpoint has a list of conditions that are
|
||||
|
@ -1,7 +1,6 @@
|
||||
package alerting
|
||||
|
||||
import (
|
||||
"log"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
||||
@ -30,6 +29,7 @@ import (
|
||||
"github.com/TwiN/gatus/v5/alerting/provider/telegram"
|
||||
"github.com/TwiN/gatus/v5/alerting/provider/twilio"
|
||||
"github.com/TwiN/gatus/v5/alerting/provider/zulip"
|
||||
"github.com/TwiN/logr"
|
||||
)
|
||||
|
||||
// Config is the configuration for alerting providers
|
||||
@ -118,7 +118,7 @@ func (config *Config) GetAlertingProviderByAlertType(alertType alert.Type) provi
|
||||
return fieldValue.Interface().(provider.AlertProvider)
|
||||
}
|
||||
}
|
||||
log.Printf("[alerting.GetAlertingProviderByAlertType] No alerting provider found for alert type %s", alertType)
|
||||
logr.Infof("[alerting.GetAlertingProviderByAlertType] No alerting provider found for alert type %s", alertType)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -5,12 +5,12 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
|
||||
"github.com/TwiN/gatus/v5/alerting/alert"
|
||||
"github.com/TwiN/gatus/v5/client"
|
||||
"github.com/TwiN/gatus/v5/config/endpoint"
|
||||
"github.com/TwiN/logr"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -74,11 +74,10 @@ func (provider *AlertProvider) Send(ep *endpoint.Endpoint, alert *alert.Alert, r
|
||||
alert.ResolveKey = ""
|
||||
} else {
|
||||
// We need to retrieve the resolve key from the response
|
||||
body, err := io.ReadAll(response.Body)
|
||||
var payload pagerDutyResponsePayload
|
||||
if err = json.Unmarshal(body, &payload); err != nil {
|
||||
if err = json.NewDecoder(response.Body).Decode(&payload); err != nil {
|
||||
// Silently fail. We don't want to create tons of alerts just because we failed to parse the body.
|
||||
log.Printf("[pagerduty.Send] Ran into error unmarshaling pagerduty response: %s", err.Error())
|
||||
logr.Errorf("[pagerduty.Send] Ran into error decoding pagerduty response: %s", err.Error())
|
||||
} else {
|
||||
alert.ResolveKey = payload.DedupKey
|
||||
}
|
||||
|
@ -2,7 +2,6 @@ package api
|
||||
|
||||
import (
|
||||
"io/fs"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
|
||||
@ -10,6 +9,7 @@ import (
|
||||
"github.com/TwiN/gatus/v5/config/web"
|
||||
static "github.com/TwiN/gatus/v5/web"
|
||||
"github.com/TwiN/health"
|
||||
"github.com/TwiN/logr"
|
||||
fiber "github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/fiber/v2/middleware/adaptor"
|
||||
"github.com/gofiber/fiber/v2/middleware/compress"
|
||||
@ -28,7 +28,7 @@ type API struct {
|
||||
func New(cfg *config.Config) *API {
|
||||
api := &API{}
|
||||
if cfg.Web == nil {
|
||||
log.Println("[api.New] nil web config passed as parameter. This should only happen in tests. Using default web configuration")
|
||||
logr.Warnf("[api.New] nil web config passed as parameter. This should only happen in tests. Using default web configuration")
|
||||
cfg.Web = web.GetDefaultConfig()
|
||||
}
|
||||
api.router = api.createRouter(cfg)
|
||||
@ -42,7 +42,7 @@ func (a *API) Router() *fiber.App {
|
||||
func (a *API) createRouter(cfg *config.Config) *fiber.App {
|
||||
app := fiber.New(fiber.Config{
|
||||
ErrorHandler: func(c *fiber.Ctx, err error) error {
|
||||
log.Printf("[api.ErrorHandler] %s", err.Error())
|
||||
logr.Errorf("[api.ErrorHandler] %s", err.Error())
|
||||
return fiber.DefaultErrorHandler(c, err)
|
||||
},
|
||||
ReadBufferSize: cfg.Web.ReadBufferSize,
|
||||
|
@ -2,7 +2,6 @@ package api
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"log"
|
||||
"math"
|
||||
"net/http"
|
||||
"sort"
|
||||
@ -10,6 +9,7 @@ import (
|
||||
|
||||
"github.com/TwiN/gatus/v5/storage/store"
|
||||
"github.com/TwiN/gatus/v5/storage/store/common"
|
||||
"github.com/TwiN/logr"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/wcharczuk/go-chart/v2"
|
||||
"github.com/wcharczuk/go-chart/v2/drawing"
|
||||
@ -116,7 +116,7 @@ func ResponseTimeChart(c *fiber.Ctx) error {
|
||||
c.Set("Expires", "0")
|
||||
c.Status(http.StatusOK)
|
||||
if err := graph.Render(chart.SVG, c); err != nil {
|
||||
log.Println("[api.ResponseTimeChart] Failed to render response time chart:", err.Error())
|
||||
logr.Errorf("[api.ResponseTimeChart] Failed to render response time chart: %s", err.Error())
|
||||
return c.Status(500).SendString(err.Error())
|
||||
}
|
||||
return nil
|
||||
|
@ -4,7 +4,6 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"github.com/TwiN/gatus/v5/client"
|
||||
"github.com/TwiN/gatus/v5/config"
|
||||
@ -13,6 +12,7 @@ import (
|
||||
"github.com/TwiN/gatus/v5/storage/store"
|
||||
"github.com/TwiN/gatus/v5/storage/store/common"
|
||||
"github.com/TwiN/gatus/v5/storage/store/common/paging"
|
||||
"github.com/TwiN/logr"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
@ -26,19 +26,19 @@ func EndpointStatuses(cfg *config.Config) fiber.Handler {
|
||||
if !exists {
|
||||
endpointStatuses, err := store.Get().GetAllEndpointStatuses(paging.NewEndpointStatusParams().WithResults(page, pageSize))
|
||||
if err != nil {
|
||||
log.Printf("[api.EndpointStatuses] Failed to retrieve endpoint statuses: %s", err.Error())
|
||||
logr.Errorf("[api.EndpointStatuses] Failed to retrieve endpoint statuses: %s", err.Error())
|
||||
return c.Status(500).SendString(err.Error())
|
||||
}
|
||||
// ALPHA: Retrieve endpoint statuses from remote instances
|
||||
if endpointStatusesFromRemote, err := getEndpointStatusesFromRemoteInstances(cfg.Remote); err != nil {
|
||||
log.Printf("[handler.EndpointStatuses] Silently failed to retrieve endpoint statuses from remote: %s", err.Error())
|
||||
logr.Errorf("[handler.EndpointStatuses] Silently failed to retrieve endpoint statuses from remote: %s", err.Error())
|
||||
} else if endpointStatusesFromRemote != nil {
|
||||
endpointStatuses = append(endpointStatuses, endpointStatusesFromRemote...)
|
||||
}
|
||||
// Marshal endpoint statuses to JSON
|
||||
data, err = json.Marshal(endpointStatuses)
|
||||
if err != nil {
|
||||
log.Printf("[api.EndpointStatuses] Unable to marshal object to JSON: %s", err.Error())
|
||||
logr.Errorf("[api.EndpointStatuses] Unable to marshal object to JSON: %s", err.Error())
|
||||
return c.Status(500).SendString("unable to marshal object to JSON")
|
||||
}
|
||||
cache.SetWithTTL(fmt.Sprintf("endpoint-status-%d-%d", page, pageSize), data, cacheTTL)
|
||||
@ -64,7 +64,7 @@ func getEndpointStatusesFromRemoteInstances(remoteConfig *remote.Config) ([]*end
|
||||
var endpointStatuses []*endpoint.Status
|
||||
if err = json.NewDecoder(response.Body).Decode(&endpointStatuses); err != nil {
|
||||
_ = response.Body.Close()
|
||||
log.Printf("[api.getEndpointStatusesFromRemoteInstances] Silently failed to retrieve endpoint statuses from %s: %s", instance.URL, err.Error())
|
||||
logr.Errorf("[api.getEndpointStatusesFromRemoteInstances] Silently failed to retrieve endpoint statuses from %s: %s", instance.URL, err.Error())
|
||||
continue
|
||||
}
|
||||
_ = response.Body.Close()
|
||||
@ -84,16 +84,16 @@ func EndpointStatus(c *fiber.Ctx) error {
|
||||
if errors.Is(err, common.ErrEndpointNotFound) {
|
||||
return c.Status(404).SendString(err.Error())
|
||||
}
|
||||
log.Printf("[api.EndpointStatus] Failed to retrieve endpoint status: %s", err.Error())
|
||||
logr.Errorf("[api.EndpointStatus] Failed to retrieve endpoint status: %s", err.Error())
|
||||
return c.Status(500).SendString(err.Error())
|
||||
}
|
||||
if endpointStatus == nil { // XXX: is this check necessary?
|
||||
log.Printf("[api.EndpointStatus] Endpoint with key=%s not found", c.Params("key"))
|
||||
logr.Errorf("[api.EndpointStatus] Endpoint with key=%s not found", c.Params("key"))
|
||||
return c.Status(404).SendString("not found")
|
||||
}
|
||||
output, err := json.Marshal(endpointStatus)
|
||||
if err != nil {
|
||||
log.Printf("[api.EndpointStatus] Unable to marshal object to JSON: %s", err.Error())
|
||||
logr.Errorf("[api.EndpointStatus] Unable to marshal object to JSON: %s", err.Error())
|
||||
return c.Status(500).SendString("unable to marshal object to JSON")
|
||||
}
|
||||
c.Set("Content-Type", "application/json")
|
||||
|
@ -2,7 +2,6 @@ package api
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"log"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@ -11,6 +10,7 @@ import (
|
||||
"github.com/TwiN/gatus/v5/storage/store"
|
||||
"github.com/TwiN/gatus/v5/storage/store/common"
|
||||
"github.com/TwiN/gatus/v5/watchdog"
|
||||
"github.com/TwiN/logr"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
@ -33,11 +33,11 @@ func CreateExternalEndpointResult(cfg *config.Config) fiber.Handler {
|
||||
key := c.Params("key")
|
||||
externalEndpoint := cfg.GetExternalEndpointByKey(key)
|
||||
if externalEndpoint == nil {
|
||||
log.Printf("[api.CreateExternalEndpointResult] External endpoint with key=%s not found", key)
|
||||
logr.Errorf("[api.CreateExternalEndpointResult] External endpoint with key=%s not found", key)
|
||||
return c.Status(404).SendString("not found")
|
||||
}
|
||||
if externalEndpoint.Token != token {
|
||||
log.Printf("[api.CreateExternalEndpointResult] Invalid token for external endpoint with key=%s", key)
|
||||
logr.Errorf("[api.CreateExternalEndpointResult] Invalid token for external endpoint with key=%s", key)
|
||||
return c.Status(401).SendString("invalid token")
|
||||
}
|
||||
// Persist the result in the storage
|
||||
@ -54,13 +54,13 @@ func CreateExternalEndpointResult(cfg *config.Config) fiber.Handler {
|
||||
if errors.Is(err, common.ErrEndpointNotFound) {
|
||||
return c.Status(404).SendString(err.Error())
|
||||
}
|
||||
log.Printf("[api.CreateExternalEndpointResult] Failed to insert result in storage: %s", err.Error())
|
||||
logr.Errorf("[api.CreateExternalEndpointResult] Failed to insert result in storage: %s", err.Error())
|
||||
return c.Status(500).SendString(err.Error())
|
||||
}
|
||||
log.Printf("[api.CreateExternalEndpointResult] Successfully inserted result for external endpoint with key=%s and success=%s", c.Params("key"), success)
|
||||
logr.Infof("[api.CreateExternalEndpointResult] Successfully inserted result for external endpoint with key=%s and success=%s", c.Params("key"), success)
|
||||
// Check if an alert should be triggered or resolved
|
||||
if !cfg.Maintenance.IsUnderMaintenance() {
|
||||
watchdog.HandleAlerting(convertedEndpoint, result, cfg.Alerting, cfg.Debug)
|
||||
watchdog.HandleAlerting(convertedEndpoint, result, cfg.Alerting)
|
||||
externalEndpoint.NumberOfSuccessesInARow = convertedEndpoint.NumberOfSuccessesInARow
|
||||
externalEndpoint.NumberOfFailuresInARow = convertedEndpoint.NumberOfFailuresInARow
|
||||
}
|
||||
|
@ -3,10 +3,10 @@ package api
|
||||
import (
|
||||
_ "embed"
|
||||
"html/template"
|
||||
"log"
|
||||
|
||||
"github.com/TwiN/gatus/v5/config/ui"
|
||||
static "github.com/TwiN/gatus/v5/web"
|
||||
"github.com/TwiN/logr"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
@ -15,14 +15,14 @@ func SinglePageApplication(ui *ui.Config) fiber.Handler {
|
||||
t, err := template.ParseFS(static.FileSystem, static.IndexPath)
|
||||
if err != nil {
|
||||
// This should never happen, because ui.ValidateAndSetDefaults validates that the template works.
|
||||
log.Println("[api.SinglePageApplication] Failed to parse template. This should never happen, because the template is validated on start. Error:", err.Error())
|
||||
logr.Errorf("[api.SinglePageApplication] Failed to parse template. This should never happen, because the template is validated on start. Error: %s", err.Error())
|
||||
return c.Status(500).SendString("Failed to parse template. This should never happen, because the template is validated on start.")
|
||||
}
|
||||
c.Set("Content-Type", "text/html")
|
||||
err = t.Execute(c, ui)
|
||||
if err != nil {
|
||||
// This should never happen, because ui.ValidateAndSetDefaults validates that the template works.
|
||||
log.Println("[api.SinglePageApplication] Failed to execute template. This should never happen, because the template is validated on start. Error:", err.Error())
|
||||
logr.Errorf("[api.SinglePageApplication] Failed to execute template. This should never happen, because the template is validated on start. Error: %s", err.Error())
|
||||
return c.Status(500).SendString("Failed to parse template. This should never happen, because the template is validated on start.")
|
||||
}
|
||||
return c.SendStatus(200)
|
||||
|
@ -110,7 +110,6 @@ func CanCreateSCTPConnection(address string, config *Config) bool {
|
||||
_ = conn.Close()
|
||||
res <- true
|
||||
})(ch)
|
||||
|
||||
select {
|
||||
case result := <-ch:
|
||||
return result
|
||||
@ -182,7 +181,6 @@ func CanCreateSSHConnection(address, username, password string, config *Config)
|
||||
} else {
|
||||
port = "22"
|
||||
}
|
||||
|
||||
cli, err := ssh.Dial("tcp", strings.Join([]string{address, port}, ":"), &ssh.ClientConfig{
|
||||
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
|
||||
User: username,
|
||||
@ -194,7 +192,6 @@ func CanCreateSSHConnection(address, username, password string, config *Config)
|
||||
if err != nil {
|
||||
return false, nil, err
|
||||
}
|
||||
|
||||
return true, cli, nil
|
||||
}
|
||||
|
||||
@ -203,37 +200,29 @@ func ExecuteSSHCommand(sshClient *ssh.Client, body string, config *Config) (bool
|
||||
type Body struct {
|
||||
Command string `json:"command"`
|
||||
}
|
||||
|
||||
defer sshClient.Close()
|
||||
|
||||
var b Body
|
||||
if err := json.Unmarshal([]byte(body), &b); err != nil {
|
||||
return false, 0, err
|
||||
}
|
||||
|
||||
sess, err := sshClient.NewSession()
|
||||
if err != nil {
|
||||
return false, 0, err
|
||||
}
|
||||
|
||||
err = sess.Start(b.Command)
|
||||
if err != nil {
|
||||
return false, 0, err
|
||||
}
|
||||
|
||||
defer sess.Close()
|
||||
|
||||
err = sess.Wait()
|
||||
if err == nil {
|
||||
return true, 0, nil
|
||||
}
|
||||
|
||||
e, ok := err.(*ssh.ExitError)
|
||||
if !ok {
|
||||
var exitErr *ssh.ExitError
|
||||
if ok := errors.As(err, &exitErr); !ok {
|
||||
return false, 0, err
|
||||
}
|
||||
|
||||
return true, e.ExitStatus(), nil
|
||||
return true, exitErr.ExitStatus(), nil
|
||||
}
|
||||
|
||||
// Ping checks if an address can be pinged and returns the round-trip time if the address can be pinged
|
||||
|
@ -4,7 +4,6 @@ import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"log"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
@ -12,6 +11,7 @@ import (
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/TwiN/logr"
|
||||
"golang.org/x/oauth2"
|
||||
"golang.org/x/oauth2/clientcredentials"
|
||||
"google.golang.org/api/idtoken"
|
||||
@ -232,7 +232,7 @@ func (c *Config) getHTTPClient() *http.Client {
|
||||
if c.ProxyURL != "" {
|
||||
proxyURL, err := url.Parse(c.ProxyURL)
|
||||
if err != nil {
|
||||
log.Println("[client.getHTTPClient] THIS SHOULD NOT HAPPEN. Silently ignoring custom proxy due to error:", err.Error())
|
||||
logr.Errorf("[client.getHTTPClient] THIS SHOULD NOT HAPPEN. Silently ignoring custom proxy due to error: %s", err.Error())
|
||||
} else {
|
||||
c.httpClient.Transport.(*http.Transport).Proxy = http.ProxyURL(proxyURL)
|
||||
}
|
||||
@ -242,7 +242,7 @@ func (c *Config) getHTTPClient() *http.Client {
|
||||
if err != nil {
|
||||
// We're ignoring the error, because it should have been validated on startup ValidateAndSetDefaults.
|
||||
// It shouldn't happen, but if it does, we'll log it... Better safe than sorry ;)
|
||||
log.Println("[client.getHTTPClient] THIS SHOULD NOT HAPPEN. Silently ignoring invalid DNS resolver due to error:", err.Error())
|
||||
logr.Errorf("[client.getHTTPClient] THIS SHOULD NOT HAPPEN. Silently ignoring invalid DNS resolver due to error: %s", err.Error())
|
||||
} else {
|
||||
dialer := &net.Dialer{
|
||||
Resolver: &net.Resolver{
|
||||
@ -259,7 +259,7 @@ func (c *Config) getHTTPClient() *http.Client {
|
||||
}
|
||||
}
|
||||
if c.HasOAuth2Config() && c.HasIAPConfig() {
|
||||
log.Println("[client.getHTTPClient] Error: Both Identity-Aware-Proxy and Oauth2 configuration are present.")
|
||||
logr.Errorf("[client.getHTTPClient] Error: Both Identity-Aware-Proxy and Oauth2 configuration are present.")
|
||||
} else if c.HasOAuth2Config() {
|
||||
c.httpClient = configureOAuth2(c.httpClient, *c.OAuth2Config)
|
||||
} else if c.HasIAPConfig() {
|
||||
@ -269,23 +269,22 @@ func (c *Config) getHTTPClient() *http.Client {
|
||||
return c.httpClient
|
||||
}
|
||||
|
||||
// validateIAPToken returns a boolean that will define if the google identity-aware-proxy token can be fetch
|
||||
// validateIAPToken returns a boolean that will define if the Google identity-aware-proxy token can be fetched
|
||||
// and if is it valid.
|
||||
func validateIAPToken(ctx context.Context, c IAPConfig) bool {
|
||||
ts, err := idtoken.NewTokenSource(ctx, c.Audience)
|
||||
if err != nil {
|
||||
log.Println("[client.ValidateIAPToken] Claiming Identity token failed. error:", err.Error())
|
||||
logr.Errorf("[client.ValidateIAPToken] Claiming Identity token failed: %s", err.Error())
|
||||
return false
|
||||
}
|
||||
tok, err := ts.Token()
|
||||
if err != nil {
|
||||
log.Println("[client.ValidateIAPToken] Get Identity-Aware-Proxy token failed. error:", err.Error())
|
||||
logr.Errorf("[client.ValidateIAPToken] Get Identity-Aware-Proxy token failed: %s", err.Error())
|
||||
return false
|
||||
}
|
||||
payload, err := idtoken.Validate(ctx, tok.AccessToken, c.Audience)
|
||||
_ = payload
|
||||
_, err = idtoken.Validate(ctx, tok.AccessToken, c.Audience)
|
||||
if err != nil {
|
||||
log.Println("[client.ValidateIAPToken] Token Validation failed. error:", err.Error())
|
||||
logr.Errorf("[client.ValidateIAPToken] Token Validation failed: %s", err.Error())
|
||||
return false
|
||||
}
|
||||
return true
|
||||
@ -298,7 +297,7 @@ func configureIAP(httpClient *http.Client, c IAPConfig) *http.Client {
|
||||
if validateIAPToken(ctx, c) {
|
||||
ts, err := idtoken.NewTokenSource(ctx, c.Audience)
|
||||
if err != nil {
|
||||
log.Println("[client.ConfigureIAP] Claiming Token Source failed. error:", err.Error())
|
||||
logr.Errorf("[client.configureIAP] Claiming Token Source failed: %s", err.Error())
|
||||
return httpClient
|
||||
}
|
||||
client := oauth2.NewClient(ctx, ts)
|
||||
@ -327,17 +326,17 @@ func configureOAuth2(httpClient *http.Client, c OAuth2Config) *http.Client {
|
||||
func configureTLS(tlsConfig *tls.Config, c TLSConfig) *tls.Config {
|
||||
clientTLSCert, err := tls.LoadX509KeyPair(c.CertificateFile, c.PrivateKeyFile)
|
||||
if err != nil {
|
||||
logr.Errorf("[client.configureTLS] Failed to load certificate: %s", err.Error())
|
||||
return nil
|
||||
}
|
||||
tlsConfig.Certificates = []tls.Certificate{clientTLSCert}
|
||||
tlsConfig.Renegotiation = tls.RenegotiateNever
|
||||
|
||||
renegotionSupport := map[string]tls.RenegotiationSupport{
|
||||
renegotiationSupport := map[string]tls.RenegotiationSupport{
|
||||
"once": tls.RenegotiateOnceAsClient,
|
||||
"freely": tls.RenegotiateFreelyAsClient,
|
||||
"never": tls.RenegotiateNever,
|
||||
}
|
||||
if val, ok := renegotionSupport[c.RenegotiationSupport]; ok {
|
||||
if val, ok := renegotiationSupport[c.RenegotiationSupport]; ok {
|
||||
tlsConfig.Renegotiation = val
|
||||
}
|
||||
return tlsConfig
|
||||
|
@ -4,7 +4,6 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
@ -53,11 +52,9 @@ var (
|
||||
// Config is the main configuration structure
|
||||
type Config struct {
|
||||
// Debug Whether to enable debug logs
|
||||
// Deprecated: Use the GATUS_LOG_LEVEL environment variable instead
|
||||
Debug bool `yaml:"debug,omitempty"`
|
||||
|
||||
// LogLevel is one of DEBUG, INFO, WARN and ERROR. Defaults to INFO
|
||||
LogLevel logr.Level `yaml:"log-level,omitempty"`
|
||||
|
||||
// Metrics Whether to expose metrics at /metrics
|
||||
Metrics bool `yaml:"metrics,omitempty"`
|
||||
|
||||
@ -176,13 +173,11 @@ func LoadConfiguration(configPath string) (*Config, error) {
|
||||
if fileInfo.IsDir() {
|
||||
err := walkConfigDir(configPath, func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
log.Printf("[config.LoadConfiguration] Error walking path=%s: %s", path, err)
|
||||
return err
|
||||
return fmt.Errorf("error walking path %s: %w", path, err)
|
||||
}
|
||||
log.Printf("[config.LoadConfiguration] Reading configuration from %s", path)
|
||||
logr.Infof("[config.LoadConfiguration] Reading configuration from %s", path)
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
log.Printf("[config.LoadConfiguration] Error reading configuration from %s: %s", path, err)
|
||||
return fmt.Errorf("error reading configuration from file %s: %w", path, err)
|
||||
}
|
||||
configBytes, err = deepmerge.YAML(configBytes, data)
|
||||
@ -192,9 +187,9 @@ func LoadConfiguration(configPath string) (*Config, error) {
|
||||
return nil, fmt.Errorf("error reading configuration from directory %s: %w", usedConfigPath, err)
|
||||
}
|
||||
} else {
|
||||
log.Printf("[config.LoadConfiguration] Reading configuration from configFile=%s", usedConfigPath)
|
||||
logr.Infof("[config.LoadConfiguration] Reading configuration from configFile=%s", usedConfigPath)
|
||||
if data, err := os.ReadFile(usedConfigPath); err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("error reading configuration from directory %s: %w", usedConfigPath, err)
|
||||
} else {
|
||||
configBytes = data
|
||||
}
|
||||
@ -204,11 +199,11 @@ func LoadConfiguration(configPath string) (*Config, error) {
|
||||
}
|
||||
config, err := parseAndValidateConfigBytes(configBytes)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("error parsing config: %w", err)
|
||||
}
|
||||
config.configPath = usedConfigPath
|
||||
config.UpdateLastFileModTime()
|
||||
return config, err
|
||||
return config, nil
|
||||
}
|
||||
|
||||
// walkConfigDir is a wrapper for filepath.WalkDir that strips directories and non-config files
|
||||
@ -249,7 +244,13 @@ func parseAndValidateConfigBytes(yamlBytes []byte) (config *Config, err error) {
|
||||
if config == nil || config.Endpoints == nil || len(config.Endpoints) == 0 {
|
||||
err = ErrNoEndpointInConfig
|
||||
} else {
|
||||
validateAlertingConfig(config.Alerting, config.Endpoints, config.ExternalEndpoints, config.Debug)
|
||||
// XXX: Remove this in v6.0.0
|
||||
if config.Debug {
|
||||
logr.Warn("WARNING: The 'debug' configuration has been deprecated and will be removed in v6.0.0")
|
||||
logr.Warn("WARNING: Please use the GATUS_LOG_LEVEL environment variable instead")
|
||||
}
|
||||
// XXX: End of v6.0.0 removals
|
||||
validateAlertingConfig(config.Alerting, config.Endpoints, config.ExternalEndpoints)
|
||||
if err := validateSecurityConfig(config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -342,9 +343,7 @@ func validateEndpointsConfig(config *Config) error {
|
||||
duplicateValidationMap := make(map[string]bool)
|
||||
// Validate endpoints
|
||||
for _, ep := range config.Endpoints {
|
||||
if config.Debug {
|
||||
log.Printf("[config.validateEndpointsConfig] Validating endpoint '%s'", ep.Name)
|
||||
}
|
||||
logr.Debugf("[config.validateEndpointsConfig] Validating endpoint with key %s", ep.Key())
|
||||
if endpointKey := ep.Key(); duplicateValidationMap[endpointKey] {
|
||||
return fmt.Errorf("invalid endpoint %s: name and group combination must be unique", ep.Key())
|
||||
} else {
|
||||
@ -354,12 +353,10 @@ func validateEndpointsConfig(config *Config) error {
|
||||
return fmt.Errorf("invalid endpoint %s: %w", ep.Key(), err)
|
||||
}
|
||||
}
|
||||
log.Printf("[config.validateEndpointsConfig] Validated %d endpoints", len(config.Endpoints))
|
||||
logr.Infof("[config.validateEndpointsConfig] Validated %d endpoints", len(config.Endpoints))
|
||||
// Validate external endpoints
|
||||
for _, ee := range config.ExternalEndpoints {
|
||||
if config.Debug {
|
||||
log.Printf("[config.validateEndpointsConfig] Validating external endpoint '%s'", ee.Name)
|
||||
}
|
||||
logr.Debugf("[config.validateEndpointsConfig] Validating external endpoint '%s'", ee.Name)
|
||||
if endpointKey := ee.Key(); duplicateValidationMap[endpointKey] {
|
||||
return fmt.Errorf("invalid external endpoint %s: name and group combination must be unique", ee.Key())
|
||||
} else {
|
||||
@ -369,16 +366,14 @@ func validateEndpointsConfig(config *Config) error {
|
||||
return fmt.Errorf("invalid external endpoint %s: %w", ee.Key(), err)
|
||||
}
|
||||
}
|
||||
log.Printf("[config.validateEndpointsConfig] Validated %d external endpoints", len(config.ExternalEndpoints))
|
||||
logr.Infof("[config.validateEndpointsConfig] Validated %d external endpoints", len(config.ExternalEndpoints))
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateSecurityConfig(config *Config) error {
|
||||
if config.Security != nil {
|
||||
if config.Security.IsValid() {
|
||||
if config.Debug {
|
||||
log.Printf("[config.validateSecurityConfig] Basic security configuration has been validated")
|
||||
}
|
||||
logr.Debug("[config.validateSecurityConfig] Basic security configuration has been validated")
|
||||
} else {
|
||||
// If there was an attempt to configure security, then it must mean that some confidential or private
|
||||
// data are exposed. As a result, we'll force a panic because it's better to be safe than sorry.
|
||||
@ -392,9 +387,9 @@ func validateSecurityConfig(config *Config) error {
|
||||
// Note that the alerting configuration has to be validated before the endpoint configuration, because the default alert
|
||||
// returned by provider.AlertProvider.GetDefaultAlert() must be parsed before endpoint.Endpoint.ValidateAndSetDefaults()
|
||||
// sets the default alert values when none are set.
|
||||
func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoint.Endpoint, externalEndpoints []*endpoint.ExternalEndpoint, debug bool) {
|
||||
func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoint.Endpoint, externalEndpoints []*endpoint.ExternalEndpoint) {
|
||||
if alertingConfig == nil {
|
||||
log.Printf("[config.validateAlertingConfig] Alerting is not configured")
|
||||
logr.Info("[config.validateAlertingConfig] Alerting is not configured")
|
||||
return
|
||||
}
|
||||
alertTypes := []alert.Type{
|
||||
@ -432,9 +427,7 @@ func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoi
|
||||
for _, ep := range endpoints {
|
||||
for alertIndex, endpointAlert := range ep.Alerts {
|
||||
if alertType == endpointAlert.Type {
|
||||
if debug {
|
||||
log.Printf("[config.validateAlertingConfig] Parsing alert %d with default alert for provider=%s in endpoint with key=%s", alertIndex, alertType, ep.Key())
|
||||
}
|
||||
logr.Debugf("[config.validateAlertingConfig] Parsing alert %d with default alert for provider=%s in endpoint with key=%s", alertIndex, alertType, ep.Key())
|
||||
provider.ParseWithDefaultAlert(alertProvider.GetDefaultAlert(), endpointAlert)
|
||||
}
|
||||
}
|
||||
@ -442,9 +435,7 @@ func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoi
|
||||
for _, ee := range externalEndpoints {
|
||||
for alertIndex, endpointAlert := range ee.Alerts {
|
||||
if alertType == endpointAlert.Type {
|
||||
if debug {
|
||||
log.Printf("[config.validateAlertingConfig] Parsing alert %d with default alert for provider=%s in endpoint with key=%s", alertIndex, alertType, ee.Key())
|
||||
}
|
||||
logr.Debugf("[config.validateAlertingConfig] Parsing alert %d with default alert for provider=%s in endpoint with key=%s", alertIndex, alertType, ee.Key())
|
||||
provider.ParseWithDefaultAlert(alertProvider.GetDefaultAlert(), endpointAlert)
|
||||
}
|
||||
}
|
||||
@ -452,7 +443,7 @@ func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoi
|
||||
}
|
||||
validProviders = append(validProviders, alertType)
|
||||
} else {
|
||||
log.Printf("[config.validateAlertingConfig] Ignoring provider=%s because configuration is invalid", alertType)
|
||||
logr.Warnf("[config.validateAlertingConfig] Ignoring provider=%s because configuration is invalid", alertType)
|
||||
invalidProviders = append(invalidProviders, alertType)
|
||||
alertingConfig.SetAlertingProviderToNil(alertProvider)
|
||||
}
|
||||
@ -460,5 +451,5 @@ func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoi
|
||||
invalidProviders = append(invalidProviders, alertType)
|
||||
}
|
||||
}
|
||||
log.Printf("[config.validateAlertingConfig] configuredProviders=%s; ignoredProviders=%s", validProviders, invalidProviders)
|
||||
logr.Infof("[config.validateAlertingConfig] configuredProviders=%s; ignoredProviders=%s", validProviders, invalidProviders)
|
||||
}
|
||||
|
@ -27,6 +27,7 @@ import (
|
||||
"github.com/TwiN/gatus/v5/alerting/provider/pushover"
|
||||
"github.com/TwiN/gatus/v5/alerting/provider/slack"
|
||||
"github.com/TwiN/gatus/v5/alerting/provider/teams"
|
||||
"github.com/TwiN/gatus/v5/alerting/provider/teamsworkflows"
|
||||
"github.com/TwiN/gatus/v5/alerting/provider/telegram"
|
||||
"github.com/TwiN/gatus/v5/alerting/provider/twilio"
|
||||
"github.com/TwiN/gatus/v5/client"
|
||||
@ -177,7 +178,6 @@ endpoints:
|
||||
conditions:
|
||||
- "[STATUS] == 200"`,
|
||||
"b.yaml": `
|
||||
debug: true
|
||||
|
||||
alerting:
|
||||
discord:
|
||||
@ -196,7 +196,6 @@ endpoints:
|
||||
- "[STATUS] == 200"`,
|
||||
},
|
||||
expectedConfig: &Config{
|
||||
Debug: true,
|
||||
Metrics: true,
|
||||
Alerting: &alerting.Config{
|
||||
Discord: &discord.AlertProvider{WebhookURL: "https://discord.com/api/webhooks/xxx/yyy"},
|
||||
@ -719,7 +718,6 @@ badconfig:
|
||||
|
||||
func TestParseAndValidateConfigBytesWithAlerting(t *testing.T) {
|
||||
config, err := parseAndValidateConfigBytes([]byte(`
|
||||
debug: true
|
||||
alerting:
|
||||
slack:
|
||||
webhook-url: "http://example.com"
|
||||
@ -919,8 +917,6 @@ endpoints:
|
||||
|
||||
func TestParseAndValidateConfigBytesWithAlertingAndDefaultAlert(t *testing.T) {
|
||||
config, err := parseAndValidateConfigBytes([]byte(`
|
||||
debug: true
|
||||
|
||||
alerting:
|
||||
slack:
|
||||
webhook-url: "http://example.com"
|
||||
@ -1493,6 +1489,7 @@ endpoints:
|
||||
t.Fatal("PagerDuty alerting config should've been set to nil, because its IsValid() method returned false and therefore alerting.Config.SetAlertingProviderToNil() should've been called")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAndValidateConfigBytesWithInvalidPushoverAlertingConfig(t *testing.T) {
|
||||
config, err := parseAndValidateConfigBytes([]byte(`
|
||||
alerting:
|
||||
@ -1801,7 +1798,7 @@ endpoints:
|
||||
func TestParseAndValidateConfigBytesWithValidSecurityConfig(t *testing.T) {
|
||||
const expectedUsername = "admin"
|
||||
const expectedPasswordHash = "JDJhJDEwJHRiMnRFakxWazZLdXBzRERQazB1TE8vckRLY05Yb1hSdnoxWU0yQ1FaYXZRSW1McmladDYu"
|
||||
config, err := parseAndValidateConfigBytes([]byte(fmt.Sprintf(`debug: true
|
||||
config, err := parseAndValidateConfigBytes([]byte(fmt.Sprintf(`
|
||||
security:
|
||||
basic:
|
||||
username: "%s"
|
||||
@ -1889,6 +1886,7 @@ func TestGetAlertingProviderByAlertType(t *testing.T) {
|
||||
Telegram: &telegram.AlertProvider{},
|
||||
Twilio: &twilio.AlertProvider{},
|
||||
Teams: &teams.AlertProvider{},
|
||||
TeamsWorkflows: &teamsworkflows.AlertProvider{},
|
||||
}
|
||||
scenarios := []struct {
|
||||
alertType alert.Type
|
||||
@ -1912,6 +1910,7 @@ func TestGetAlertingProviderByAlertType(t *testing.T) {
|
||||
{alertType: alert.TypeTelegram, expected: alertingConfig.Telegram},
|
||||
{alertType: alert.TypeTwilio, expected: alertingConfig.Twilio},
|
||||
{alertType: alert.TypeTeams, expected: alertingConfig.Teams},
|
||||
{alertType: alert.TypeTeamsWorkflows, expected: alertingConfig.TeamsWorkflows},
|
||||
}
|
||||
for _, scenario := range scenarios {
|
||||
t.Run(string(scenario.alertType), func(t *testing.T) {
|
||||
|
@ -150,7 +150,7 @@ func (c Condition) evaluate(result *Result, dontResolveFailedConditions bool) bo
|
||||
return false
|
||||
}
|
||||
if !success {
|
||||
//log.Printf("[Condition.evaluate] Condition '%s' did not succeed because '%s' is false", condition, condition)
|
||||
//logr.Debugf("[Condition.evaluate] Condition '%s' did not succeed because '%s' is false", condition, condition)
|
||||
}
|
||||
result.ConditionResults = append(result.ConditionResults, &ConditionResult{Condition: conditionToDisplay, Success: success})
|
||||
return success
|
||||
|
@ -1,9 +1,8 @@
|
||||
package remote
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"github.com/TwiN/gatus/v5/client"
|
||||
"github.com/TwiN/logr"
|
||||
)
|
||||
|
||||
// NOTICE: This is an experimental alpha feature and may be updated/removed in future versions.
|
||||
@ -31,9 +30,8 @@ func (c *Config) ValidateAndSetDefaults() error {
|
||||
}
|
||||
}
|
||||
if len(c.Instances) > 0 {
|
||||
log.Println("WARNING: Your configuration is using 'remote', which is in alpha and may be updated/removed in future versions.")
|
||||
log.Println("WARNING: See https://github.com/TwiN/gatus/issues/64 for more information")
|
||||
log.Println("WARNING: This feature is a candidate for removal in future versions. Please comment on the issue above if you need this feature.")
|
||||
logr.Warn("WARNING: Your configuration is using 'remote', which is in alpha and may be updated/removed in future versions.")
|
||||
logr.Warn("WARNING: See https://github.com/TwiN/gatus/issues/64 for more information")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -1,12 +1,12 @@
|
||||
package controller
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/TwiN/gatus/v5/api"
|
||||
"github.com/TwiN/gatus/v5/config"
|
||||
"github.com/TwiN/logr"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
@ -25,19 +25,19 @@ func Handle(cfg *config.Config) {
|
||||
if os.Getenv("ROUTER_TEST") == "true" {
|
||||
return
|
||||
}
|
||||
log.Println("[controller.Handle] Listening on " + cfg.Web.SocketAddress())
|
||||
logr.Info("[controller.Handle] Listening on " + cfg.Web.SocketAddress())
|
||||
if cfg.Web.HasTLS() {
|
||||
err := app.ListenTLS(cfg.Web.SocketAddress(), cfg.Web.TLS.CertificateFile, cfg.Web.TLS.PrivateKeyFile)
|
||||
if err != nil {
|
||||
log.Fatal("[controller.Handle]", err)
|
||||
logr.Fatalf("[controller.Handle] %s", err.Error())
|
||||
}
|
||||
} else {
|
||||
err := app.Listen(cfg.Web.SocketAddress())
|
||||
if err != nil {
|
||||
log.Fatal("[controller.Handle]", err)
|
||||
logr.Fatalf("[controller.Handle] %s", err.Error())
|
||||
}
|
||||
}
|
||||
log.Println("[controller.Handle] Server has shut down successfully")
|
||||
logr.Info("[controller.Handle] Server has shut down successfully")
|
||||
}
|
||||
|
||||
// Shutdown stops the server
|
||||
|
4
go.mod
4
go.mod
@ -1,6 +1,6 @@
|
||||
module github.com/TwiN/gatus/v5
|
||||
|
||||
go 1.22.4
|
||||
go 1.23.3
|
||||
|
||||
require (
|
||||
code.gitea.io/sdk/gitea v0.19.0
|
||||
@ -8,7 +8,7 @@ require (
|
||||
github.com/TwiN/g8/v2 v2.0.0
|
||||
github.com/TwiN/gocache/v2 v2.2.2
|
||||
github.com/TwiN/health v1.6.0
|
||||
github.com/TwiN/logr v0.2.1
|
||||
github.com/TwiN/logr v0.3.1
|
||||
github.com/TwiN/whois v1.1.9
|
||||
github.com/aws/aws-sdk-go v1.54.10
|
||||
github.com/coreos/go-oidc/v3 v3.11.0
|
||||
|
4
go.sum
4
go.sum
@ -16,8 +16,8 @@ github.com/TwiN/gocache/v2 v2.2.2 h1:4HToPfDV8FSbaYO5kkbhLpEllUYse5rAf+hVU/mSsuI
|
||||
github.com/TwiN/gocache/v2 v2.2.2/go.mod h1:WfIuwd7GR82/7EfQqEtmLFC3a2vqaKbs4Pe6neB7Gyc=
|
||||
github.com/TwiN/health v1.6.0 h1:L2ks575JhRgQqWWOfKjw9B0ec172hx7GdToqkYUycQM=
|
||||
github.com/TwiN/health v1.6.0/go.mod h1:Z6TszwQPMvtSiVx1QMidVRgvVr4KZGfiwqcD7/Z+3iw=
|
||||
github.com/TwiN/logr v0.2.1 h1:kMhUmBBVlFxzqTvyHuNoYQ/uwqg8BW4y0AyZxI5JB3Q=
|
||||
github.com/TwiN/logr v0.2.1/go.mod h1:oldDOkRjFXjZqiMP0+ca5NAQHXTiJ02zHirsuBJJH6k=
|
||||
github.com/TwiN/logr v0.3.1 h1:CfTKA83jUmsAoxqrr3p4JxEkqXOBnEE9/f35L5MODy4=
|
||||
github.com/TwiN/logr v0.3.1/go.mod h1:BZgZFYq6fQdU3KtR8qYato3zUEw53yQDaIuujHb55Jw=
|
||||
github.com/TwiN/whois v1.1.9 h1:m20+m1CXnrstie+tW2ZmAJkfcT9zgwpVRUFsKeMw+ng=
|
||||
github.com/TwiN/whois v1.1.9/go.mod h1:TjipCMpJRAJYKmtz/rXQBU6UGxMh6bk8SHazu7OMnQE=
|
||||
github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=
|
||||
|
62
main.go
62
main.go
@ -1,7 +1,6 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strconv"
|
||||
@ -15,16 +14,22 @@ import (
|
||||
"github.com/TwiN/logr"
|
||||
)
|
||||
|
||||
const (
|
||||
GatusConfigPathEnvVar = "GATUS_CONFIG_PATH"
|
||||
GatusConfigFileEnvVar = "GATUS_CONFIG_FILE" // Deprecated in favor of GatusConfigPathEnvVar
|
||||
GatusLogLevelEnvVar = "GATUS_LOG_LEVEL"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if delayInSeconds, _ := strconv.Atoi(os.Getenv("GATUS_DELAY_START_SECONDS")); delayInSeconds > 0 {
|
||||
log.Printf("Delaying start by %d seconds", delayInSeconds)
|
||||
logr.Infof("Delaying start by %d seconds", delayInSeconds)
|
||||
time.Sleep(time.Duration(delayInSeconds) * time.Second)
|
||||
}
|
||||
configureLogging()
|
||||
cfg, err := loadConfiguration()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
configureLogging(cfg)
|
||||
initializeStorage(cfg)
|
||||
start(cfg)
|
||||
// Wait for termination signal
|
||||
@ -33,13 +38,13 @@ func main() {
|
||||
signal.Notify(signalChannel, os.Interrupt, syscall.SIGTERM)
|
||||
go func() {
|
||||
<-signalChannel
|
||||
log.Println("Received termination signal, attempting to gracefully shut down")
|
||||
logr.Info("Received termination signal, attempting to gracefully shut down")
|
||||
stop(cfg)
|
||||
save()
|
||||
done <- true
|
||||
}()
|
||||
<-done
|
||||
log.Println("Shutting down")
|
||||
logr.Info("Shutting down")
|
||||
}
|
||||
|
||||
func start(cfg *config.Config) {
|
||||
@ -55,21 +60,32 @@ func stop(cfg *config.Config) {
|
||||
|
||||
func save() {
|
||||
if err := store.Get().Save(); err != nil {
|
||||
log.Println("Failed to save storage provider:", err.Error())
|
||||
logr.Errorf("Failed to save storage provider: %s", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func configureLogging(cfg *config.Config) {
|
||||
logr.SetThreshold(cfg.LogLevel)
|
||||
logr.Infof("[main.configureLogging] Log Level is %s", logr.GetThreshold())
|
||||
func configureLogging() {
|
||||
logLevelAsString := os.Getenv(GatusLogLevelEnvVar)
|
||||
if logLevel, err := logr.LevelFromString(logLevelAsString); err != nil {
|
||||
logr.SetThreshold(logr.LevelInfo)
|
||||
if len(logLevelAsString) == 0 {
|
||||
logr.Infof("[main.configureLogging] Defaulting log level to %s", logr.LevelInfo)
|
||||
} else {
|
||||
logr.Warnf("[main.configureLogging] Invalid log level '%s', defaulting to %s", logLevelAsString, logr.LevelInfo)
|
||||
}
|
||||
} else {
|
||||
logr.SetThreshold(logLevel)
|
||||
logr.Infof("[main.configureLogging] Log Level is set to %s", logr.GetThreshold())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func loadConfiguration() (*config.Config, error) {
|
||||
configPath := os.Getenv("GATUS_CONFIG_PATH")
|
||||
configPath := os.Getenv(GatusConfigPathEnvVar)
|
||||
// Backwards compatibility
|
||||
if len(configPath) == 0 {
|
||||
if configPath = os.Getenv("GATUS_CONFIG_FILE"); len(configPath) > 0 {
|
||||
log.Println("WARNING: GATUS_CONFIG_FILE is deprecated. Please use GATUS_CONFIG_PATH instead.")
|
||||
if configPath = os.Getenv(GatusConfigFileEnvVar); len(configPath) > 0 {
|
||||
logr.Warnf("WARNING: %s is deprecated. Please use %s instead.", GatusConfigFileEnvVar, GatusConfigPathEnvVar)
|
||||
}
|
||||
}
|
||||
return config.LoadConfiguration(configPath)
|
||||
@ -95,7 +111,7 @@ func initializeStorage(cfg *config.Config) {
|
||||
}
|
||||
numberOfEndpointStatusesDeleted := store.Get().DeleteAllEndpointStatusesNotInKeys(keys)
|
||||
if numberOfEndpointStatusesDeleted > 0 {
|
||||
log.Printf("[main.initializeStorage] Deleted %d endpoint statuses because their matching endpoints no longer existed", numberOfEndpointStatusesDeleted)
|
||||
logr.Infof("[main.initializeStorage] Deleted %d endpoint statuses because their matching endpoints no longer existed", numberOfEndpointStatusesDeleted)
|
||||
}
|
||||
// Clean up the triggered alerts from the storage provider and load valid triggered endpoint alerts
|
||||
numberOfPersistedTriggeredAlertsLoaded := 0
|
||||
@ -107,13 +123,13 @@ func initializeStorage(cfg *config.Config) {
|
||||
}
|
||||
}
|
||||
numberOfTriggeredAlertsDeleted := store.Get().DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep, checksums)
|
||||
if cfg.Debug && numberOfTriggeredAlertsDeleted > 0 {
|
||||
log.Printf("[main.initializeStorage] Deleted %d triggered alerts for endpoint with key=%s because their configurations have been changed or deleted", numberOfTriggeredAlertsDeleted, ep.Key())
|
||||
if numberOfTriggeredAlertsDeleted > 0 {
|
||||
logr.Debugf("[main.initializeStorage] Deleted %d triggered alerts for endpoint with key=%s because their configurations have been changed or deleted", numberOfTriggeredAlertsDeleted, ep.Key())
|
||||
}
|
||||
for _, alert := range ep.Alerts {
|
||||
exists, resolveKey, numberOfSuccessesInARow, err := store.Get().GetTriggeredEndpointAlert(ep, alert)
|
||||
if err != nil {
|
||||
log.Printf("[main.initializeStorage] Failed to get triggered alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[main.initializeStorage] Failed to get triggered alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
continue
|
||||
}
|
||||
if exists {
|
||||
@ -132,13 +148,13 @@ func initializeStorage(cfg *config.Config) {
|
||||
}
|
||||
convertedEndpoint := ee.ToEndpoint()
|
||||
numberOfTriggeredAlertsDeleted := store.Get().DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(convertedEndpoint, checksums)
|
||||
if cfg.Debug && numberOfTriggeredAlertsDeleted > 0 {
|
||||
log.Printf("[main.initializeStorage] Deleted %d triggered alerts for endpoint with key=%s because their configurations have been changed or deleted", numberOfTriggeredAlertsDeleted, ee.Key())
|
||||
if numberOfTriggeredAlertsDeleted > 0 {
|
||||
logr.Debugf("[main.initializeStorage] Deleted %d triggered alerts for endpoint with key=%s because their configurations have been changed or deleted", numberOfTriggeredAlertsDeleted, ee.Key())
|
||||
}
|
||||
for _, alert := range ee.Alerts {
|
||||
exists, resolveKey, numberOfSuccessesInARow, err := store.Get().GetTriggeredEndpointAlert(convertedEndpoint, alert)
|
||||
if err != nil {
|
||||
log.Printf("[main.initializeStorage] Failed to get triggered alert for endpoint with key=%s: %s", ee.Key(), err.Error())
|
||||
logr.Errorf("[main.initializeStorage] Failed to get triggered alert for endpoint with key=%s: %s", ee.Key(), err.Error())
|
||||
continue
|
||||
}
|
||||
if exists {
|
||||
@ -149,7 +165,7 @@ func initializeStorage(cfg *config.Config) {
|
||||
}
|
||||
}
|
||||
if numberOfPersistedTriggeredAlertsLoaded > 0 {
|
||||
log.Printf("[main.initializeStorage] Loaded %d persisted triggered alerts", numberOfPersistedTriggeredAlertsLoaded)
|
||||
logr.Infof("[main.initializeStorage] Loaded %d persisted triggered alerts", numberOfPersistedTriggeredAlertsLoaded)
|
||||
}
|
||||
}
|
||||
|
||||
@ -157,15 +173,15 @@ func listenToConfigurationFileChanges(cfg *config.Config) {
|
||||
for {
|
||||
time.Sleep(30 * time.Second)
|
||||
if cfg.HasLoadedConfigurationBeenModified() {
|
||||
log.Println("[main.listenToConfigurationFileChanges] Configuration file has been modified")
|
||||
logr.Info("[main.listenToConfigurationFileChanges] Configuration file has been modified")
|
||||
stop(cfg)
|
||||
time.Sleep(time.Second) // Wait a bit to make sure everything is done.
|
||||
save()
|
||||
updatedConfig, err := loadConfiguration()
|
||||
if err != nil {
|
||||
if cfg.SkipInvalidConfigUpdate {
|
||||
log.Println("[main.listenToConfigurationFileChanges] Failed to load new configuration:", err.Error())
|
||||
log.Println("[main.listenToConfigurationFileChanges] The configuration file was updated, but it is not valid. The old configuration will continue being used.")
|
||||
logr.Errorf("[main.listenToConfigurationFileChanges] Failed to load new configuration: %s", err.Error())
|
||||
logr.Error("[main.listenToConfigurationFileChanges] The configuration file was updated, but it is not valid. The old configuration will continue being used.")
|
||||
// Update the last file modification time to avoid trying to process the same invalid configuration again
|
||||
cfg.UpdateLastFileModTime()
|
||||
continue
|
||||
|
@ -2,10 +2,10 @@ package security
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"log"
|
||||
"net/http"
|
||||
|
||||
g8 "github.com/TwiN/g8/v2"
|
||||
"github.com/TwiN/logr"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/fiber/v2/middleware/adaptor"
|
||||
"github.com/gofiber/fiber/v2/middleware/basicauth"
|
||||
@ -99,7 +99,7 @@ func (c *Config) IsAuthenticated(ctx *fiber.Ctx) bool {
|
||||
// TODO: Update g8 to support fasthttp natively? (see g8's fasthttp branch)
|
||||
request, err := adaptor.ConvertRequest(ctx, false)
|
||||
if err != nil {
|
||||
log.Printf("[security.IsAuthenticated] Unexpected error converting request: %v", err)
|
||||
logr.Errorf("[security.IsAuthenticated] Unexpected error converting request: %v", err)
|
||||
return false
|
||||
}
|
||||
token := c.gate.ExtractTokenFromRequest(request)
|
||||
|
@ -2,11 +2,11 @@ package security
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/TwiN/logr"
|
||||
"github.com/coreos/go-oidc/v3/oidc"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
@ -124,7 +124,7 @@ func (c *OIDCConfig) callbackHandler(w http.ResponseWriter, r *http.Request) { /
|
||||
return
|
||||
}
|
||||
}
|
||||
log.Printf("[security.callbackHandler] Subject %s is not in the list of allowed subjects", idToken.Subject)
|
||||
logr.Debugf("[security.callbackHandler] Subject %s is not in the list of allowed subjects", idToken.Subject)
|
||||
http.Redirect(w, r, "/?error=access_denied", http.StatusFound)
|
||||
}
|
||||
|
||||
|
@ -4,7 +4,6 @@ import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
@ -14,6 +13,7 @@ import (
|
||||
"github.com/TwiN/gatus/v5/storage/store/common"
|
||||
"github.com/TwiN/gatus/v5/storage/store/common/paging"
|
||||
"github.com/TwiN/gocache/v2"
|
||||
"github.com/TwiN/logr"
|
||||
_ "github.com/lib/pq"
|
||||
_ "modernc.org/sqlite"
|
||||
)
|
||||
@ -237,12 +237,12 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
|
||||
// Endpoint doesn't exist in the database, insert it
|
||||
if endpointID, err = s.insertEndpoint(tx, ep); err != nil {
|
||||
_ = tx.Rollback()
|
||||
log.Printf("[sql.Insert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[sql.Insert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
_ = tx.Rollback()
|
||||
log.Printf("[sql.Insert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[sql.Insert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
return err
|
||||
}
|
||||
}
|
||||
@ -253,12 +253,12 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
|
||||
// of type EventStart, in which case we will have to create a new event of type EventHealthy or EventUnhealthy
|
||||
// based on result.Success.
|
||||
// 2. The lastResult.Success != result.Success. This implies that the endpoint went from healthy to unhealthy or
|
||||
// vice-versa, in which case we will have to create a new event of type EventHealthy or EventUnhealthy
|
||||
// vice versa, in which case we will have to create a new event of type EventHealthy or EventUnhealthy
|
||||
// based on result.Success.
|
||||
numberOfEvents, err := s.getNumberOfEventsByEndpointID(tx, endpointID)
|
||||
if err != nil {
|
||||
// Silently fail
|
||||
log.Printf("[sql.Insert] Failed to retrieve total number of events for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[sql.Insert] Failed to retrieve total number of events for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
}
|
||||
if numberOfEvents == 0 {
|
||||
// There's no events yet, which means we need to add the EventStart and the first healthy/unhealthy event
|
||||
@ -268,18 +268,18 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
|
||||
})
|
||||
if err != nil {
|
||||
// Silently fail
|
||||
log.Printf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", endpoint.EventStart, ep.Key(), err.Error())
|
||||
logr.Errorf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", endpoint.EventStart, ep.Key(), err.Error())
|
||||
}
|
||||
event := endpoint.NewEventFromResult(result)
|
||||
if err = s.insertEndpointEvent(tx, endpointID, event); err != nil {
|
||||
// Silently fail
|
||||
log.Printf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error())
|
||||
logr.Errorf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error())
|
||||
}
|
||||
} else {
|
||||
// Get the success value of the previous result
|
||||
var lastResultSuccess bool
|
||||
if lastResultSuccess, err = s.getLastEndpointResultSuccessValue(tx, endpointID); err != nil {
|
||||
log.Printf("[sql.Insert] Failed to retrieve outcome of previous result for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[sql.Insert] Failed to retrieve outcome of previous result for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
} else {
|
||||
// If we managed to retrieve the outcome of the previous result, we'll compare it with the new result.
|
||||
// If the final outcome (success or failure) of the previous and the new result aren't the same, it means
|
||||
@ -289,7 +289,7 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
|
||||
event := endpoint.NewEventFromResult(result)
|
||||
if err = s.insertEndpointEvent(tx, endpointID, event); err != nil {
|
||||
// Silently fail
|
||||
log.Printf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error())
|
||||
logr.Errorf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -298,42 +298,42 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
|
||||
// (since we're only deleting MaximumNumberOfEvents at a time instead of 1)
|
||||
if numberOfEvents > eventsCleanUpThreshold {
|
||||
if err = s.deleteOldEndpointEvents(tx, endpointID); err != nil {
|
||||
log.Printf("[sql.Insert] Failed to delete old events for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[sql.Insert] Failed to delete old events for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
// Second, we need to insert the result.
|
||||
if err = s.insertEndpointResult(tx, endpointID, result); err != nil {
|
||||
log.Printf("[sql.Insert] Failed to insert result for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[sql.Insert] Failed to insert result for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
_ = tx.Rollback() // If we can't insert the result, we'll rollback now since there's no point continuing
|
||||
return err
|
||||
}
|
||||
// Clean up old results
|
||||
numberOfResults, err := s.getNumberOfResultsByEndpointID(tx, endpointID)
|
||||
if err != nil {
|
||||
log.Printf("[sql.Insert] Failed to retrieve total number of results for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[sql.Insert] Failed to retrieve total number of results for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
} else {
|
||||
if numberOfResults > resultsCleanUpThreshold {
|
||||
if err = s.deleteOldEndpointResults(tx, endpointID); err != nil {
|
||||
log.Printf("[sql.Insert] Failed to delete old results for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[sql.Insert] Failed to delete old results for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
// Finally, we need to insert the uptime data.
|
||||
// Because the uptime data significantly outlives the results, we can't rely on the results for determining the uptime
|
||||
if err = s.updateEndpointUptime(tx, endpointID, result); err != nil {
|
||||
log.Printf("[sql.Insert] Failed to update uptime for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[sql.Insert] Failed to update uptime for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
}
|
||||
// Merge hourly uptime entries that can be merged into daily entries and clean up old uptime entries
|
||||
numberOfUptimeEntries, err := s.getNumberOfUptimeEntriesByEndpointID(tx, endpointID)
|
||||
if err != nil {
|
||||
log.Printf("[sql.Insert] Failed to retrieve total number of uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[sql.Insert] Failed to retrieve total number of uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
} else {
|
||||
// Merge older hourly uptime entries into daily uptime entries if we have more than uptimeTotalEntriesMergeThreshold
|
||||
if numberOfUptimeEntries >= uptimeTotalEntriesMergeThreshold {
|
||||
log.Printf("[sql.Insert] Merging hourly uptime entries for endpoint with key=%s; This is a lot of work, it shouldn't happen too often", ep.Key())
|
||||
logr.Infof("[sql.Insert] Merging hourly uptime entries for endpoint with key=%s; This is a lot of work, it shouldn't happen too often", ep.Key())
|
||||
if err = s.mergeHourlyUptimeEntriesOlderThanMergeThresholdIntoDailyUptimeEntries(tx, endpointID); err != nil {
|
||||
log.Printf("[sql.Insert] Failed to merge hourly uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[sql.Insert] Failed to merge hourly uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -342,11 +342,11 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
|
||||
// but if Gatus was temporarily shut down, we might have some old entries that need to be cleaned up
|
||||
ageOfOldestUptimeEntry, err := s.getAgeOfOldestEndpointUptimeEntry(tx, endpointID)
|
||||
if err != nil {
|
||||
log.Printf("[sql.Insert] Failed to retrieve oldest endpoint uptime entry for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[sql.Insert] Failed to retrieve oldest endpoint uptime entry for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
} else {
|
||||
if ageOfOldestUptimeEntry > uptimeAgeCleanUpThreshold {
|
||||
if err = s.deleteOldUptimeEntries(tx, endpointID, time.Now().Add(-(uptimeRetention + time.Hour))); err != nil {
|
||||
log.Printf("[sql.Insert] Failed to delete old uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[sql.Insert] Failed to delete old uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -356,7 +356,7 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
|
||||
s.writeThroughCache.Delete(cacheKey)
|
||||
endpointKey, params, err := extractKeyAndParamsFromCacheKey(cacheKey)
|
||||
if err != nil {
|
||||
log.Printf("[sql.Insert] Silently deleting cache key %s instead of refreshing due to error: %s", cacheKey, err.Error())
|
||||
logr.Errorf("[sql.Insert] Silently deleting cache key %s instead of refreshing due to error: %s", cacheKey, err.Error())
|
||||
continue
|
||||
}
|
||||
// Retrieve the endpoint status by key, which will in turn refresh the cache
|
||||
@ -387,7 +387,7 @@ func (s *Store) DeleteAllEndpointStatusesNotInKeys(keys []string) int {
|
||||
result, err = s.db.Exec(query, args...)
|
||||
}
|
||||
if err != nil {
|
||||
log.Printf("[sql.DeleteAllEndpointStatusesNotInKeys] Failed to delete rows that do not belong to any of keys=%v: %s", keys, err.Error())
|
||||
logr.Errorf("[sql.DeleteAllEndpointStatusesNotInKeys] Failed to delete rows that do not belong to any of keys=%v: %s", keys, err.Error())
|
||||
return 0
|
||||
}
|
||||
if s.writeThroughCache != nil {
|
||||
@ -403,7 +403,7 @@ func (s *Store) DeleteAllEndpointStatusesNotInKeys(keys []string) int {
|
||||
|
||||
// GetTriggeredEndpointAlert returns whether the triggered alert for the specified endpoint as well as the necessary information to resolve it
|
||||
func (s *Store) GetTriggeredEndpointAlert(ep *endpoint.Endpoint, alert *alert.Alert) (exists bool, resolveKey string, numberOfSuccessesInARow int, err error) {
|
||||
//log.Printf("[sql.GetTriggeredEndpointAlert] Getting triggered alert with checksum=%s for endpoint with key=%s", alert.Checksum(), ep.Key())
|
||||
//logr.Debugf("[sql.GetTriggeredEndpointAlert] Getting triggered alert with checksum=%s for endpoint with key=%s", alert.Checksum(), ep.Key())
|
||||
err = s.db.QueryRow(
|
||||
"SELECT resolve_key, number_of_successes_in_a_row FROM endpoint_alerts_triggered WHERE endpoint_id = (SELECT endpoint_id FROM endpoints WHERE endpoint_key = $1 LIMIT 1) AND configuration_checksum = $2",
|
||||
ep.Key(),
|
||||
@ -421,7 +421,7 @@ func (s *Store) GetTriggeredEndpointAlert(ep *endpoint.Endpoint, alert *alert.Al
|
||||
// UpsertTriggeredEndpointAlert inserts/updates a triggered alert for an endpoint
|
||||
// Used for persistence of triggered alerts across application restarts
|
||||
func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAlert *alert.Alert) error {
|
||||
//log.Printf("[sql.UpsertTriggeredEndpointAlert] Upserting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key())
|
||||
//logr.Debugf("[sql.UpsertTriggeredEndpointAlert] Upserting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key())
|
||||
tx, err := s.db.Begin()
|
||||
if err != nil {
|
||||
return err
|
||||
@ -433,12 +433,12 @@ func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAle
|
||||
// This shouldn't happen, but we'll handle it anyway
|
||||
if endpointID, err = s.insertEndpoint(tx, ep); err != nil {
|
||||
_ = tx.Rollback()
|
||||
log.Printf("[sql.UpsertTriggeredEndpointAlert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[sql.UpsertTriggeredEndpointAlert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
_ = tx.Rollback()
|
||||
log.Printf("[sql.UpsertTriggeredEndpointAlert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[sql.UpsertTriggeredEndpointAlert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
return err
|
||||
}
|
||||
}
|
||||
@ -457,7 +457,7 @@ func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAle
|
||||
)
|
||||
if err != nil {
|
||||
_ = tx.Rollback()
|
||||
log.Printf("[sql.UpsertTriggeredEndpointAlert] Failed to persist triggered alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[sql.UpsertTriggeredEndpointAlert] Failed to persist triggered alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
return err
|
||||
}
|
||||
if err = tx.Commit(); err != nil {
|
||||
@ -468,7 +468,7 @@ func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAle
|
||||
|
||||
// DeleteTriggeredEndpointAlert deletes a triggered alert for an endpoint
|
||||
func (s *Store) DeleteTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAlert *alert.Alert) error {
|
||||
//log.Printf("[sql.DeleteTriggeredEndpointAlert] Deleting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key())
|
||||
//logr.Debugf("[sql.DeleteTriggeredEndpointAlert] Deleting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key())
|
||||
_, err := s.db.Exec("DELETE FROM endpoint_alerts_triggered WHERE configuration_checksum = $1 AND endpoint_id = (SELECT endpoint_id FROM endpoints WHERE endpoint_key = $2 LIMIT 1)", triggeredAlert.Checksum(), ep.Key())
|
||||
return err
|
||||
}
|
||||
@ -477,7 +477,7 @@ func (s *Store) DeleteTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAle
|
||||
// configurations are not provided in the checksums list.
|
||||
// This prevents triggered alerts that have been removed or modified from lingering in the database.
|
||||
func (s *Store) DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep *endpoint.Endpoint, checksums []string) int {
|
||||
//log.Printf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Deleting triggered alerts for endpoint with key=%s that do not belong to any of checksums=%v", ep.Key(), checksums)
|
||||
//logr.Debugf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Deleting triggered alerts for endpoint with key=%s that do not belong to any of checksums=%v", ep.Key(), checksums)
|
||||
var err error
|
||||
var result sql.Result
|
||||
if len(checksums) == 0 {
|
||||
@ -498,7 +498,7 @@ func (s *Store) DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep *endpoint.En
|
||||
result, err = s.db.Exec(query, args...)
|
||||
}
|
||||
if err != nil {
|
||||
log.Printf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Failed to delete rows for endpoint with key=%s that do not belong to any of checksums=%v: %s", ep.Key(), checksums, err.Error())
|
||||
logr.Errorf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Failed to delete rows for endpoint with key=%s that do not belong to any of checksums=%v: %s", ep.Key(), checksums, err.Error())
|
||||
return 0
|
||||
}
|
||||
// Return number of rows deleted
|
||||
@ -530,7 +530,7 @@ func (s *Store) Close() {
|
||||
|
||||
// insertEndpoint inserts an endpoint in the store and returns the generated id of said endpoint
|
||||
func (s *Store) insertEndpoint(tx *sql.Tx, ep *endpoint.Endpoint) (int64, error) {
|
||||
//log.Printf("[sql.insertEndpoint] Inserting endpoint with group=%s and name=%s", ep.Group, ep.Name)
|
||||
//logr.Debugf("[sql.insertEndpoint] Inserting endpoint with group=%s and name=%s", ep.Group, ep.Name)
|
||||
var id int64
|
||||
err := tx.QueryRow(
|
||||
"INSERT INTO endpoints (endpoint_key, endpoint_name, endpoint_group) VALUES ($1, $2, $3) RETURNING endpoint_id",
|
||||
@ -655,12 +655,12 @@ func (s *Store) getEndpointStatusByKey(tx *sql.Tx, key string, parameters *pagin
|
||||
endpointStatus := endpoint.NewStatus(group, endpointName)
|
||||
if parameters.EventsPageSize > 0 {
|
||||
if endpointStatus.Events, err = s.getEndpointEventsByEndpointID(tx, endpointID, parameters.EventsPage, parameters.EventsPageSize); err != nil {
|
||||
log.Printf("[sql.getEndpointStatusByKey] Failed to retrieve events for key=%s: %s", key, err.Error())
|
||||
logr.Errorf("[sql.getEndpointStatusByKey] Failed to retrieve events for key=%s: %s", key, err.Error())
|
||||
}
|
||||
}
|
||||
if parameters.ResultsPageSize > 0 {
|
||||
if endpointStatus.Results, err = s.getEndpointResultsByEndpointID(tx, endpointID, parameters.ResultsPage, parameters.ResultsPageSize); err != nil {
|
||||
log.Printf("[sql.getEndpointStatusByKey] Failed to retrieve results for key=%s: %s", key, err.Error())
|
||||
logr.Errorf("[sql.getEndpointStatusByKey] Failed to retrieve results for key=%s: %s", key, err.Error())
|
||||
}
|
||||
}
|
||||
if s.writeThroughCache != nil {
|
||||
@ -735,7 +735,7 @@ func (s *Store) getEndpointResultsByEndpointID(tx *sql.Tx, endpointID int64, pag
|
||||
var joinedErrors string
|
||||
err = rows.Scan(&id, &result.Success, &joinedErrors, &result.Connected, &result.HTTPStatus, &result.DNSRCode, &result.CertificateExpiration, &result.DomainExpiration, &result.Hostname, &result.IP, &result.Duration, &result.Timestamp)
|
||||
if err != nil {
|
||||
log.Printf("[sql.getEndpointResultsByEndpointID] Silently failed to retrieve endpoint result for endpointID=%d: %s", endpointID, err.Error())
|
||||
logr.Errorf("[sql.getEndpointResultsByEndpointID] Silently failed to retrieve endpoint result for endpointID=%d: %s", endpointID, err.Error())
|
||||
err = nil
|
||||
}
|
||||
if len(joinedErrors) != 0 {
|
||||
|
@ -2,7 +2,6 @@ package store
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/TwiN/gatus/v5/alerting/alert"
|
||||
@ -11,6 +10,7 @@ import (
|
||||
"github.com/TwiN/gatus/v5/storage/store/common/paging"
|
||||
"github.com/TwiN/gatus/v5/storage/store/memory"
|
||||
"github.com/TwiN/gatus/v5/storage/store/sql"
|
||||
"github.com/TwiN/logr"
|
||||
)
|
||||
|
||||
// Store is the interface that each store should implement
|
||||
@ -91,7 +91,7 @@ var (
|
||||
func Get() Store {
|
||||
if !initialized {
|
||||
// This only happens in tests
|
||||
log.Println("[store.Get] Provider requested before it was initialized, automatically initializing")
|
||||
logr.Info("[store.Get] Provider requested before it was initialized, automatically initializing")
|
||||
err := Initialize(nil)
|
||||
if err != nil {
|
||||
panic("failed to automatically initialize store: " + err.Error())
|
||||
@ -110,11 +110,11 @@ func Initialize(cfg *storage.Config) error {
|
||||
}
|
||||
if cfg == nil {
|
||||
// This only happens in tests
|
||||
log.Println("[store.Initialize] nil storage config passed as parameter. This should only happen in tests. Defaulting to an empty config.")
|
||||
logr.Warn("[store.Initialize] nil storage config passed as parameter. This should only happen in tests. Defaulting to an empty config.")
|
||||
cfg = &storage.Config{}
|
||||
}
|
||||
if len(cfg.Path) == 0 && cfg.Type != storage.TypePostgres {
|
||||
log.Printf("[store.Initialize] Creating storage provider of type=%s", cfg.Type)
|
||||
logr.Infof("[store.Initialize] Creating storage provider of type=%s", cfg.Type)
|
||||
}
|
||||
ctx, cancelFunc = context.WithCancel(context.Background())
|
||||
switch cfg.Type {
|
||||
@ -136,13 +136,12 @@ func autoSave(ctx context.Context, store Store, interval time.Duration) {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Printf("[store.autoSave] Stopping active job")
|
||||
logr.Info("[store.autoSave] Stopping active job")
|
||||
return
|
||||
case <-time.After(interval):
|
||||
log.Printf("[store.autoSave] Saving")
|
||||
err := store.Save()
|
||||
if err != nil {
|
||||
log.Println("[store.autoSave] Save failed:", err.Error())
|
||||
logr.Info("[store.autoSave] Saving")
|
||||
if err := store.Save(); err != nil {
|
||||
logr.Errorf("[store.autoSave] Save failed: %s", err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2,27 +2,27 @@ package watchdog
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"github.com/TwiN/gatus/v5/alerting"
|
||||
"github.com/TwiN/gatus/v5/config/endpoint"
|
||||
"github.com/TwiN/gatus/v5/storage/store"
|
||||
"github.com/TwiN/logr"
|
||||
)
|
||||
|
||||
// HandleAlerting takes care of alerts to resolve and alerts to trigger based on result success or failure
|
||||
func HandleAlerting(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config, debug bool) {
|
||||
func HandleAlerting(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config) {
|
||||
if alertingConfig == nil {
|
||||
return
|
||||
}
|
||||
if result.Success {
|
||||
handleAlertsToResolve(ep, result, alertingConfig, debug)
|
||||
handleAlertsToResolve(ep, result, alertingConfig)
|
||||
} else {
|
||||
handleAlertsToTrigger(ep, result, alertingConfig, debug)
|
||||
handleAlertsToTrigger(ep, result, alertingConfig)
|
||||
}
|
||||
}
|
||||
|
||||
func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config, debug bool) {
|
||||
func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config) {
|
||||
ep.NumberOfSuccessesInARow = 0
|
||||
ep.NumberOfFailuresInARow++
|
||||
for _, endpointAlert := range ep.Alerts {
|
||||
@ -31,14 +31,12 @@ func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alert
|
||||
continue
|
||||
}
|
||||
if endpointAlert.Triggered {
|
||||
if debug {
|
||||
log.Printf("[watchdog.handleAlertsToTrigger] Alert for endpoint=%s with description='%s' has already been TRIGGERED, skipping", ep.Name, endpointAlert.GetDescription())
|
||||
}
|
||||
logr.Debugf("[watchdog.handleAlertsToTrigger] Alert for endpoint with key=%s with description='%s' has already been TRIGGERED, skipping", ep.Key(), endpointAlert.GetDescription())
|
||||
continue
|
||||
}
|
||||
alertProvider := alertingConfig.GetAlertingProviderByAlertType(endpointAlert.Type)
|
||||
if alertProvider != nil {
|
||||
log.Printf("[watchdog.handleAlertsToTrigger] Sending %s alert because alert for endpoint=%s with description='%s' has been TRIGGERED", endpointAlert.Type, ep.Name, endpointAlert.GetDescription())
|
||||
logr.Infof("[watchdog.handleAlertsToTrigger] Sending %s alert because alert for endpoint with key=%s with description='%s' has been TRIGGERED", endpointAlert.Type, ep.Key(), endpointAlert.GetDescription())
|
||||
var err error
|
||||
if os.Getenv("MOCK_ALERT_PROVIDER") == "true" {
|
||||
if os.Getenv("MOCK_ALERT_PROVIDER_ERROR") == "true" {
|
||||
@ -48,27 +46,27 @@ func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alert
|
||||
err = alertProvider.Send(ep, endpointAlert, result, false)
|
||||
}
|
||||
if err != nil {
|
||||
log.Printf("[watchdog.handleAlertsToTrigger] Failed to send an alert for endpoint=%s: %s", ep.Name, err.Error())
|
||||
logr.Errorf("[watchdog.handleAlertsToTrigger] Failed to send an alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
} else {
|
||||
endpointAlert.Triggered = true
|
||||
if err := store.Get().UpsertTriggeredEndpointAlert(ep, endpointAlert); err != nil {
|
||||
log.Printf("[watchdog.handleAlertsToTrigger] Failed to persist triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[watchdog.handleAlertsToTrigger] Failed to persist triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log.Printf("[watchdog.handleAlertsToTrigger] Not sending alert of type=%s despite being TRIGGERED, because the provider wasn't configured properly", endpointAlert.Type)
|
||||
logr.Warnf("[watchdog.handleAlertsToTrigger] Not sending alert of type=%s endpoint with key=%s despite being TRIGGERED, because the provider wasn't configured properly", endpointAlert.Type, ep.Key())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func handleAlertsToResolve(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config, debug bool) {
|
||||
func handleAlertsToResolve(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config) {
|
||||
ep.NumberOfSuccessesInARow++
|
||||
for _, endpointAlert := range ep.Alerts {
|
||||
isStillBelowSuccessThreshold := endpointAlert.SuccessThreshold > ep.NumberOfSuccessesInARow
|
||||
if isStillBelowSuccessThreshold && endpointAlert.IsEnabled() && endpointAlert.Triggered {
|
||||
// Persist NumberOfSuccessesInARow
|
||||
if err := store.Get().UpsertTriggeredEndpointAlert(ep, endpointAlert); err != nil {
|
||||
log.Printf("[watchdog.handleAlertsToResolve] Failed to update triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[watchdog.handleAlertsToResolve] Failed to update triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
}
|
||||
}
|
||||
if !endpointAlert.IsEnabled() || !endpointAlert.Triggered || isStillBelowSuccessThreshold {
|
||||
@ -78,20 +76,20 @@ func handleAlertsToResolve(ep *endpoint.Endpoint, result *endpoint.Result, alert
|
||||
// Further explanation can be found on Alert's Triggered field.
|
||||
endpointAlert.Triggered = false
|
||||
if err := store.Get().DeleteTriggeredEndpointAlert(ep, endpointAlert); err != nil {
|
||||
log.Printf("[watchdog.handleAlertsToResolve] Failed to delete persisted triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[watchdog.handleAlertsToResolve] Failed to delete persisted triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
}
|
||||
if !endpointAlert.IsSendingOnResolved() {
|
||||
continue
|
||||
}
|
||||
alertProvider := alertingConfig.GetAlertingProviderByAlertType(endpointAlert.Type)
|
||||
if alertProvider != nil {
|
||||
log.Printf("[watchdog.handleAlertsToResolve] Sending %s alert because alert for endpoint with key=%s with description='%s' has been RESOLVED", endpointAlert.Type, ep.Key(), endpointAlert.GetDescription())
|
||||
logr.Infof("[watchdog.handleAlertsToResolve] Sending %s alert because alert for endpoint with key=%s with description='%s' has been RESOLVED", endpointAlert.Type, ep.Key(), endpointAlert.GetDescription())
|
||||
err := alertProvider.Send(ep, endpointAlert, result, true)
|
||||
if err != nil {
|
||||
log.Printf("[watchdog.handleAlertsToResolve] Failed to send an alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
logr.Errorf("[watchdog.handleAlertsToResolve] Failed to send an alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
}
|
||||
} else {
|
||||
log.Printf("[watchdog.handleAlertsToResolve] Not sending alert of type=%s despite being RESOLVED, because the provider wasn't configured properly", endpointAlert.Type)
|
||||
logr.Warnf("[watchdog.handleAlertsToResolve] Not sending alert of type=%s for endpoint with key=%s despite being RESOLVED, because the provider wasn't configured properly", endpointAlert.Type, ep.Key())
|
||||
}
|
||||
}
|
||||
ep.NumberOfFailuresInARow = 0
|
||||
|
@ -28,7 +28,6 @@ func TestHandleAlerting(t *testing.T) {
|
||||
defer os.Clearenv()
|
||||
|
||||
cfg := &config.Config{
|
||||
Debug: true,
|
||||
Alerting: &alerting.Config{
|
||||
Custom: &custom.AlertProvider{
|
||||
URL: "https://twin.sh/health",
|
||||
@ -52,28 +51,28 @@ func TestHandleAlerting(t *testing.T) {
|
||||
}
|
||||
|
||||
verify(t, ep, 0, 0, false, "The alert shouldn't start triggered")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
|
||||
verify(t, ep, 1, 0, false, "The alert shouldn't have triggered")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
|
||||
verify(t, ep, 2, 0, true, "The alert should've triggered")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
|
||||
verify(t, ep, 3, 0, true, "The alert should still be triggered")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
|
||||
verify(t, ep, 4, 0, true, "The alert should still be triggered")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
|
||||
verify(t, ep, 0, 1, true, "The alert should still be triggered (because endpoint.Alerts[0].SuccessThreshold is 3)")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
|
||||
verify(t, ep, 0, 2, true, "The alert should still be triggered (because endpoint.Alerts[0].SuccessThreshold is 3)")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
|
||||
verify(t, ep, 0, 3, false, "The alert should've been resolved")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
|
||||
verify(t, ep, 0, 4, false, "The alert should no longer be triggered")
|
||||
}
|
||||
|
||||
func TestHandleAlertingWhenAlertingConfigIsNil(t *testing.T) {
|
||||
_ = os.Setenv("MOCK_ALERT_PROVIDER", "true")
|
||||
defer os.Clearenv()
|
||||
HandleAlerting(nil, nil, nil, true)
|
||||
HandleAlerting(nil, nil, nil)
|
||||
}
|
||||
|
||||
func TestHandleAlertingWithBadAlertProvider(t *testing.T) {
|
||||
@ -96,9 +95,9 @@ func TestHandleAlertingWithBadAlertProvider(t *testing.T) {
|
||||
}
|
||||
|
||||
verify(t, ep, 0, 0, false, "The alert shouldn't start triggered")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, &alerting.Config{}, false)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, &alerting.Config{})
|
||||
verify(t, ep, 1, 0, false, "The alert shouldn't have triggered")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, &alerting.Config{}, false)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, &alerting.Config{})
|
||||
verify(t, ep, 2, 0, false, "The alert shouldn't have triggered, because the provider wasn't configured properly")
|
||||
}
|
||||
|
||||
@ -107,7 +106,6 @@ func TestHandleAlertingWhenTriggeredAlertIsAlmostResolvedButendpointStartFailing
|
||||
defer os.Clearenv()
|
||||
|
||||
cfg := &config.Config{
|
||||
Debug: true,
|
||||
Alerting: &alerting.Config{
|
||||
Custom: &custom.AlertProvider{
|
||||
URL: "https://twin.sh/health",
|
||||
@ -132,7 +130,7 @@ func TestHandleAlertingWhenTriggeredAlertIsAlmostResolvedButendpointStartFailing
|
||||
}
|
||||
|
||||
// This test simulate an alert that was already triggered
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
|
||||
verify(t, ep, 2, 0, true, "The alert was already triggered at the beginning of this test")
|
||||
}
|
||||
|
||||
@ -141,7 +139,6 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedButSendOnResolvedIsFalse(t *t
|
||||
defer os.Clearenv()
|
||||
|
||||
cfg := &config.Config{
|
||||
Debug: true,
|
||||
Alerting: &alerting.Config{
|
||||
Custom: &custom.AlertProvider{
|
||||
URL: "https://twin.sh/health",
|
||||
@ -166,7 +163,7 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedButSendOnResolvedIsFalse(t *t
|
||||
NumberOfFailuresInARow: 1,
|
||||
}
|
||||
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
|
||||
verify(t, ep, 0, 1, false, "The alert should've been resolved")
|
||||
}
|
||||
|
||||
@ -175,7 +172,6 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPagerDuty(t *testing.T) {
|
||||
defer os.Clearenv()
|
||||
|
||||
cfg := &config.Config{
|
||||
Debug: true,
|
||||
Alerting: &alerting.Config{
|
||||
PagerDuty: &pagerduty.AlertProvider{
|
||||
IntegrationKey: "00000000000000000000000000000000",
|
||||
@ -198,10 +194,10 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPagerDuty(t *testing.T) {
|
||||
NumberOfFailuresInARow: 0,
|
||||
}
|
||||
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
|
||||
verify(t, ep, 1, 0, true, "")
|
||||
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
|
||||
verify(t, ep, 0, 1, false, "The alert should've been resolved")
|
||||
}
|
||||
|
||||
@ -210,7 +206,6 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPushover(t *testing.T) {
|
||||
defer os.Clearenv()
|
||||
|
||||
cfg := &config.Config{
|
||||
Debug: true,
|
||||
Alerting: &alerting.Config{
|
||||
Pushover: &pushover.AlertProvider{
|
||||
ApplicationToken: "000000000000000000000000000000",
|
||||
@ -234,10 +229,10 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPushover(t *testing.T) {
|
||||
NumberOfFailuresInARow: 0,
|
||||
}
|
||||
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
|
||||
verify(t, ep, 1, 0, true, "")
|
||||
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
|
||||
verify(t, ep, 0, 1, false, "The alert should've been resolved")
|
||||
}
|
||||
|
||||
@ -403,32 +398,32 @@ func TestHandleAlertingWithProviderThatReturnsAnError(t *testing.T) {
|
||||
},
|
||||
}
|
||||
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
|
||||
verify(t, ep, 1, 0, false, "")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
|
||||
verify(t, ep, 2, 0, false, "The alert should have failed to trigger, because the alert provider is returning an error")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
|
||||
verify(t, ep, 3, 0, false, "The alert should still not be triggered, because the alert provider is still returning an error")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
|
||||
verify(t, ep, 4, 0, false, "The alert should still not be triggered, because the alert provider is still returning an error")
|
||||
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
|
||||
verify(t, ep, 5, 0, true, "The alert should've been triggered because the alert provider is no longer returning an error")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig, true)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig)
|
||||
verify(t, ep, 0, 1, true, "The alert should've still been triggered")
|
||||
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig, true)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig)
|
||||
verify(t, ep, 0, 2, false, "The alert should've been resolved DESPITE THE ALERT PROVIDER RETURNING AN ERROR. See Alert.Triggered for further explanation.")
|
||||
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false")
|
||||
|
||||
// Make sure that everything's working as expected after a rough patch
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
|
||||
verify(t, ep, 1, 0, false, "")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig, true)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
|
||||
verify(t, ep, 2, 0, true, "The alert should have triggered")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig, true)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig)
|
||||
verify(t, ep, 0, 1, true, "The alert should still be triggered")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig, true)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig)
|
||||
verify(t, ep, 0, 2, false, "The alert should have been resolved")
|
||||
})
|
||||
}
|
||||
@ -440,7 +435,6 @@ func TestHandleAlertingWithProviderThatOnlyReturnsErrorOnResolve(t *testing.T) {
|
||||
defer os.Clearenv()
|
||||
|
||||
cfg := &config.Config{
|
||||
Debug: true,
|
||||
Alerting: &alerting.Config{
|
||||
Custom: &custom.AlertProvider{
|
||||
URL: "https://twin.sh/health",
|
||||
@ -463,27 +457,27 @@ func TestHandleAlertingWithProviderThatOnlyReturnsErrorOnResolve(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
|
||||
verify(t, ep, 1, 0, true, "")
|
||||
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
|
||||
verify(t, ep, 0, 1, false, "")
|
||||
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
|
||||
verify(t, ep, 1, 0, true, "")
|
||||
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
|
||||
verify(t, ep, 0, 1, false, "")
|
||||
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false")
|
||||
|
||||
// Make sure that everything's working as expected after a rough patch
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
|
||||
verify(t, ep, 1, 0, true, "")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
|
||||
verify(t, ep, 2, 0, true, "")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
|
||||
verify(t, ep, 0, 1, false, "")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting, cfg.Debug)
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
|
||||
verify(t, ep, 0, 2, false, "")
|
||||
}
|
||||
|
||||
|
@ -2,7 +2,6 @@ package watchdog
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@ -45,7 +44,7 @@ func monitor(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenance
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Printf("[watchdog.monitor] Canceling current execution of group=%s; endpoint=%s", ep.Group, ep.Name)
|
||||
logr.Warnf("[watchdog.monitor] Canceling current execution of group=%s; endpoint=%s; key=%s", ep.Group, ep.Name, ep.Key())
|
||||
return
|
||||
case <-time.After(ep.Interval):
|
||||
execute(ep, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics)
|
||||
@ -68,30 +67,30 @@ func execute(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenance
|
||||
logr.Infof("[watchdog.execute] No connectivity; skipping execution")
|
||||
return
|
||||
}
|
||||
logr.Debugf("[watchdog.execute] Monitoring group=%s; endpoint=%s", ep.Group, ep.Name)
|
||||
logr.Debugf("[watchdog.execute] Monitoring group=%s; endpoint=%s; key=%s", ep.Group, ep.Name, ep.Key())
|
||||
result := ep.EvaluateHealth()
|
||||
if enabledMetrics {
|
||||
metrics.PublishMetricsForEndpoint(ep, result)
|
||||
}
|
||||
UpdateEndpointStatuses(ep, result)
|
||||
if logr.GetThreshold() == logr.LevelDebug && !result.Success {
|
||||
logr.Debugf("[watchdog.execute] Monitored group=%s; endpoint=%s; success=%v; errors=%d; duration=%s; body=%s", ep.Group, ep.Name, result.Success, len(result.Errors), result.Duration.Round(time.Millisecond), result.Body)
|
||||
logr.Debugf("[watchdog.execute] Monitored group=%s; endpoint=%s; key=%s; success=%v; errors=%d; duration=%s; body=%s", ep.Group, ep.Name, ep.Key(), result.Success, len(result.Errors), result.Duration.Round(time.Millisecond), result.Body)
|
||||
} else {
|
||||
logr.Infof("[watchdog.execute] Monitored group=%s; endpoint=%s; success=%v; errors=%d; duration=%s", ep.Group, ep.Name, result.Success, len(result.Errors), result.Duration.Round(time.Millisecond))
|
||||
logr.Infof("[watchdog.execute] Monitored group=%s; endpoint=%s; key=%s; success=%v; errors=%d; duration=%s", ep.Group, ep.Name, ep.Key(), result.Success, len(result.Errors), result.Duration.Round(time.Millisecond))
|
||||
}
|
||||
if !maintenanceConfig.IsUnderMaintenance() {
|
||||
// TODO: Consider moving this after the monitoring lock is unlocked? I mean, how much noise can a single alerting provider cause...
|
||||
HandleAlerting(ep, result, alertingConfig, logr.GetThreshold() == logr.LevelDebug)
|
||||
HandleAlerting(ep, result, alertingConfig)
|
||||
} else {
|
||||
logr.Debugf("[watchdog.execute] Not handling alerting because currently in the maintenance window")
|
||||
logr.Debug("[watchdog.execute] Not handling alerting because currently in the maintenance window")
|
||||
}
|
||||
logr.Debugf("[watchdog.execute] Waiting for interval=%s before monitoring group=%s endpoint=%s again", ep.Interval, ep.Group, ep.Name)
|
||||
logr.Debugf("[watchdog.execute] Waiting for interval=%s before monitoring group=%s endpoint=%s (key=%s) again", ep.Interval, ep.Group, ep.Name, ep.Key())
|
||||
}
|
||||
|
||||
// UpdateEndpointStatuses updates the slice of endpoint statuses
|
||||
func UpdateEndpointStatuses(ep *endpoint.Endpoint, result *endpoint.Result) {
|
||||
if err := store.Get().Insert(ep, result); err != nil {
|
||||
logr.Errorf("[watchdog.UpdateEndpointStatuses] Failed to insert result in storage:", err.Error())
|
||||
logr.Errorf("[watchdog.UpdateEndpointStatuses] Failed to insert result in storage: %s", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user