#29: Automatically reload on configuration file update

This commit is contained in:
TwinProduction 2021-05-18 22:29:15 -04:00
parent 40dc1cc270
commit db23bd9073
37 changed files with 616 additions and 496 deletions

View File

@ -49,6 +49,7 @@ core applications: https://status.twinnation.org/
- [Monitoring a service using DNS queries](#monitoring-a-service-using-dns-queries)
- [Basic authentication](#basic-authentication)
- [disable-monitoring-lock](#disable-monitoring-lock)
- [Reloading configuration on the fly](#reloading-configuration-on-the-fly)
- [Service groups](#service-groups)
- [Exposing Gatus on a custom port](#exposing-gatus-on-a-custom-port)
- [Uptime Badges (ALPHA)](#uptime-badges)
@ -153,6 +154,7 @@ Note that you can also use environment variables in the configuration file (e.g.
| `security.basic.username` | Username for Basic authentication | Required `""` |
| `security.basic.password-sha512` | Password's SHA512 hash for Basic authentication | Required `""` |
| `disable-monitoring-lock` | Whether to [disable the monitoring lock](#disable-monitoring-lock) | `false` |
| `skip-invalid-config-update` | Whether to ignore invalid configuration update. See [Reloading configuration on the fly](#reloading-configuration-on-the-fly).
| `web` | Web configuration | `{}` |
| `web.address` | Address to listen on | `0.0.0.0` |
| `web.port` | Port to listen on | `8080` |
@ -818,6 +820,30 @@ technically, if you create 100 services with a 1 seconds interval, Gatus will se
- You want to test multiple services at very short interval (< 5s)
### Reloading configuration on the fly
For the sake on convenience, Gatus automatically reloads the configuration on the fly if the loaded configuration file
is updated while Gatus is running.
By default, the application will exit if the updating configuration is invalid, but you can configure
Gatus to continue running if the configuration file is updated with an invalid configuration by
setting `skip-invalid-config-update` to `true`.
Keep in mind that it is in your best interest to ensure the validity of the configuration file after each update you
apply to the configuration file while Gatus is running by looking at the log and making sure that you do not see the
following message:
```
The configuration file was updated, but it is not valid. The old configuration will continue being used.
```
Failure to do so may result in Gatus being unable to start if the application is restarted for whatever reason.
I recommend not setting `skip-invalid-config-update` to `true` to avoid a situation like this, but the choice is yours
to make.
Note that if you are not using a file storage, updating the configuration while Gatus is running is effectively
the same as restarting the application.
### Service groups
Service groups are used for grouping multiple services together on the dashboard.

View File

@ -1,9 +1,9 @@
package core
package alert
// Alert is the service's alert configuration
type Alert struct {
// Type of alert (required)
Type AlertType `yaml:"type"`
Type Type `yaml:"type"`
// Enabled defines whether or not the alert is enabled
//
@ -67,33 +67,3 @@ func (alert Alert) IsSendingOnResolved() bool {
}
return *alert.SendOnResolved
}
// AlertType is the type of the alert.
// The value will generally be the name of the alert provider
type AlertType string
const (
// CustomAlert is the AlertType for the custom alerting provider
CustomAlert AlertType = "custom"
// DiscordAlert is the AlertType for the discord alerting provider
DiscordAlert AlertType = "discord"
// MattermostAlert is the AlertType for the mattermost alerting provider
MattermostAlert AlertType = "mattermost"
// MessagebirdAlert is the AlertType for the messagebird alerting provider
MessagebirdAlert AlertType = "messagebird"
// PagerDutyAlert is the AlertType for the pagerduty alerting provider
PagerDutyAlert AlertType = "pagerduty"
// SlackAlert is the AlertType for the slack alerting provider
SlackAlert AlertType = "slack"
// TelegramAlert is the AlertType for the telegram alerting provider
TelegramAlert AlertType = "telegram"
// TwilioAlert is the AlertType for the twilio alerting provider
TwilioAlert AlertType = "twilio"
)

View File

@ -1,4 +1,4 @@
package core
package alert
import "testing"

31
alerting/alert/type.go Normal file
View File

@ -0,0 +1,31 @@
package alert
// Type is the type of the alert.
// The value will generally be the name of the alert provider
type Type string
const (
// TypeCustom is the Type for the custom alerting provider
TypeCustom Type = "custom"
// TypeDiscord is the Type for the discord alerting provider
TypeDiscord Type = "discord"
// TypeMattermost is the Type for the mattermost alerting provider
TypeMattermost Type = "mattermost"
// TypeMessagebird is the Type for the messagebird alerting provider
TypeMessagebird Type = "messagebird"
// TypePagerDuty is the Type for the pagerduty alerting provider
TypePagerDuty Type = "pagerduty"
// TypeSlack is the Type for the slack alerting provider
TypeSlack Type = "slack"
// TypeTelegram is the Type for the telegram alerting provider
TypeTelegram Type = "telegram"
// TypeTwilio is the Type for the twilio alerting provider
TypeTwilio Type = "twilio"
)

View File

@ -1,6 +1,8 @@
package alerting
import (
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/alerting/provider"
"github.com/TwinProduction/gatus/alerting/provider/custom"
"github.com/TwinProduction/gatus/alerting/provider/discord"
"github.com/TwinProduction/gatus/alerting/provider/mattermost"
@ -37,3 +39,58 @@ type Config struct {
// Twilio is the configuration for the twilio alerting provider
Twilio *twilio.AlertProvider `yaml:"twilio"`
}
// GetAlertingProviderByAlertType returns an provider.AlertProvider by its corresponding alert.Type
func (config Config) GetAlertingProviderByAlertType(alertType alert.Type) provider.AlertProvider {
switch alertType {
case alert.TypeCustom:
if config.Custom == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Custom
case alert.TypeDiscord:
if config.Discord == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Discord
case alert.TypeMattermost:
if config.Mattermost == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Mattermost
case alert.TypeMessagebird:
if config.Messagebird == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Messagebird
case alert.TypePagerDuty:
if config.PagerDuty == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.PagerDuty
case alert.TypeSlack:
if config.Slack == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Slack
case alert.TypeTelegram:
if config.Telegram == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Telegram
case alert.TypeTwilio:
if config.Twilio == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Twilio
}
return nil
}

View File

@ -9,6 +9,7 @@ import (
"os"
"strings"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/client"
"github.com/TwinProduction/gatus/core"
)
@ -24,7 +25,7 @@ type AlertProvider struct {
Placeholders map[string]map[string]string `yaml:"placeholders,omitempty"`
// DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type
DefaultAlert *core.Alert `yaml:"default-alert"`
DefaultAlert *alert.Alert `yaml:"default-alert"`
}
// IsValid returns whether the provider's configuration is valid
@ -33,7 +34,7 @@ func (provider *AlertProvider) IsValid() bool {
}
// ToCustomAlertProvider converts the provider into a custom.AlertProvider
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *AlertProvider {
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *alert.Alert, result *core.Result, resolved bool) *AlertProvider {
return provider
}
@ -117,6 +118,6 @@ func (provider *AlertProvider) Send(serviceName, alertDescription string, resolv
}
// GetDefaultAlert returns the provider's default alert configuration
func (provider AlertProvider) GetDefaultAlert() *core.Alert {
func (provider AlertProvider) GetDefaultAlert() *alert.Alert {
return provider.DefaultAlert
}

View File

@ -4,6 +4,7 @@ import (
"io/ioutil"
"testing"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/core"
)
@ -60,7 +61,7 @@ func TestAlertProvider_buildHTTPRequestWhenTriggered(t *testing.T) {
func TestAlertProvider_ToCustomAlertProvider(t *testing.T) {
provider := AlertProvider{URL: "http://example.com"}
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{}, true)
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{}, true)
if customAlertProvider == nil {
t.Fatal("customAlertProvider shouldn't have been nil")
}

View File

@ -4,6 +4,7 @@ import (
"fmt"
"net/http"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/alerting/provider/custom"
"github.com/TwinProduction/gatus/core"
)
@ -13,7 +14,7 @@ type AlertProvider struct {
WebhookURL string `yaml:"webhook-url"`
// DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type
DefaultAlert *core.Alert `yaml:"default-alert"`
DefaultAlert *alert.Alert `yaml:"default-alert"`
}
// IsValid returns whether the provider's configuration is valid
@ -22,7 +23,7 @@ func (provider *AlertProvider) IsValid() bool {
}
// ToCustomAlertProvider converts the provider into a custom.AlertProvider
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider {
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *alert.Alert, result *core.Result, resolved bool) *custom.AlertProvider {
var message, results string
var colorCode int
if resolved {
@ -66,6 +67,6 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler
}
// GetDefaultAlert returns the provider's default alert configuration
func (provider AlertProvider) GetDefaultAlert() *core.Alert {
func (provider AlertProvider) GetDefaultAlert() *alert.Alert {
return provider.DefaultAlert
}

View File

@ -6,6 +6,7 @@ import (
"strings"
"testing"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/core"
)
@ -22,7 +23,7 @@ func TestAlertProvider_IsValid(t *testing.T) {
func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) {
provider := AlertProvider{WebhookURL: "http://example.com"}
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "SUCCESSFUL_CONDITION", Success: true}}}, true)
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "SUCCESSFUL_CONDITION", Success: true}}}, true)
if customAlertProvider == nil {
t.Fatal("customAlertProvider shouldn't have been nil")
}
@ -44,7 +45,7 @@ func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) {
func TestAlertProvider_ToCustomAlertProviderWithTriggeredAlert(t *testing.T) {
provider := AlertProvider{WebhookURL: "http://example.com"}
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false)
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false)
if customAlertProvider == nil {
t.Fatal("customAlertProvider shouldn't have been nil")
}

View File

@ -4,6 +4,7 @@ import (
"fmt"
"net/http"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/alerting/provider/custom"
"github.com/TwinProduction/gatus/core"
)
@ -14,7 +15,7 @@ type AlertProvider struct {
Insecure bool `yaml:"insecure,omitempty"`
// DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type
DefaultAlert *core.Alert `yaml:"default-alert"`
DefaultAlert *alert.Alert `yaml:"default-alert"`
}
// IsValid returns whether the provider's configuration is valid
@ -23,7 +24,7 @@ func (provider *AlertProvider) IsValid() bool {
}
// ToCustomAlertProvider converts the provider into a custom.AlertProvider
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider {
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *alert.Alert, result *core.Result, resolved bool) *custom.AlertProvider {
var message string
var color string
if resolved {
@ -78,6 +79,6 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler
}
// GetDefaultAlert returns the provider's default alert configuration
func (provider AlertProvider) GetDefaultAlert() *core.Alert {
func (provider AlertProvider) GetDefaultAlert() *alert.Alert {
return provider.DefaultAlert
}

View File

@ -6,6 +6,7 @@ import (
"strings"
"testing"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/core"
)
@ -22,7 +23,7 @@ func TestAlertProvider_IsValid(t *testing.T) {
func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) {
provider := AlertProvider{WebhookURL: "http://example.org"}
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "SUCCESSFUL_CONDITION", Success: true}}}, true)
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "SUCCESSFUL_CONDITION", Success: true}}}, true)
if customAlertProvider == nil {
t.Fatal("customAlertProvider shouldn't have been nil")
}
@ -44,7 +45,7 @@ func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) {
func TestAlertProvider_ToCustomAlertProviderWithTriggeredAlert(t *testing.T) {
provider := AlertProvider{WebhookURL: "http://example.org"}
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false)
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false)
if customAlertProvider == nil {
t.Fatal("customAlertProvider shouldn't have been nil")
}

View File

@ -4,6 +4,7 @@ import (
"fmt"
"net/http"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/alerting/provider/custom"
"github.com/TwinProduction/gatus/core"
)
@ -19,7 +20,7 @@ type AlertProvider struct {
Recipients string `yaml:"recipients"`
// DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type
DefaultAlert *core.Alert `yaml:"default-alert"`
DefaultAlert *alert.Alert `yaml:"default-alert"`
}
// IsValid returns whether the provider's configuration is valid
@ -29,7 +30,7 @@ func (provider *AlertProvider) IsValid() bool {
// ToCustomAlertProvider converts the provider into a custom.AlertProvider
// Reference doc for messagebird https://developers.messagebird.com/api/sms-messaging/#send-outbound-sms
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, _ *core.Result, resolved bool) *custom.AlertProvider {
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *alert.Alert, _ *core.Result, resolved bool) *custom.AlertProvider {
var message string
if resolved {
message = fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.GetDescription())
@ -53,6 +54,6 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler
}
// GetDefaultAlert returns the provider's default alert configuration
func (provider AlertProvider) GetDefaultAlert() *core.Alert {
func (provider AlertProvider) GetDefaultAlert() *alert.Alert {
return provider.DefaultAlert
}

View File

@ -6,6 +6,7 @@ import (
"strings"
"testing"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/core"
)
@ -30,7 +31,7 @@ func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) {
Originator: "1",
Recipients: "1",
}
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{}, true)
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{}, true)
if customAlertProvider == nil {
t.Fatal("customAlertProvider shouldn't have been nil")
}
@ -56,7 +57,7 @@ func TestAlertProvider_ToCustomAlertProviderWithTriggeredAlert(t *testing.T) {
Originator: "1",
Recipients: "1",
}
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{}, false)
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{}, false)
if customAlertProvider == nil {
t.Fatal("customAlertProvider shouldn't have been nil")
}

View File

@ -4,6 +4,7 @@ import (
"fmt"
"net/http"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/alerting/provider/custom"
"github.com/TwinProduction/gatus/core"
)
@ -13,7 +14,7 @@ type AlertProvider struct {
IntegrationKey string `yaml:"integration-key"`
// DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type
DefaultAlert *core.Alert `yaml:"default-alert"`
DefaultAlert *alert.Alert `yaml:"default-alert"`
}
// IsValid returns whether the provider's configuration is valid
@ -24,7 +25,7 @@ func (provider *AlertProvider) IsValid() bool {
// ToCustomAlertProvider converts the provider into a custom.AlertProvider
//
// relevant: https://developer.pagerduty.com/docs/events-api-v2/trigger-events/
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, _ *core.Result, resolved bool) *custom.AlertProvider {
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *alert.Alert, _ *core.Result, resolved bool) *custom.AlertProvider {
var message, eventAction, resolveKey string
if resolved {
message = fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.GetDescription())
@ -55,6 +56,6 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler
}
// GetDefaultAlert returns the provider's default alert configuration
func (provider AlertProvider) GetDefaultAlert() *core.Alert {
func (provider AlertProvider) GetDefaultAlert() *alert.Alert {
return provider.DefaultAlert
}

View File

@ -6,6 +6,7 @@ import (
"strings"
"testing"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/core"
)
@ -22,7 +23,7 @@ func TestAlertProvider_IsValid(t *testing.T) {
func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) {
provider := AlertProvider{IntegrationKey: "00000000000000000000000000000000"}
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{}, true)
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{}, true)
if customAlertProvider == nil {
t.Fatal("customAlertProvider shouldn't have been nil")
}
@ -44,7 +45,7 @@ func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) {
func TestAlertProvider_ToCustomAlertProviderWithTriggeredAlert(t *testing.T) {
provider := AlertProvider{IntegrationKey: "00000000000000000000000000000000"}
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{}, false)
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{}, false)
if customAlertProvider == nil {
t.Fatal("customAlertProvider shouldn't have been nil")
}

View File

@ -1,6 +1,7 @@
package provider
import (
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/alerting/provider/custom"
"github.com/TwinProduction/gatus/alerting/provider/discord"
"github.com/TwinProduction/gatus/alerting/provider/mattermost"
@ -18,14 +19,14 @@ type AlertProvider interface {
IsValid() bool
// ToCustomAlertProvider converts the provider into a custom.AlertProvider
ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider
ToCustomAlertProvider(service *core.Service, alert *alert.Alert, result *core.Result, resolved bool) *custom.AlertProvider
// GetDefaultAlert returns the provider's default alert configuration
GetDefaultAlert() *core.Alert
GetDefaultAlert() *alert.Alert
}
// ParseWithDefaultAlert parses a service alert by using the provider's default alert as a baseline
func ParseWithDefaultAlert(providerDefaultAlert, serviceAlert *core.Alert) {
func ParseWithDefaultAlert(providerDefaultAlert, serviceAlert *alert.Alert) {
if providerDefaultAlert == nil || serviceAlert == nil {
return
}

View File

@ -3,13 +3,13 @@ package provider
import (
"testing"
"github.com/TwinProduction/gatus/core"
"github.com/TwinProduction/gatus/alerting/alert"
)
func TestParseWithDefaultAlert(t *testing.T) {
type Scenario struct {
Name string
DefaultAlert, ServiceAlert, ExpectedOutputAlert *core.Alert
DefaultAlert, ServiceAlert, ExpectedOutputAlert *alert.Alert
}
enabled := true
disabled := false
@ -18,18 +18,18 @@ func TestParseWithDefaultAlert(t *testing.T) {
scenarios := []Scenario{
{
Name: "service-alert-type-only",
DefaultAlert: &core.Alert{
DefaultAlert: &alert.Alert{
Enabled: &enabled,
SendOnResolved: &enabled,
Description: &firstDescription,
FailureThreshold: 5,
SuccessThreshold: 10,
},
ServiceAlert: &core.Alert{
Type: core.DiscordAlert,
ServiceAlert: &alert.Alert{
Type: alert.TypeDiscord,
},
ExpectedOutputAlert: &core.Alert{
Type: core.DiscordAlert,
ExpectedOutputAlert: &alert.Alert{
Type: alert.TypeDiscord,
Enabled: &enabled,
SendOnResolved: &enabled,
Description: &firstDescription,
@ -39,23 +39,23 @@ func TestParseWithDefaultAlert(t *testing.T) {
},
{
Name: "service-alert-overwrites-default-alert",
DefaultAlert: &core.Alert{
DefaultAlert: &alert.Alert{
Enabled: &disabled,
SendOnResolved: &disabled,
Description: &firstDescription,
FailureThreshold: 5,
SuccessThreshold: 10,
},
ServiceAlert: &core.Alert{
Type: core.TelegramAlert,
ServiceAlert: &alert.Alert{
Type: alert.TypeTelegram,
Enabled: &enabled,
SendOnResolved: &enabled,
Description: &secondDescription,
FailureThreshold: 6,
SuccessThreshold: 11,
},
ExpectedOutputAlert: &core.Alert{
Type: core.TelegramAlert,
ExpectedOutputAlert: &alert.Alert{
Type: alert.TypeTelegram,
Enabled: &enabled,
SendOnResolved: &enabled,
Description: &secondDescription,
@ -65,22 +65,22 @@ func TestParseWithDefaultAlert(t *testing.T) {
},
{
Name: "service-alert-partially-overwrites-default-alert",
DefaultAlert: &core.Alert{
DefaultAlert: &alert.Alert{
Enabled: &enabled,
SendOnResolved: &enabled,
Description: &firstDescription,
FailureThreshold: 5,
SuccessThreshold: 10,
},
ServiceAlert: &core.Alert{
Type: core.DiscordAlert,
ServiceAlert: &alert.Alert{
Type: alert.TypeDiscord,
Enabled: nil,
SendOnResolved: nil,
FailureThreshold: 6,
SuccessThreshold: 11,
},
ExpectedOutputAlert: &core.Alert{
Type: core.DiscordAlert,
ExpectedOutputAlert: &alert.Alert{
Type: alert.TypeDiscord,
Enabled: &enabled,
SendOnResolved: &enabled,
Description: &firstDescription,
@ -90,19 +90,19 @@ func TestParseWithDefaultAlert(t *testing.T) {
},
{
Name: "default-alert-type-should-be-ignored",
DefaultAlert: &core.Alert{
Type: core.TelegramAlert,
DefaultAlert: &alert.Alert{
Type: alert.TypeTelegram,
Enabled: &enabled,
SendOnResolved: &enabled,
Description: &firstDescription,
FailureThreshold: 5,
SuccessThreshold: 10,
},
ServiceAlert: &core.Alert{
Type: core.DiscordAlert,
ServiceAlert: &alert.Alert{
Type: alert.TypeDiscord,
},
ExpectedOutputAlert: &core.Alert{
Type: core.DiscordAlert,
ExpectedOutputAlert: &alert.Alert{
Type: alert.TypeDiscord,
Enabled: &enabled,
SendOnResolved: &enabled,
Description: &firstDescription,
@ -112,8 +112,8 @@ func TestParseWithDefaultAlert(t *testing.T) {
},
{
Name: "no-default-alert",
DefaultAlert: &core.Alert{
Type: core.DiscordAlert,
DefaultAlert: &alert.Alert{
Type: alert.TypeDiscord,
Enabled: nil,
SendOnResolved: nil,
Description: &firstDescription,

View File

@ -4,6 +4,7 @@ import (
"fmt"
"net/http"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/alerting/provider/custom"
"github.com/TwinProduction/gatus/core"
)
@ -13,7 +14,7 @@ type AlertProvider struct {
WebhookURL string `yaml:"webhook-url"` // Slack webhook URL
// DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type
DefaultAlert *core.Alert `yaml:"default-alert"`
DefaultAlert *alert.Alert `yaml:"default-alert"`
}
// IsValid returns whether the provider's configuration is valid
@ -22,7 +23,7 @@ func (provider *AlertProvider) IsValid() bool {
}
// ToCustomAlertProvider converts the provider into a custom.AlertProvider
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider {
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *alert.Alert, result *core.Result, resolved bool) *custom.AlertProvider {
var message, color, results string
if resolved {
message = fmt.Sprintf("An alert for *%s* has been resolved after passing successfully %d time(s) in a row", service.Name, alert.SuccessThreshold)
@ -66,6 +67,6 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler
}
// GetDefaultAlert returns the provider's default alert configuration
func (provider AlertProvider) GetDefaultAlert() *core.Alert {
func (provider AlertProvider) GetDefaultAlert() *alert.Alert {
return provider.DefaultAlert
}

View File

@ -6,6 +6,7 @@ import (
"strings"
"testing"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/core"
)
@ -22,7 +23,7 @@ func TestAlertProvider_IsValid(t *testing.T) {
func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) {
provider := AlertProvider{WebhookURL: "http://example.com"}
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "SUCCESSFUL_CONDITION", Success: true}}}, true)
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "SUCCESSFUL_CONDITION", Success: true}}}, true)
if customAlertProvider == nil {
t.Fatal("customAlertProvider shouldn't have been nil")
}
@ -44,7 +45,7 @@ func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) {
func TestAlertProvider_ToCustomAlertProviderWithTriggeredAlert(t *testing.T) {
provider := AlertProvider{WebhookURL: "http://example.com"}
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false)
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false)
if customAlertProvider == nil {
t.Fatal("customAlertProvider shouldn't have been nil")
}

View File

@ -4,6 +4,7 @@ import (
"fmt"
"net/http"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/alerting/provider/custom"
"github.com/TwinProduction/gatus/core"
)
@ -14,7 +15,7 @@ type AlertProvider struct {
ID string `yaml:"id"`
// DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type
DefaultAlert *core.Alert `yaml:"default-alert"`
DefaultAlert *alert.Alert `yaml:"default-alert"`
}
// IsValid returns whether the provider's configuration is valid
@ -23,7 +24,7 @@ func (provider *AlertProvider) IsValid() bool {
}
// ToCustomAlertProvider converts the provider into a custom.AlertProvider
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider {
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *alert.Alert, result *core.Result, resolved bool) *custom.AlertProvider {
var message, results string
if resolved {
message = fmt.Sprintf("An alert for *%s* has been resolved:\\n—\\n _healthcheck passing successfully %d time(s) in a row_\\n— ", service.Name, alert.FailureThreshold)
@ -54,6 +55,6 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler
}
// GetDefaultAlert returns the provider's default alert configuration
func (provider AlertProvider) GetDefaultAlert() *core.Alert {
func (provider AlertProvider) GetDefaultAlert() *alert.Alert {
return provider.DefaultAlert
}

View File

@ -7,6 +7,7 @@ import (
"strings"
"testing"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/core"
)
@ -23,7 +24,7 @@ func TestAlertProvider_IsValid(t *testing.T) {
func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) {
provider := AlertProvider{Token: "123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11", ID: "12345678"}
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "SUCCESSFUL_CONDITION", Success: true}}}, true)
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "SUCCESSFUL_CONDITION", Success: true}}}, true)
if customAlertProvider == nil {
t.Fatal("customAlertProvider shouldn't have been nil")
}
@ -47,7 +48,7 @@ func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) {
func TestAlertProvider_ToCustomAlertProviderWithTriggeredAlert(t *testing.T) {
provider := AlertProvider{Token: "123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11", ID: "0123456789"}
description := "Healthcheck Successful"
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{Description: &description}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false)
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{Description: &description}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false)
if customAlertProvider == nil {
t.Fatal("customAlertProvider shouldn't have been nil")
}
@ -69,7 +70,7 @@ func TestAlertProvider_ToCustomAlertProviderWithTriggeredAlert(t *testing.T) {
func TestAlertProvider_ToCustomAlertProviderWithDescription(t *testing.T) {
provider := AlertProvider{Token: "123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11", ID: "0123456789"}
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false)
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false)
if customAlertProvider == nil {
t.Fatal("customAlertProvider shouldn't have been nil")
}

View File

@ -6,6 +6,7 @@ import (
"net/http"
"net/url"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/alerting/provider/custom"
"github.com/TwinProduction/gatus/core"
)
@ -18,7 +19,7 @@ type AlertProvider struct {
To string `yaml:"to"`
// DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type
DefaultAlert *core.Alert `yaml:"default-alert"`
DefaultAlert *alert.Alert `yaml:"default-alert"`
}
// IsValid returns whether the provider's configuration is valid
@ -27,7 +28,7 @@ func (provider *AlertProvider) IsValid() bool {
}
// ToCustomAlertProvider converts the provider into a custom.AlertProvider
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, _ *core.Result, resolved bool) *custom.AlertProvider {
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *alert.Alert, _ *core.Result, resolved bool) *custom.AlertProvider {
var message string
if resolved {
message = fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.GetDescription())
@ -50,6 +51,6 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler
}
// GetDefaultAlert returns the provider's default alert configuration
func (provider AlertProvider) GetDefaultAlert() *core.Alert {
func (provider AlertProvider) GetDefaultAlert() *alert.Alert {
return provider.DefaultAlert
}

View File

@ -5,6 +5,7 @@ import (
"strings"
"testing"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/core"
)
@ -32,7 +33,7 @@ func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) {
To: "4",
}
description := "alert-description"
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{Name: "service-name"}, &core.Alert{Description: &description}, &core.Result{}, true)
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{Name: "service-name"}, &alert.Alert{Description: &description}, &core.Result{}, true)
if customAlertProvider == nil {
t.Fatal("customAlertProvider shouldn't have been nil")
}
@ -58,7 +59,7 @@ func TestAlertProvider_ToCustomAlertProviderWithTriggeredAlert(t *testing.T) {
To: "1",
}
description := "alert-description"
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{Name: "service-name"}, &core.Alert{Description: &description}, &core.Result{}, false)
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{Name: "service-name"}, &alert.Alert{Description: &description}, &core.Result{}, false)
if customAlertProvider == nil {
t.Fatal("customAlertProvider shouldn't have been nil")
}

View File

@ -5,8 +5,10 @@ import (
"io/ioutil"
"log"
"os"
"time"
"github.com/TwinProduction/gatus/alerting"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/alerting/provider"
"github.com/TwinProduction/gatus/core"
"github.com/TwinProduction/gatus/k8s"
@ -39,13 +41,8 @@ var (
// ErrConfigFileNotFound is an error returned when the configuration file could not be found
ErrConfigFileNotFound = errors.New("configuration file not found")
// ErrConfigNotLoaded is an error returned when an attempt to Get() the configuration before loading it is made
ErrConfigNotLoaded = errors.New("configuration is nil")
// ErrInvalidSecurityConfig is an error returned when the security configuration is invalid
ErrInvalidSecurityConfig = errors.New("invalid security configuration")
config *Config
)
// Config is the main configuration structure
@ -56,6 +53,10 @@ type Config struct {
// Metrics Whether to expose metrics at /metrics
Metrics bool `yaml:"metrics"`
// SkipInvalidConfigUpdate Whether to make the application ignore invalid configuration
// if the configuration file is updated while the application is running
SkipInvalidConfigUpdate bool `yaml:"skip-invalid-config-update"`
// DisableMonitoringLock Whether to disable the monitoring lock
// The monitoring lock is what prevents multiple services from being processed at the same time.
// Disabling this may lead to inaccurate response times
@ -78,47 +79,57 @@ type Config struct {
// Web is the configuration for the web listener
Web *WebConfig `yaml:"web"`
filePath string // path to the file from which config was loaded from
lastFileModTime time.Time // last modification time
}
// Get returns the configuration, or panics if the configuration hasn't loaded yet
func Get() *Config {
if config == nil {
panic(ErrConfigNotLoaded)
// HasLoadedConfigurationFileBeenModified returns whether the file that the
// configuration has been loaded from has been modified since it was last read
func (config Config) HasLoadedConfigurationFileBeenModified() bool {
if fileInfo, err := os.Stat(config.filePath); err == nil {
if !fileInfo.ModTime().IsZero() {
return config.lastFileModTime.Unix() != fileInfo.ModTime().Unix()
}
return config
}
return false
}
// Set sets the configuration
// Used only for testing
func Set(cfg *Config) {
config = cfg
// UpdateLastFileModTime refreshes Config.lastFileModTime
func (config *Config) UpdateLastFileModTime() {
if fileInfo, err := os.Stat(config.filePath); err == nil {
if !fileInfo.ModTime().IsZero() {
config.lastFileModTime = fileInfo.ModTime()
}
}
}
// Load loads a custom configuration file
// Note that the misconfiguration of some fields may lead to panics. This is on purpose.
func Load(configFile string) error {
func Load(configFile string) (*Config, error) {
log.Printf("[config][Load] Reading configuration from configFile=%s", configFile)
cfg, err := readConfigurationFile(configFile)
if err != nil {
if os.IsNotExist(err) {
return ErrConfigFileNotFound
return nil, ErrConfigFileNotFound
}
return err
return nil, err
}
config = cfg
return nil
cfg.filePath = configFile
cfg.UpdateLastFileModTime()
return cfg, nil
}
// LoadDefaultConfiguration loads the default configuration file
func LoadDefaultConfiguration() error {
err := Load(DefaultConfigurationFilePath)
func LoadDefaultConfiguration() (*Config, error) {
cfg, err := Load(DefaultConfigurationFilePath)
if err != nil {
if err == ErrConfigFileNotFound {
return Load(DefaultFallbackConfigurationFilePath)
}
return err
return nil, err
}
return nil
return cfg, nil
}
func readConfigurationFile(fileName string) (config *Config, err error) {
@ -144,23 +155,33 @@ func parseAndValidateConfigBytes(yamlBytes []byte) (config *Config, err error) {
} else {
// Note that the functions below may panic, and this is on purpose to prevent Gatus from starting with
// invalid configurations
validateAlertingConfig(config)
validateSecurityConfig(config)
validateServicesConfig(config)
validateKubernetesConfig(config)
validateWebConfig(config)
validateStorageConfig(config)
validateAlertingConfig(config.Alerting, config.Services, config.Debug)
if err := validateSecurityConfig(config); err != nil {
return nil, err
}
if err := validateServicesConfig(config); err != nil {
return nil, err
}
if err := validateKubernetesConfig(config); err != nil {
return nil, err
}
if err := validateWebConfig(config); err != nil {
return nil, err
}
if err := validateStorageConfig(config); err != nil {
return nil, err
}
}
return
}
func validateStorageConfig(config *Config) {
func validateStorageConfig(config *Config) error {
if config.Storage == nil {
config.Storage = &storage.Config{}
}
err := storage.Initialize(config.Storage)
if err != nil {
panic(err)
return err
}
// Remove all ServiceStatus that represent services which no longer exist in the configuration
var keys []string
@ -171,44 +192,52 @@ func validateStorageConfig(config *Config) {
if numberOfServiceStatusesDeleted > 0 {
log.Printf("[config][validateStorageConfig] Deleted %d service statuses because their matching services no longer existed", numberOfServiceStatusesDeleted)
}
return nil
}
func validateWebConfig(config *Config) {
func validateWebConfig(config *Config) error {
if config.Web == nil {
config.Web = &WebConfig{Address: DefaultAddress, Port: DefaultPort}
} else {
config.Web.validateAndSetDefaults()
return config.Web.validateAndSetDefaults()
}
return nil
}
func validateKubernetesConfig(config *Config) {
// deprecated
// I don't like the current implementation.
func validateKubernetesConfig(config *Config) error {
if config.Kubernetes != nil && config.Kubernetes.AutoDiscover {
if config.Kubernetes.ServiceTemplate == nil {
panic("kubernetes.service-template cannot be nil")
return errors.New("kubernetes.service-template cannot be nil")
}
if config.Debug {
log.Println("[config][validateKubernetesConfig] Automatically discovering Kubernetes services...")
}
discoveredServices, err := k8s.DiscoverServices(config.Kubernetes)
if err != nil {
panic(err)
return err
}
config.Services = append(config.Services, discoveredServices...)
log.Printf("[config][validateKubernetesConfig] Discovered %d Kubernetes services", len(discoveredServices))
}
return nil
}
func validateServicesConfig(config *Config) {
func validateServicesConfig(config *Config) error {
for _, service := range config.Services {
if config.Debug {
log.Printf("[config][validateServicesConfig] Validating service '%s'", service.Name)
}
service.ValidateAndSetDefaults()
if err := service.ValidateAndSetDefaults(); err != nil {
return err
}
}
log.Printf("[config][validateServicesConfig] Validated %d services", len(config.Services))
return nil
}
func validateSecurityConfig(config *Config) {
func validateSecurityConfig(config *Config) error {
if config.Security != nil {
if config.Security.IsValid() {
if config.Debug {
@ -217,44 +246,45 @@ func validateSecurityConfig(config *Config) {
} else {
// If there was an attempt to configure security, then it must mean that some confidential or private
// data are exposed. As a result, we'll force a panic because it's better to be safe than sorry.
panic(ErrInvalidSecurityConfig)
return ErrInvalidSecurityConfig
}
}
return nil
}
// validateAlertingConfig validates the alerting configuration
// Note that the alerting configuration has to be validated before the service configuration, because the default alert
// returned by provider.AlertProvider.GetDefaultAlert() must be parsed before core.Service.ValidateAndSetDefaults()
// sets the default alert values when none are set.
func validateAlertingConfig(config *Config) {
if config.Alerting == nil {
func validateAlertingConfig(alertingConfig *alerting.Config, services []*core.Service, debug bool) {
if alertingConfig == nil {
log.Printf("[config][validateAlertingConfig] Alerting is not configured")
return
}
alertTypes := []core.AlertType{
core.CustomAlert,
core.DiscordAlert,
core.MattermostAlert,
core.MessagebirdAlert,
core.PagerDutyAlert,
core.SlackAlert,
core.TelegramAlert,
core.TwilioAlert,
alertTypes := []alert.Type{
alert.TypeCustom,
alert.TypeDiscord,
alert.TypeMattermost,
alert.TypeMessagebird,
alert.TypePagerDuty,
alert.TypeSlack,
alert.TypeTelegram,
alert.TypeTwilio,
}
var validProviders, invalidProviders []core.AlertType
var validProviders, invalidProviders []alert.Type
for _, alertType := range alertTypes {
alertProvider := GetAlertingProviderByAlertType(config, alertType)
alertProvider := alertingConfig.GetAlertingProviderByAlertType(alertType)
if alertProvider != nil {
if alertProvider.IsValid() {
// Parse alerts with the provider's default alert
if alertProvider.GetDefaultAlert() != nil {
for _, service := range config.Services {
for alertIndex, alert := range service.Alerts {
if alertType == alert.Type {
if config.Debug {
for _, service := range services {
for alertIndex, serviceAlert := range service.Alerts {
if alertType == serviceAlert.Type {
if debug {
log.Printf("[config][validateAlertingConfig] Parsing alert %d with provider's default alert for provider=%s in service=%s", alertIndex, alertType, service.Name)
}
provider.ParseWithDefaultAlert(alertProvider.GetDefaultAlert(), alert)
provider.ParseWithDefaultAlert(alertProvider.GetDefaultAlert(), serviceAlert)
}
}
}
@ -270,58 +300,3 @@ func validateAlertingConfig(config *Config) {
}
log.Printf("[config][validateAlertingConfig] configuredProviders=%s; ignoredProviders=%s", validProviders, invalidProviders)
}
// GetAlertingProviderByAlertType returns an provider.AlertProvider by its corresponding core.AlertType
func GetAlertingProviderByAlertType(config *Config, alertType core.AlertType) provider.AlertProvider {
switch alertType {
case core.CustomAlert:
if config.Alerting.Custom == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Alerting.Custom
case core.DiscordAlert:
if config.Alerting.Discord == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Alerting.Discord
case core.MattermostAlert:
if config.Alerting.Mattermost == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Alerting.Mattermost
case core.MessagebirdAlert:
if config.Alerting.Messagebird == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Alerting.Messagebird
case core.PagerDutyAlert:
if config.Alerting.PagerDuty == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Alerting.PagerDuty
case core.SlackAlert:
if config.Alerting.Slack == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Alerting.Slack
case core.TelegramAlert:
if config.Alerting.Telegram == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Alerting.Telegram
case core.TwilioAlert:
if config.Alerting.Twilio == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Alerting.Twilio
}
return nil
}

View File

@ -7,6 +7,7 @@ import (
"time"
"github.com/TwinProduction/gatus/alerting"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/alerting/provider/custom"
"github.com/TwinProduction/gatus/alerting/provider/discord"
"github.com/TwinProduction/gatus/alerting/provider/mattermost"
@ -20,31 +21,15 @@ import (
v1 "k8s.io/api/core/v1"
)
func TestGetBeforeConfigIsLoaded(t *testing.T) {
defer func() { recover() }()
Get()
t.Fatal("Should've panicked because the configuration hasn't been loaded yet")
}
func TestSet(t *testing.T) {
if config != nil {
t.Fatal("config should've been nil")
}
Set(&Config{})
if config == nil {
t.Fatal("config shouldn't have been nil")
}
}
func TestLoadFileThatDoesNotExist(t *testing.T) {
err := Load("file-that-does-not-exist.yaml")
_, err := Load("file-that-does-not-exist.yaml")
if err == nil {
t.Error("Should've returned an error, because the file specified doesn't exist")
}
}
func TestLoadDefaultConfigurationFile(t *testing.T) {
err := LoadDefaultConfiguration()
_, err := LoadDefaultConfiguration()
if err == nil {
t.Error("Should've returned an error, because there's no configuration files at the default path nor the default fallback path")
}
@ -419,8 +404,8 @@ services:
t.Fatal("There should've been 7 alerts configured")
}
if config.Services[0].Alerts[0].Type != core.SlackAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[0].Type)
if config.Services[0].Alerts[0].Type != alert.TypeSlack {
t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeSlack, config.Services[0].Alerts[0].Type)
}
if !config.Services[0].Alerts[0].IsEnabled() {
t.Error("The alert should've been enabled")
@ -432,8 +417,8 @@ services:
t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[0].SuccessThreshold)
}
if config.Services[0].Alerts[1].Type != core.PagerDutyAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.PagerDutyAlert, config.Services[0].Alerts[1].Type)
if config.Services[0].Alerts[1].Type != alert.TypePagerDuty {
t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypePagerDuty, config.Services[0].Alerts[1].Type)
}
if config.Services[0].Alerts[1].GetDescription() != "Healthcheck failed 7 times in a row" {
t.Errorf("The description of the alert should've been %s, but it was %s", "Healthcheck failed 7 times in a row", config.Services[0].Alerts[1].GetDescription())
@ -445,8 +430,8 @@ services:
t.Errorf("The success threshold of the alert should've been %d, but it was %d", 5, config.Services[0].Alerts[1].SuccessThreshold)
}
if config.Services[0].Alerts[2].Type != core.MattermostAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.MattermostAlert, config.Services[0].Alerts[2].Type)
if config.Services[0].Alerts[2].Type != alert.TypeMattermost {
t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeMattermost, config.Services[0].Alerts[2].Type)
}
if !config.Services[0].Alerts[2].IsEnabled() {
t.Error("The alert should've been enabled")
@ -458,15 +443,15 @@ services:
t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[2].SuccessThreshold)
}
if config.Services[0].Alerts[3].Type != core.MessagebirdAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.MessagebirdAlert, config.Services[0].Alerts[3].Type)
if config.Services[0].Alerts[3].Type != alert.TypeMessagebird {
t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeMessagebird, config.Services[0].Alerts[3].Type)
}
if config.Services[0].Alerts[3].IsEnabled() {
t.Error("The alert should've been disabled")
}
if config.Services[0].Alerts[4].Type != core.DiscordAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.DiscordAlert, config.Services[0].Alerts[4].Type)
if config.Services[0].Alerts[4].Type != alert.TypeDiscord {
t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeDiscord, config.Services[0].Alerts[4].Type)
}
if !config.Services[0].Alerts[4].IsEnabled() {
t.Error("The alert should've been enabled")
@ -478,8 +463,8 @@ services:
t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[4].SuccessThreshold)
}
if config.Services[0].Alerts[5].Type != core.TelegramAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.TelegramAlert, config.Services[0].Alerts[5].Type)
if config.Services[0].Alerts[5].Type != alert.TypeTelegram {
t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeTelegram, config.Services[0].Alerts[5].Type)
}
if !config.Services[0].Alerts[5].IsEnabled() {
t.Error("The alert should've been enabled")
@ -491,8 +476,8 @@ services:
t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[5].SuccessThreshold)
}
if config.Services[0].Alerts[6].Type != core.TwilioAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.TwilioAlert, config.Services[0].Alerts[6].Type)
if config.Services[0].Alerts[6].Type != alert.TypeTwilio {
t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeTwilio, config.Services[0].Alerts[6].Type)
}
if !config.Services[0].Alerts[6].IsEnabled() {
t.Error("The alert should've been enabled")
@ -633,7 +618,7 @@ services:
if config.Alerting.Discord.WebhookURL != "http://example.org" {
t.Errorf("Discord webhook should've been %s, but was %s", "http://example.org", config.Alerting.Discord.WebhookURL)
}
if GetAlertingProviderByAlertType(config, core.DiscordAlert) != config.Alerting.Discord {
if config.Alerting.GetAlertingProviderByAlertType(alert.TypeDiscord) != config.Alerting.Discord {
t.Error("expected discord configuration")
}
@ -670,8 +655,8 @@ services:
t.Fatal("There should've been 7 alerts configured")
}
if config.Services[0].Alerts[0].Type != core.SlackAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[0].Type)
if config.Services[0].Alerts[0].Type != alert.TypeSlack {
t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeSlack, config.Services[0].Alerts[0].Type)
}
if !config.Services[0].Alerts[0].IsEnabled() {
t.Error("The alert should've been enabled")
@ -683,8 +668,8 @@ services:
t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[0].SuccessThreshold)
}
if config.Services[0].Alerts[1].Type != core.PagerDutyAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.PagerDutyAlert, config.Services[0].Alerts[1].Type)
if config.Services[0].Alerts[1].Type != alert.TypePagerDuty {
t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypePagerDuty, config.Services[0].Alerts[1].Type)
}
if config.Services[0].Alerts[1].GetDescription() != "default description" {
t.Errorf("The description of the alert should've been %s, but it was %s", "default description", config.Services[0].Alerts[1].GetDescription())
@ -696,8 +681,8 @@ services:
t.Errorf("The success threshold of the alert should've been %d, but it was %d", 5, config.Services[0].Alerts[1].SuccessThreshold)
}
if config.Services[0].Alerts[2].Type != core.MattermostAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.MattermostAlert, config.Services[0].Alerts[2].Type)
if config.Services[0].Alerts[2].Type != alert.TypeMattermost {
t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeMattermost, config.Services[0].Alerts[2].Type)
}
if !config.Services[0].Alerts[2].IsEnabled() {
t.Error("The alert should've been enabled")
@ -709,8 +694,8 @@ services:
t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[2].SuccessThreshold)
}
if config.Services[0].Alerts[3].Type != core.MessagebirdAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.MessagebirdAlert, config.Services[0].Alerts[3].Type)
if config.Services[0].Alerts[3].Type != alert.TypeMessagebird {
t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeMessagebird, config.Services[0].Alerts[3].Type)
}
if config.Services[0].Alerts[3].IsEnabled() {
t.Error("The alert should've been disabled")
@ -719,8 +704,8 @@ services:
t.Error("The alert should be sending on resolve")
}
if config.Services[0].Alerts[4].Type != core.DiscordAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.DiscordAlert, config.Services[0].Alerts[4].Type)
if config.Services[0].Alerts[4].Type != alert.TypeDiscord {
t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeDiscord, config.Services[0].Alerts[4].Type)
}
if !config.Services[0].Alerts[4].IsEnabled() {
t.Error("The alert should've been enabled")
@ -732,8 +717,8 @@ services:
t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[4].SuccessThreshold)
}
if config.Services[0].Alerts[5].Type != core.TelegramAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.TelegramAlert, config.Services[0].Alerts[5].Type)
if config.Services[0].Alerts[5].Type != alert.TypeTelegram {
t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeTelegram, config.Services[0].Alerts[5].Type)
}
if !config.Services[0].Alerts[5].IsEnabled() {
t.Error("The alert should've been enabled")
@ -745,8 +730,8 @@ services:
t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[5].SuccessThreshold)
}
if config.Services[0].Alerts[6].Type != core.TwilioAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.TwilioAlert, config.Services[0].Alerts[6].Type)
if config.Services[0].Alerts[6].Type != alert.TypeTwilio {
t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeTwilio, config.Services[0].Alerts[6].Type)
}
if !config.Services[0].Alerts[6].IsEnabled() {
t.Error("The alert should've been enabled")
@ -800,14 +785,14 @@ services:
if len(config.Services) != 1 {
t.Error("There should've been 2 services")
}
if config.Services[0].Alerts[0].Type != core.SlackAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[0].Type)
if config.Services[0].Alerts[0].Type != alert.TypeSlack {
t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeSlack, config.Services[0].Alerts[0].Type)
}
if config.Services[0].Alerts[1].Type != core.SlackAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[1].Type)
if config.Services[0].Alerts[1].Type != alert.TypeSlack {
t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeSlack, config.Services[0].Alerts[1].Type)
}
if config.Services[0].Alerts[2].Type != core.SlackAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[2].Type)
if config.Services[0].Alerts[2].Type != alert.TypeSlack {
t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeSlack, config.Services[0].Alerts[2].Type)
}
if !config.Services[0].Alerts[0].IsEnabled() {
t.Error("The alert should've been enabled")
@ -1209,8 +1194,7 @@ kubernetes:
}
func TestGetAlertingProviderByAlertType(t *testing.T) {
cfg := &Config{
Alerting: &alerting.Config{
alertingConfig := &alerting.Config{
Custom: &custom.AlertProvider{},
Discord: &discord.AlertProvider{},
Mattermost: &mattermost.AlertProvider{},
@ -1219,30 +1203,29 @@ func TestGetAlertingProviderByAlertType(t *testing.T) {
Slack: &slack.AlertProvider{},
Telegram: &telegram.AlertProvider{},
Twilio: &twilio.AlertProvider{},
},
}
if GetAlertingProviderByAlertType(cfg, core.CustomAlert) != cfg.Alerting.Custom {
if alertingConfig.GetAlertingProviderByAlertType(alert.TypeCustom) != alertingConfig.Custom {
t.Error("expected Custom configuration")
}
if GetAlertingProviderByAlertType(cfg, core.DiscordAlert) != cfg.Alerting.Discord {
if alertingConfig.GetAlertingProviderByAlertType(alert.TypeDiscord) != alertingConfig.Discord {
t.Error("expected Discord configuration")
}
if GetAlertingProviderByAlertType(cfg, core.MattermostAlert) != cfg.Alerting.Mattermost {
if alertingConfig.GetAlertingProviderByAlertType(alert.TypeMattermost) != alertingConfig.Mattermost {
t.Error("expected Mattermost configuration")
}
if GetAlertingProviderByAlertType(cfg, core.MessagebirdAlert) != cfg.Alerting.Messagebird {
if alertingConfig.GetAlertingProviderByAlertType(alert.TypeMessagebird) != alertingConfig.Messagebird {
t.Error("expected Messagebird configuration")
}
if GetAlertingProviderByAlertType(cfg, core.PagerDutyAlert) != cfg.Alerting.PagerDuty {
if alertingConfig.GetAlertingProviderByAlertType(alert.TypePagerDuty) != alertingConfig.PagerDuty {
t.Error("expected PagerDuty configuration")
}
if GetAlertingProviderByAlertType(cfg, core.SlackAlert) != cfg.Alerting.Slack {
if alertingConfig.GetAlertingProviderByAlertType(alert.TypeSlack) != alertingConfig.Slack {
t.Error("expected Slack configuration")
}
if GetAlertingProviderByAlertType(cfg, core.TelegramAlert) != cfg.Alerting.Telegram {
if alertingConfig.GetAlertingProviderByAlertType(alert.TypeTelegram) != alertingConfig.Telegram {
t.Error("expected Telegram configuration")
}
if GetAlertingProviderByAlertType(cfg, core.TwilioAlert) != cfg.Alerting.Twilio {
if alertingConfig.GetAlertingProviderByAlertType(alert.TypeTwilio) != alertingConfig.Twilio {
t.Error("expected Twilio configuration")
}
}

View File

@ -16,7 +16,7 @@ type WebConfig struct {
}
// validateAndSetDefaults checks and sets the default values for fields that are not set
func (web *WebConfig) validateAndSetDefaults() {
func (web *WebConfig) validateAndSetDefaults() error {
// Validate the Address
if len(web.Address) == 0 {
web.Address = DefaultAddress
@ -25,8 +25,9 @@ func (web *WebConfig) validateAndSetDefaults() {
if web.Port == 0 {
web.Port = DefaultPort
} else if web.Port < 0 || web.Port > math.MaxUint16 {
panic(fmt.Sprintf("invalid port: value should be between %d and %d", 0, math.MaxUint16))
return fmt.Errorf("invalid port: value should be between %d and %d", 0, math.MaxUint16)
}
return nil
}
// SocketAddress returns the combination of the Address and the Port

View File

@ -44,20 +44,19 @@ func init() {
}
// Handle creates the router and starts the server
func Handle() {
cfg := config.Get()
var router http.Handler = CreateRouter(cfg)
func Handle(securityConfig *security.Config, webConfig *config.WebConfig, enableMetrics bool) {
var router http.Handler = CreateRouter(securityConfig, enableMetrics)
if os.Getenv("ENVIRONMENT") == "dev" {
router = developmentCorsHandler(router)
}
server = &http.Server{
Addr: fmt.Sprintf("%s:%d", cfg.Web.Address, cfg.Web.Port),
Addr: fmt.Sprintf("%s:%d", webConfig.Address, webConfig.Port),
Handler: router,
ReadTimeout: 15 * time.Second,
WriteTimeout: 15 * time.Second,
IdleTimeout: 15 * time.Second,
}
log.Println("[controller][Handle] Listening on " + cfg.Web.SocketAddress())
log.Println("[controller][Handle] Listening on " + webConfig.SocketAddress())
if os.Getenv("ROUTER_TEST") == "true" {
return
}
@ -73,15 +72,15 @@ func Shutdown() {
}
// CreateRouter creates the router for the http server
func CreateRouter(cfg *config.Config) *mux.Router {
func CreateRouter(securityConfig *security.Config, enabledMetrics bool) *mux.Router {
router := mux.NewRouter()
if cfg.Metrics {
if enabledMetrics {
router.Handle("/metrics", promhttp.Handler()).Methods("GET")
}
router.Handle("/health", health.Handler().WithJSON(true)).Methods("GET")
router.HandleFunc("/favicon.ico", favIconHandler).Methods("GET")
router.HandleFunc("/api/v1/statuses", secureIfNecessary(cfg, serviceStatusesHandler)).Methods("GET") // No GzipHandler for this one, because we cache the content
router.HandleFunc("/api/v1/statuses/{key}", secureIfNecessary(cfg, GzipHandlerFunc(serviceStatusHandler))).Methods("GET")
router.HandleFunc("/api/v1/statuses", secureIfNecessary(securityConfig, serviceStatusesHandler)).Methods("GET") // No GzipHandler for this one, because we cache the content
router.HandleFunc("/api/v1/statuses/{key}", secureIfNecessary(securityConfig, GzipHandlerFunc(serviceStatusHandler))).Methods("GET")
router.HandleFunc("/api/v1/badges/uptime/{duration}/{identifier}", badgeHandler).Methods("GET")
// SPA
router.HandleFunc("/services/{service}", spaHandler).Methods("GET")
@ -90,9 +89,9 @@ func CreateRouter(cfg *config.Config) *mux.Router {
return router
}
func secureIfNecessary(cfg *config.Config, handler http.HandlerFunc) http.HandlerFunc {
if cfg.Security != nil && cfg.Security.IsValid() {
return security.Handler(handler, cfg.Security)
func secureIfNecessary(securityConfig *security.Config, handler http.HandlerFunc) http.HandlerFunc {
if securityConfig != nil && securityConfig.IsValid() {
return security.Handler(handler, securityConfig)
}
return handler
}

View File

@ -105,7 +105,7 @@ func TestCreateRouter(t *testing.T) {
}
watchdog.UpdateServiceStatuses(cfg.Services[0], &core.Result{Success: true, Duration: time.Millisecond, Timestamp: time.Now()})
watchdog.UpdateServiceStatuses(cfg.Services[1], &core.Result{Success: false, Duration: time.Second, Timestamp: time.Now()})
router := CreateRouter(cfg)
router := CreateRouter(cfg.Security, cfg.Metrics)
type Scenario struct {
Name string
Path string
@ -235,12 +235,10 @@ func TestHandle(t *testing.T) {
},
},
}
config.Set(cfg)
defer config.Set(nil)
_ = os.Setenv("ROUTER_TEST", "true")
_ = os.Setenv("ENVIRONMENT", "dev")
defer os.Clearenv()
Handle()
Handle(cfg.Security, cfg.Web, cfg.Metrics)
defer Shutdown()
request, _ := http.NewRequest("GET", "/health", nil)
responseRecorder := httptest.NewRecorder()
@ -273,7 +271,7 @@ func TestServiceStatusesHandler(t *testing.T) {
// Can't be bothered dealing with timezone issues on the worker that runs the automated tests
firstResult.Timestamp = time.Time{}
secondResult.Timestamp = time.Time{}
router := CreateRouter(&config.Config{})
router := CreateRouter(nil, false)
type Scenario struct {
Name string

View File

@ -29,16 +29,17 @@ type DNS struct {
QueryName string `yaml:"query-name"`
}
func (d *DNS) validateAndSetDefault() {
func (d *DNS) validateAndSetDefault() error {
if len(d.QueryName) == 0 {
panic(ErrDNSWithNoQueryName)
return ErrDNSWithNoQueryName
}
if !strings.HasSuffix(d.QueryName, ".") {
d.QueryName += "."
}
if _, ok := dns.StringToType[d.QueryType]; !ok {
panic(ErrDNSWithInvalidQueryType)
return ErrDNSWithInvalidQueryType
}
return nil
}
func (d *DNS) query(url string, result *Result) {

View File

@ -11,6 +11,7 @@ import (
"strings"
"time"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/client"
)
@ -72,7 +73,7 @@ type Service struct {
Conditions []*Condition `yaml:"conditions"`
// Alerts is the alerting configuration for the service in case of failure
Alerts []*Alert `yaml:"alerts"`
Alerts []*alert.Alert `yaml:"alerts"`
// Insecure is whether to skip verifying the server's certificate chain and host name
Insecure bool `yaml:"insecure,omitempty"`
@ -85,7 +86,7 @@ type Service struct {
}
// ValidateAndSetDefaults validates the service's configuration and sets the default value of fields that have one
func (service *Service) ValidateAndSetDefaults() {
func (service *Service) ValidateAndSetDefaults() error {
// Set default values
if service.Interval == 0 {
service.Interval = 1 * time.Minute
@ -105,32 +106,32 @@ func (service *Service) ValidateAndSetDefaults() {
if _, contentTypeHeaderExists := service.Headers[ContentTypeHeader]; !contentTypeHeaderExists && service.GraphQL {
service.Headers[ContentTypeHeader] = "application/json"
}
for _, alert := range service.Alerts {
if alert.FailureThreshold <= 0 {
alert.FailureThreshold = 3
for _, serviceAlert := range service.Alerts {
if serviceAlert.FailureThreshold <= 0 {
serviceAlert.FailureThreshold = 3
}
if alert.SuccessThreshold <= 0 {
alert.SuccessThreshold = 2
if serviceAlert.SuccessThreshold <= 0 {
serviceAlert.SuccessThreshold = 2
}
}
if len(service.Name) == 0 {
panic(ErrServiceWithNoName)
return ErrServiceWithNoName
}
if len(service.URL) == 0 {
panic(ErrServiceWithNoURL)
return ErrServiceWithNoURL
}
if len(service.Conditions) == 0 {
panic(ErrServiceWithNoCondition)
return ErrServiceWithNoCondition
}
if service.DNS != nil {
service.DNS.validateAndSetDefault()
return
return service.DNS.validateAndSetDefault()
}
// Make sure that the request can be created
_, err := http.NewRequest(service.Method, service.URL, bytes.NewBuffer([]byte(service.Body)))
if err != nil {
panic(err)
return err
}
return nil
}
// EvaluateHealth sends a request to the service's URL and evaluates the conditions of the service.
@ -155,8 +156,8 @@ func (service *Service) EvaluateHealth() *Result {
}
// GetAlertsTriggered returns a slice of alerts that have been triggered
func (service *Service) GetAlertsTriggered() []Alert {
var alerts []Alert
func (service *Service) GetAlertsTriggered() []alert.Alert {
var alerts []alert.Alert
if service.NumberOfFailuresInARow == 0 {
return alerts
}

View File

@ -5,6 +5,8 @@ import (
"strings"
"testing"
"time"
"github.com/TwinProduction/gatus/alerting/alert"
)
func TestService_ValidateAndSetDefaults(t *testing.T) {
@ -13,7 +15,7 @@ func TestService_ValidateAndSetDefaults(t *testing.T) {
Name: "twinnation-health",
URL: "https://twinnation.org/health",
Conditions: []*Condition{&condition},
Alerts: []*Alert{{Type: PagerDutyAlert}},
Alerts: []*alert.Alert{{Type: alert.TypePagerDuty}},
}
service.ValidateAndSetDefaults()
if service.Method != "GET" {
@ -98,7 +100,7 @@ func TestService_GetAlertsTriggered(t *testing.T) {
Name: "twinnation-health",
URL: "https://twinnation.org/health",
Conditions: []*Condition{&condition},
Alerts: []*Alert{{Type: PagerDutyAlert, Enabled: &enabled}},
Alerts: []*alert.Alert{{Type: alert.TypePagerDuty, Enabled: &enabled}},
}
service.ValidateAndSetDefaults()
if service.NumberOfFailuresInARow != 0 {

75
main.go
View File

@ -5,6 +5,7 @@ import (
"os"
"os/signal"
"syscall"
"time"
"github.com/TwinProduction/gatus/config"
"github.com/TwinProduction/gatus/controller"
@ -13,37 +14,75 @@ import (
)
func main() {
cfg := loadConfiguration()
go watchdog.Monitor(cfg)
go controller.Handle()
// Wait for termination signal
sig := make(chan os.Signal, 1)
done := make(chan bool, 1)
signal.Notify(sig, os.Interrupt, syscall.SIGTERM)
go func() {
<-sig
log.Println("Received termination signal, attempting to gracefully shut down")
controller.Shutdown()
err := storage.Get().Save()
cfg, err := loadConfiguration()
if err != nil {
log.Println("Failed to save storage provider:", err.Error())
panic(err)
}
start(cfg)
// Wait for termination signal
signalChannel := make(chan os.Signal, 1)
done := make(chan bool, 1)
signal.Notify(signalChannel, os.Interrupt, syscall.SIGTERM)
go func() {
<-signalChannel
log.Println("Received termination signal, attempting to gracefully shut down")
stop()
save()
done <- true
}()
<-done
log.Println("Shutting down")
}
func loadConfiguration() *config.Config {
var err error
func stop() {
watchdog.Shutdown()
controller.Shutdown()
}
func save() {
err := storage.Get().Save()
if err != nil {
log.Println("Failed to save storage provider:", err.Error())
}
}
func start(cfg *config.Config) {
go controller.Handle(cfg.Security, cfg.Web, cfg.Metrics)
watchdog.Monitor(cfg)
go listenToConfigurationFileChanges(cfg)
}
func loadConfiguration() (cfg *config.Config, err error) {
customConfigFile := os.Getenv("GATUS_CONFIG_FILE")
if len(customConfigFile) > 0 {
err = config.Load(customConfigFile)
cfg, err = config.Load(customConfigFile)
} else {
err = config.LoadDefaultConfiguration()
cfg, err = config.LoadDefaultConfiguration()
}
return
}
func listenToConfigurationFileChanges(cfg *config.Config) {
for {
time.Sleep(30 * time.Second)
if cfg.HasLoadedConfigurationFileBeenModified() {
log.Println("[main][listenToConfigurationFileChanges] Configuration file has been modified")
save()
updatedConfig, err := loadConfiguration()
if err != nil {
if cfg.SkipInvalidConfigUpdate {
log.Println("[main][listenToConfigurationFileChanges] Failed to load new configuration:", err.Error())
log.Println("[main][listenToConfigurationFileChanges] The configuration file was updated, but it is not valid. The old configuration will continue being used.")
// Update the last file modification time to avoid trying to process the same invalid configuration again
cfg.UpdateLastFileModTime()
continue
} else {
panic(err)
}
return config.Get()
}
stop()
start(updatedConfig)
return
}
}
}

View File

@ -5,7 +5,6 @@ import (
"strconv"
"sync"
"github.com/TwinProduction/gatus/config"
"github.com/TwinProduction/gatus/core"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
@ -19,7 +18,6 @@ var (
// PublishMetricsForService publishes metrics for the given service and its result.
// These metrics will be exposed at /metrics if the metrics are enabled
func PublishMetricsForService(service *core.Service, result *core.Result) {
if config.Get().Metrics {
rwLock.Lock()
gauge, exists := gauges[fmt.Sprintf("%s_%s", service.Name, service.URL)]
if !exists {
@ -32,5 +30,4 @@ func PublishMetricsForService(service *core.Service, result *core.Result) {
}
rwLock.Unlock()
gauge.WithLabelValues(strconv.Itoa(result.HTTPStatus), strconv.FormatBool(result.Success)).Inc()
}
}

View File

@ -1,6 +1,7 @@
package storage
import (
"context"
"log"
"time"
@ -15,6 +16,9 @@ var (
// Because store.Store is an interface, a nil check wouldn't be sufficient, so instead of doing reflection
// every single time Get is called, we'll just lazily keep track of its existence through this variable
initialized bool
ctx context.Context
cancelFunc context.CancelFunc
)
// Get retrieves the storage provider
@ -40,24 +44,34 @@ func Initialize(cfg *Config) error {
return err
}
} else {
if cancelFunc != nil {
// Stop the active autoSave task
cancelFunc()
}
ctx, cancelFunc = context.WithCancel(context.Background())
log.Printf("[storage][Initialize] Creating storage provider with file=%s", cfg.File)
provider, err = memory.NewStore(cfg.File)
if err != nil {
return err
}
go autoSave(7 * time.Minute)
go autoSave(7*time.Minute, ctx)
}
return nil
}
// autoSave automatically calls the Save function of the provider at every interval
func autoSave(interval time.Duration) {
// autoSave automatically calls the SaveFunc function of the provider at every interval
func autoSave(interval time.Duration, ctx context.Context) {
for {
time.Sleep(interval)
select {
case <-ctx.Done():
log.Printf("[storage][autoSave] Stopping active job")
return
case <-time.After(interval):
log.Printf("[storage][autoSave] Saving")
err := provider.Save()
if err != nil {
log.Println("[storage][autoSave] Save failed:", err.Error())
}
}
}
}

View File

@ -4,96 +4,96 @@ import (
"encoding/json"
"log"
"github.com/TwinProduction/gatus/config"
"github.com/TwinProduction/gatus/alerting"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/core"
)
// HandleAlerting takes care of alerts to resolve and alerts to trigger based on result success or failure
func HandleAlerting(service *core.Service, result *core.Result) {
cfg := config.Get()
if cfg.Alerting == nil {
func HandleAlerting(service *core.Service, result *core.Result, alertingConfig *alerting.Config, debug bool) {
if alertingConfig == nil {
return
}
if result.Success {
handleAlertsToResolve(service, result, cfg)
handleAlertsToResolve(service, result, alertingConfig, debug)
} else {
handleAlertsToTrigger(service, result, cfg)
handleAlertsToTrigger(service, result, alertingConfig, debug)
}
}
func handleAlertsToTrigger(service *core.Service, result *core.Result, cfg *config.Config) {
func handleAlertsToTrigger(service *core.Service, result *core.Result, alertingConfig *alerting.Config, debug bool) {
service.NumberOfSuccessesInARow = 0
service.NumberOfFailuresInARow++
for _, alert := range service.Alerts {
// If the alert hasn't been triggered, move to the next one
if !alert.IsEnabled() || alert.FailureThreshold > service.NumberOfFailuresInARow {
for _, serviceAlert := range service.Alerts {
// If the serviceAlert hasn't been triggered, move to the next one
if !serviceAlert.IsEnabled() || serviceAlert.FailureThreshold > service.NumberOfFailuresInARow {
continue
}
if alert.Triggered {
if cfg.Debug {
log.Printf("[watchdog][handleAlertsToTrigger] Alert for service=%s with description='%s' has already been TRIGGERED, skipping", service.Name, alert.GetDescription())
if serviceAlert.Triggered {
if debug {
log.Printf("[watchdog][handleAlertsToTrigger] Alert for service=%s with description='%s' has already been TRIGGERED, skipping", service.Name, serviceAlert.GetDescription())
}
continue
}
alertProvider := config.GetAlertingProviderByAlertType(cfg, alert.Type)
alertProvider := alertingConfig.GetAlertingProviderByAlertType(serviceAlert.Type)
if alertProvider != nil && alertProvider.IsValid() {
log.Printf("[watchdog][handleAlertsToTrigger] Sending %s alert because alert for service=%s with description='%s' has been TRIGGERED", alert.Type, service.Name, alert.GetDescription())
customAlertProvider := alertProvider.ToCustomAlertProvider(service, alert, result, false)
log.Printf("[watchdog][handleAlertsToTrigger] Sending %s serviceAlert because serviceAlert for service=%s with description='%s' has been TRIGGERED", serviceAlert.Type, service.Name, serviceAlert.GetDescription())
customAlertProvider := alertProvider.ToCustomAlertProvider(service, serviceAlert, result, false)
// TODO: retry on error
var err error
// We need to extract the DedupKey from PagerDuty's response
if alert.Type == core.PagerDutyAlert {
if serviceAlert.Type == alert.TypePagerDuty {
var body []byte
if body, err = customAlertProvider.Send(service.Name, alert.GetDescription(), false); err == nil {
if body, err = customAlertProvider.Send(service.Name, serviceAlert.GetDescription(), false); err == nil {
var response pagerDutyResponse
if err = json.Unmarshal(body, &response); err != nil {
log.Printf("[watchdog][handleAlertsToTrigger] Ran into error unmarshaling pagerduty response: %s", err.Error())
} else {
alert.ResolveKey = response.DedupKey
serviceAlert.ResolveKey = response.DedupKey
}
}
} else {
// All other alert types don't need to extract anything from the body, so we can just send the request right away
_, err = customAlertProvider.Send(service.Name, alert.GetDescription(), false)
// All other serviceAlert types don't need to extract anything from the body, so we can just send the request right away
_, err = customAlertProvider.Send(service.Name, serviceAlert.GetDescription(), false)
}
if err != nil {
log.Printf("[watchdog][handleAlertsToTrigger] Failed to send an alert for service=%s: %s", service.Name, err.Error())
log.Printf("[watchdog][handleAlertsToTrigger] Failed to send an serviceAlert for service=%s: %s", service.Name, err.Error())
} else {
alert.Triggered = true
serviceAlert.Triggered = true
}
} else {
log.Printf("[watchdog][handleAlertsToResolve] Not sending alert of type=%s despite being TRIGGERED, because the provider wasn't configured properly", alert.Type)
log.Printf("[watchdog][handleAlertsToResolve] Not sending serviceAlert of type=%s despite being TRIGGERED, because the provider wasn't configured properly", serviceAlert.Type)
}
}
}
func handleAlertsToResolve(service *core.Service, result *core.Result, cfg *config.Config) {
func handleAlertsToResolve(service *core.Service, result *core.Result, alertingConfig *alerting.Config, debug bool) {
service.NumberOfSuccessesInARow++
for _, alert := range service.Alerts {
if !alert.IsEnabled() || !alert.Triggered || alert.SuccessThreshold > service.NumberOfSuccessesInARow {
for _, serviceAlert := range service.Alerts {
if !serviceAlert.IsEnabled() || !serviceAlert.Triggered || serviceAlert.SuccessThreshold > service.NumberOfSuccessesInARow {
continue
}
// Even if the alert provider returns an error, we still set the alert's Triggered variable to false.
// Even if the serviceAlert provider returns an error, we still set the serviceAlert's Triggered variable to false.
// Further explanation can be found on Alert's Triggered field.
alert.Triggered = false
if !alert.IsSendingOnResolved() {
serviceAlert.Triggered = false
if !serviceAlert.IsSendingOnResolved() {
continue
}
alertProvider := config.GetAlertingProviderByAlertType(cfg, alert.Type)
alertProvider := alertingConfig.GetAlertingProviderByAlertType(serviceAlert.Type)
if alertProvider != nil && alertProvider.IsValid() {
log.Printf("[watchdog][handleAlertsToResolve] Sending %s alert because alert for service=%s with description='%s' has been RESOLVED", alert.Type, service.Name, alert.GetDescription())
customAlertProvider := alertProvider.ToCustomAlertProvider(service, alert, result, true)
log.Printf("[watchdog][handleAlertsToResolve] Sending %s serviceAlert because serviceAlert for service=%s with description='%s' has been RESOLVED", serviceAlert.Type, service.Name, serviceAlert.GetDescription())
customAlertProvider := alertProvider.ToCustomAlertProvider(service, serviceAlert, result, true)
// TODO: retry on error
_, err := customAlertProvider.Send(service.Name, alert.GetDescription(), true)
_, err := customAlertProvider.Send(service.Name, serviceAlert.GetDescription(), true)
if err != nil {
log.Printf("[watchdog][handleAlertsToResolve] Failed to send an alert for service=%s: %s", service.Name, err.Error())
log.Printf("[watchdog][handleAlertsToResolve] Failed to send an serviceAlert for service=%s: %s", service.Name, err.Error())
} else {
if alert.Type == core.PagerDutyAlert {
alert.ResolveKey = ""
if serviceAlert.Type == alert.TypePagerDuty {
serviceAlert.ResolveKey = ""
}
}
} else {
log.Printf("[watchdog][handleAlertsToResolve] Not sending alert of type=%s despite being RESOLVED, because the provider wasn't configured properly", alert.Type)
log.Printf("[watchdog][handleAlertsToResolve] Not sending serviceAlert of type=%s despite being RESOLVED, because the provider wasn't configured properly", serviceAlert.Type)
}
}
service.NumberOfFailuresInARow = 0

View File

@ -5,6 +5,7 @@ import (
"testing"
"github.com/TwinProduction/gatus/alerting"
"github.com/TwinProduction/gatus/alerting/alert"
"github.com/TwinProduction/gatus/alerting/provider/custom"
"github.com/TwinProduction/gatus/alerting/provider/pagerduty"
"github.com/TwinProduction/gatus/config"
@ -24,13 +25,12 @@ func TestHandleAlerting(t *testing.T) {
},
},
}
config.Set(cfg)
enabled := true
service := &core.Service{
URL: "http://example.com",
Alerts: []*core.Alert{
Alerts: []*alert.Alert{
{
Type: core.CustomAlert,
Type: alert.TypeCustom,
Enabled: &enabled,
FailureThreshold: 2,
SuccessThreshold: 3,
@ -41,50 +41,40 @@ func TestHandleAlerting(t *testing.T) {
}
verify(t, service, 0, 0, false, "The alert shouldn't start triggered")
HandleAlerting(service, &core.Result{Success: false})
HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug)
verify(t, service, 1, 0, false, "The alert shouldn't have triggered")
HandleAlerting(service, &core.Result{Success: false})
HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug)
verify(t, service, 2, 0, true, "The alert should've triggered")
HandleAlerting(service, &core.Result{Success: false})
HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug)
verify(t, service, 3, 0, true, "The alert should still be triggered")
HandleAlerting(service, &core.Result{Success: false})
HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug)
verify(t, service, 4, 0, true, "The alert should still be triggered")
HandleAlerting(service, &core.Result{Success: true})
HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug)
verify(t, service, 0, 1, true, "The alert should still be triggered (because service.Alerts[0].SuccessThreshold is 3)")
HandleAlerting(service, &core.Result{Success: true})
HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug)
verify(t, service, 0, 2, true, "The alert should still be triggered (because service.Alerts[0].SuccessThreshold is 3)")
HandleAlerting(service, &core.Result{Success: true})
HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug)
verify(t, service, 0, 3, false, "The alert should've been resolved")
HandleAlerting(service, &core.Result{Success: true})
HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug)
verify(t, service, 0, 4, false, "The alert should no longer be triggered")
}
func TestHandleAlertingWhenAlertingConfigIsNil(t *testing.T) {
_ = os.Setenv("MOCK_ALERT_PROVIDER", "true")
defer os.Clearenv()
cfg := &config.Config{
Debug: true,
Alerting: nil,
}
config.Set(cfg)
HandleAlerting(nil, nil)
HandleAlerting(nil, nil, nil, true)
}
func TestHandleAlertingWithBadAlertProvider(t *testing.T) {
_ = os.Setenv("MOCK_ALERT_PROVIDER", "true")
defer os.Clearenv()
cfg := &config.Config{
Alerting: &alerting.Config{},
}
config.Set(cfg)
enabled := true
service := &core.Service{
URL: "http://example.com",
Alerts: []*core.Alert{
Alerts: []*alert.Alert{
{
Type: core.CustomAlert,
Type: alert.TypeCustom,
Enabled: &enabled,
FailureThreshold: 1,
SuccessThreshold: 1,
@ -95,9 +85,9 @@ func TestHandleAlertingWithBadAlertProvider(t *testing.T) {
}
verify(t, service, 0, 0, false, "The alert shouldn't start triggered")
HandleAlerting(service, &core.Result{Success: false})
HandleAlerting(service, &core.Result{Success: false}, &alerting.Config{}, false)
verify(t, service, 1, 0, false, "The alert shouldn't have triggered")
HandleAlerting(service, &core.Result{Success: false})
HandleAlerting(service, &core.Result{Success: false}, &alerting.Config{}, false)
verify(t, service, 2, 0, false, "The alert shouldn't have triggered, because the provider wasn't configured properly")
}
@ -114,13 +104,12 @@ func TestHandleAlertingWhenTriggeredAlertIsAlmostResolvedButServiceStartFailingA
},
},
}
config.Set(cfg)
enabled := true
service := &core.Service{
URL: "http://example.com",
Alerts: []*core.Alert{
Alerts: []*alert.Alert{
{
Type: core.CustomAlert,
Type: alert.TypeCustom,
Enabled: &enabled,
FailureThreshold: 2,
SuccessThreshold: 3,
@ -132,7 +121,7 @@ func TestHandleAlertingWhenTriggeredAlertIsAlmostResolvedButServiceStartFailingA
}
// This test simulate an alert that was already triggered
HandleAlerting(service, &core.Result{Success: false})
HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug)
verify(t, service, 2, 0, true, "The alert was already triggered at the beginning of this test")
}
@ -149,14 +138,13 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedButSendOnResolvedIsFalse(t *t
},
},
}
config.Set(cfg)
enabled := true
disabled := false
service := &core.Service{
URL: "http://example.com",
Alerts: []*core.Alert{
Alerts: []*alert.Alert{
{
Type: core.CustomAlert,
Type: alert.TypeCustom,
Enabled: &enabled,
FailureThreshold: 1,
SuccessThreshold: 1,
@ -167,7 +155,7 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedButSendOnResolvedIsFalse(t *t
NumberOfFailuresInARow: 1,
}
HandleAlerting(service, &core.Result{Success: true})
HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug)
verify(t, service, 0, 1, false, "The alert should've been resolved")
}
@ -183,13 +171,12 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPagerDuty(t *testing.T) {
},
},
}
config.Set(cfg)
enabled := true
service := &core.Service{
URL: "http://example.com",
Alerts: []*core.Alert{
Alerts: []*alert.Alert{
{
Type: core.PagerDutyAlert,
Type: alert.TypePagerDuty,
Enabled: &enabled,
FailureThreshold: 1,
SuccessThreshold: 1,
@ -200,10 +187,10 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPagerDuty(t *testing.T) {
NumberOfFailuresInARow: 0,
}
HandleAlerting(service, &core.Result{Success: false})
HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug)
verify(t, service, 1, 0, true, "")
HandleAlerting(service, &core.Result{Success: true})
HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug)
verify(t, service, 0, 1, false, "The alert should've been resolved")
}
@ -220,13 +207,12 @@ func TestHandleAlertingWithProviderThatReturnsAnError(t *testing.T) {
},
},
}
config.Set(cfg)
enabled := true
service := &core.Service{
URL: "http://example.com",
Alerts: []*core.Alert{
Alerts: []*alert.Alert{
{
Type: core.CustomAlert,
Type: alert.TypeCustom,
Enabled: &enabled,
FailureThreshold: 2,
SuccessThreshold: 2,
@ -237,32 +223,32 @@ func TestHandleAlertingWithProviderThatReturnsAnError(t *testing.T) {
}
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true")
HandleAlerting(service, &core.Result{Success: false})
HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug)
verify(t, service, 1, 0, false, "")
HandleAlerting(service, &core.Result{Success: false})
HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug)
verify(t, service, 2, 0, false, "The alert should have failed to trigger, because the alert provider is returning an error")
HandleAlerting(service, &core.Result{Success: false})
HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug)
verify(t, service, 3, 0, false, "The alert should still not be triggered, because the alert provider is still returning an error")
HandleAlerting(service, &core.Result{Success: false})
HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug)
verify(t, service, 4, 0, false, "The alert should still not be triggered, because the alert provider is still returning an error")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false")
HandleAlerting(service, &core.Result{Success: false})
HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug)
verify(t, service, 5, 0, true, "The alert should've been triggered because the alert provider is no longer returning an error")
HandleAlerting(service, &core.Result{Success: true})
HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug)
verify(t, service, 0, 1, true, "The alert should've still been triggered")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true")
HandleAlerting(service, &core.Result{Success: true})
HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug)
verify(t, service, 0, 2, false, "The alert should've been resolved DESPITE THE ALERT PROVIDER RETURNING AN ERROR. See Alert.Triggered for further explanation.")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false")
// Make sure that everything's working as expected after a rough patch
HandleAlerting(service, &core.Result{Success: false})
HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug)
verify(t, service, 1, 0, false, "")
HandleAlerting(service, &core.Result{Success: false})
HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug)
verify(t, service, 2, 0, true, "The alert should have triggered")
HandleAlerting(service, &core.Result{Success: true})
HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug)
verify(t, service, 0, 1, true, "The alert should still be triggered")
HandleAlerting(service, &core.Result{Success: true})
HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug)
verify(t, service, 0, 2, false, "The alert should have been resolved")
}
@ -279,13 +265,12 @@ func TestHandleAlertingWithProviderThatOnlyReturnsErrorOnResolve(t *testing.T) {
},
},
}
config.Set(cfg)
enabled := true
service := &core.Service{
URL: "http://example.com",
Alerts: []*core.Alert{
Alerts: []*alert.Alert{
{
Type: core.CustomAlert,
Type: alert.TypeCustom,
Enabled: &enabled,
FailureThreshold: 1,
SuccessThreshold: 1,
@ -295,27 +280,27 @@ func TestHandleAlertingWithProviderThatOnlyReturnsErrorOnResolve(t *testing.T) {
},
}
HandleAlerting(service, &core.Result{Success: false})
HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug)
verify(t, service, 1, 0, true, "")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true")
HandleAlerting(service, &core.Result{Success: true})
HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug)
verify(t, service, 0, 1, false, "")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false")
HandleAlerting(service, &core.Result{Success: false})
HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug)
verify(t, service, 1, 0, true, "")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true")
HandleAlerting(service, &core.Result{Success: true})
HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug)
verify(t, service, 0, 1, false, "")
_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false")
// Make sure that everything's working as expected after a rough patch
HandleAlerting(service, &core.Result{Success: false})
HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug)
verify(t, service, 1, 0, true, "")
HandleAlerting(service, &core.Result{Success: false})
HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug)
verify(t, service, 2, 0, true, "")
HandleAlerting(service, &core.Result{Success: true})
HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug)
verify(t, service, 0, 1, false, "")
HandleAlerting(service, &core.Result{Success: true})
HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug)
verify(t, service, 0, 2, false, "")
}

View File

@ -1,10 +1,12 @@
package watchdog
import (
"context"
"log"
"sync"
"time"
"github.com/TwinProduction/gatus/alerting"
"github.com/TwinProduction/gatus/config"
"github.com/TwinProduction/gatus/core"
"github.com/TwinProduction/gatus/metric"
@ -15,52 +17,74 @@ var (
// monitoringMutex is used to prevent multiple services from being evaluated at the same time.
// Without this, conditions using response time may become inaccurate.
monitoringMutex sync.Mutex
ctx context.Context
cancelFunc context.CancelFunc
)
// Monitor loops over each services and starts a goroutine to monitor each services separately
func Monitor(cfg *config.Config) {
ctx, cancelFunc = context.WithCancel(context.Background())
for _, service := range cfg.Services {
// To prevent multiple requests from running at the same time, we'll wait for a little bit before each iteration
time.Sleep(1111 * time.Millisecond)
go monitor(service)
go monitor(service, cfg.Alerting, cfg.DisableMonitoringLock, cfg.Metrics, cfg.Debug, ctx)
}
}
// monitor monitors a single service in a loop
func monitor(service *core.Service) {
cfg := config.Get()
func monitor(service *core.Service, alertingConfig *alerting.Config, disableMonitoringLock, enabledMetrics, debug bool, ctx context.Context) {
// Run it immediately on start
execute(service, alertingConfig, disableMonitoringLock, enabledMetrics, debug)
// Loop for the next executions
for {
if !cfg.DisableMonitoringLock {
select {
case <-ctx.Done():
log.Printf("[watchdog][monitor] Canceling current execution of group=%s; service=%s", service.Group, service.Name)
return
case <-time.After(service.Interval):
execute(service, alertingConfig, disableMonitoringLock, enabledMetrics, debug)
}
}
}
func execute(service *core.Service, alertingConfig *alerting.Config, disableMonitoringLock, enabledMetrics, debug bool) {
if !disableMonitoringLock {
// By placing the lock here, we prevent multiple services from being monitored at the exact same time, which
// could cause performance issues and return inaccurate results
monitoringMutex.Lock()
}
if cfg.Debug {
log.Printf("[watchdog][monitor] Monitoring group=%s; service=%s", service.Group, service.Name)
if debug {
log.Printf("[watchdog][execute] Monitoring group=%s; service=%s", service.Group, service.Name)
}
result := service.EvaluateHealth()
if enabledMetrics {
metric.PublishMetricsForService(service, result)
}
UpdateServiceStatuses(service, result)
log.Printf(
"[watchdog][monitor] Monitored group=%s; service=%s; success=%v; errors=%d; duration=%s",
"[watchdog][execute] Monitored group=%s; service=%s; success=%v; errors=%d; duration=%s",
service.Group,
service.Name,
result.Success,
len(result.Errors),
result.Duration.Round(time.Millisecond),
)
HandleAlerting(service, result)
if cfg.Debug {
log.Printf("[watchdog][monitor] Waiting for interval=%s before monitoring group=%s service=%s again", service.Interval, service.Group, service.Name)
HandleAlerting(service, result, alertingConfig, debug)
if debug {
log.Printf("[watchdog][execute] Waiting for interval=%s before monitoring group=%s service=%s again", service.Interval, service.Group, service.Name)
}
if !cfg.DisableMonitoringLock {
if !disableMonitoringLock {
monitoringMutex.Unlock()
}
time.Sleep(service.Interval)
}
}
// UpdateServiceStatuses updates the slice of service statuses
func UpdateServiceStatuses(service *core.Service, result *core.Result) {
storage.Get().Insert(service, result)
}
// Shutdown stops monitoring all services
func Shutdown() {
cancelFunc()
}