From db23bd90736bc58c7772bcfa8df1f2cb64501c40 Mon Sep 17 00:00:00 2001 From: TwinProduction Date: Tue, 18 May 2021 22:29:15 -0400 Subject: [PATCH] #29: Automatically reload on configuration file update --- README.md | 26 +++ {core => alerting/alert}/alert.go | 34 +-- {core => alerting/alert}/alert_test.go | 2 +- alerting/alert/type.go | 31 +++ alerting/config.go | 57 +++++ alerting/provider/custom/custom.go | 7 +- alerting/provider/custom/custom_test.go | 3 +- alerting/provider/discord/discord.go | 7 +- alerting/provider/discord/discord_test.go | 5 +- alerting/provider/mattermost/mattermost.go | 7 +- .../provider/mattermost/mattermost_test.go | 5 +- alerting/provider/messagebird/messagebird.go | 7 +- .../provider/messagebird/messagebird_test.go | 5 +- alerting/provider/pagerduty/pagerduty.go | 7 +- alerting/provider/pagerduty/pagerduty_test.go | 5 +- alerting/provider/provider.go | 7 +- alerting/provider/provider_test.go | 50 ++--- alerting/provider/slack/slack.go | 7 +- alerting/provider/slack/slack_test.go | 5 +- alerting/provider/telegram/telegram.go | 7 +- alerting/provider/telegram/telegram_test.go | 7 +- alerting/provider/twilio/twilio.go | 7 +- alerting/provider/twilio/twilio_test.go | 5 +- config/config.go | 201 ++++++++---------- config/config_test.go | 127 +++++------ config/web.go | 5 +- controller/controller.go | 23 +- controller/controller_test.go | 8 +- core/dns.go | 7 +- core/service.go | 31 +-- core/service_test.go | 6 +- main.go | 77 +++++-- metric/metric.go | 25 +-- storage/storage.go | 30 ++- watchdog/alerting.go | 76 +++---- watchdog/alerting_test.go | 113 +++++----- watchdog/watchdog.go | 80 ++++--- 37 files changed, 616 insertions(+), 496 deletions(-) rename {core => alerting/alert}/alert.go (72%) rename {core => alerting/alert}/alert_test.go (99%) create mode 100644 alerting/alert/type.go diff --git a/README.md b/README.md index b30799c7..eba13f47 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,7 @@ core applications: https://status.twinnation.org/ - [Monitoring a service using DNS queries](#monitoring-a-service-using-dns-queries) - [Basic authentication](#basic-authentication) - [disable-monitoring-lock](#disable-monitoring-lock) + - [Reloading configuration on the fly](#reloading-configuration-on-the-fly) - [Service groups](#service-groups) - [Exposing Gatus on a custom port](#exposing-gatus-on-a-custom-port) - [Uptime Badges (ALPHA)](#uptime-badges) @@ -153,6 +154,7 @@ Note that you can also use environment variables in the configuration file (e.g. | `security.basic.username` | Username for Basic authentication | Required `""` | | `security.basic.password-sha512` | Password's SHA512 hash for Basic authentication | Required `""` | | `disable-monitoring-lock` | Whether to [disable the monitoring lock](#disable-monitoring-lock) | `false` | +| `skip-invalid-config-update` | Whether to ignore invalid configuration update. See [Reloading configuration on the fly](#reloading-configuration-on-the-fly). | `web` | Web configuration | `{}` | | `web.address` | Address to listen on | `0.0.0.0` | | `web.port` | Port to listen on | `8080` | @@ -818,6 +820,30 @@ technically, if you create 100 services with a 1 seconds interval, Gatus will se - You want to test multiple services at very short interval (< 5s) +### Reloading configuration on the fly + +For the sake on convenience, Gatus automatically reloads the configuration on the fly if the loaded configuration file +is updated while Gatus is running. + +By default, the application will exit if the updating configuration is invalid, but you can configure +Gatus to continue running if the configuration file is updated with an invalid configuration by +setting `skip-invalid-config-update` to `true`. + +Keep in mind that it is in your best interest to ensure the validity of the configuration file after each update you +apply to the configuration file while Gatus is running by looking at the log and making sure that you do not see the +following message: +``` +The configuration file was updated, but it is not valid. The old configuration will continue being used. +``` +Failure to do so may result in Gatus being unable to start if the application is restarted for whatever reason. + +I recommend not setting `skip-invalid-config-update` to `true` to avoid a situation like this, but the choice is yours +to make. + +Note that if you are not using a file storage, updating the configuration while Gatus is running is effectively +the same as restarting the application. + + ### Service groups Service groups are used for grouping multiple services together on the dashboard. diff --git a/core/alert.go b/alerting/alert/alert.go similarity index 72% rename from core/alert.go rename to alerting/alert/alert.go index 18dee849..9f8f811e 100644 --- a/core/alert.go +++ b/alerting/alert/alert.go @@ -1,9 +1,9 @@ -package core +package alert // Alert is the service's alert configuration type Alert struct { // Type of alert (required) - Type AlertType `yaml:"type"` + Type Type `yaml:"type"` // Enabled defines whether or not the alert is enabled // @@ -67,33 +67,3 @@ func (alert Alert) IsSendingOnResolved() bool { } return *alert.SendOnResolved } - -// AlertType is the type of the alert. -// The value will generally be the name of the alert provider -type AlertType string - -const ( - // CustomAlert is the AlertType for the custom alerting provider - CustomAlert AlertType = "custom" - - // DiscordAlert is the AlertType for the discord alerting provider - DiscordAlert AlertType = "discord" - - // MattermostAlert is the AlertType for the mattermost alerting provider - MattermostAlert AlertType = "mattermost" - - // MessagebirdAlert is the AlertType for the messagebird alerting provider - MessagebirdAlert AlertType = "messagebird" - - // PagerDutyAlert is the AlertType for the pagerduty alerting provider - PagerDutyAlert AlertType = "pagerduty" - - // SlackAlert is the AlertType for the slack alerting provider - SlackAlert AlertType = "slack" - - // TelegramAlert is the AlertType for the telegram alerting provider - TelegramAlert AlertType = "telegram" - - // TwilioAlert is the AlertType for the twilio alerting provider - TwilioAlert AlertType = "twilio" -) diff --git a/core/alert_test.go b/alerting/alert/alert_test.go similarity index 99% rename from core/alert_test.go rename to alerting/alert/alert_test.go index 6c11891f..a061a53d 100644 --- a/core/alert_test.go +++ b/alerting/alert/alert_test.go @@ -1,4 +1,4 @@ -package core +package alert import "testing" diff --git a/alerting/alert/type.go b/alerting/alert/type.go new file mode 100644 index 00000000..bbf226b6 --- /dev/null +++ b/alerting/alert/type.go @@ -0,0 +1,31 @@ +package alert + +// Type is the type of the alert. +// The value will generally be the name of the alert provider +type Type string + +const ( + // TypeCustom is the Type for the custom alerting provider + TypeCustom Type = "custom" + + // TypeDiscord is the Type for the discord alerting provider + TypeDiscord Type = "discord" + + // TypeMattermost is the Type for the mattermost alerting provider + TypeMattermost Type = "mattermost" + + // TypeMessagebird is the Type for the messagebird alerting provider + TypeMessagebird Type = "messagebird" + + // TypePagerDuty is the Type for the pagerduty alerting provider + TypePagerDuty Type = "pagerduty" + + // TypeSlack is the Type for the slack alerting provider + TypeSlack Type = "slack" + + // TypeTelegram is the Type for the telegram alerting provider + TypeTelegram Type = "telegram" + + // TypeTwilio is the Type for the twilio alerting provider + TypeTwilio Type = "twilio" +) diff --git a/alerting/config.go b/alerting/config.go index ca51c9a6..43cff00a 100644 --- a/alerting/config.go +++ b/alerting/config.go @@ -1,6 +1,8 @@ package alerting import ( + "github.com/TwinProduction/gatus/alerting/alert" + "github.com/TwinProduction/gatus/alerting/provider" "github.com/TwinProduction/gatus/alerting/provider/custom" "github.com/TwinProduction/gatus/alerting/provider/discord" "github.com/TwinProduction/gatus/alerting/provider/mattermost" @@ -37,3 +39,58 @@ type Config struct { // Twilio is the configuration for the twilio alerting provider Twilio *twilio.AlertProvider `yaml:"twilio"` } + +// GetAlertingProviderByAlertType returns an provider.AlertProvider by its corresponding alert.Type +func (config Config) GetAlertingProviderByAlertType(alertType alert.Type) provider.AlertProvider { + switch alertType { + case alert.TypeCustom: + if config.Custom == nil { + // Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil + return nil + } + return config.Custom + case alert.TypeDiscord: + if config.Discord == nil { + // Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil + return nil + } + return config.Discord + case alert.TypeMattermost: + if config.Mattermost == nil { + // Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil + return nil + } + return config.Mattermost + case alert.TypeMessagebird: + if config.Messagebird == nil { + // Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil + return nil + } + return config.Messagebird + case alert.TypePagerDuty: + if config.PagerDuty == nil { + // Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil + return nil + } + return config.PagerDuty + case alert.TypeSlack: + if config.Slack == nil { + // Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil + return nil + } + return config.Slack + case alert.TypeTelegram: + if config.Telegram == nil { + // Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil + return nil + } + return config.Telegram + case alert.TypeTwilio: + if config.Twilio == nil { + // Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil + return nil + } + return config.Twilio + } + return nil +} diff --git a/alerting/provider/custom/custom.go b/alerting/provider/custom/custom.go index 06c63bcf..344754f9 100644 --- a/alerting/provider/custom/custom.go +++ b/alerting/provider/custom/custom.go @@ -9,6 +9,7 @@ import ( "os" "strings" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/client" "github.com/TwinProduction/gatus/core" ) @@ -24,7 +25,7 @@ type AlertProvider struct { Placeholders map[string]map[string]string `yaml:"placeholders,omitempty"` // DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type - DefaultAlert *core.Alert `yaml:"default-alert"` + DefaultAlert *alert.Alert `yaml:"default-alert"` } // IsValid returns whether the provider's configuration is valid @@ -33,7 +34,7 @@ func (provider *AlertProvider) IsValid() bool { } // ToCustomAlertProvider converts the provider into a custom.AlertProvider -func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *AlertProvider { +func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *alert.Alert, result *core.Result, resolved bool) *AlertProvider { return provider } @@ -117,6 +118,6 @@ func (provider *AlertProvider) Send(serviceName, alertDescription string, resolv } // GetDefaultAlert returns the provider's default alert configuration -func (provider AlertProvider) GetDefaultAlert() *core.Alert { +func (provider AlertProvider) GetDefaultAlert() *alert.Alert { return provider.DefaultAlert } diff --git a/alerting/provider/custom/custom_test.go b/alerting/provider/custom/custom_test.go index 815a0ee0..18ac1431 100644 --- a/alerting/provider/custom/custom_test.go +++ b/alerting/provider/custom/custom_test.go @@ -4,6 +4,7 @@ import ( "io/ioutil" "testing" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/core" ) @@ -60,7 +61,7 @@ func TestAlertProvider_buildHTTPRequestWhenTriggered(t *testing.T) { func TestAlertProvider_ToCustomAlertProvider(t *testing.T) { provider := AlertProvider{URL: "http://example.com"} - customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{}, true) + customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{}, true) if customAlertProvider == nil { t.Fatal("customAlertProvider shouldn't have been nil") } diff --git a/alerting/provider/discord/discord.go b/alerting/provider/discord/discord.go index 348a5299..ec93cab1 100644 --- a/alerting/provider/discord/discord.go +++ b/alerting/provider/discord/discord.go @@ -4,6 +4,7 @@ import ( "fmt" "net/http" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/alerting/provider/custom" "github.com/TwinProduction/gatus/core" ) @@ -13,7 +14,7 @@ type AlertProvider struct { WebhookURL string `yaml:"webhook-url"` // DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type - DefaultAlert *core.Alert `yaml:"default-alert"` + DefaultAlert *alert.Alert `yaml:"default-alert"` } // IsValid returns whether the provider's configuration is valid @@ -22,7 +23,7 @@ func (provider *AlertProvider) IsValid() bool { } // ToCustomAlertProvider converts the provider into a custom.AlertProvider -func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider { +func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *alert.Alert, result *core.Result, resolved bool) *custom.AlertProvider { var message, results string var colorCode int if resolved { @@ -66,6 +67,6 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler } // GetDefaultAlert returns the provider's default alert configuration -func (provider AlertProvider) GetDefaultAlert() *core.Alert { +func (provider AlertProvider) GetDefaultAlert() *alert.Alert { return provider.DefaultAlert } diff --git a/alerting/provider/discord/discord_test.go b/alerting/provider/discord/discord_test.go index 84abd070..9243e890 100644 --- a/alerting/provider/discord/discord_test.go +++ b/alerting/provider/discord/discord_test.go @@ -6,6 +6,7 @@ import ( "strings" "testing" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/core" ) @@ -22,7 +23,7 @@ func TestAlertProvider_IsValid(t *testing.T) { func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) { provider := AlertProvider{WebhookURL: "http://example.com"} - customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "SUCCESSFUL_CONDITION", Success: true}}}, true) + customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "SUCCESSFUL_CONDITION", Success: true}}}, true) if customAlertProvider == nil { t.Fatal("customAlertProvider shouldn't have been nil") } @@ -44,7 +45,7 @@ func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) { func TestAlertProvider_ToCustomAlertProviderWithTriggeredAlert(t *testing.T) { provider := AlertProvider{WebhookURL: "http://example.com"} - customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false) + customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false) if customAlertProvider == nil { t.Fatal("customAlertProvider shouldn't have been nil") } diff --git a/alerting/provider/mattermost/mattermost.go b/alerting/provider/mattermost/mattermost.go index 35bc4820..4b0f4271 100644 --- a/alerting/provider/mattermost/mattermost.go +++ b/alerting/provider/mattermost/mattermost.go @@ -4,6 +4,7 @@ import ( "fmt" "net/http" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/alerting/provider/custom" "github.com/TwinProduction/gatus/core" ) @@ -14,7 +15,7 @@ type AlertProvider struct { Insecure bool `yaml:"insecure,omitempty"` // DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type - DefaultAlert *core.Alert `yaml:"default-alert"` + DefaultAlert *alert.Alert `yaml:"default-alert"` } // IsValid returns whether the provider's configuration is valid @@ -23,7 +24,7 @@ func (provider *AlertProvider) IsValid() bool { } // ToCustomAlertProvider converts the provider into a custom.AlertProvider -func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider { +func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *alert.Alert, result *core.Result, resolved bool) *custom.AlertProvider { var message string var color string if resolved { @@ -78,6 +79,6 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler } // GetDefaultAlert returns the provider's default alert configuration -func (provider AlertProvider) GetDefaultAlert() *core.Alert { +func (provider AlertProvider) GetDefaultAlert() *alert.Alert { return provider.DefaultAlert } diff --git a/alerting/provider/mattermost/mattermost_test.go b/alerting/provider/mattermost/mattermost_test.go index 59630032..b4df04c5 100644 --- a/alerting/provider/mattermost/mattermost_test.go +++ b/alerting/provider/mattermost/mattermost_test.go @@ -6,6 +6,7 @@ import ( "strings" "testing" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/core" ) @@ -22,7 +23,7 @@ func TestAlertProvider_IsValid(t *testing.T) { func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) { provider := AlertProvider{WebhookURL: "http://example.org"} - customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "SUCCESSFUL_CONDITION", Success: true}}}, true) + customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "SUCCESSFUL_CONDITION", Success: true}}}, true) if customAlertProvider == nil { t.Fatal("customAlertProvider shouldn't have been nil") } @@ -44,7 +45,7 @@ func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) { func TestAlertProvider_ToCustomAlertProviderWithTriggeredAlert(t *testing.T) { provider := AlertProvider{WebhookURL: "http://example.org"} - customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false) + customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false) if customAlertProvider == nil { t.Fatal("customAlertProvider shouldn't have been nil") } diff --git a/alerting/provider/messagebird/messagebird.go b/alerting/provider/messagebird/messagebird.go index 1dbe56c9..866a6be1 100644 --- a/alerting/provider/messagebird/messagebird.go +++ b/alerting/provider/messagebird/messagebird.go @@ -4,6 +4,7 @@ import ( "fmt" "net/http" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/alerting/provider/custom" "github.com/TwinProduction/gatus/core" ) @@ -19,7 +20,7 @@ type AlertProvider struct { Recipients string `yaml:"recipients"` // DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type - DefaultAlert *core.Alert `yaml:"default-alert"` + DefaultAlert *alert.Alert `yaml:"default-alert"` } // IsValid returns whether the provider's configuration is valid @@ -29,7 +30,7 @@ func (provider *AlertProvider) IsValid() bool { // ToCustomAlertProvider converts the provider into a custom.AlertProvider // Reference doc for messagebird https://developers.messagebird.com/api/sms-messaging/#send-outbound-sms -func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, _ *core.Result, resolved bool) *custom.AlertProvider { +func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *alert.Alert, _ *core.Result, resolved bool) *custom.AlertProvider { var message string if resolved { message = fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.GetDescription()) @@ -53,6 +54,6 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler } // GetDefaultAlert returns the provider's default alert configuration -func (provider AlertProvider) GetDefaultAlert() *core.Alert { +func (provider AlertProvider) GetDefaultAlert() *alert.Alert { return provider.DefaultAlert } diff --git a/alerting/provider/messagebird/messagebird_test.go b/alerting/provider/messagebird/messagebird_test.go index 85188941..03ec1a2c 100644 --- a/alerting/provider/messagebird/messagebird_test.go +++ b/alerting/provider/messagebird/messagebird_test.go @@ -6,6 +6,7 @@ import ( "strings" "testing" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/core" ) @@ -30,7 +31,7 @@ func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) { Originator: "1", Recipients: "1", } - customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{}, true) + customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{}, true) if customAlertProvider == nil { t.Fatal("customAlertProvider shouldn't have been nil") } @@ -56,7 +57,7 @@ func TestAlertProvider_ToCustomAlertProviderWithTriggeredAlert(t *testing.T) { Originator: "1", Recipients: "1", } - customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{}, false) + customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{}, false) if customAlertProvider == nil { t.Fatal("customAlertProvider shouldn't have been nil") } diff --git a/alerting/provider/pagerduty/pagerduty.go b/alerting/provider/pagerduty/pagerduty.go index b5b1baec..09583e65 100644 --- a/alerting/provider/pagerduty/pagerduty.go +++ b/alerting/provider/pagerduty/pagerduty.go @@ -4,6 +4,7 @@ import ( "fmt" "net/http" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/alerting/provider/custom" "github.com/TwinProduction/gatus/core" ) @@ -13,7 +14,7 @@ type AlertProvider struct { IntegrationKey string `yaml:"integration-key"` // DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type - DefaultAlert *core.Alert `yaml:"default-alert"` + DefaultAlert *alert.Alert `yaml:"default-alert"` } // IsValid returns whether the provider's configuration is valid @@ -24,7 +25,7 @@ func (provider *AlertProvider) IsValid() bool { // ToCustomAlertProvider converts the provider into a custom.AlertProvider // // relevant: https://developer.pagerduty.com/docs/events-api-v2/trigger-events/ -func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, _ *core.Result, resolved bool) *custom.AlertProvider { +func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *alert.Alert, _ *core.Result, resolved bool) *custom.AlertProvider { var message, eventAction, resolveKey string if resolved { message = fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.GetDescription()) @@ -55,6 +56,6 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler } // GetDefaultAlert returns the provider's default alert configuration -func (provider AlertProvider) GetDefaultAlert() *core.Alert { +func (provider AlertProvider) GetDefaultAlert() *alert.Alert { return provider.DefaultAlert } diff --git a/alerting/provider/pagerduty/pagerduty_test.go b/alerting/provider/pagerduty/pagerduty_test.go index 4a0f60ed..08d65de2 100644 --- a/alerting/provider/pagerduty/pagerduty_test.go +++ b/alerting/provider/pagerduty/pagerduty_test.go @@ -6,6 +6,7 @@ import ( "strings" "testing" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/core" ) @@ -22,7 +23,7 @@ func TestAlertProvider_IsValid(t *testing.T) { func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) { provider := AlertProvider{IntegrationKey: "00000000000000000000000000000000"} - customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{}, true) + customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{}, true) if customAlertProvider == nil { t.Fatal("customAlertProvider shouldn't have been nil") } @@ -44,7 +45,7 @@ func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) { func TestAlertProvider_ToCustomAlertProviderWithTriggeredAlert(t *testing.T) { provider := AlertProvider{IntegrationKey: "00000000000000000000000000000000"} - customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{}, false) + customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{}, false) if customAlertProvider == nil { t.Fatal("customAlertProvider shouldn't have been nil") } diff --git a/alerting/provider/provider.go b/alerting/provider/provider.go index 1c381356..b1a65188 100644 --- a/alerting/provider/provider.go +++ b/alerting/provider/provider.go @@ -1,6 +1,7 @@ package provider import ( + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/alerting/provider/custom" "github.com/TwinProduction/gatus/alerting/provider/discord" "github.com/TwinProduction/gatus/alerting/provider/mattermost" @@ -18,14 +19,14 @@ type AlertProvider interface { IsValid() bool // ToCustomAlertProvider converts the provider into a custom.AlertProvider - ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider + ToCustomAlertProvider(service *core.Service, alert *alert.Alert, result *core.Result, resolved bool) *custom.AlertProvider // GetDefaultAlert returns the provider's default alert configuration - GetDefaultAlert() *core.Alert + GetDefaultAlert() *alert.Alert } // ParseWithDefaultAlert parses a service alert by using the provider's default alert as a baseline -func ParseWithDefaultAlert(providerDefaultAlert, serviceAlert *core.Alert) { +func ParseWithDefaultAlert(providerDefaultAlert, serviceAlert *alert.Alert) { if providerDefaultAlert == nil || serviceAlert == nil { return } diff --git a/alerting/provider/provider_test.go b/alerting/provider/provider_test.go index 8b98b1ed..187b0541 100644 --- a/alerting/provider/provider_test.go +++ b/alerting/provider/provider_test.go @@ -3,13 +3,13 @@ package provider import ( "testing" - "github.com/TwinProduction/gatus/core" + "github.com/TwinProduction/gatus/alerting/alert" ) func TestParseWithDefaultAlert(t *testing.T) { type Scenario struct { Name string - DefaultAlert, ServiceAlert, ExpectedOutputAlert *core.Alert + DefaultAlert, ServiceAlert, ExpectedOutputAlert *alert.Alert } enabled := true disabled := false @@ -18,18 +18,18 @@ func TestParseWithDefaultAlert(t *testing.T) { scenarios := []Scenario{ { Name: "service-alert-type-only", - DefaultAlert: &core.Alert{ + DefaultAlert: &alert.Alert{ Enabled: &enabled, SendOnResolved: &enabled, Description: &firstDescription, FailureThreshold: 5, SuccessThreshold: 10, }, - ServiceAlert: &core.Alert{ - Type: core.DiscordAlert, + ServiceAlert: &alert.Alert{ + Type: alert.TypeDiscord, }, - ExpectedOutputAlert: &core.Alert{ - Type: core.DiscordAlert, + ExpectedOutputAlert: &alert.Alert{ + Type: alert.TypeDiscord, Enabled: &enabled, SendOnResolved: &enabled, Description: &firstDescription, @@ -39,23 +39,23 @@ func TestParseWithDefaultAlert(t *testing.T) { }, { Name: "service-alert-overwrites-default-alert", - DefaultAlert: &core.Alert{ + DefaultAlert: &alert.Alert{ Enabled: &disabled, SendOnResolved: &disabled, Description: &firstDescription, FailureThreshold: 5, SuccessThreshold: 10, }, - ServiceAlert: &core.Alert{ - Type: core.TelegramAlert, + ServiceAlert: &alert.Alert{ + Type: alert.TypeTelegram, Enabled: &enabled, SendOnResolved: &enabled, Description: &secondDescription, FailureThreshold: 6, SuccessThreshold: 11, }, - ExpectedOutputAlert: &core.Alert{ - Type: core.TelegramAlert, + ExpectedOutputAlert: &alert.Alert{ + Type: alert.TypeTelegram, Enabled: &enabled, SendOnResolved: &enabled, Description: &secondDescription, @@ -65,22 +65,22 @@ func TestParseWithDefaultAlert(t *testing.T) { }, { Name: "service-alert-partially-overwrites-default-alert", - DefaultAlert: &core.Alert{ + DefaultAlert: &alert.Alert{ Enabled: &enabled, SendOnResolved: &enabled, Description: &firstDescription, FailureThreshold: 5, SuccessThreshold: 10, }, - ServiceAlert: &core.Alert{ - Type: core.DiscordAlert, + ServiceAlert: &alert.Alert{ + Type: alert.TypeDiscord, Enabled: nil, SendOnResolved: nil, FailureThreshold: 6, SuccessThreshold: 11, }, - ExpectedOutputAlert: &core.Alert{ - Type: core.DiscordAlert, + ExpectedOutputAlert: &alert.Alert{ + Type: alert.TypeDiscord, Enabled: &enabled, SendOnResolved: &enabled, Description: &firstDescription, @@ -90,19 +90,19 @@ func TestParseWithDefaultAlert(t *testing.T) { }, { Name: "default-alert-type-should-be-ignored", - DefaultAlert: &core.Alert{ - Type: core.TelegramAlert, + DefaultAlert: &alert.Alert{ + Type: alert.TypeTelegram, Enabled: &enabled, SendOnResolved: &enabled, Description: &firstDescription, FailureThreshold: 5, SuccessThreshold: 10, }, - ServiceAlert: &core.Alert{ - Type: core.DiscordAlert, + ServiceAlert: &alert.Alert{ + Type: alert.TypeDiscord, }, - ExpectedOutputAlert: &core.Alert{ - Type: core.DiscordAlert, + ExpectedOutputAlert: &alert.Alert{ + Type: alert.TypeDiscord, Enabled: &enabled, SendOnResolved: &enabled, Description: &firstDescription, @@ -112,8 +112,8 @@ func TestParseWithDefaultAlert(t *testing.T) { }, { Name: "no-default-alert", - DefaultAlert: &core.Alert{ - Type: core.DiscordAlert, + DefaultAlert: &alert.Alert{ + Type: alert.TypeDiscord, Enabled: nil, SendOnResolved: nil, Description: &firstDescription, diff --git a/alerting/provider/slack/slack.go b/alerting/provider/slack/slack.go index ed5eb825..4b3bcf99 100644 --- a/alerting/provider/slack/slack.go +++ b/alerting/provider/slack/slack.go @@ -4,6 +4,7 @@ import ( "fmt" "net/http" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/alerting/provider/custom" "github.com/TwinProduction/gatus/core" ) @@ -13,7 +14,7 @@ type AlertProvider struct { WebhookURL string `yaml:"webhook-url"` // Slack webhook URL // DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type - DefaultAlert *core.Alert `yaml:"default-alert"` + DefaultAlert *alert.Alert `yaml:"default-alert"` } // IsValid returns whether the provider's configuration is valid @@ -22,7 +23,7 @@ func (provider *AlertProvider) IsValid() bool { } // ToCustomAlertProvider converts the provider into a custom.AlertProvider -func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider { +func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *alert.Alert, result *core.Result, resolved bool) *custom.AlertProvider { var message, color, results string if resolved { message = fmt.Sprintf("An alert for *%s* has been resolved after passing successfully %d time(s) in a row", service.Name, alert.SuccessThreshold) @@ -66,6 +67,6 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler } // GetDefaultAlert returns the provider's default alert configuration -func (provider AlertProvider) GetDefaultAlert() *core.Alert { +func (provider AlertProvider) GetDefaultAlert() *alert.Alert { return provider.DefaultAlert } diff --git a/alerting/provider/slack/slack_test.go b/alerting/provider/slack/slack_test.go index ed74dd5b..798cf8b4 100644 --- a/alerting/provider/slack/slack_test.go +++ b/alerting/provider/slack/slack_test.go @@ -6,6 +6,7 @@ import ( "strings" "testing" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/core" ) @@ -22,7 +23,7 @@ func TestAlertProvider_IsValid(t *testing.T) { func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) { provider := AlertProvider{WebhookURL: "http://example.com"} - customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "SUCCESSFUL_CONDITION", Success: true}}}, true) + customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "SUCCESSFUL_CONDITION", Success: true}}}, true) if customAlertProvider == nil { t.Fatal("customAlertProvider shouldn't have been nil") } @@ -44,7 +45,7 @@ func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) { func TestAlertProvider_ToCustomAlertProviderWithTriggeredAlert(t *testing.T) { provider := AlertProvider{WebhookURL: "http://example.com"} - customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false) + customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false) if customAlertProvider == nil { t.Fatal("customAlertProvider shouldn't have been nil") } diff --git a/alerting/provider/telegram/telegram.go b/alerting/provider/telegram/telegram.go index 58608c56..bdb84076 100644 --- a/alerting/provider/telegram/telegram.go +++ b/alerting/provider/telegram/telegram.go @@ -4,6 +4,7 @@ import ( "fmt" "net/http" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/alerting/provider/custom" "github.com/TwinProduction/gatus/core" ) @@ -14,7 +15,7 @@ type AlertProvider struct { ID string `yaml:"id"` // DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type - DefaultAlert *core.Alert `yaml:"default-alert"` + DefaultAlert *alert.Alert `yaml:"default-alert"` } // IsValid returns whether the provider's configuration is valid @@ -23,7 +24,7 @@ func (provider *AlertProvider) IsValid() bool { } // ToCustomAlertProvider converts the provider into a custom.AlertProvider -func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider { +func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *alert.Alert, result *core.Result, resolved bool) *custom.AlertProvider { var message, results string if resolved { message = fmt.Sprintf("An alert for *%s* has been resolved:\\n—\\n _healthcheck passing successfully %d time(s) in a row_\\n— ", service.Name, alert.FailureThreshold) @@ -54,6 +55,6 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler } // GetDefaultAlert returns the provider's default alert configuration -func (provider AlertProvider) GetDefaultAlert() *core.Alert { +func (provider AlertProvider) GetDefaultAlert() *alert.Alert { return provider.DefaultAlert } diff --git a/alerting/provider/telegram/telegram_test.go b/alerting/provider/telegram/telegram_test.go index 7a56df60..4098fbbf 100644 --- a/alerting/provider/telegram/telegram_test.go +++ b/alerting/provider/telegram/telegram_test.go @@ -7,6 +7,7 @@ import ( "strings" "testing" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/core" ) @@ -23,7 +24,7 @@ func TestAlertProvider_IsValid(t *testing.T) { func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) { provider := AlertProvider{Token: "123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11", ID: "12345678"} - customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "SUCCESSFUL_CONDITION", Success: true}}}, true) + customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "SUCCESSFUL_CONDITION", Success: true}}}, true) if customAlertProvider == nil { t.Fatal("customAlertProvider shouldn't have been nil") } @@ -47,7 +48,7 @@ func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) { func TestAlertProvider_ToCustomAlertProviderWithTriggeredAlert(t *testing.T) { provider := AlertProvider{Token: "123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11", ID: "0123456789"} description := "Healthcheck Successful" - customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{Description: &description}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false) + customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{Description: &description}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false) if customAlertProvider == nil { t.Fatal("customAlertProvider shouldn't have been nil") } @@ -69,7 +70,7 @@ func TestAlertProvider_ToCustomAlertProviderWithTriggeredAlert(t *testing.T) { func TestAlertProvider_ToCustomAlertProviderWithDescription(t *testing.T) { provider := AlertProvider{Token: "123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11", ID: "0123456789"} - customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false) + customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &alert.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false) if customAlertProvider == nil { t.Fatal("customAlertProvider shouldn't have been nil") } diff --git a/alerting/provider/twilio/twilio.go b/alerting/provider/twilio/twilio.go index 5439a779..cef05d8e 100644 --- a/alerting/provider/twilio/twilio.go +++ b/alerting/provider/twilio/twilio.go @@ -6,6 +6,7 @@ import ( "net/http" "net/url" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/alerting/provider/custom" "github.com/TwinProduction/gatus/core" ) @@ -18,7 +19,7 @@ type AlertProvider struct { To string `yaml:"to"` // DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type - DefaultAlert *core.Alert `yaml:"default-alert"` + DefaultAlert *alert.Alert `yaml:"default-alert"` } // IsValid returns whether the provider's configuration is valid @@ -27,7 +28,7 @@ func (provider *AlertProvider) IsValid() bool { } // ToCustomAlertProvider converts the provider into a custom.AlertProvider -func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, _ *core.Result, resolved bool) *custom.AlertProvider { +func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *alert.Alert, _ *core.Result, resolved bool) *custom.AlertProvider { var message string if resolved { message = fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.GetDescription()) @@ -50,6 +51,6 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler } // GetDefaultAlert returns the provider's default alert configuration -func (provider AlertProvider) GetDefaultAlert() *core.Alert { +func (provider AlertProvider) GetDefaultAlert() *alert.Alert { return provider.DefaultAlert } diff --git a/alerting/provider/twilio/twilio_test.go b/alerting/provider/twilio/twilio_test.go index 4b89a393..6da79a19 100644 --- a/alerting/provider/twilio/twilio_test.go +++ b/alerting/provider/twilio/twilio_test.go @@ -5,6 +5,7 @@ import ( "strings" "testing" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/core" ) @@ -32,7 +33,7 @@ func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) { To: "4", } description := "alert-description" - customAlertProvider := provider.ToCustomAlertProvider(&core.Service{Name: "service-name"}, &core.Alert{Description: &description}, &core.Result{}, true) + customAlertProvider := provider.ToCustomAlertProvider(&core.Service{Name: "service-name"}, &alert.Alert{Description: &description}, &core.Result{}, true) if customAlertProvider == nil { t.Fatal("customAlertProvider shouldn't have been nil") } @@ -58,7 +59,7 @@ func TestAlertProvider_ToCustomAlertProviderWithTriggeredAlert(t *testing.T) { To: "1", } description := "alert-description" - customAlertProvider := provider.ToCustomAlertProvider(&core.Service{Name: "service-name"}, &core.Alert{Description: &description}, &core.Result{}, false) + customAlertProvider := provider.ToCustomAlertProvider(&core.Service{Name: "service-name"}, &alert.Alert{Description: &description}, &core.Result{}, false) if customAlertProvider == nil { t.Fatal("customAlertProvider shouldn't have been nil") } diff --git a/config/config.go b/config/config.go index ba8d5272..101ccb95 100644 --- a/config/config.go +++ b/config/config.go @@ -5,8 +5,10 @@ import ( "io/ioutil" "log" "os" + "time" "github.com/TwinProduction/gatus/alerting" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/alerting/provider" "github.com/TwinProduction/gatus/core" "github.com/TwinProduction/gatus/k8s" @@ -39,13 +41,8 @@ var ( // ErrConfigFileNotFound is an error returned when the configuration file could not be found ErrConfigFileNotFound = errors.New("configuration file not found") - // ErrConfigNotLoaded is an error returned when an attempt to Get() the configuration before loading it is made - ErrConfigNotLoaded = errors.New("configuration is nil") - // ErrInvalidSecurityConfig is an error returned when the security configuration is invalid ErrInvalidSecurityConfig = errors.New("invalid security configuration") - - config *Config ) // Config is the main configuration structure @@ -56,6 +53,10 @@ type Config struct { // Metrics Whether to expose metrics at /metrics Metrics bool `yaml:"metrics"` + // SkipInvalidConfigUpdate Whether to make the application ignore invalid configuration + // if the configuration file is updated while the application is running + SkipInvalidConfigUpdate bool `yaml:"skip-invalid-config-update"` + // DisableMonitoringLock Whether to disable the monitoring lock // The monitoring lock is what prevents multiple services from being processed at the same time. // Disabling this may lead to inaccurate response times @@ -78,47 +79,57 @@ type Config struct { // Web is the configuration for the web listener Web *WebConfig `yaml:"web"` + + filePath string // path to the file from which config was loaded from + lastFileModTime time.Time // last modification time } -// Get returns the configuration, or panics if the configuration hasn't loaded yet -func Get() *Config { - if config == nil { - panic(ErrConfigNotLoaded) +// HasLoadedConfigurationFileBeenModified returns whether the file that the +// configuration has been loaded from has been modified since it was last read +func (config Config) HasLoadedConfigurationFileBeenModified() bool { + if fileInfo, err := os.Stat(config.filePath); err == nil { + if !fileInfo.ModTime().IsZero() { + return config.lastFileModTime.Unix() != fileInfo.ModTime().Unix() + } } - return config + return false } -// Set sets the configuration -// Used only for testing -func Set(cfg *Config) { - config = cfg +// UpdateLastFileModTime refreshes Config.lastFileModTime +func (config *Config) UpdateLastFileModTime() { + if fileInfo, err := os.Stat(config.filePath); err == nil { + if !fileInfo.ModTime().IsZero() { + config.lastFileModTime = fileInfo.ModTime() + } + } } // Load loads a custom configuration file // Note that the misconfiguration of some fields may lead to panics. This is on purpose. -func Load(configFile string) error { +func Load(configFile string) (*Config, error) { log.Printf("[config][Load] Reading configuration from configFile=%s", configFile) cfg, err := readConfigurationFile(configFile) if err != nil { if os.IsNotExist(err) { - return ErrConfigFileNotFound + return nil, ErrConfigFileNotFound } - return err + return nil, err } - config = cfg - return nil + cfg.filePath = configFile + cfg.UpdateLastFileModTime() + return cfg, nil } // LoadDefaultConfiguration loads the default configuration file -func LoadDefaultConfiguration() error { - err := Load(DefaultConfigurationFilePath) +func LoadDefaultConfiguration() (*Config, error) { + cfg, err := Load(DefaultConfigurationFilePath) if err != nil { if err == ErrConfigFileNotFound { return Load(DefaultFallbackConfigurationFilePath) } - return err + return nil, err } - return nil + return cfg, nil } func readConfigurationFile(fileName string) (config *Config, err error) { @@ -144,23 +155,33 @@ func parseAndValidateConfigBytes(yamlBytes []byte) (config *Config, err error) { } else { // Note that the functions below may panic, and this is on purpose to prevent Gatus from starting with // invalid configurations - validateAlertingConfig(config) - validateSecurityConfig(config) - validateServicesConfig(config) - validateKubernetesConfig(config) - validateWebConfig(config) - validateStorageConfig(config) + validateAlertingConfig(config.Alerting, config.Services, config.Debug) + if err := validateSecurityConfig(config); err != nil { + return nil, err + } + if err := validateServicesConfig(config); err != nil { + return nil, err + } + if err := validateKubernetesConfig(config); err != nil { + return nil, err + } + if err := validateWebConfig(config); err != nil { + return nil, err + } + if err := validateStorageConfig(config); err != nil { + return nil, err + } } return } -func validateStorageConfig(config *Config) { +func validateStorageConfig(config *Config) error { if config.Storage == nil { config.Storage = &storage.Config{} } err := storage.Initialize(config.Storage) if err != nil { - panic(err) + return err } // Remove all ServiceStatus that represent services which no longer exist in the configuration var keys []string @@ -171,44 +192,52 @@ func validateStorageConfig(config *Config) { if numberOfServiceStatusesDeleted > 0 { log.Printf("[config][validateStorageConfig] Deleted %d service statuses because their matching services no longer existed", numberOfServiceStatusesDeleted) } + return nil } -func validateWebConfig(config *Config) { +func validateWebConfig(config *Config) error { if config.Web == nil { config.Web = &WebConfig{Address: DefaultAddress, Port: DefaultPort} } else { - config.Web.validateAndSetDefaults() + return config.Web.validateAndSetDefaults() } + return nil } -func validateKubernetesConfig(config *Config) { +// deprecated +// I don't like the current implementation. +func validateKubernetesConfig(config *Config) error { if config.Kubernetes != nil && config.Kubernetes.AutoDiscover { if config.Kubernetes.ServiceTemplate == nil { - panic("kubernetes.service-template cannot be nil") + return errors.New("kubernetes.service-template cannot be nil") } if config.Debug { log.Println("[config][validateKubernetesConfig] Automatically discovering Kubernetes services...") } discoveredServices, err := k8s.DiscoverServices(config.Kubernetes) if err != nil { - panic(err) + return err } config.Services = append(config.Services, discoveredServices...) log.Printf("[config][validateKubernetesConfig] Discovered %d Kubernetes services", len(discoveredServices)) } + return nil } -func validateServicesConfig(config *Config) { +func validateServicesConfig(config *Config) error { for _, service := range config.Services { if config.Debug { log.Printf("[config][validateServicesConfig] Validating service '%s'", service.Name) } - service.ValidateAndSetDefaults() + if err := service.ValidateAndSetDefaults(); err != nil { + return err + } } log.Printf("[config][validateServicesConfig] Validated %d services", len(config.Services)) + return nil } -func validateSecurityConfig(config *Config) { +func validateSecurityConfig(config *Config) error { if config.Security != nil { if config.Security.IsValid() { if config.Debug { @@ -217,44 +246,45 @@ func validateSecurityConfig(config *Config) { } else { // If there was an attempt to configure security, then it must mean that some confidential or private // data are exposed. As a result, we'll force a panic because it's better to be safe than sorry. - panic(ErrInvalidSecurityConfig) + return ErrInvalidSecurityConfig } } + return nil } // validateAlertingConfig validates the alerting configuration // Note that the alerting configuration has to be validated before the service configuration, because the default alert // returned by provider.AlertProvider.GetDefaultAlert() must be parsed before core.Service.ValidateAndSetDefaults() // sets the default alert values when none are set. -func validateAlertingConfig(config *Config) { - if config.Alerting == nil { +func validateAlertingConfig(alertingConfig *alerting.Config, services []*core.Service, debug bool) { + if alertingConfig == nil { log.Printf("[config][validateAlertingConfig] Alerting is not configured") return } - alertTypes := []core.AlertType{ - core.CustomAlert, - core.DiscordAlert, - core.MattermostAlert, - core.MessagebirdAlert, - core.PagerDutyAlert, - core.SlackAlert, - core.TelegramAlert, - core.TwilioAlert, + alertTypes := []alert.Type{ + alert.TypeCustom, + alert.TypeDiscord, + alert.TypeMattermost, + alert.TypeMessagebird, + alert.TypePagerDuty, + alert.TypeSlack, + alert.TypeTelegram, + alert.TypeTwilio, } - var validProviders, invalidProviders []core.AlertType + var validProviders, invalidProviders []alert.Type for _, alertType := range alertTypes { - alertProvider := GetAlertingProviderByAlertType(config, alertType) + alertProvider := alertingConfig.GetAlertingProviderByAlertType(alertType) if alertProvider != nil { if alertProvider.IsValid() { // Parse alerts with the provider's default alert if alertProvider.GetDefaultAlert() != nil { - for _, service := range config.Services { - for alertIndex, alert := range service.Alerts { - if alertType == alert.Type { - if config.Debug { + for _, service := range services { + for alertIndex, serviceAlert := range service.Alerts { + if alertType == serviceAlert.Type { + if debug { log.Printf("[config][validateAlertingConfig] Parsing alert %d with provider's default alert for provider=%s in service=%s", alertIndex, alertType, service.Name) } - provider.ParseWithDefaultAlert(alertProvider.GetDefaultAlert(), alert) + provider.ParseWithDefaultAlert(alertProvider.GetDefaultAlert(), serviceAlert) } } } @@ -270,58 +300,3 @@ func validateAlertingConfig(config *Config) { } log.Printf("[config][validateAlertingConfig] configuredProviders=%s; ignoredProviders=%s", validProviders, invalidProviders) } - -// GetAlertingProviderByAlertType returns an provider.AlertProvider by its corresponding core.AlertType -func GetAlertingProviderByAlertType(config *Config, alertType core.AlertType) provider.AlertProvider { - switch alertType { - case core.CustomAlert: - if config.Alerting.Custom == nil { - // Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil - return nil - } - return config.Alerting.Custom - case core.DiscordAlert: - if config.Alerting.Discord == nil { - // Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil - return nil - } - return config.Alerting.Discord - case core.MattermostAlert: - if config.Alerting.Mattermost == nil { - // Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil - return nil - } - return config.Alerting.Mattermost - case core.MessagebirdAlert: - if config.Alerting.Messagebird == nil { - // Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil - return nil - } - return config.Alerting.Messagebird - case core.PagerDutyAlert: - if config.Alerting.PagerDuty == nil { - // Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil - return nil - } - return config.Alerting.PagerDuty - case core.SlackAlert: - if config.Alerting.Slack == nil { - // Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil - return nil - } - return config.Alerting.Slack - case core.TelegramAlert: - if config.Alerting.Telegram == nil { - // Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil - return nil - } - return config.Alerting.Telegram - case core.TwilioAlert: - if config.Alerting.Twilio == nil { - // Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil - return nil - } - return config.Alerting.Twilio - } - return nil -} diff --git a/config/config_test.go b/config/config_test.go index f9d5b788..ba91bdcc 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -7,6 +7,7 @@ import ( "time" "github.com/TwinProduction/gatus/alerting" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/alerting/provider/custom" "github.com/TwinProduction/gatus/alerting/provider/discord" "github.com/TwinProduction/gatus/alerting/provider/mattermost" @@ -20,31 +21,15 @@ import ( v1 "k8s.io/api/core/v1" ) -func TestGetBeforeConfigIsLoaded(t *testing.T) { - defer func() { recover() }() - Get() - t.Fatal("Should've panicked because the configuration hasn't been loaded yet") -} - -func TestSet(t *testing.T) { - if config != nil { - t.Fatal("config should've been nil") - } - Set(&Config{}) - if config == nil { - t.Fatal("config shouldn't have been nil") - } -} - func TestLoadFileThatDoesNotExist(t *testing.T) { - err := Load("file-that-does-not-exist.yaml") + _, err := Load("file-that-does-not-exist.yaml") if err == nil { t.Error("Should've returned an error, because the file specified doesn't exist") } } func TestLoadDefaultConfigurationFile(t *testing.T) { - err := LoadDefaultConfiguration() + _, err := LoadDefaultConfiguration() if err == nil { t.Error("Should've returned an error, because there's no configuration files at the default path nor the default fallback path") } @@ -419,8 +404,8 @@ services: t.Fatal("There should've been 7 alerts configured") } - if config.Services[0].Alerts[0].Type != core.SlackAlert { - t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[0].Type) + if config.Services[0].Alerts[0].Type != alert.TypeSlack { + t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeSlack, config.Services[0].Alerts[0].Type) } if !config.Services[0].Alerts[0].IsEnabled() { t.Error("The alert should've been enabled") @@ -432,8 +417,8 @@ services: t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[0].SuccessThreshold) } - if config.Services[0].Alerts[1].Type != core.PagerDutyAlert { - t.Errorf("The type of the alert should've been %s, but it was %s", core.PagerDutyAlert, config.Services[0].Alerts[1].Type) + if config.Services[0].Alerts[1].Type != alert.TypePagerDuty { + t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypePagerDuty, config.Services[0].Alerts[1].Type) } if config.Services[0].Alerts[1].GetDescription() != "Healthcheck failed 7 times in a row" { t.Errorf("The description of the alert should've been %s, but it was %s", "Healthcheck failed 7 times in a row", config.Services[0].Alerts[1].GetDescription()) @@ -445,8 +430,8 @@ services: t.Errorf("The success threshold of the alert should've been %d, but it was %d", 5, config.Services[0].Alerts[1].SuccessThreshold) } - if config.Services[0].Alerts[2].Type != core.MattermostAlert { - t.Errorf("The type of the alert should've been %s, but it was %s", core.MattermostAlert, config.Services[0].Alerts[2].Type) + if config.Services[0].Alerts[2].Type != alert.TypeMattermost { + t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeMattermost, config.Services[0].Alerts[2].Type) } if !config.Services[0].Alerts[2].IsEnabled() { t.Error("The alert should've been enabled") @@ -458,15 +443,15 @@ services: t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[2].SuccessThreshold) } - if config.Services[0].Alerts[3].Type != core.MessagebirdAlert { - t.Errorf("The type of the alert should've been %s, but it was %s", core.MessagebirdAlert, config.Services[0].Alerts[3].Type) + if config.Services[0].Alerts[3].Type != alert.TypeMessagebird { + t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeMessagebird, config.Services[0].Alerts[3].Type) } if config.Services[0].Alerts[3].IsEnabled() { t.Error("The alert should've been disabled") } - if config.Services[0].Alerts[4].Type != core.DiscordAlert { - t.Errorf("The type of the alert should've been %s, but it was %s", core.DiscordAlert, config.Services[0].Alerts[4].Type) + if config.Services[0].Alerts[4].Type != alert.TypeDiscord { + t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeDiscord, config.Services[0].Alerts[4].Type) } if !config.Services[0].Alerts[4].IsEnabled() { t.Error("The alert should've been enabled") @@ -478,8 +463,8 @@ services: t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[4].SuccessThreshold) } - if config.Services[0].Alerts[5].Type != core.TelegramAlert { - t.Errorf("The type of the alert should've been %s, but it was %s", core.TelegramAlert, config.Services[0].Alerts[5].Type) + if config.Services[0].Alerts[5].Type != alert.TypeTelegram { + t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeTelegram, config.Services[0].Alerts[5].Type) } if !config.Services[0].Alerts[5].IsEnabled() { t.Error("The alert should've been enabled") @@ -491,8 +476,8 @@ services: t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[5].SuccessThreshold) } - if config.Services[0].Alerts[6].Type != core.TwilioAlert { - t.Errorf("The type of the alert should've been %s, but it was %s", core.TwilioAlert, config.Services[0].Alerts[6].Type) + if config.Services[0].Alerts[6].Type != alert.TypeTwilio { + t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeTwilio, config.Services[0].Alerts[6].Type) } if !config.Services[0].Alerts[6].IsEnabled() { t.Error("The alert should've been enabled") @@ -633,7 +618,7 @@ services: if config.Alerting.Discord.WebhookURL != "http://example.org" { t.Errorf("Discord webhook should've been %s, but was %s", "http://example.org", config.Alerting.Discord.WebhookURL) } - if GetAlertingProviderByAlertType(config, core.DiscordAlert) != config.Alerting.Discord { + if config.Alerting.GetAlertingProviderByAlertType(alert.TypeDiscord) != config.Alerting.Discord { t.Error("expected discord configuration") } @@ -670,8 +655,8 @@ services: t.Fatal("There should've been 7 alerts configured") } - if config.Services[0].Alerts[0].Type != core.SlackAlert { - t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[0].Type) + if config.Services[0].Alerts[0].Type != alert.TypeSlack { + t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeSlack, config.Services[0].Alerts[0].Type) } if !config.Services[0].Alerts[0].IsEnabled() { t.Error("The alert should've been enabled") @@ -683,8 +668,8 @@ services: t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[0].SuccessThreshold) } - if config.Services[0].Alerts[1].Type != core.PagerDutyAlert { - t.Errorf("The type of the alert should've been %s, but it was %s", core.PagerDutyAlert, config.Services[0].Alerts[1].Type) + if config.Services[0].Alerts[1].Type != alert.TypePagerDuty { + t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypePagerDuty, config.Services[0].Alerts[1].Type) } if config.Services[0].Alerts[1].GetDescription() != "default description" { t.Errorf("The description of the alert should've been %s, but it was %s", "default description", config.Services[0].Alerts[1].GetDescription()) @@ -696,8 +681,8 @@ services: t.Errorf("The success threshold of the alert should've been %d, but it was %d", 5, config.Services[0].Alerts[1].SuccessThreshold) } - if config.Services[0].Alerts[2].Type != core.MattermostAlert { - t.Errorf("The type of the alert should've been %s, but it was %s", core.MattermostAlert, config.Services[0].Alerts[2].Type) + if config.Services[0].Alerts[2].Type != alert.TypeMattermost { + t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeMattermost, config.Services[0].Alerts[2].Type) } if !config.Services[0].Alerts[2].IsEnabled() { t.Error("The alert should've been enabled") @@ -709,8 +694,8 @@ services: t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[2].SuccessThreshold) } - if config.Services[0].Alerts[3].Type != core.MessagebirdAlert { - t.Errorf("The type of the alert should've been %s, but it was %s", core.MessagebirdAlert, config.Services[0].Alerts[3].Type) + if config.Services[0].Alerts[3].Type != alert.TypeMessagebird { + t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeMessagebird, config.Services[0].Alerts[3].Type) } if config.Services[0].Alerts[3].IsEnabled() { t.Error("The alert should've been disabled") @@ -719,8 +704,8 @@ services: t.Error("The alert should be sending on resolve") } - if config.Services[0].Alerts[4].Type != core.DiscordAlert { - t.Errorf("The type of the alert should've been %s, but it was %s", core.DiscordAlert, config.Services[0].Alerts[4].Type) + if config.Services[0].Alerts[4].Type != alert.TypeDiscord { + t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeDiscord, config.Services[0].Alerts[4].Type) } if !config.Services[0].Alerts[4].IsEnabled() { t.Error("The alert should've been enabled") @@ -732,8 +717,8 @@ services: t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[4].SuccessThreshold) } - if config.Services[0].Alerts[5].Type != core.TelegramAlert { - t.Errorf("The type of the alert should've been %s, but it was %s", core.TelegramAlert, config.Services[0].Alerts[5].Type) + if config.Services[0].Alerts[5].Type != alert.TypeTelegram { + t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeTelegram, config.Services[0].Alerts[5].Type) } if !config.Services[0].Alerts[5].IsEnabled() { t.Error("The alert should've been enabled") @@ -745,8 +730,8 @@ services: t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[5].SuccessThreshold) } - if config.Services[0].Alerts[6].Type != core.TwilioAlert { - t.Errorf("The type of the alert should've been %s, but it was %s", core.TwilioAlert, config.Services[0].Alerts[6].Type) + if config.Services[0].Alerts[6].Type != alert.TypeTwilio { + t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeTwilio, config.Services[0].Alerts[6].Type) } if !config.Services[0].Alerts[6].IsEnabled() { t.Error("The alert should've been enabled") @@ -800,14 +785,14 @@ services: if len(config.Services) != 1 { t.Error("There should've been 2 services") } - if config.Services[0].Alerts[0].Type != core.SlackAlert { - t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[0].Type) + if config.Services[0].Alerts[0].Type != alert.TypeSlack { + t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeSlack, config.Services[0].Alerts[0].Type) } - if config.Services[0].Alerts[1].Type != core.SlackAlert { - t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[1].Type) + if config.Services[0].Alerts[1].Type != alert.TypeSlack { + t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeSlack, config.Services[0].Alerts[1].Type) } - if config.Services[0].Alerts[2].Type != core.SlackAlert { - t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[2].Type) + if config.Services[0].Alerts[2].Type != alert.TypeSlack { + t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeSlack, config.Services[0].Alerts[2].Type) } if !config.Services[0].Alerts[0].IsEnabled() { t.Error("The alert should've been enabled") @@ -1209,40 +1194,38 @@ kubernetes: } func TestGetAlertingProviderByAlertType(t *testing.T) { - cfg := &Config{ - Alerting: &alerting.Config{ - Custom: &custom.AlertProvider{}, - Discord: &discord.AlertProvider{}, - Mattermost: &mattermost.AlertProvider{}, - Messagebird: &messagebird.AlertProvider{}, - PagerDuty: &pagerduty.AlertProvider{}, - Slack: &slack.AlertProvider{}, - Telegram: &telegram.AlertProvider{}, - Twilio: &twilio.AlertProvider{}, - }, + alertingConfig := &alerting.Config{ + Custom: &custom.AlertProvider{}, + Discord: &discord.AlertProvider{}, + Mattermost: &mattermost.AlertProvider{}, + Messagebird: &messagebird.AlertProvider{}, + PagerDuty: &pagerduty.AlertProvider{}, + Slack: &slack.AlertProvider{}, + Telegram: &telegram.AlertProvider{}, + Twilio: &twilio.AlertProvider{}, } - if GetAlertingProviderByAlertType(cfg, core.CustomAlert) != cfg.Alerting.Custom { + if alertingConfig.GetAlertingProviderByAlertType(alert.TypeCustom) != alertingConfig.Custom { t.Error("expected Custom configuration") } - if GetAlertingProviderByAlertType(cfg, core.DiscordAlert) != cfg.Alerting.Discord { + if alertingConfig.GetAlertingProviderByAlertType(alert.TypeDiscord) != alertingConfig.Discord { t.Error("expected Discord configuration") } - if GetAlertingProviderByAlertType(cfg, core.MattermostAlert) != cfg.Alerting.Mattermost { + if alertingConfig.GetAlertingProviderByAlertType(alert.TypeMattermost) != alertingConfig.Mattermost { t.Error("expected Mattermost configuration") } - if GetAlertingProviderByAlertType(cfg, core.MessagebirdAlert) != cfg.Alerting.Messagebird { + if alertingConfig.GetAlertingProviderByAlertType(alert.TypeMessagebird) != alertingConfig.Messagebird { t.Error("expected Messagebird configuration") } - if GetAlertingProviderByAlertType(cfg, core.PagerDutyAlert) != cfg.Alerting.PagerDuty { + if alertingConfig.GetAlertingProviderByAlertType(alert.TypePagerDuty) != alertingConfig.PagerDuty { t.Error("expected PagerDuty configuration") } - if GetAlertingProviderByAlertType(cfg, core.SlackAlert) != cfg.Alerting.Slack { + if alertingConfig.GetAlertingProviderByAlertType(alert.TypeSlack) != alertingConfig.Slack { t.Error("expected Slack configuration") } - if GetAlertingProviderByAlertType(cfg, core.TelegramAlert) != cfg.Alerting.Telegram { + if alertingConfig.GetAlertingProviderByAlertType(alert.TypeTelegram) != alertingConfig.Telegram { t.Error("expected Telegram configuration") } - if GetAlertingProviderByAlertType(cfg, core.TwilioAlert) != cfg.Alerting.Twilio { + if alertingConfig.GetAlertingProviderByAlertType(alert.TypeTwilio) != alertingConfig.Twilio { t.Error("expected Twilio configuration") } } diff --git a/config/web.go b/config/web.go index 3e7309e2..55adf3ef 100644 --- a/config/web.go +++ b/config/web.go @@ -16,7 +16,7 @@ type WebConfig struct { } // validateAndSetDefaults checks and sets the default values for fields that are not set -func (web *WebConfig) validateAndSetDefaults() { +func (web *WebConfig) validateAndSetDefaults() error { // Validate the Address if len(web.Address) == 0 { web.Address = DefaultAddress @@ -25,8 +25,9 @@ func (web *WebConfig) validateAndSetDefaults() { if web.Port == 0 { web.Port = DefaultPort } else if web.Port < 0 || web.Port > math.MaxUint16 { - panic(fmt.Sprintf("invalid port: value should be between %d and %d", 0, math.MaxUint16)) + return fmt.Errorf("invalid port: value should be between %d and %d", 0, math.MaxUint16) } + return nil } // SocketAddress returns the combination of the Address and the Port diff --git a/controller/controller.go b/controller/controller.go index 036089be..861006fb 100644 --- a/controller/controller.go +++ b/controller/controller.go @@ -44,20 +44,19 @@ func init() { } // Handle creates the router and starts the server -func Handle() { - cfg := config.Get() - var router http.Handler = CreateRouter(cfg) +func Handle(securityConfig *security.Config, webConfig *config.WebConfig, enableMetrics bool) { + var router http.Handler = CreateRouter(securityConfig, enableMetrics) if os.Getenv("ENVIRONMENT") == "dev" { router = developmentCorsHandler(router) } server = &http.Server{ - Addr: fmt.Sprintf("%s:%d", cfg.Web.Address, cfg.Web.Port), + Addr: fmt.Sprintf("%s:%d", webConfig.Address, webConfig.Port), Handler: router, ReadTimeout: 15 * time.Second, WriteTimeout: 15 * time.Second, IdleTimeout: 15 * time.Second, } - log.Println("[controller][Handle] Listening on " + cfg.Web.SocketAddress()) + log.Println("[controller][Handle] Listening on " + webConfig.SocketAddress()) if os.Getenv("ROUTER_TEST") == "true" { return } @@ -73,15 +72,15 @@ func Shutdown() { } // CreateRouter creates the router for the http server -func CreateRouter(cfg *config.Config) *mux.Router { +func CreateRouter(securityConfig *security.Config, enabledMetrics bool) *mux.Router { router := mux.NewRouter() - if cfg.Metrics { + if enabledMetrics { router.Handle("/metrics", promhttp.Handler()).Methods("GET") } router.Handle("/health", health.Handler().WithJSON(true)).Methods("GET") router.HandleFunc("/favicon.ico", favIconHandler).Methods("GET") - router.HandleFunc("/api/v1/statuses", secureIfNecessary(cfg, serviceStatusesHandler)).Methods("GET") // No GzipHandler for this one, because we cache the content - router.HandleFunc("/api/v1/statuses/{key}", secureIfNecessary(cfg, GzipHandlerFunc(serviceStatusHandler))).Methods("GET") + router.HandleFunc("/api/v1/statuses", secureIfNecessary(securityConfig, serviceStatusesHandler)).Methods("GET") // No GzipHandler for this one, because we cache the content + router.HandleFunc("/api/v1/statuses/{key}", secureIfNecessary(securityConfig, GzipHandlerFunc(serviceStatusHandler))).Methods("GET") router.HandleFunc("/api/v1/badges/uptime/{duration}/{identifier}", badgeHandler).Methods("GET") // SPA router.HandleFunc("/services/{service}", spaHandler).Methods("GET") @@ -90,9 +89,9 @@ func CreateRouter(cfg *config.Config) *mux.Router { return router } -func secureIfNecessary(cfg *config.Config, handler http.HandlerFunc) http.HandlerFunc { - if cfg.Security != nil && cfg.Security.IsValid() { - return security.Handler(handler, cfg.Security) +func secureIfNecessary(securityConfig *security.Config, handler http.HandlerFunc) http.HandlerFunc { + if securityConfig != nil && securityConfig.IsValid() { + return security.Handler(handler, securityConfig) } return handler } diff --git a/controller/controller_test.go b/controller/controller_test.go index d6f3ea36..269ca5fe 100644 --- a/controller/controller_test.go +++ b/controller/controller_test.go @@ -105,7 +105,7 @@ func TestCreateRouter(t *testing.T) { } watchdog.UpdateServiceStatuses(cfg.Services[0], &core.Result{Success: true, Duration: time.Millisecond, Timestamp: time.Now()}) watchdog.UpdateServiceStatuses(cfg.Services[1], &core.Result{Success: false, Duration: time.Second, Timestamp: time.Now()}) - router := CreateRouter(cfg) + router := CreateRouter(cfg.Security, cfg.Metrics) type Scenario struct { Name string Path string @@ -235,12 +235,10 @@ func TestHandle(t *testing.T) { }, }, } - config.Set(cfg) - defer config.Set(nil) _ = os.Setenv("ROUTER_TEST", "true") _ = os.Setenv("ENVIRONMENT", "dev") defer os.Clearenv() - Handle() + Handle(cfg.Security, cfg.Web, cfg.Metrics) defer Shutdown() request, _ := http.NewRequest("GET", "/health", nil) responseRecorder := httptest.NewRecorder() @@ -273,7 +271,7 @@ func TestServiceStatusesHandler(t *testing.T) { // Can't be bothered dealing with timezone issues on the worker that runs the automated tests firstResult.Timestamp = time.Time{} secondResult.Timestamp = time.Time{} - router := CreateRouter(&config.Config{}) + router := CreateRouter(nil, false) type Scenario struct { Name string diff --git a/core/dns.go b/core/dns.go index 8a77ba85..d8011925 100644 --- a/core/dns.go +++ b/core/dns.go @@ -29,16 +29,17 @@ type DNS struct { QueryName string `yaml:"query-name"` } -func (d *DNS) validateAndSetDefault() { +func (d *DNS) validateAndSetDefault() error { if len(d.QueryName) == 0 { - panic(ErrDNSWithNoQueryName) + return ErrDNSWithNoQueryName } if !strings.HasSuffix(d.QueryName, ".") { d.QueryName += "." } if _, ok := dns.StringToType[d.QueryType]; !ok { - panic(ErrDNSWithInvalidQueryType) + return ErrDNSWithInvalidQueryType } + return nil } func (d *DNS) query(url string, result *Result) { diff --git a/core/service.go b/core/service.go index 6db7290a..2e3f8dbc 100644 --- a/core/service.go +++ b/core/service.go @@ -11,6 +11,7 @@ import ( "strings" "time" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/client" ) @@ -72,7 +73,7 @@ type Service struct { Conditions []*Condition `yaml:"conditions"` // Alerts is the alerting configuration for the service in case of failure - Alerts []*Alert `yaml:"alerts"` + Alerts []*alert.Alert `yaml:"alerts"` // Insecure is whether to skip verifying the server's certificate chain and host name Insecure bool `yaml:"insecure,omitempty"` @@ -85,7 +86,7 @@ type Service struct { } // ValidateAndSetDefaults validates the service's configuration and sets the default value of fields that have one -func (service *Service) ValidateAndSetDefaults() { +func (service *Service) ValidateAndSetDefaults() error { // Set default values if service.Interval == 0 { service.Interval = 1 * time.Minute @@ -105,32 +106,32 @@ func (service *Service) ValidateAndSetDefaults() { if _, contentTypeHeaderExists := service.Headers[ContentTypeHeader]; !contentTypeHeaderExists && service.GraphQL { service.Headers[ContentTypeHeader] = "application/json" } - for _, alert := range service.Alerts { - if alert.FailureThreshold <= 0 { - alert.FailureThreshold = 3 + for _, serviceAlert := range service.Alerts { + if serviceAlert.FailureThreshold <= 0 { + serviceAlert.FailureThreshold = 3 } - if alert.SuccessThreshold <= 0 { - alert.SuccessThreshold = 2 + if serviceAlert.SuccessThreshold <= 0 { + serviceAlert.SuccessThreshold = 2 } } if len(service.Name) == 0 { - panic(ErrServiceWithNoName) + return ErrServiceWithNoName } if len(service.URL) == 0 { - panic(ErrServiceWithNoURL) + return ErrServiceWithNoURL } if len(service.Conditions) == 0 { - panic(ErrServiceWithNoCondition) + return ErrServiceWithNoCondition } if service.DNS != nil { - service.DNS.validateAndSetDefault() - return + return service.DNS.validateAndSetDefault() } // Make sure that the request can be created _, err := http.NewRequest(service.Method, service.URL, bytes.NewBuffer([]byte(service.Body))) if err != nil { - panic(err) + return err } + return nil } // EvaluateHealth sends a request to the service's URL and evaluates the conditions of the service. @@ -155,8 +156,8 @@ func (service *Service) EvaluateHealth() *Result { } // GetAlertsTriggered returns a slice of alerts that have been triggered -func (service *Service) GetAlertsTriggered() []Alert { - var alerts []Alert +func (service *Service) GetAlertsTriggered() []alert.Alert { + var alerts []alert.Alert if service.NumberOfFailuresInARow == 0 { return alerts } diff --git a/core/service_test.go b/core/service_test.go index 65dbcff5..348bbb45 100644 --- a/core/service_test.go +++ b/core/service_test.go @@ -5,6 +5,8 @@ import ( "strings" "testing" "time" + + "github.com/TwinProduction/gatus/alerting/alert" ) func TestService_ValidateAndSetDefaults(t *testing.T) { @@ -13,7 +15,7 @@ func TestService_ValidateAndSetDefaults(t *testing.T) { Name: "twinnation-health", URL: "https://twinnation.org/health", Conditions: []*Condition{&condition}, - Alerts: []*Alert{{Type: PagerDutyAlert}}, + Alerts: []*alert.Alert{{Type: alert.TypePagerDuty}}, } service.ValidateAndSetDefaults() if service.Method != "GET" { @@ -98,7 +100,7 @@ func TestService_GetAlertsTriggered(t *testing.T) { Name: "twinnation-health", URL: "https://twinnation.org/health", Conditions: []*Condition{&condition}, - Alerts: []*Alert{{Type: PagerDutyAlert, Enabled: &enabled}}, + Alerts: []*alert.Alert{{Type: alert.TypePagerDuty, Enabled: &enabled}}, } service.ValidateAndSetDefaults() if service.NumberOfFailuresInARow != 0 { diff --git a/main.go b/main.go index b9d87758..1fec8406 100644 --- a/main.go +++ b/main.go @@ -5,6 +5,7 @@ import ( "os" "os/signal" "syscall" + "time" "github.com/TwinProduction/gatus/config" "github.com/TwinProduction/gatus/controller" @@ -13,37 +14,75 @@ import ( ) func main() { - cfg := loadConfiguration() - go watchdog.Monitor(cfg) - go controller.Handle() + cfg, err := loadConfiguration() + if err != nil { + panic(err) + } + start(cfg) // Wait for termination signal - sig := make(chan os.Signal, 1) + signalChannel := make(chan os.Signal, 1) done := make(chan bool, 1) - signal.Notify(sig, os.Interrupt, syscall.SIGTERM) + signal.Notify(signalChannel, os.Interrupt, syscall.SIGTERM) go func() { - <-sig + <-signalChannel log.Println("Received termination signal, attempting to gracefully shut down") - controller.Shutdown() - err := storage.Get().Save() - if err != nil { - log.Println("Failed to save storage provider:", err.Error()) - } + stop() + save() done <- true }() <-done log.Println("Shutting down") } -func loadConfiguration() *config.Config { - var err error +func stop() { + watchdog.Shutdown() + controller.Shutdown() +} + +func save() { + err := storage.Get().Save() + if err != nil { + log.Println("Failed to save storage provider:", err.Error()) + } +} + +func start(cfg *config.Config) { + go controller.Handle(cfg.Security, cfg.Web, cfg.Metrics) + watchdog.Monitor(cfg) + go listenToConfigurationFileChanges(cfg) +} + +func loadConfiguration() (cfg *config.Config, err error) { customConfigFile := os.Getenv("GATUS_CONFIG_FILE") if len(customConfigFile) > 0 { - err = config.Load(customConfigFile) + cfg, err = config.Load(customConfigFile) } else { - err = config.LoadDefaultConfiguration() + cfg, err = config.LoadDefaultConfiguration() + } + return +} + +func listenToConfigurationFileChanges(cfg *config.Config) { + for { + time.Sleep(30 * time.Second) + if cfg.HasLoadedConfigurationFileBeenModified() { + log.Println("[main][listenToConfigurationFileChanges] Configuration file has been modified") + save() + updatedConfig, err := loadConfiguration() + if err != nil { + if cfg.SkipInvalidConfigUpdate { + log.Println("[main][listenToConfigurationFileChanges] Failed to load new configuration:", err.Error()) + log.Println("[main][listenToConfigurationFileChanges] The configuration file was updated, but it is not valid. The old configuration will continue being used.") + // Update the last file modification time to avoid trying to process the same invalid configuration again + cfg.UpdateLastFileModTime() + continue + } else { + panic(err) + } + } + stop() + start(updatedConfig) + return + } } - if err != nil { - panic(err) - } - return config.Get() } diff --git a/metric/metric.go b/metric/metric.go index e02a7310..a98e5eb8 100644 --- a/metric/metric.go +++ b/metric/metric.go @@ -5,7 +5,6 @@ import ( "strconv" "sync" - "github.com/TwinProduction/gatus/config" "github.com/TwinProduction/gatus/core" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" @@ -19,18 +18,16 @@ var ( // PublishMetricsForService publishes metrics for the given service and its result. // These metrics will be exposed at /metrics if the metrics are enabled func PublishMetricsForService(service *core.Service, result *core.Result) { - if config.Get().Metrics { - rwLock.Lock() - gauge, exists := gauges[fmt.Sprintf("%s_%s", service.Name, service.URL)] - if !exists { - gauge = promauto.NewGaugeVec(prometheus.GaugeOpts{ - Subsystem: "gatus", - Name: "tasks", - ConstLabels: prometheus.Labels{"service": service.Name, "url": service.URL}, - }, []string{"status", "success"}) - gauges[fmt.Sprintf("%s_%s", service.Name, service.URL)] = gauge - } - rwLock.Unlock() - gauge.WithLabelValues(strconv.Itoa(result.HTTPStatus), strconv.FormatBool(result.Success)).Inc() + rwLock.Lock() + gauge, exists := gauges[fmt.Sprintf("%s_%s", service.Name, service.URL)] + if !exists { + gauge = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Subsystem: "gatus", + Name: "tasks", + ConstLabels: prometheus.Labels{"service": service.Name, "url": service.URL}, + }, []string{"status", "success"}) + gauges[fmt.Sprintf("%s_%s", service.Name, service.URL)] = gauge } + rwLock.Unlock() + gauge.WithLabelValues(strconv.Itoa(result.HTTPStatus), strconv.FormatBool(result.Success)).Inc() } diff --git a/storage/storage.go b/storage/storage.go index e96b9a22..9adc1f87 100644 --- a/storage/storage.go +++ b/storage/storage.go @@ -1,6 +1,7 @@ package storage import ( + "context" "log" "time" @@ -15,6 +16,9 @@ var ( // Because store.Store is an interface, a nil check wouldn't be sufficient, so instead of doing reflection // every single time Get is called, we'll just lazily keep track of its existence through this variable initialized bool + + ctx context.Context + cancelFunc context.CancelFunc ) // Get retrieves the storage provider @@ -40,24 +44,34 @@ func Initialize(cfg *Config) error { return err } } else { + if cancelFunc != nil { + // Stop the active autoSave task + cancelFunc() + } + ctx, cancelFunc = context.WithCancel(context.Background()) log.Printf("[storage][Initialize] Creating storage provider with file=%s", cfg.File) provider, err = memory.NewStore(cfg.File) if err != nil { return err } - go autoSave(7 * time.Minute) + go autoSave(7*time.Minute, ctx) } return nil } -// autoSave automatically calls the Save function of the provider at every interval -func autoSave(interval time.Duration) { +// autoSave automatically calls the SaveFunc function of the provider at every interval +func autoSave(interval time.Duration, ctx context.Context) { for { - time.Sleep(interval) - log.Printf("[storage][autoSave] Saving") - err := provider.Save() - if err != nil { - log.Println("[storage][autoSave] Save failed:", err.Error()) + select { + case <-ctx.Done(): + log.Printf("[storage][autoSave] Stopping active job") + return + case <-time.After(interval): + log.Printf("[storage][autoSave] Saving") + err := provider.Save() + if err != nil { + log.Println("[storage][autoSave] Save failed:", err.Error()) + } } } } diff --git a/watchdog/alerting.go b/watchdog/alerting.go index addbd2d2..05672cf7 100644 --- a/watchdog/alerting.go +++ b/watchdog/alerting.go @@ -4,96 +4,96 @@ import ( "encoding/json" "log" - "github.com/TwinProduction/gatus/config" + "github.com/TwinProduction/gatus/alerting" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/core" ) // HandleAlerting takes care of alerts to resolve and alerts to trigger based on result success or failure -func HandleAlerting(service *core.Service, result *core.Result) { - cfg := config.Get() - if cfg.Alerting == nil { +func HandleAlerting(service *core.Service, result *core.Result, alertingConfig *alerting.Config, debug bool) { + if alertingConfig == nil { return } if result.Success { - handleAlertsToResolve(service, result, cfg) + handleAlertsToResolve(service, result, alertingConfig, debug) } else { - handleAlertsToTrigger(service, result, cfg) + handleAlertsToTrigger(service, result, alertingConfig, debug) } } -func handleAlertsToTrigger(service *core.Service, result *core.Result, cfg *config.Config) { +func handleAlertsToTrigger(service *core.Service, result *core.Result, alertingConfig *alerting.Config, debug bool) { service.NumberOfSuccessesInARow = 0 service.NumberOfFailuresInARow++ - for _, alert := range service.Alerts { - // If the alert hasn't been triggered, move to the next one - if !alert.IsEnabled() || alert.FailureThreshold > service.NumberOfFailuresInARow { + for _, serviceAlert := range service.Alerts { + // If the serviceAlert hasn't been triggered, move to the next one + if !serviceAlert.IsEnabled() || serviceAlert.FailureThreshold > service.NumberOfFailuresInARow { continue } - if alert.Triggered { - if cfg.Debug { - log.Printf("[watchdog][handleAlertsToTrigger] Alert for service=%s with description='%s' has already been TRIGGERED, skipping", service.Name, alert.GetDescription()) + if serviceAlert.Triggered { + if debug { + log.Printf("[watchdog][handleAlertsToTrigger] Alert for service=%s with description='%s' has already been TRIGGERED, skipping", service.Name, serviceAlert.GetDescription()) } continue } - alertProvider := config.GetAlertingProviderByAlertType(cfg, alert.Type) + alertProvider := alertingConfig.GetAlertingProviderByAlertType(serviceAlert.Type) if alertProvider != nil && alertProvider.IsValid() { - log.Printf("[watchdog][handleAlertsToTrigger] Sending %s alert because alert for service=%s with description='%s' has been TRIGGERED", alert.Type, service.Name, alert.GetDescription()) - customAlertProvider := alertProvider.ToCustomAlertProvider(service, alert, result, false) + log.Printf("[watchdog][handleAlertsToTrigger] Sending %s serviceAlert because serviceAlert for service=%s with description='%s' has been TRIGGERED", serviceAlert.Type, service.Name, serviceAlert.GetDescription()) + customAlertProvider := alertProvider.ToCustomAlertProvider(service, serviceAlert, result, false) // TODO: retry on error var err error // We need to extract the DedupKey from PagerDuty's response - if alert.Type == core.PagerDutyAlert { + if serviceAlert.Type == alert.TypePagerDuty { var body []byte - if body, err = customAlertProvider.Send(service.Name, alert.GetDescription(), false); err == nil { + if body, err = customAlertProvider.Send(service.Name, serviceAlert.GetDescription(), false); err == nil { var response pagerDutyResponse if err = json.Unmarshal(body, &response); err != nil { log.Printf("[watchdog][handleAlertsToTrigger] Ran into error unmarshaling pagerduty response: %s", err.Error()) } else { - alert.ResolveKey = response.DedupKey + serviceAlert.ResolveKey = response.DedupKey } } } else { - // All other alert types don't need to extract anything from the body, so we can just send the request right away - _, err = customAlertProvider.Send(service.Name, alert.GetDescription(), false) + // All other serviceAlert types don't need to extract anything from the body, so we can just send the request right away + _, err = customAlertProvider.Send(service.Name, serviceAlert.GetDescription(), false) } if err != nil { - log.Printf("[watchdog][handleAlertsToTrigger] Failed to send an alert for service=%s: %s", service.Name, err.Error()) + log.Printf("[watchdog][handleAlertsToTrigger] Failed to send an serviceAlert for service=%s: %s", service.Name, err.Error()) } else { - alert.Triggered = true + serviceAlert.Triggered = true } } else { - log.Printf("[watchdog][handleAlertsToResolve] Not sending alert of type=%s despite being TRIGGERED, because the provider wasn't configured properly", alert.Type) + log.Printf("[watchdog][handleAlertsToResolve] Not sending serviceAlert of type=%s despite being TRIGGERED, because the provider wasn't configured properly", serviceAlert.Type) } } } -func handleAlertsToResolve(service *core.Service, result *core.Result, cfg *config.Config) { +func handleAlertsToResolve(service *core.Service, result *core.Result, alertingConfig *alerting.Config, debug bool) { service.NumberOfSuccessesInARow++ - for _, alert := range service.Alerts { - if !alert.IsEnabled() || !alert.Triggered || alert.SuccessThreshold > service.NumberOfSuccessesInARow { + for _, serviceAlert := range service.Alerts { + if !serviceAlert.IsEnabled() || !serviceAlert.Triggered || serviceAlert.SuccessThreshold > service.NumberOfSuccessesInARow { continue } - // Even if the alert provider returns an error, we still set the alert's Triggered variable to false. + // Even if the serviceAlert provider returns an error, we still set the serviceAlert's Triggered variable to false. // Further explanation can be found on Alert's Triggered field. - alert.Triggered = false - if !alert.IsSendingOnResolved() { + serviceAlert.Triggered = false + if !serviceAlert.IsSendingOnResolved() { continue } - alertProvider := config.GetAlertingProviderByAlertType(cfg, alert.Type) + alertProvider := alertingConfig.GetAlertingProviderByAlertType(serviceAlert.Type) if alertProvider != nil && alertProvider.IsValid() { - log.Printf("[watchdog][handleAlertsToResolve] Sending %s alert because alert for service=%s with description='%s' has been RESOLVED", alert.Type, service.Name, alert.GetDescription()) - customAlertProvider := alertProvider.ToCustomAlertProvider(service, alert, result, true) + log.Printf("[watchdog][handleAlertsToResolve] Sending %s serviceAlert because serviceAlert for service=%s with description='%s' has been RESOLVED", serviceAlert.Type, service.Name, serviceAlert.GetDescription()) + customAlertProvider := alertProvider.ToCustomAlertProvider(service, serviceAlert, result, true) // TODO: retry on error - _, err := customAlertProvider.Send(service.Name, alert.GetDescription(), true) + _, err := customAlertProvider.Send(service.Name, serviceAlert.GetDescription(), true) if err != nil { - log.Printf("[watchdog][handleAlertsToResolve] Failed to send an alert for service=%s: %s", service.Name, err.Error()) + log.Printf("[watchdog][handleAlertsToResolve] Failed to send an serviceAlert for service=%s: %s", service.Name, err.Error()) } else { - if alert.Type == core.PagerDutyAlert { - alert.ResolveKey = "" + if serviceAlert.Type == alert.TypePagerDuty { + serviceAlert.ResolveKey = "" } } } else { - log.Printf("[watchdog][handleAlertsToResolve] Not sending alert of type=%s despite being RESOLVED, because the provider wasn't configured properly", alert.Type) + log.Printf("[watchdog][handleAlertsToResolve] Not sending serviceAlert of type=%s despite being RESOLVED, because the provider wasn't configured properly", serviceAlert.Type) } } service.NumberOfFailuresInARow = 0 diff --git a/watchdog/alerting_test.go b/watchdog/alerting_test.go index acf056b2..f5618609 100644 --- a/watchdog/alerting_test.go +++ b/watchdog/alerting_test.go @@ -5,6 +5,7 @@ import ( "testing" "github.com/TwinProduction/gatus/alerting" + "github.com/TwinProduction/gatus/alerting/alert" "github.com/TwinProduction/gatus/alerting/provider/custom" "github.com/TwinProduction/gatus/alerting/provider/pagerduty" "github.com/TwinProduction/gatus/config" @@ -24,13 +25,12 @@ func TestHandleAlerting(t *testing.T) { }, }, } - config.Set(cfg) enabled := true service := &core.Service{ URL: "http://example.com", - Alerts: []*core.Alert{ + Alerts: []*alert.Alert{ { - Type: core.CustomAlert, + Type: alert.TypeCustom, Enabled: &enabled, FailureThreshold: 2, SuccessThreshold: 3, @@ -41,50 +41,40 @@ func TestHandleAlerting(t *testing.T) { } verify(t, service, 0, 0, false, "The alert shouldn't start triggered") - HandleAlerting(service, &core.Result{Success: false}) + HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug) verify(t, service, 1, 0, false, "The alert shouldn't have triggered") - HandleAlerting(service, &core.Result{Success: false}) + HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug) verify(t, service, 2, 0, true, "The alert should've triggered") - HandleAlerting(service, &core.Result{Success: false}) + HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug) verify(t, service, 3, 0, true, "The alert should still be triggered") - HandleAlerting(service, &core.Result{Success: false}) + HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug) verify(t, service, 4, 0, true, "The alert should still be triggered") - HandleAlerting(service, &core.Result{Success: true}) + HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug) verify(t, service, 0, 1, true, "The alert should still be triggered (because service.Alerts[0].SuccessThreshold is 3)") - HandleAlerting(service, &core.Result{Success: true}) + HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug) verify(t, service, 0, 2, true, "The alert should still be triggered (because service.Alerts[0].SuccessThreshold is 3)") - HandleAlerting(service, &core.Result{Success: true}) + HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug) verify(t, service, 0, 3, false, "The alert should've been resolved") - HandleAlerting(service, &core.Result{Success: true}) + HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug) verify(t, service, 0, 4, false, "The alert should no longer be triggered") } func TestHandleAlertingWhenAlertingConfigIsNil(t *testing.T) { _ = os.Setenv("MOCK_ALERT_PROVIDER", "true") defer os.Clearenv() - - cfg := &config.Config{ - Debug: true, - Alerting: nil, - } - config.Set(cfg) - HandleAlerting(nil, nil) + HandleAlerting(nil, nil, nil, true) } func TestHandleAlertingWithBadAlertProvider(t *testing.T) { _ = os.Setenv("MOCK_ALERT_PROVIDER", "true") defer os.Clearenv() - cfg := &config.Config{ - Alerting: &alerting.Config{}, - } - config.Set(cfg) enabled := true service := &core.Service{ URL: "http://example.com", - Alerts: []*core.Alert{ + Alerts: []*alert.Alert{ { - Type: core.CustomAlert, + Type: alert.TypeCustom, Enabled: &enabled, FailureThreshold: 1, SuccessThreshold: 1, @@ -95,9 +85,9 @@ func TestHandleAlertingWithBadAlertProvider(t *testing.T) { } verify(t, service, 0, 0, false, "The alert shouldn't start triggered") - HandleAlerting(service, &core.Result{Success: false}) + HandleAlerting(service, &core.Result{Success: false}, &alerting.Config{}, false) verify(t, service, 1, 0, false, "The alert shouldn't have triggered") - HandleAlerting(service, &core.Result{Success: false}) + HandleAlerting(service, &core.Result{Success: false}, &alerting.Config{}, false) verify(t, service, 2, 0, false, "The alert shouldn't have triggered, because the provider wasn't configured properly") } @@ -114,13 +104,12 @@ func TestHandleAlertingWhenTriggeredAlertIsAlmostResolvedButServiceStartFailingA }, }, } - config.Set(cfg) enabled := true service := &core.Service{ URL: "http://example.com", - Alerts: []*core.Alert{ + Alerts: []*alert.Alert{ { - Type: core.CustomAlert, + Type: alert.TypeCustom, Enabled: &enabled, FailureThreshold: 2, SuccessThreshold: 3, @@ -132,7 +121,7 @@ func TestHandleAlertingWhenTriggeredAlertIsAlmostResolvedButServiceStartFailingA } // This test simulate an alert that was already triggered - HandleAlerting(service, &core.Result{Success: false}) + HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug) verify(t, service, 2, 0, true, "The alert was already triggered at the beginning of this test") } @@ -149,14 +138,13 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedButSendOnResolvedIsFalse(t *t }, }, } - config.Set(cfg) enabled := true disabled := false service := &core.Service{ URL: "http://example.com", - Alerts: []*core.Alert{ + Alerts: []*alert.Alert{ { - Type: core.CustomAlert, + Type: alert.TypeCustom, Enabled: &enabled, FailureThreshold: 1, SuccessThreshold: 1, @@ -167,7 +155,7 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedButSendOnResolvedIsFalse(t *t NumberOfFailuresInARow: 1, } - HandleAlerting(service, &core.Result{Success: true}) + HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug) verify(t, service, 0, 1, false, "The alert should've been resolved") } @@ -183,13 +171,12 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPagerDuty(t *testing.T) { }, }, } - config.Set(cfg) enabled := true service := &core.Service{ URL: "http://example.com", - Alerts: []*core.Alert{ + Alerts: []*alert.Alert{ { - Type: core.PagerDutyAlert, + Type: alert.TypePagerDuty, Enabled: &enabled, FailureThreshold: 1, SuccessThreshold: 1, @@ -200,10 +187,10 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPagerDuty(t *testing.T) { NumberOfFailuresInARow: 0, } - HandleAlerting(service, &core.Result{Success: false}) + HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug) verify(t, service, 1, 0, true, "") - HandleAlerting(service, &core.Result{Success: true}) + HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug) verify(t, service, 0, 1, false, "The alert should've been resolved") } @@ -220,13 +207,12 @@ func TestHandleAlertingWithProviderThatReturnsAnError(t *testing.T) { }, }, } - config.Set(cfg) enabled := true service := &core.Service{ URL: "http://example.com", - Alerts: []*core.Alert{ + Alerts: []*alert.Alert{ { - Type: core.CustomAlert, + Type: alert.TypeCustom, Enabled: &enabled, FailureThreshold: 2, SuccessThreshold: 2, @@ -237,32 +223,32 @@ func TestHandleAlertingWithProviderThatReturnsAnError(t *testing.T) { } _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true") - HandleAlerting(service, &core.Result{Success: false}) + HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug) verify(t, service, 1, 0, false, "") - HandleAlerting(service, &core.Result{Success: false}) + HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug) verify(t, service, 2, 0, false, "The alert should have failed to trigger, because the alert provider is returning an error") - HandleAlerting(service, &core.Result{Success: false}) + HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug) verify(t, service, 3, 0, false, "The alert should still not be triggered, because the alert provider is still returning an error") - HandleAlerting(service, &core.Result{Success: false}) + HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug) verify(t, service, 4, 0, false, "The alert should still not be triggered, because the alert provider is still returning an error") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false") - HandleAlerting(service, &core.Result{Success: false}) + HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug) verify(t, service, 5, 0, true, "The alert should've been triggered because the alert provider is no longer returning an error") - HandleAlerting(service, &core.Result{Success: true}) + HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug) verify(t, service, 0, 1, true, "The alert should've still been triggered") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true") - HandleAlerting(service, &core.Result{Success: true}) + HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug) verify(t, service, 0, 2, false, "The alert should've been resolved DESPITE THE ALERT PROVIDER RETURNING AN ERROR. See Alert.Triggered for further explanation.") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false") // Make sure that everything's working as expected after a rough patch - HandleAlerting(service, &core.Result{Success: false}) + HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug) verify(t, service, 1, 0, false, "") - HandleAlerting(service, &core.Result{Success: false}) + HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug) verify(t, service, 2, 0, true, "The alert should have triggered") - HandleAlerting(service, &core.Result{Success: true}) + HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug) verify(t, service, 0, 1, true, "The alert should still be triggered") - HandleAlerting(service, &core.Result{Success: true}) + HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug) verify(t, service, 0, 2, false, "The alert should have been resolved") } @@ -279,13 +265,12 @@ func TestHandleAlertingWithProviderThatOnlyReturnsErrorOnResolve(t *testing.T) { }, }, } - config.Set(cfg) enabled := true service := &core.Service{ URL: "http://example.com", - Alerts: []*core.Alert{ + Alerts: []*alert.Alert{ { - Type: core.CustomAlert, + Type: alert.TypeCustom, Enabled: &enabled, FailureThreshold: 1, SuccessThreshold: 1, @@ -295,27 +280,27 @@ func TestHandleAlertingWithProviderThatOnlyReturnsErrorOnResolve(t *testing.T) { }, } - HandleAlerting(service, &core.Result{Success: false}) + HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug) verify(t, service, 1, 0, true, "") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true") - HandleAlerting(service, &core.Result{Success: true}) + HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug) verify(t, service, 0, 1, false, "") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false") - HandleAlerting(service, &core.Result{Success: false}) + HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug) verify(t, service, 1, 0, true, "") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true") - HandleAlerting(service, &core.Result{Success: true}) + HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug) verify(t, service, 0, 1, false, "") _ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false") // Make sure that everything's working as expected after a rough patch - HandleAlerting(service, &core.Result{Success: false}) + HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug) verify(t, service, 1, 0, true, "") - HandleAlerting(service, &core.Result{Success: false}) + HandleAlerting(service, &core.Result{Success: false}, cfg.Alerting, cfg.Debug) verify(t, service, 2, 0, true, "") - HandleAlerting(service, &core.Result{Success: true}) + HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug) verify(t, service, 0, 1, false, "") - HandleAlerting(service, &core.Result{Success: true}) + HandleAlerting(service, &core.Result{Success: true}, cfg.Alerting, cfg.Debug) verify(t, service, 0, 2, false, "") } diff --git a/watchdog/watchdog.go b/watchdog/watchdog.go index 5781756c..d4ff1642 100644 --- a/watchdog/watchdog.go +++ b/watchdog/watchdog.go @@ -1,10 +1,12 @@ package watchdog import ( + "context" "log" "sync" "time" + "github.com/TwinProduction/gatus/alerting" "github.com/TwinProduction/gatus/config" "github.com/TwinProduction/gatus/core" "github.com/TwinProduction/gatus/metric" @@ -15,48 +17,65 @@ var ( // monitoringMutex is used to prevent multiple services from being evaluated at the same time. // Without this, conditions using response time may become inaccurate. monitoringMutex sync.Mutex + + ctx context.Context + cancelFunc context.CancelFunc ) // Monitor loops over each services and starts a goroutine to monitor each services separately func Monitor(cfg *config.Config) { + ctx, cancelFunc = context.WithCancel(context.Background()) for _, service := range cfg.Services { // To prevent multiple requests from running at the same time, we'll wait for a little bit before each iteration time.Sleep(1111 * time.Millisecond) - go monitor(service) + go monitor(service, cfg.Alerting, cfg.DisableMonitoringLock, cfg.Metrics, cfg.Debug, ctx) } } // monitor monitors a single service in a loop -func monitor(service *core.Service) { - cfg := config.Get() +func monitor(service *core.Service, alertingConfig *alerting.Config, disableMonitoringLock, enabledMetrics, debug bool, ctx context.Context) { + // Run it immediately on start + execute(service, alertingConfig, disableMonitoringLock, enabledMetrics, debug) + // Loop for the next executions for { - if !cfg.DisableMonitoringLock { - // By placing the lock here, we prevent multiple services from being monitored at the exact same time, which - // could cause performance issues and return inaccurate results - monitoringMutex.Lock() + select { + case <-ctx.Done(): + log.Printf("[watchdog][monitor] Canceling current execution of group=%s; service=%s", service.Group, service.Name) + return + case <-time.After(service.Interval): + execute(service, alertingConfig, disableMonitoringLock, enabledMetrics, debug) } - if cfg.Debug { - log.Printf("[watchdog][monitor] Monitoring group=%s; service=%s", service.Group, service.Name) - } - result := service.EvaluateHealth() + } +} + +func execute(service *core.Service, alertingConfig *alerting.Config, disableMonitoringLock, enabledMetrics, debug bool) { + if !disableMonitoringLock { + // By placing the lock here, we prevent multiple services from being monitored at the exact same time, which + // could cause performance issues and return inaccurate results + monitoringMutex.Lock() + } + if debug { + log.Printf("[watchdog][execute] Monitoring group=%s; service=%s", service.Group, service.Name) + } + result := service.EvaluateHealth() + if enabledMetrics { metric.PublishMetricsForService(service, result) - UpdateServiceStatuses(service, result) - log.Printf( - "[watchdog][monitor] Monitored group=%s; service=%s; success=%v; errors=%d; duration=%s", - service.Group, - service.Name, - result.Success, - len(result.Errors), - result.Duration.Round(time.Millisecond), - ) - HandleAlerting(service, result) - if cfg.Debug { - log.Printf("[watchdog][monitor] Waiting for interval=%s before monitoring group=%s service=%s again", service.Interval, service.Group, service.Name) - } - if !cfg.DisableMonitoringLock { - monitoringMutex.Unlock() - } - time.Sleep(service.Interval) + } + UpdateServiceStatuses(service, result) + log.Printf( + "[watchdog][execute] Monitored group=%s; service=%s; success=%v; errors=%d; duration=%s", + service.Group, + service.Name, + result.Success, + len(result.Errors), + result.Duration.Round(time.Millisecond), + ) + HandleAlerting(service, result, alertingConfig, debug) + if debug { + log.Printf("[watchdog][execute] Waiting for interval=%s before monitoring group=%s service=%s again", service.Interval, service.Group, service.Name) + } + if !disableMonitoringLock { + monitoringMutex.Unlock() } } @@ -64,3 +83,8 @@ func monitor(service *core.Service) { func UpdateServiceStatuses(service *core.Service, result *core.Result) { storage.Get().Insert(service, result) } + +// Shutdown stops monitoring all services +func Shutdown() { + cancelFunc() +}