From 51ea912cf95cb125d639a18102c4887dcacc3ac9 Mon Sep 17 00:00:00 2001 From: TwinProduction Date: Fri, 4 Sep 2020 18:23:56 -0400 Subject: [PATCH] Start working on notifications when service is back to healthy (#9) --- core/alert.go | 3 + core/alerting.go | 8 +++ core/service.go | 12 +--- go.sum | 1 + watchdog/watchdog.go | 137 ++++++++++++++++++++++++------------------- 5 files changed, 93 insertions(+), 68 deletions(-) diff --git a/core/alert.go b/core/alert.go index bc8a12db..ebded0a7 100644 --- a/core/alert.go +++ b/core/alert.go @@ -13,6 +13,9 @@ type Alert struct { // Description of the alert. Will be included in the alert sent. Description string `yaml:"description"` + + // SendOnResolved defines whether to send a second notification when the issue has been resolved + SendOnResolved bool `yaml:"send-on-resolved"` } type AlertType string diff --git a/core/alerting.go b/core/alerting.go index 9cd83b6a..530a6dda 100644 --- a/core/alerting.go +++ b/core/alerting.go @@ -21,6 +21,10 @@ type TwilioAlertProvider struct { To string `yaml:"to"` } +func (provider *TwilioAlertProvider) IsValid() bool { + return len(provider.Token) > 0 && len(provider.SID) > 0 && len(provider.From) > 0 && len(provider.To) > 0 +} + type CustomAlertProvider struct { Url string `yaml:"url"` Method string `yaml:"method,omitempty"` @@ -28,6 +32,10 @@ type CustomAlertProvider struct { Headers map[string]string `yaml:"headers,omitempty"` } +func (provider *CustomAlertProvider) IsValid() bool { + return len(provider.Url) > 0 +} + func (provider *CustomAlertProvider) buildRequest(serviceName, alertDescription string) *http.Request { body := provider.Body url := provider.Url diff --git a/core/service.go b/core/service.go index 1a5850c3..2df3b05e 100644 --- a/core/service.go +++ b/core/service.go @@ -46,7 +46,7 @@ type Service struct { // Alerts is the alerting configuration for the service in case of failure Alerts []*Alert `yaml:"alerts"` - numberOfFailuresInARow int + NumberOfFailuresInARow int } func (service *Service) Validate() { @@ -94,22 +94,16 @@ func (service *Service) EvaluateConditions() *Result { } } result.Timestamp = time.Now() - if result.Success { - service.numberOfFailuresInARow = 0 - // TODO: Send notification that alert has been resolved? - } else { - service.numberOfFailuresInARow++ - } return result } func (service *Service) GetAlertsTriggered() []Alert { var alerts []Alert - if service.numberOfFailuresInARow == 0 { + if service.NumberOfFailuresInARow == 0 { return alerts } for _, alert := range service.Alerts { - if alert.Enabled && alert.Threshold == service.numberOfFailuresInARow { + if alert.Enabled && alert.Threshold == service.NumberOfFailuresInARow { alerts = append(alerts, *alert) continue } diff --git a/go.sum b/go.sum index 69d7b4ec..61c6e3a2 100644 --- a/go.sum +++ b/go.sum @@ -18,6 +18,7 @@ github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/me github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= diff --git a/watchdog/watchdog.go b/watchdog/watchdog.go index 97707575..df8403cf 100644 --- a/watchdog/watchdog.go +++ b/watchdog/watchdog.go @@ -57,67 +57,86 @@ func monitor(service *core.Service) { extra, ) - cfg := config.Get() - if cfg.Alerting != nil { - for _, alertTriggered := range service.GetAlertsTriggered() { - var alertProvider *core.CustomAlertProvider - if alertTriggered.Type == core.SlackAlert { - if len(cfg.Alerting.Slack) > 0 { - log.Printf("[watchdog][monitor] Sending Slack alert because alert with description=%s has been triggered", alertTriggered.Description) - alertProvider = &core.CustomAlertProvider{ - Url: cfg.Alerting.Slack, - Method: "POST", - Body: fmt.Sprintf(`{"text":"*[Gatus]*\n*service:* %s\n*description:* %s"}`, service.Name, alertTriggered.Description), - Headers: map[string]string{"Content-Type": "application/json"}, - } - } else { - log.Printf("[watchdog][monitor] Not sending Slack alert despite being triggered, because there is no Slack webhook configured") - } - } else if alertTriggered.Type == core.TwilioAlert { - if len(cfg.Alerting.Twilio.Token) > 0 && - len(cfg.Alerting.Twilio.SID) > 0 && - len(cfg.Alerting.Twilio.From) > 0 && - len(cfg.Alerting.Twilio.To) > 0 { - log.Printf("[watchdog][monitor] Sending Twilio alert because alert with description=%s has been triggered", alertTriggered.Description) - alertProvider = &core.CustomAlertProvider{ - Url: fmt.Sprintf("https://api.twilio.com/2010-04-01/Accounts/%s/Messages.json", cfg.Alerting.Twilio.SID), - Method: "POST", - Body: url.Values{ - "To": {cfg.Alerting.Twilio.To}, - "From": {cfg.Alerting.Twilio.From}, - "Body": {fmt.Sprintf("%s - %s", service.Name, alertTriggered.Description)}, - }.Encode(), - Headers: map[string]string{ - "Content-Type": "application/x-www-form-urlencoded", - "Authorization": fmt.Sprintf("Basic %s", base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("%s:%s", cfg.Alerting.Twilio.SID, cfg.Alerting.Twilio.Token)))), - }, - } - } else { - log.Printf("[watchdog][monitor] Not sending Twilio alert despite being triggered, because twilio config settings missing") - } - } else if alertTriggered.Type == core.CustomAlert { - if cfg.Alerting.Custom != nil && len(cfg.Alerting.Custom.Url) > 0 { - log.Printf("[watchdog][monitor] Sending custom alert because alert with description=%s has been triggered", alertTriggered.Description) - alertProvider = &core.CustomAlertProvider{ - Url: cfg.Alerting.Custom.Url, - Method: cfg.Alerting.Custom.Method, - Body: cfg.Alerting.Custom.Body, - Headers: cfg.Alerting.Custom.Headers, - } - } else { - log.Printf("[watchdog][monitor] Not sending custom alert despite being triggered, because there is no custom url configured") - } - } - if alertProvider != nil { - err := alertProvider.Send(service.Name, alertTriggered.Description) - if err != nil { - log.Printf("[watchdog][monitor] Ran into error sending an alert: %s", err.Error()) - } - } - } - } + handleAlerting(service, result) log.Printf("[watchdog][monitor] Waiting for interval=%s before monitoring serviceName=%s", service.Interval, service.Name) time.Sleep(service.Interval) } } + +func handleAlerting(service *core.Service, result *core.Result) { + cfg := config.Get() + if cfg.Alerting == nil { + return + } + if result.Success { + if service.NumberOfFailuresInARow > 0 { + for _, alert := range service.Alerts { + if !alert.Enabled || !alert.SendOnResolved || alert.Threshold < service.NumberOfFailuresInARow { + continue + } + // TODO + } + } + service.NumberOfFailuresInARow = 0 + } else { + service.NumberOfFailuresInARow++ + for _, alert := range service.Alerts { + // If the alert hasn't been triggered, move to the next one + if !alert.Enabled || alert.Threshold != service.NumberOfFailuresInARow { + continue + } + var alertProvider *core.CustomAlertProvider + if alert.Type == core.SlackAlert { + if len(cfg.Alerting.Slack) > 0 { + log.Printf("[watchdog][monitor] Sending Slack alert because alert with description=%s has been triggered", alert.Description) + alertProvider = &core.CustomAlertProvider{ + Url: cfg.Alerting.Slack, + Method: "POST", + Body: fmt.Sprintf(`{"text":"*[Gatus]*\n*service:* %s\n*description:* %s"}`, service.Name, alert.Description), + Headers: map[string]string{"Content-Type": "application/json"}, + } + } else { + log.Printf("[watchdog][monitor] Not sending Slack alert despite being triggered, because there is no Slack webhook configured") + } + } else if alert.Type == core.TwilioAlert { + if cfg.Alerting.Twilio != nil && cfg.Alerting.Twilio.IsValid() { + log.Printf("[watchdog][monitor] Sending Twilio alert because alert with description=%s has been triggered", alert.Description) + alertProvider = &core.CustomAlertProvider{ + Url: fmt.Sprintf("https://api.twilio.com/2010-04-01/Accounts/%s/Messages.json", cfg.Alerting.Twilio.SID), + Method: "POST", + Body: url.Values{ + "To": {cfg.Alerting.Twilio.To}, + "From": {cfg.Alerting.Twilio.From}, + "Body": {fmt.Sprintf("%s - %s", service.Name, alert.Description)}, + }.Encode(), + Headers: map[string]string{ + "Content-Type": "application/x-www-form-urlencoded", + "Authorization": fmt.Sprintf("Basic %s", base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("%s:%s", cfg.Alerting.Twilio.SID, cfg.Alerting.Twilio.Token)))), + }, + } + } else { + log.Printf("[watchdog][monitor] Not sending Twilio alert despite being triggered, because twilio config settings missing") + } + } else if alert.Type == core.CustomAlert { + if cfg.Alerting.Custom != nil && cfg.Alerting.Custom.IsValid() { + log.Printf("[watchdog][monitor] Sending custom alert because alert with description=%s has been triggered", alert.Description) + alertProvider = &core.CustomAlertProvider{ + Url: cfg.Alerting.Custom.Url, + Method: cfg.Alerting.Custom.Method, + Body: cfg.Alerting.Custom.Body, + Headers: cfg.Alerting.Custom.Headers, + } + } else { + log.Printf("[watchdog][monitor] Not sending custom alert despite being triggered, because there is no custom url configured") + } + } + if alertProvider != nil { + err := alertProvider.Send(service.Name, alert.Description) + if err != nil { + log.Printf("[watchdog][monitor] Ran into error sending an alert: %s", err.Error()) + } + } + } + } +}