Add support for PagerDuty

This commit is contained in:
TwinProduction 2020-09-16 19:26:19 -04:00
parent cf6a74f862
commit 75b7a41c9d
7 changed files with 390 additions and 237 deletions

295
README.md
View File

@ -17,14 +17,16 @@ core applications: https://status.twinnation.org/
- [Usage](#usage)
- [Configuration](#configuration)
- [Conditions](#conditions)
- [Alerting](#alerting)
- [Configuring Slack alerts](#configuring-slack-alerts)
- [Configuring PagerDuty alerts](#configuring-pagerduty-alerts)
- [Configuring Twilio alerts](#configuring-twilio-alerts)
- [Configuring custom alerts](#configuring-custom-alerts)
- [Docker](#docker)
- [Running the tests](#running-the-tests)
- [Using in Production](#using-in-production)
- [FAQ](#faq)
- [Sending a GraphQL request](#sending-a-graphql-request)
- [Configuring Slack alerts](#configuring-slack-alerts)
- [Configuring Twilio alerts](#configuring-twilio-alerts)
- [Configuring custom alerts](#configuring-custom-alerts)
## Features
@ -72,35 +74,36 @@ Note that you can also add environment variables in the configuration file (i.e.
### Configuration
| Parameter | Description | Default |
| -------------------------------------- | --------------------------------------------------------------- | -------------- |
| `debug` | Whether to enable debug logs | `false` |
| `metrics` | Whether to expose metrics at /metrics | `false` |
| `services` | List of services to monitor | Required `[]` |
| `services[].name` | Name of the service. Can be anything. | Required `""` |
| `services[].url` | URL to send the request to | Required `""` |
| `services[].conditions` | Conditions used to determine the health of the service | `[]` |
| `services[].interval` | Duration to wait between every status check | `60s` |
| `services[].method` | Request method | `GET` |
| `services[].graphql` | Whether to wrap the body in a query param (`{"query":"$body"}`) | `false` |
| `services[].body` | Request body | `""` |
| `services[].headers` | Request headers | `{}` |
| `services[].alerts[].type` | Type of alert. Valid types: `slack`, `twilio`, `custom` | Required `""` |
| `services[].alerts[].enabled` | Whether to enable the alert | `false` |
| `services[].alerts[].threshold` | Number of failures in a row needed before triggering the alert | `3` |
| `services[].alerts[].description` | Description of the alert. Will be included in the alert sent | `""` |
| `services[].alerts[].send-on-resolved` | Whether to send a notification once a triggered alert subsides | `false` |
| `alerting` | Configuration for alerting | `{}` |
| `alerting.slack` | Webhook to use for alerts of type `slack` | `""` |
| `alerting.twilio` | Settings for alerts of type `twilio` | `""` |
| `alerting.twilio.sid` | Twilio account SID | Required `""` |
| `alerting.twilio.token` | Twilio auth token | Required `""` |
| `alerting.twilio.from` | Number to send Twilio alerts from | Required `""` |
| `alerting.twilio.to` | Number to send twilio alerts to | Required `""` |
| `alerting.custom` | Configuration for custom actions on failure or alerts | `""` |
| `alerting.custom.url` | Custom alerting request url | `""` |
| `alerting.custom.body` | Custom alerting request body. | `""` |
| `alerting.custom.headers` | Custom alerting request headers | `{}` |
| Parameter | Description | Default |
| --------------------------------------------- | -------------------------------------------------------------------------- | -------------- |
| `debug` | Whether to enable debug logs | `false` |
| `metrics` | Whether to expose metrics at /metrics | `false` |
| `services` | List of services to monitor | Required `[]` |
| `services[].name` | Name of the service. Can be anything. | Required `""` |
| `services[].url` | URL to send the request to | Required `""` |
| `services[].conditions` | Conditions used to determine the health of the service | `[]` |
| `services[].interval` | Duration to wait between every status check | `60s` |
| `services[].method` | Request method | `GET` |
| `services[].graphql` | Whether to wrap the body in a query param (`{"query":"$body"}`) | `false` |
| `services[].body` | Request body | `""` |
| `services[].headers` | Request headers | `{}` |
| `services[].alerts[].type` | Type of alert. Valid types: `slack`, `twilio`, `custom` | Required `""` |
| `services[].alerts[].enabled` | Whether to enable the alert | `false` |
| `services[].alerts[].threshold` | Number of failures in a row needed before triggering the alert | `3` |
| `services[].alerts[].description` | Description of the alert. Will be included in the alert sent | `""` |
| `services[].alerts[].send-on-resolved` | Whether to send a notification once a triggered alert subsides | `false` |
| `services[].alerts[].success-before-resolved` | Number of successes in a row needed before sending a resolved notification | `2` |
| `alerting` | Configuration for alerting | `{}` |
| `alerting.slack` | Webhook to use for alerts of type `slack` | `""` |
| `alerting.twilio` | Settings for alerts of type `twilio` | `""` |
| `alerting.twilio.sid` | Twilio account SID | Required `""` |
| `alerting.twilio.token` | Twilio auth token | Required `""` |
| `alerting.twilio.from` | Number to send Twilio alerts from | Required `""` |
| `alerting.twilio.to` | Number to send twilio alerts to | Required `""` |
| `alerting.custom` | Configuration for custom actions on failure or alerts | `""` |
| `alerting.custom.url` | Custom alerting request url | `""` |
| `alerting.custom.body` | Custom alerting request body. | `""` |
| `alerting.custom.headers` | Custom alerting request headers | `{}` |
### Conditions
@ -121,6 +124,136 @@ Here are some examples of conditions you can use:
| `len([BODY].name) == 8` | String at jsonpath `$.name` has a length of 8 | `{"name":"john.doe"}` | `{"name":"bob"}` |
### Alerting
#### Configuring Slack alerts
```yaml
alerting:
slack: "https://hooks.slack.com/services/**********/**********/**********"
services:
- name: twinnation
interval: 30s
url: "https://twinnation.org/health"
alerts:
- type: slack
enabled: true
description: "healthcheck failed 3 times in a row"
send-on-resolved: true
- type: slack
enabled: true
threshold: 5
description: "healthcheck failed 5 times in a row"
send-on-resolved: true
conditions:
- "[STATUS] == 200"
- "[BODY].status == UP"
- "[RESPONSE_TIME] < 300"
```
Here's an example of what the notifications look like:
![Slack notifications](.github/assets/slack-alerts.png)
#### Configuring PagerDuty alerts
It is highly recommended to set `services[].alerts[].send-on-resolved` to `true` for alerts
of type `pagerduty`, because unlike other alerts, the operation resulting from setting said
parameter to `true` will not create another incident, but mark the incident as resolved on
PagerDuty instead.
```yaml
alerting:
pagerduty: "********************************"
services:
- name: twinnation
interval: 30s
url: "https://twinnation.org/health"
alerts:
- type: pagerduty
enabled: true
threshold: 3
description: "healthcheck failed 3 times in a row"
send-on-resolved: true
success-before-resolved: 5
conditions:
- "[STATUS] == 200"
- "[BODY].status == UP"
- "[RESPONSE_TIME] < 300"
```
#### Configuring Twilio alerts
```yaml
alerting:
twilio:
sid: "..."
token: "..."
from: "+1-234-567-8901"
to: "+1-234-567-8901"
services:
- name: twinnation
interval: 30s
url: "https://twinnation.org/health"
alerts:
- type: twilio
enabled: true
threshold: 5
description: "healthcheck failed 5 times in a row"
conditions:
- "[STATUS] == 200"
- "[BODY].status == UP"
- "[RESPONSE_TIME] < 300"
```
#### Configuring custom alerts
While they're called alerts, you can use this feature to call anything.
For instance, you could automate rollbacks by having an application that keeps tracks of new deployments, and by
leveraging Gatus, you could have Gatus call that application endpoint when a service starts failing. Your application
would then check if the service that started failing was recently deployed, and if it was, then automatically
roll it back.
The values `[ALERT_DESCRIPTION]` and `[SERVICE_NAME]` are automatically substituted for the alert description and the
service name respectively in the body (`alerting.custom.body`) as well as the url (`alerting.custom.url`).
If you have `send-on-resolved` set to `true`, you may want to use `[ALERT_TRIGGERED_OR_RESOLVED]` to differentiate
the notifications. It will be replaced for either `TRIGGERED` or `RESOLVED`, based on the situation.
For all intents and purpose, we'll configure the custom alert with a Slack webhook, but you can call anything you want.
```yaml
alerting:
custom:
url: "https://hooks.slack.com/services/**********/**********/**********"
method: "POST"
body: |
{
"text": "[ALERT_TRIGGERED_OR_RESOLVED]: [SERVICE_NAME] - [ALERT_DESCRIPTION]"
}
services:
- name: twinnation
interval: 30s
url: "https://twinnation.org/health"
alerts:
- type: custom
enabled: true
threshold: 10
send-on-resolved: true
description: "healthcheck failed 10 times in a row"
conditions:
- "[STATUS] == 200"
- "[BODY].status == UP"
- "[RESPONSE_TIME] < 300"
```
## Docker
```
@ -173,101 +306,3 @@ will send a `POST` request to `http://localhost:8080/playground` with the follow
```json
{"query":" {\n user(gender: \"female\") {\n id\n name\n gender\n avatar\n }\n }"}
```
### Configuring Slack alerts
```yaml
alerting:
slack: "https://hooks.slack.com/services/**********/**********/**********"
services:
- name: twinnation
interval: 30s
url: "https://twinnation.org/health"
alerts:
- type: slack
enabled: true
description: "healthcheck failed 3 times in a row"
send-on-resolved: true
- type: slack
enabled: true
threshold: 5
description: "healthcheck failed 5 times in a row"
send-on-resolved: true
conditions:
- "[STATUS] == 200"
- "[BODY].status == UP"
- "[RESPONSE_TIME] < 300"
```
Here's an example of what the notifications look like:
![Slack notifications](.github/assets/slack-alerts.png)
### Configuring Twilio alerts
```yaml
alerting:
twilio:
sid: "..."
token: "..."
from: "+1-234-567-8901"
to: "+1-234-567-8901"
services:
- name: twinnation
interval: 30s
url: "https://twinnation.org/health"
alerts:
- type: twilio
enabled: true
threshold: 5
description: "healthcheck failed 5 times in a row"
conditions:
- "[STATUS] == 200"
- "[BODY].status == UP"
- "[RESPONSE_TIME] < 300"
```
### Configuring custom alerts
While they're called alerts, you can use this feature to call anything.
For instance, you could automate rollbacks by having an application that keeps tracks of new deployments, and by
leveraging Gatus, you could have Gatus call that application endpoint when a service starts failing. Your application
would then check if the service that started failing was recently deployed, and if it was, then automatically
roll it back.
The values `[ALERT_DESCRIPTION]` and `[SERVICE_NAME]` are automatically substituted for the alert description and the
service name respectively in the body (`alerting.custom.body`) as well as the url (`alerting.custom.url`).
If you have `send-on-resolved` set to `true`, you may want to use `[ALERT_TRIGGERED_OR_RESOLVED]` to differentiate
the notifications. It will be replaced for either `TRIGGERED` or `RESOLVED`, based on the situation.
For all intents and purpose, we'll configure the custom alert with a Slack webhook, but you can call anything you want.
```yaml
alerting:
custom:
url: "https://hooks.slack.com/services/**********/**********/**********"
method: "POST"
body: |
{
"text": "[ALERT_TRIGGERED_OR_RESOLVED]: [SERVICE_NAME] - [ALERT_DESCRIPTION]"
}
services:
- name: twinnation
interval: 30s
url: "https://twinnation.org/health"
alerts:
- type: custom
enabled: true
threshold: 10
send-on-resolved: true
description: "healthcheck failed 10 times in a row"
conditions:
- "[STATUS] == 200"
- "[BODY].status == UP"
- "[RESPONSE_TIME] < 300"
```

158
alerting/alerting.go Normal file
View File

@ -0,0 +1,158 @@
package alerting
import (
"encoding/json"
"fmt"
"github.com/TwinProduction/gatus/config"
"github.com/TwinProduction/gatus/core"
"log"
)
// Handle takes care of alerts to resolve and alerts to trigger based on result success or failure
func Handle(service *core.Service, result *core.Result) {
cfg := config.Get()
if cfg.Alerting == nil {
return
}
if result.Success {
handleAlertsToResolve(service, result, cfg)
} else {
handleAlertsToTrigger(service, result, cfg)
}
}
func handleAlertsToTrigger(service *core.Service, result *core.Result, cfg *config.Config) {
service.NumberOfSuccessesInARow = 0
service.NumberOfFailuresInARow++
for _, alert := range service.Alerts {
// If the alert hasn't been triggered, move to the next one
if !alert.Enabled || alert.Threshold != service.NumberOfFailuresInARow {
continue
}
if alert.Triggered {
if cfg.Debug {
log.Printf("[alerting][handleAlertsToTrigger] Alert with description='%s' has already been triggered, skipping", alert.Description)
}
continue
}
var alertProvider *core.CustomAlertProvider
if alert.Type == core.SlackAlert {
if len(cfg.Alerting.Slack) > 0 {
log.Printf("[alerting][handleAlertsToTrigger] Sending Slack alert because alert with description='%s' has been triggered", alert.Description)
alertProvider = core.CreateSlackCustomAlertProvider(cfg.Alerting.Slack, service, alert, result, false)
} else {
log.Printf("[alerting][handleAlertsToTrigger] Not sending Slack alert despite being triggered, because there is no Slack webhook configured")
}
} else if alert.Type == core.PagerDutyAlert {
if len(cfg.Alerting.PagerDuty) > 0 {
log.Printf("[alerting][handleAlertsToTrigger] Sending PagerDuty alert because alert with description='%s' has been triggered", alert.Description)
alertProvider = core.CreatePagerDutyCustomAlertProvider(cfg.Alerting.PagerDuty, "trigger", "", service, fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description))
} else {
log.Printf("[alerting][handleAlertsToTrigger] Not sending PagerDuty alert despite being triggered, because PagerDuty isn't configured properly")
}
} else if alert.Type == core.TwilioAlert {
if cfg.Alerting.Twilio != nil && cfg.Alerting.Twilio.IsValid() {
log.Printf("[alerting][handleAlertsToTrigger] Sending Twilio alert because alert with description='%s' has been triggered", alert.Description)
alertProvider = core.CreateTwilioCustomAlertProvider(cfg.Alerting.Twilio, fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description))
} else {
log.Printf("[alerting][handleAlertsToTrigger] Not sending Twilio alert despite being triggered, because Twilio config settings missing")
}
} else if alert.Type == core.CustomAlert {
if cfg.Alerting.Custom != nil && cfg.Alerting.Custom.IsValid() {
log.Printf("[alerting][handleAlertsToTrigger] Sending custom alert because alert with description='%s' has been triggered", alert.Description)
alertProvider = &core.CustomAlertProvider{
Url: cfg.Alerting.Custom.Url,
Method: cfg.Alerting.Custom.Method,
Body: cfg.Alerting.Custom.Body,
Headers: cfg.Alerting.Custom.Headers,
}
} else {
log.Printf("[alerting][handleAlertsToTrigger] Not sending custom alert despite being triggered, because there is no custom url configured")
}
}
if alertProvider != nil {
// TODO: retry on error
var err error
if alert.Type == core.PagerDutyAlert {
var body []byte
body, err = alertProvider.Send(service.Name, alert.Description, true)
if err == nil {
var response pagerDutyResponse
err = json.Unmarshal(body, &response)
if err != nil {
log.Printf("[alerting][handleAlertsToTrigger] Ran into error unmarshaling pager duty response: %s", err.Error())
} else {
alert.ResolveKey = response.DedupKey
}
}
} else {
_, err = alertProvider.Send(service.Name, alert.Description, false)
}
if err != nil {
log.Printf("[alerting][handleAlertsToTrigger] Ran into error sending an alert: %s", err.Error())
} else {
alert.Triggered = true
}
}
}
}
func handleAlertsToResolve(service *core.Service, result *core.Result, cfg *config.Config) {
service.NumberOfSuccessesInARow++
for _, alert := range service.Alerts {
if !alert.Enabled || !alert.Triggered || alert.SuccessBeforeResolved > service.NumberOfSuccessesInARow {
continue
}
alert.Triggered = false
if !alert.SendOnResolved {
continue
}
var alertProvider *core.CustomAlertProvider
if alert.Type == core.SlackAlert {
if len(cfg.Alerting.Slack) > 0 {
log.Printf("[alerting][handleAlertsToResolve] Sending Slack alert because alert with description='%s' has been resolved", alert.Description)
alertProvider = core.CreateSlackCustomAlertProvider(cfg.Alerting.Slack, service, alert, result, true)
} else {
log.Printf("[alerting][handleAlertsToResolve] Not sending Slack alert despite being resolved, because there is no Slack webhook configured")
}
} else if alert.Type == core.PagerDutyAlert {
if len(cfg.Alerting.PagerDuty) > 0 {
log.Printf("[alerting][handleAlertsToResolve] Sending PagerDuty alert because alert with description='%s' has been resolved", alert.Description)
alertProvider = core.CreatePagerDutyCustomAlertProvider(cfg.Alerting.PagerDuty, "resolve", alert.ResolveKey, service, fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description))
} else {
log.Printf("[alerting][handleAlertsToResolve] Not sending PagerDuty alert despite being resolved, because PagerDuty isn't configured properly")
}
} else if alert.Type == core.TwilioAlert {
if cfg.Alerting.Twilio != nil && cfg.Alerting.Twilio.IsValid() {
log.Printf("[alerting][handleAlertsToResolve] Sending Twilio alert because alert with description='%s' has been resolved", alert.Description)
alertProvider = core.CreateTwilioCustomAlertProvider(cfg.Alerting.Twilio, fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description))
} else {
log.Printf("[alerting][handleAlertsToResolve] Not sending Twilio alert despite being resolved, because Twilio isn't configured properly")
}
} else if alert.Type == core.CustomAlert {
if cfg.Alerting.Custom != nil && cfg.Alerting.Custom.IsValid() {
log.Printf("[alerting][handleAlertsToResolve] Sending custom alert because alert with description='%s' has been resolved", alert.Description)
alertProvider = &core.CustomAlertProvider{
Url: cfg.Alerting.Custom.Url,
Method: cfg.Alerting.Custom.Method,
Body: cfg.Alerting.Custom.Body,
Headers: cfg.Alerting.Custom.Headers,
}
} else {
log.Printf("[alerting][handleAlertsToResolve] Not sending custom alert despite being resolved, because the custom provider isn't configured properly")
}
}
if alertProvider != nil {
// TODO: retry on error
_, err := alertProvider.Send(service.Name, alert.Description, true)
if err != nil {
log.Printf("[alerting][handleAlertsToResolve] Ran into error sending an alert: %s", err.Error())
} else {
if alert.Type == core.PagerDutyAlert {
alert.ResolveKey = ""
}
}
}
}
service.NumberOfFailuresInARow = 0
}

7
alerting/pagerduty.go Normal file
View File

@ -0,0 +1,7 @@
package alerting
type pagerDutyResponse struct {
Status string `json:"status"`
Message string `json:"message"`
DedupKey string `json:"dedup_key"`
}

View File

@ -16,12 +16,24 @@ type Alert struct {
// SendOnResolved defines whether to send a second notification when the issue has been resolved
SendOnResolved bool `yaml:"send-on-resolved"`
// SuccessBeforeResolved defines whether to send a second notification when the issue has been resolved
SuccessBeforeResolved int `yaml:"success-before-resolved"`
// ResolveKey is an optional field that is used by some providers (i.e. PagerDuty's dedup_key) to resolve
// ongoing/triggered incidents
ResolveKey string
// Triggered is used to determine whether an alert has been triggered. When an alert is resolved, this value
// should be set back to false. It is used to prevent the same alert from going out twice.
Triggered bool
}
type AlertType string
const (
SlackAlert AlertType = "slack"
TwilioAlert AlertType = "twilio"
CustomAlert AlertType = "custom"
SlackAlert AlertType = "slack"
PagerDutyAlert AlertType = "pagerduty"
TwilioAlert AlertType = "twilio"
CustomAlert AlertType = "custom"
)

View File

@ -5,15 +5,17 @@ import (
"encoding/base64"
"fmt"
"github.com/TwinProduction/gatus/client"
"io/ioutil"
"net/http"
"net/url"
"strings"
)
type AlertingConfig struct {
Slack string `yaml:"slack"`
Twilio *TwilioAlertProvider `yaml:"twilio"`
Custom *CustomAlertProvider `yaml:"custom"`
Slack string `yaml:"slack"`
PagerDuty string `yaml:"pagerduty"`
Twilio *TwilioAlertProvider `yaml:"twilio"`
Custom *CustomAlertProvider `yaml:"custom"`
}
type TwilioAlertProvider struct {
@ -75,26 +77,32 @@ func (provider *CustomAlertProvider) buildRequest(serviceName, alertDescription
return request
}
func (provider *CustomAlertProvider) Send(serviceName, alertDescription string, resolved bool) error {
// Send a request to the alert provider and return the body
func (provider *CustomAlertProvider) Send(serviceName, alertDescription string, resolved bool) ([]byte, error) {
request := provider.buildRequest(serviceName, alertDescription, resolved)
response, err := client.GetHttpClient().Do(request)
if err != nil {
return err
return nil, err
}
if response.StatusCode > 399 {
return fmt.Errorf("call to provider alert returned status code %d", response.StatusCode)
body, err := ioutil.ReadAll(response.Body)
if err != nil {
return nil, fmt.Errorf("call to provider alert returned status code %d", response.StatusCode)
} else {
return nil, fmt.Errorf("call to provider alert returned status code %d: %s", response.StatusCode, string(body))
}
}
return nil
return ioutil.ReadAll(response.Body)
}
func CreateSlackCustomAlertProvider(slackWebHookUrl string, service *Service, alert *Alert, result *Result, resolved bool) *CustomAlertProvider {
var message string
var color string
if resolved {
message = fmt.Sprintf("An alert for *%s* has been resolved after %d failures in a row", service.Name, service.NumberOfFailuresInARow)
message = fmt.Sprintf("An alert for *%s* has been resolved after passing successfully %d time(s) in a row", service.Name, alert.SuccessBeforeResolved)
color = "#36A64F"
} else {
message = fmt.Sprintf("An alert for *%s* has been triggered", service.Name)
message = fmt.Sprintf("An alert for *%s* has been triggered due to having failed %d time(s) in a row", service.Name, alert.Threshold)
color = "#DD0000"
}
var results string
@ -147,3 +155,24 @@ func CreateTwilioCustomAlertProvider(provider *TwilioAlertProvider, message stri
},
}
}
// https://developer.pagerduty.com/docs/events-api-v2/trigger-events/
func CreatePagerDutyCustomAlertProvider(routingKey, eventAction, resolveKey string, service *Service, message string) *CustomAlertProvider {
return &CustomAlertProvider{
Url: "https://events.pagerduty.com/v2/enqueue",
Method: "POST",
Body: fmt.Sprintf(`{
"routing_key": "%s",
"dedup_key": "%s",
"event_action": "%s",
"payload": {
"summary": "%s",
"source": "%s",
"severity": "critical"
}
}`, routingKey, resolveKey, eventAction, message, service.Name),
Headers: map[string]string{
"Content-Type": "application/json",
},
}
}

View File

@ -46,7 +46,8 @@ type Service struct {
// Alerts is the alerting configuration for the service in case of failure
Alerts []*Alert `yaml:"alerts"`
NumberOfFailuresInARow int
NumberOfFailuresInARow int
NumberOfSuccessesInARow int
}
func (service *Service) Validate() {
@ -64,6 +65,9 @@ func (service *Service) Validate() {
if alert.Threshold <= 0 {
alert.Threshold = 3
}
if alert.SuccessBeforeResolved <= 0 {
alert.SuccessBeforeResolved = 2
}
}
if len(service.Url) == 0 {
panic(ErrNoUrl)

View File

@ -3,6 +3,7 @@ package watchdog
import (
"encoding/json"
"fmt"
"github.com/TwinProduction/gatus/alerting"
"github.com/TwinProduction/gatus/config"
"github.com/TwinProduction/gatus/core"
"github.com/TwinProduction/gatus/metric"
@ -70,7 +71,7 @@ func monitor(service *core.Service) {
result.Duration.Round(time.Millisecond),
extra,
)
handleAlerting(service, result)
alerting.Handle(service, result)
if cfg.Debug {
log.Printf("[watchdog][monitor] Waiting for interval=%s before monitoring serviceName=%s again", service.Interval, service.Name)
}
@ -78,96 +79,3 @@ func monitor(service *core.Service) {
time.Sleep(service.Interval)
}
}
func handleAlerting(service *core.Service, result *core.Result) {
cfg := config.Get()
if cfg.Alerting == nil {
return
}
if result.Success {
if service.NumberOfFailuresInARow > 0 {
for _, alert := range service.Alerts {
if !alert.Enabled || !alert.SendOnResolved || alert.Threshold > service.NumberOfFailuresInARow {
continue
}
var alertProvider *core.CustomAlertProvider
if alert.Type == core.SlackAlert {
if len(cfg.Alerting.Slack) > 0 {
log.Printf("[watchdog][handleAlerting] Sending Slack alert because alert with description=%s has been resolved", alert.Description)
alertProvider = core.CreateSlackCustomAlertProvider(cfg.Alerting.Slack, service, alert, result, true)
} else {
log.Printf("[watchdog][handleAlerting] Not sending Slack alert despite being triggered, because there is no Slack webhook configured")
}
} else if alert.Type == core.TwilioAlert {
if cfg.Alerting.Twilio != nil && cfg.Alerting.Twilio.IsValid() {
log.Printf("[watchdog][handleAlerting] Sending Twilio alert because alert with description=%s has been resolved", alert.Description)
alertProvider = core.CreateTwilioCustomAlertProvider(cfg.Alerting.Twilio, fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description))
} else {
log.Printf("[watchdog][handleAlerting] Not sending Twilio alert despite being resolved, because Twilio isn't configured properly")
}
} else if alert.Type == core.CustomAlert {
if cfg.Alerting.Custom != nil && cfg.Alerting.Custom.IsValid() {
log.Printf("[watchdog][handleAlerting] Sending custom alert because alert with description=%s has been resolved", alert.Description)
alertProvider = &core.CustomAlertProvider{
Url: cfg.Alerting.Custom.Url,
Method: cfg.Alerting.Custom.Method,
Body: cfg.Alerting.Custom.Body,
Headers: cfg.Alerting.Custom.Headers,
}
} else {
log.Printf("[watchdog][handleAlerting] Not sending custom alert despite being resolved, because the custom provider isn't configured properly")
}
}
if alertProvider != nil {
err := alertProvider.Send(service.Name, alert.Description, true)
if err != nil {
log.Printf("[watchdog][handleAlerting] Ran into error sending an alert: %s", err.Error())
}
}
}
}
service.NumberOfFailuresInARow = 0
} else {
service.NumberOfFailuresInARow++
for _, alert := range service.Alerts {
// If the alert hasn't been triggered, move to the next one
if !alert.Enabled || alert.Threshold != service.NumberOfFailuresInARow {
continue
}
var alertProvider *core.CustomAlertProvider
if alert.Type == core.SlackAlert {
if len(cfg.Alerting.Slack) > 0 {
log.Printf("[watchdog][handleAlerting] Sending Slack alert because alert with description=%s has been triggered", alert.Description)
alertProvider = core.CreateSlackCustomAlertProvider(cfg.Alerting.Slack, service, alert, result, false)
} else {
log.Printf("[watchdog][handleAlerting] Not sending Slack alert despite being triggered, because there is no Slack webhook configured")
}
} else if alert.Type == core.TwilioAlert {
if cfg.Alerting.Twilio != nil && cfg.Alerting.Twilio.IsValid() {
log.Printf("[watchdog][handleAlerting] Sending Twilio alert because alert with description=%s has been triggered", alert.Description)
alertProvider = core.CreateTwilioCustomAlertProvider(cfg.Alerting.Twilio, fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description))
} else {
log.Printf("[watchdog][handleAlerting] Not sending Twilio alert despite being triggered, because Twilio config settings missing")
}
} else if alert.Type == core.CustomAlert {
if cfg.Alerting.Custom != nil && cfg.Alerting.Custom.IsValid() {
log.Printf("[watchdog][handleAlerting] Sending custom alert because alert with description=%s has been triggered", alert.Description)
alertProvider = &core.CustomAlertProvider{
Url: cfg.Alerting.Custom.Url,
Method: cfg.Alerting.Custom.Method,
Body: cfg.Alerting.Custom.Body,
Headers: cfg.Alerting.Custom.Headers,
}
} else {
log.Printf("[watchdog][handleAlerting] Not sending custom alert despite being triggered, because there is no custom url configured")
}
}
if alertProvider != nil {
err := alertProvider.Send(service.Name, alert.Description, false)
if err != nil {
log.Printf("[watchdog][handleAlerting] Ran into error sending an alert: %s", err.Error())
}
}
}
}
}