mirror of
https://github.com/TwiN/gatus.git
synced 2024-11-21 23:43:27 +01:00
Rename services[].alerts[]'s threshold and success-before-resolved to failure-threshold and success-threshold
This commit is contained in:
parent
fefc728201
commit
c6f11e63e4
73
README.md
73
README.md
@ -74,36 +74,37 @@ Note that you can also add environment variables in the configuration file (i.e.
|
|||||||
|
|
||||||
### Configuration
|
### Configuration
|
||||||
|
|
||||||
| Parameter | Description | Default |
|
| Parameter | Description | Default |
|
||||||
| --------------------------------------------- | -------------------------------------------------------------------------- | -------------- |
|
| ---------------------------------------- | ----------------------------------------------------------------------------- | -------------- |
|
||||||
| `debug` | Whether to enable debug logs | `false` |
|
| `debug` | Whether to enable debug logs | `false` |
|
||||||
| `metrics` | Whether to expose metrics at /metrics | `false` |
|
| `metrics` | Whether to expose metrics at /metrics | `false` |
|
||||||
| `services` | List of services to monitor | Required `[]` |
|
| `services` | List of services to monitor | Required `[]` |
|
||||||
| `services[].name` | Name of the service. Can be anything. | Required `""` |
|
| `services[].name` | Name of the service. Can be anything. | Required `""` |
|
||||||
| `services[].url` | URL to send the request to | Required `""` |
|
| `services[].url` | URL to send the request to | Required `""` |
|
||||||
| `services[].conditions` | Conditions used to determine the health of the service | `[]` |
|
| `services[].conditions` | Conditions used to determine the health of the service | `[]` |
|
||||||
| `services[].interval` | Duration to wait between every status check | `60s` |
|
| `services[].interval` | Duration to wait between every status check | `60s` |
|
||||||
| `services[].method` | Request method | `GET` |
|
| `services[].method` | Request method | `GET` |
|
||||||
| `services[].graphql` | Whether to wrap the body in a query param (`{"query":"$body"}`) | `false` |
|
| `services[].graphql` | Whether to wrap the body in a query param (`{"query":"$body"}`) | `false` |
|
||||||
| `services[].body` | Request body | `""` |
|
| `services[].body` | Request body | `""` |
|
||||||
| `services[].headers` | Request headers | `{}` |
|
| `services[].headers` | Request headers | `{}` |
|
||||||
| `services[].alerts[].type` | Type of alert. Valid types: `slack`, `twilio`, `custom` | Required `""` |
|
| `services[].alerts[].type` | Type of alert. Valid types: `slack`, `pagerduty`, `twilio`, `custom` | Required `""` |
|
||||||
| `services[].alerts[].enabled` | Whether to enable the alert | `false` |
|
| `services[].alerts[].enabled` | Whether to enable the alert | `false` |
|
||||||
| `services[].alerts[].threshold` | Number of failures in a row needed before triggering the alert | `3` |
|
| `services[].alerts[].failure-threshold` | Number of failures in a row needed before triggering the alert | `3` |
|
||||||
| `services[].alerts[].description` | Description of the alert. Will be included in the alert sent | `""` |
|
| `services[].alerts[].success-threshold` | Number of successes in a row before an ongoing incident is marked as resolved | `2` |
|
||||||
| `services[].alerts[].send-on-resolved` | Whether to send a notification once a triggered alert subsides | `false` |
|
| `services[].alerts[].send-on-resolved` | Whether to send a notification once a triggered alert is marked as resolved | `false` |
|
||||||
| `services[].alerts[].success-before-resolved` | Number of successes in a row needed before sending a resolved notification | `2` |
|
| `services[].alerts[].description` | Description of the alert. Will be included in the alert sent | `""` |
|
||||||
| `alerting` | Configuration for alerting | `{}` |
|
| `alerting` | Configuration for alerting | `{}` |
|
||||||
| `alerting.slack` | Webhook to use for alerts of type `slack` | `""` |
|
| `alerting.slack` | Webhook to use for alerts of type `slack` | `""` |
|
||||||
| `alerting.twilio` | Settings for alerts of type `twilio` | `""` |
|
| `alerting.pagerduty` | PagerDuty Events API v2 integration key. Used for alerts of type `pagerduty` | `""` |
|
||||||
| `alerting.twilio.sid` | Twilio account SID | Required `""` |
|
| `alerting.twilio` | Settings for alerts of type `twilio` | `""` |
|
||||||
| `alerting.twilio.token` | Twilio auth token | Required `""` |
|
| `alerting.twilio.sid` | Twilio account SID | Required `""` |
|
||||||
| `alerting.twilio.from` | Number to send Twilio alerts from | Required `""` |
|
| `alerting.twilio.token` | Twilio auth token | Required `""` |
|
||||||
| `alerting.twilio.to` | Number to send twilio alerts to | Required `""` |
|
| `alerting.twilio.from` | Number to send Twilio alerts from | Required `""` |
|
||||||
| `alerting.custom` | Configuration for custom actions on failure or alerts | `""` |
|
| `alerting.twilio.to` | Number to send twilio alerts to | Required `""` |
|
||||||
| `alerting.custom.url` | Custom alerting request url | `""` |
|
| `alerting.custom` | Configuration for custom actions on failure or alerts | `""` |
|
||||||
| `alerting.custom.body` | Custom alerting request body. | `""` |
|
| `alerting.custom.url` | Custom alerting request url | `""` |
|
||||||
| `alerting.custom.headers` | Custom alerting request headers | `{}` |
|
| `alerting.custom.body` | Custom alerting request body. | `""` |
|
||||||
|
| `alerting.custom.headers` | Custom alerting request headers | `{}` |
|
||||||
|
|
||||||
|
|
||||||
### Conditions
|
### Conditions
|
||||||
@ -144,7 +145,7 @@ services:
|
|||||||
send-on-resolved: true
|
send-on-resolved: true
|
||||||
- type: slack
|
- type: slack
|
||||||
enabled: true
|
enabled: true
|
||||||
threshold: 5
|
failure-threshold: 5
|
||||||
description: "healthcheck failed 5 times in a row"
|
description: "healthcheck failed 5 times in a row"
|
||||||
send-on-resolved: true
|
send-on-resolved: true
|
||||||
conditions:
|
conditions:
|
||||||
@ -175,10 +176,10 @@ services:
|
|||||||
alerts:
|
alerts:
|
||||||
- type: pagerduty
|
- type: pagerduty
|
||||||
enabled: true
|
enabled: true
|
||||||
threshold: 3
|
failure-threshold: 3
|
||||||
|
success-threshold: 5
|
||||||
description: "healthcheck failed 3 times in a row"
|
description: "healthcheck failed 3 times in a row"
|
||||||
send-on-resolved: true
|
send-on-resolved: true
|
||||||
success-before-resolved: 5
|
|
||||||
conditions:
|
conditions:
|
||||||
- "[STATUS] == 200"
|
- "[STATUS] == 200"
|
||||||
- "[BODY].status == UP"
|
- "[BODY].status == UP"
|
||||||
@ -202,8 +203,9 @@ services:
|
|||||||
alerts:
|
alerts:
|
||||||
- type: twilio
|
- type: twilio
|
||||||
enabled: true
|
enabled: true
|
||||||
threshold: 5
|
failure-threshold: 5
|
||||||
description: "healthcheck failed 5 times in a row"
|
description: "healthcheck failed 5 times in a row"
|
||||||
|
send-on-resolved: true
|
||||||
conditions:
|
conditions:
|
||||||
- "[STATUS] == 200"
|
- "[STATUS] == 200"
|
||||||
- "[BODY].status == UP"
|
- "[BODY].status == UP"
|
||||||
@ -244,7 +246,8 @@ services:
|
|||||||
alerts:
|
alerts:
|
||||||
- type: custom
|
- type: custom
|
||||||
enabled: true
|
enabled: true
|
||||||
threshold: 10
|
failure-threshold: 10
|
||||||
|
success-threshold: 3
|
||||||
send-on-resolved: true
|
send-on-resolved: true
|
||||||
description: "healthcheck failed 10 times in a row"
|
description: "healthcheck failed 10 times in a row"
|
||||||
conditions:
|
conditions:
|
||||||
|
@ -26,7 +26,7 @@ func handleAlertsToTrigger(service *core.Service, result *core.Result, cfg *conf
|
|||||||
service.NumberOfFailuresInARow++
|
service.NumberOfFailuresInARow++
|
||||||
for _, alert := range service.Alerts {
|
for _, alert := range service.Alerts {
|
||||||
// If the alert hasn't been triggered, move to the next one
|
// If the alert hasn't been triggered, move to the next one
|
||||||
if !alert.Enabled || alert.Threshold != service.NumberOfFailuresInARow {
|
if !alert.Enabled || alert.FailureThreshold != service.NumberOfFailuresInARow {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if alert.Triggered {
|
if alert.Triggered {
|
||||||
@ -100,7 +100,7 @@ func handleAlertsToTrigger(service *core.Service, result *core.Result, cfg *conf
|
|||||||
func handleAlertsToResolve(service *core.Service, result *core.Result, cfg *config.Config) {
|
func handleAlertsToResolve(service *core.Service, result *core.Result, cfg *config.Config) {
|
||||||
service.NumberOfSuccessesInARow++
|
service.NumberOfSuccessesInARow++
|
||||||
for _, alert := range service.Alerts {
|
for _, alert := range service.Alerts {
|
||||||
if !alert.Enabled || !alert.Triggered || alert.SuccessBeforeResolved > service.NumberOfSuccessesInARow {
|
if !alert.Enabled || !alert.Triggered || alert.SuccessThreshold > service.NumberOfSuccessesInARow {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
alert.Triggered = false
|
alert.Triggered = false
|
||||||
|
@ -128,7 +128,7 @@ services:
|
|||||||
alerts:
|
alerts:
|
||||||
- type: slack
|
- type: slack
|
||||||
enabled: true
|
enabled: true
|
||||||
threshold: 7
|
failure-threshold: 7
|
||||||
description: "Healthcheck failed 7 times in a row"
|
description: "Healthcheck failed 7 times in a row"
|
||||||
conditions:
|
conditions:
|
||||||
- "[STATUS] == 200"
|
- "[STATUS] == 200"
|
||||||
@ -166,8 +166,11 @@ services:
|
|||||||
if !config.Services[0].Alerts[0].Enabled {
|
if !config.Services[0].Alerts[0].Enabled {
|
||||||
t.Error("The alert should've been enabled")
|
t.Error("The alert should've been enabled")
|
||||||
}
|
}
|
||||||
if config.Services[0].Alerts[0].Threshold != 7 {
|
if config.Services[0].Alerts[0].FailureThreshold != 7 {
|
||||||
t.Errorf("The threshold of the alert should've been %d, but it was %d", 7, config.Services[0].Alerts[0].Threshold)
|
t.Errorf("The failure threshold of the alert should've been %d, but it was %d", 7, config.Services[0].Alerts[0].FailureThreshold)
|
||||||
|
}
|
||||||
|
if config.Services[0].Alerts[0].FailureThreshold != 7 {
|
||||||
|
t.Errorf("The success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[0].SuccessThreshold)
|
||||||
}
|
}
|
||||||
if config.Services[0].Alerts[0].Type != core.SlackAlert {
|
if config.Services[0].Alerts[0].Type != core.SlackAlert {
|
||||||
t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[0].Type)
|
t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[0].Type)
|
||||||
|
@ -8,8 +8,8 @@ type Alert struct {
|
|||||||
// Enabled defines whether or not the alert is enabled
|
// Enabled defines whether or not the alert is enabled
|
||||||
Enabled bool `yaml:"enabled"`
|
Enabled bool `yaml:"enabled"`
|
||||||
|
|
||||||
// Threshold is the number of failures in a row needed before triggering the alert
|
// FailureThreshold is the number of failures in a row needed before triggering the alert
|
||||||
Threshold int `yaml:"threshold"`
|
FailureThreshold int `yaml:"failure-threshold"`
|
||||||
|
|
||||||
// Description of the alert. Will be included in the alert sent.
|
// Description of the alert. Will be included in the alert sent.
|
||||||
Description string `yaml:"description"`
|
Description string `yaml:"description"`
|
||||||
@ -17,8 +17,8 @@ type Alert struct {
|
|||||||
// SendOnResolved defines whether to send a second notification when the issue has been resolved
|
// SendOnResolved defines whether to send a second notification when the issue has been resolved
|
||||||
SendOnResolved bool `yaml:"send-on-resolved"`
|
SendOnResolved bool `yaml:"send-on-resolved"`
|
||||||
|
|
||||||
// SuccessBeforeResolved defines whether to send a second notification when the issue has been resolved
|
// SuccessThreshold defines how many successful executions must happen in a row before an ongoing incident is marked as resolved
|
||||||
SuccessBeforeResolved int `yaml:"success-before-resolved"`
|
SuccessThreshold int `yaml:"success-threshold"`
|
||||||
|
|
||||||
// ResolveKey is an optional field that is used by some providers (i.e. PagerDuty's dedup_key) to resolve
|
// ResolveKey is an optional field that is used by some providers (i.e. PagerDuty's dedup_key) to resolve
|
||||||
// ongoing/triggered incidents
|
// ongoing/triggered incidents
|
||||||
|
@ -99,10 +99,10 @@ func CreateSlackCustomAlertProvider(slackWebHookUrl string, service *Service, al
|
|||||||
var message string
|
var message string
|
||||||
var color string
|
var color string
|
||||||
if resolved {
|
if resolved {
|
||||||
message = fmt.Sprintf("An alert for *%s* has been resolved after passing successfully %d time(s) in a row", service.Name, alert.SuccessBeforeResolved)
|
message = fmt.Sprintf("An alert for *%s* has been resolved after passing successfully %d time(s) in a row", service.Name, alert.SuccessThreshold)
|
||||||
color = "#36A64F"
|
color = "#36A64F"
|
||||||
} else {
|
} else {
|
||||||
message = fmt.Sprintf("An alert for *%s* has been triggered due to having failed %d time(s) in a row", service.Name, alert.Threshold)
|
message = fmt.Sprintf("An alert for *%s* has been triggered due to having failed %d time(s) in a row", service.Name, alert.FailureThreshold)
|
||||||
color = "#DD0000"
|
color = "#DD0000"
|
||||||
}
|
}
|
||||||
var results string
|
var results string
|
||||||
|
@ -62,11 +62,11 @@ func (service *Service) Validate() {
|
|||||||
service.Headers = make(map[string]string)
|
service.Headers = make(map[string]string)
|
||||||
}
|
}
|
||||||
for _, alert := range service.Alerts {
|
for _, alert := range service.Alerts {
|
||||||
if alert.Threshold <= 0 {
|
if alert.FailureThreshold <= 0 {
|
||||||
alert.Threshold = 3
|
alert.FailureThreshold = 3
|
||||||
}
|
}
|
||||||
if alert.SuccessBeforeResolved <= 0 {
|
if alert.SuccessThreshold <= 0 {
|
||||||
alert.SuccessBeforeResolved = 2
|
alert.SuccessThreshold = 2
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(service.Url) == 0 {
|
if len(service.Url) == 0 {
|
||||||
@ -107,7 +107,7 @@ func (service *Service) GetAlertsTriggered() []Alert {
|
|||||||
return alerts
|
return alerts
|
||||||
}
|
}
|
||||||
for _, alert := range service.Alerts {
|
for _, alert := range service.Alerts {
|
||||||
if alert.Enabled && alert.Threshold == service.NumberOfFailuresInARow {
|
if alert.Enabled && alert.FailureThreshold == service.NumberOfFailuresInARow {
|
||||||
alerts = append(alerts, *alert)
|
alerts = append(alerts, *alert)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user