mirror of
https://github.com/TwiN/gatus.git
synced 2024-11-25 09:24:04 +01:00
Rename services[].alerts[]'s threshold and success-before-resolved to failure-threshold and success-threshold
This commit is contained in:
parent
fefc728201
commit
c6f11e63e4
23
README.md
23
README.md
@ -75,7 +75,7 @@ Note that you can also add environment variables in the configuration file (i.e.
|
||||
### Configuration
|
||||
|
||||
| Parameter | Description | Default |
|
||||
| --------------------------------------------- | -------------------------------------------------------------------------- | -------------- |
|
||||
| ---------------------------------------- | ----------------------------------------------------------------------------- | -------------- |
|
||||
| `debug` | Whether to enable debug logs | `false` |
|
||||
| `metrics` | Whether to expose metrics at /metrics | `false` |
|
||||
| `services` | List of services to monitor | Required `[]` |
|
||||
@ -87,14 +87,15 @@ Note that you can also add environment variables in the configuration file (i.e.
|
||||
| `services[].graphql` | Whether to wrap the body in a query param (`{"query":"$body"}`) | `false` |
|
||||
| `services[].body` | Request body | `""` |
|
||||
| `services[].headers` | Request headers | `{}` |
|
||||
| `services[].alerts[].type` | Type of alert. Valid types: `slack`, `twilio`, `custom` | Required `""` |
|
||||
| `services[].alerts[].type` | Type of alert. Valid types: `slack`, `pagerduty`, `twilio`, `custom` | Required `""` |
|
||||
| `services[].alerts[].enabled` | Whether to enable the alert | `false` |
|
||||
| `services[].alerts[].threshold` | Number of failures in a row needed before triggering the alert | `3` |
|
||||
| `services[].alerts[].failure-threshold` | Number of failures in a row needed before triggering the alert | `3` |
|
||||
| `services[].alerts[].success-threshold` | Number of successes in a row before an ongoing incident is marked as resolved | `2` |
|
||||
| `services[].alerts[].send-on-resolved` | Whether to send a notification once a triggered alert is marked as resolved | `false` |
|
||||
| `services[].alerts[].description` | Description of the alert. Will be included in the alert sent | `""` |
|
||||
| `services[].alerts[].send-on-resolved` | Whether to send a notification once a triggered alert subsides | `false` |
|
||||
| `services[].alerts[].success-before-resolved` | Number of successes in a row needed before sending a resolved notification | `2` |
|
||||
| `alerting` | Configuration for alerting | `{}` |
|
||||
| `alerting.slack` | Webhook to use for alerts of type `slack` | `""` |
|
||||
| `alerting.pagerduty` | PagerDuty Events API v2 integration key. Used for alerts of type `pagerduty` | `""` |
|
||||
| `alerting.twilio` | Settings for alerts of type `twilio` | `""` |
|
||||
| `alerting.twilio.sid` | Twilio account SID | Required `""` |
|
||||
| `alerting.twilio.token` | Twilio auth token | Required `""` |
|
||||
@ -144,7 +145,7 @@ services:
|
||||
send-on-resolved: true
|
||||
- type: slack
|
||||
enabled: true
|
||||
threshold: 5
|
||||
failure-threshold: 5
|
||||
description: "healthcheck failed 5 times in a row"
|
||||
send-on-resolved: true
|
||||
conditions:
|
||||
@ -175,10 +176,10 @@ services:
|
||||
alerts:
|
||||
- type: pagerduty
|
||||
enabled: true
|
||||
threshold: 3
|
||||
failure-threshold: 3
|
||||
success-threshold: 5
|
||||
description: "healthcheck failed 3 times in a row"
|
||||
send-on-resolved: true
|
||||
success-before-resolved: 5
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
- "[BODY].status == UP"
|
||||
@ -202,8 +203,9 @@ services:
|
||||
alerts:
|
||||
- type: twilio
|
||||
enabled: true
|
||||
threshold: 5
|
||||
failure-threshold: 5
|
||||
description: "healthcheck failed 5 times in a row"
|
||||
send-on-resolved: true
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
- "[BODY].status == UP"
|
||||
@ -244,7 +246,8 @@ services:
|
||||
alerts:
|
||||
- type: custom
|
||||
enabled: true
|
||||
threshold: 10
|
||||
failure-threshold: 10
|
||||
success-threshold: 3
|
||||
send-on-resolved: true
|
||||
description: "healthcheck failed 10 times in a row"
|
||||
conditions:
|
||||
|
@ -26,7 +26,7 @@ func handleAlertsToTrigger(service *core.Service, result *core.Result, cfg *conf
|
||||
service.NumberOfFailuresInARow++
|
||||
for _, alert := range service.Alerts {
|
||||
// If the alert hasn't been triggered, move to the next one
|
||||
if !alert.Enabled || alert.Threshold != service.NumberOfFailuresInARow {
|
||||
if !alert.Enabled || alert.FailureThreshold != service.NumberOfFailuresInARow {
|
||||
continue
|
||||
}
|
||||
if alert.Triggered {
|
||||
@ -100,7 +100,7 @@ func handleAlertsToTrigger(service *core.Service, result *core.Result, cfg *conf
|
||||
func handleAlertsToResolve(service *core.Service, result *core.Result, cfg *config.Config) {
|
||||
service.NumberOfSuccessesInARow++
|
||||
for _, alert := range service.Alerts {
|
||||
if !alert.Enabled || !alert.Triggered || alert.SuccessBeforeResolved > service.NumberOfSuccessesInARow {
|
||||
if !alert.Enabled || !alert.Triggered || alert.SuccessThreshold > service.NumberOfSuccessesInARow {
|
||||
continue
|
||||
}
|
||||
alert.Triggered = false
|
||||
|
@ -128,7 +128,7 @@ services:
|
||||
alerts:
|
||||
- type: slack
|
||||
enabled: true
|
||||
threshold: 7
|
||||
failure-threshold: 7
|
||||
description: "Healthcheck failed 7 times in a row"
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
@ -166,8 +166,11 @@ services:
|
||||
if !config.Services[0].Alerts[0].Enabled {
|
||||
t.Error("The alert should've been enabled")
|
||||
}
|
||||
if config.Services[0].Alerts[0].Threshold != 7 {
|
||||
t.Errorf("The threshold of the alert should've been %d, but it was %d", 7, config.Services[0].Alerts[0].Threshold)
|
||||
if config.Services[0].Alerts[0].FailureThreshold != 7 {
|
||||
t.Errorf("The failure threshold of the alert should've been %d, but it was %d", 7, config.Services[0].Alerts[0].FailureThreshold)
|
||||
}
|
||||
if config.Services[0].Alerts[0].FailureThreshold != 7 {
|
||||
t.Errorf("The success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[0].SuccessThreshold)
|
||||
}
|
||||
if config.Services[0].Alerts[0].Type != core.SlackAlert {
|
||||
t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[0].Type)
|
||||
|
@ -8,8 +8,8 @@ type Alert struct {
|
||||
// Enabled defines whether or not the alert is enabled
|
||||
Enabled bool `yaml:"enabled"`
|
||||
|
||||
// Threshold is the number of failures in a row needed before triggering the alert
|
||||
Threshold int `yaml:"threshold"`
|
||||
// FailureThreshold is the number of failures in a row needed before triggering the alert
|
||||
FailureThreshold int `yaml:"failure-threshold"`
|
||||
|
||||
// Description of the alert. Will be included in the alert sent.
|
||||
Description string `yaml:"description"`
|
||||
@ -17,8 +17,8 @@ type Alert struct {
|
||||
// SendOnResolved defines whether to send a second notification when the issue has been resolved
|
||||
SendOnResolved bool `yaml:"send-on-resolved"`
|
||||
|
||||
// SuccessBeforeResolved defines whether to send a second notification when the issue has been resolved
|
||||
SuccessBeforeResolved int `yaml:"success-before-resolved"`
|
||||
// SuccessThreshold defines how many successful executions must happen in a row before an ongoing incident is marked as resolved
|
||||
SuccessThreshold int `yaml:"success-threshold"`
|
||||
|
||||
// ResolveKey is an optional field that is used by some providers (i.e. PagerDuty's dedup_key) to resolve
|
||||
// ongoing/triggered incidents
|
||||
|
@ -99,10 +99,10 @@ func CreateSlackCustomAlertProvider(slackWebHookUrl string, service *Service, al
|
||||
var message string
|
||||
var color string
|
||||
if resolved {
|
||||
message = fmt.Sprintf("An alert for *%s* has been resolved after passing successfully %d time(s) in a row", service.Name, alert.SuccessBeforeResolved)
|
||||
message = fmt.Sprintf("An alert for *%s* has been resolved after passing successfully %d time(s) in a row", service.Name, alert.SuccessThreshold)
|
||||
color = "#36A64F"
|
||||
} else {
|
||||
message = fmt.Sprintf("An alert for *%s* has been triggered due to having failed %d time(s) in a row", service.Name, alert.Threshold)
|
||||
message = fmt.Sprintf("An alert for *%s* has been triggered due to having failed %d time(s) in a row", service.Name, alert.FailureThreshold)
|
||||
color = "#DD0000"
|
||||
}
|
||||
var results string
|
||||
|
@ -62,11 +62,11 @@ func (service *Service) Validate() {
|
||||
service.Headers = make(map[string]string)
|
||||
}
|
||||
for _, alert := range service.Alerts {
|
||||
if alert.Threshold <= 0 {
|
||||
alert.Threshold = 3
|
||||
if alert.FailureThreshold <= 0 {
|
||||
alert.FailureThreshold = 3
|
||||
}
|
||||
if alert.SuccessBeforeResolved <= 0 {
|
||||
alert.SuccessBeforeResolved = 2
|
||||
if alert.SuccessThreshold <= 0 {
|
||||
alert.SuccessThreshold = 2
|
||||
}
|
||||
}
|
||||
if len(service.Url) == 0 {
|
||||
@ -107,7 +107,7 @@ func (service *Service) GetAlertsTriggered() []Alert {
|
||||
return alerts
|
||||
}
|
||||
for _, alert := range service.Alerts {
|
||||
if alert.Enabled && alert.Threshold == service.NumberOfFailuresInARow {
|
||||
if alert.Enabled && alert.FailureThreshold == service.NumberOfFailuresInARow {
|
||||
alerts = append(alerts, *alert)
|
||||
continue
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user