2020-09-19 22:22:12 +02:00
package watchdog
2020-09-17 01:26:19 +02:00
import (
2021-12-03 04:15:51 +01:00
"errors"
2020-11-13 21:01:21 +01:00
"log"
2021-12-03 04:15:51 +01:00
"os"
2020-11-13 21:01:21 +01:00
2022-12-06 07:41:09 +01:00
"github.com/TwiN/gatus/v5/alerting"
2024-05-10 04:56:16 +02:00
"github.com/TwiN/gatus/v5/config/endpoint"
2024-05-16 03:29:45 +02:00
"github.com/TwiN/gatus/v5/storage/store"
2020-09-17 01:26:19 +02:00
)
2020-09-19 22:22:12 +02:00
// HandleAlerting takes care of alerts to resolve and alerts to trigger based on result success or failure
2024-05-10 04:56:16 +02:00
func HandleAlerting ( ep * endpoint . Endpoint , result * endpoint . Result , alertingConfig * alerting . Config , debug bool ) {
2021-05-19 04:29:15 +02:00
if alertingConfig == nil {
2020-09-17 01:26:19 +02:00
return
}
if result . Success {
2024-05-10 04:56:16 +02:00
handleAlertsToResolve ( ep , result , alertingConfig , debug )
2020-09-17 01:26:19 +02:00
} else {
2024-05-10 04:56:16 +02:00
handleAlertsToTrigger ( ep , result , alertingConfig , debug )
2020-09-17 01:26:19 +02:00
}
}
2024-05-10 04:56:16 +02:00
func handleAlertsToTrigger ( ep * endpoint . Endpoint , result * endpoint . Result , alertingConfig * alerting . Config , debug bool ) {
ep . NumberOfSuccessesInARow = 0
ep . NumberOfFailuresInARow ++
for _ , endpointAlert := range ep . Alerts {
2021-10-23 22:47:12 +02:00
// If the alert hasn't been triggered, move to the next one
2024-05-10 04:56:16 +02:00
if ! endpointAlert . IsEnabled ( ) || endpointAlert . FailureThreshold > ep . NumberOfFailuresInARow {
2020-09-17 01:26:19 +02:00
continue
}
2021-10-23 22:47:12 +02:00
if endpointAlert . Triggered {
2021-05-19 04:29:15 +02:00
if debug {
2024-05-10 04:56:16 +02:00
log . Printf ( "[watchdog.handleAlertsToTrigger] Alert for endpoint=%s with description='%s' has already been TRIGGERED, skipping" , ep . Name , endpointAlert . GetDescription ( ) )
2020-09-17 01:26:19 +02:00
}
continue
}
2021-10-23 22:47:12 +02:00
alertProvider := alertingConfig . GetAlertingProviderByAlertType ( endpointAlert . Type )
2022-12-16 02:54:38 +01:00
if alertProvider != nil {
2024-05-10 04:56:16 +02:00
log . Printf ( "[watchdog.handleAlertsToTrigger] Sending %s alert because alert for endpoint=%s with description='%s' has been TRIGGERED" , endpointAlert . Type , ep . Name , endpointAlert . GetDescription ( ) )
2021-12-03 04:15:51 +01:00
var err error
if os . Getenv ( "MOCK_ALERT_PROVIDER" ) == "true" {
if os . Getenv ( "MOCK_ALERT_PROVIDER_ERROR" ) == "true" {
err = errors . New ( "error" )
}
} else {
2024-05-10 04:56:16 +02:00
err = alertProvider . Send ( ep , endpointAlert , result , false )
2021-12-03 04:15:51 +01:00
}
2020-09-17 01:26:19 +02:00
if err != nil {
2024-05-10 04:56:16 +02:00
log . Printf ( "[watchdog.handleAlertsToTrigger] Failed to send an alert for endpoint=%s: %s" , ep . Name , err . Error ( ) )
2020-09-17 01:26:19 +02:00
} else {
2021-10-23 22:47:12 +02:00
endpointAlert . Triggered = true
2024-05-16 03:29:45 +02:00
if err := store . Get ( ) . UpsertTriggeredEndpointAlert ( ep , endpointAlert ) ; err != nil {
log . Printf ( "[watchdog.handleAlertsToTrigger] Failed to persist triggered endpoint alert for endpoint with key=%s: %s" , ep . Key ( ) , err . Error ( ) )
}
2020-09-17 01:26:19 +02:00
}
2020-09-26 20:23:43 +02:00
} else {
2024-05-16 03:29:45 +02:00
log . Printf ( "[watchdog.handleAlertsToTrigger] Not sending alert of type=%s despite being TRIGGERED, because the provider wasn't configured properly" , endpointAlert . Type )
2020-09-17 01:26:19 +02:00
}
}
}
2024-05-10 04:56:16 +02:00
func handleAlertsToResolve ( ep * endpoint . Endpoint , result * endpoint . Result , alertingConfig * alerting . Config , debug bool ) {
ep . NumberOfSuccessesInARow ++
for _ , endpointAlert := range ep . Alerts {
2024-05-16 03:29:45 +02:00
isStillBelowSuccessThreshold := endpointAlert . SuccessThreshold > ep . NumberOfSuccessesInARow
2024-05-18 00:18:06 +02:00
if isStillBelowSuccessThreshold && endpointAlert . IsEnabled ( ) && endpointAlert . Triggered {
2024-05-16 03:29:45 +02:00
// Persist NumberOfSuccessesInARow
if err := store . Get ( ) . UpsertTriggeredEndpointAlert ( ep , endpointAlert ) ; err != nil {
log . Printf ( "[watchdog.handleAlertsToResolve] Failed to update triggered endpoint alert for endpoint with key=%s: %s" , ep . Key ( ) , err . Error ( ) )
}
}
if ! endpointAlert . IsEnabled ( ) || ! endpointAlert . Triggered || isStillBelowSuccessThreshold {
2020-09-17 01:26:19 +02:00
continue
}
2021-10-23 22:47:12 +02:00
// Even if the alert provider returns an error, we still set the alert's Triggered variable to false.
2021-01-21 22:14:32 +01:00
// Further explanation can be found on Alert's Triggered field.
2021-10-23 22:47:12 +02:00
endpointAlert . Triggered = false
2024-05-16 03:29:45 +02:00
if err := store . Get ( ) . DeleteTriggeredEndpointAlert ( ep , endpointAlert ) ; err != nil {
log . Printf ( "[watchdog.handleAlertsToResolve] Failed to delete persisted triggered endpoint alert for endpoint with key=%s: %s" , ep . Key ( ) , err . Error ( ) )
}
2021-10-23 22:47:12 +02:00
if ! endpointAlert . IsSendingOnResolved ( ) {
2020-09-17 01:26:19 +02:00
continue
}
2021-10-23 22:47:12 +02:00
alertProvider := alertingConfig . GetAlertingProviderByAlertType ( endpointAlert . Type )
2022-12-16 02:54:38 +01:00
if alertProvider != nil {
2024-05-16 03:29:45 +02:00
log . Printf ( "[watchdog.handleAlertsToResolve] Sending %s alert because alert for endpoint with key=%s with description='%s' has been RESOLVED" , endpointAlert . Type , ep . Key ( ) , endpointAlert . GetDescription ( ) )
2024-05-10 04:56:16 +02:00
err := alertProvider . Send ( ep , endpointAlert , result , true )
2020-09-17 01:26:19 +02:00
if err != nil {
2024-05-16 03:29:45 +02:00
log . Printf ( "[watchdog.handleAlertsToResolve] Failed to send an alert for endpoint with key=%s: %s" , ep . Key ( ) , err . Error ( ) )
2020-09-17 01:26:19 +02:00
}
2020-09-26 20:23:43 +02:00
} else {
2024-04-02 03:47:14 +02:00
log . Printf ( "[watchdog.handleAlertsToResolve] Not sending alert of type=%s despite being RESOLVED, because the provider wasn't configured properly" , endpointAlert . Type )
2020-09-17 01:26:19 +02:00
}
}
2024-05-10 04:56:16 +02:00
ep . NumberOfFailuresInARow = 0
2020-09-17 01:26:19 +02:00
}