Support custom alert provider

This commit is contained in:
TwinProduction 2020-08-27 22:23:21 -04:00
parent 4b57654592
commit 37c4715453
8 changed files with 151 additions and 48 deletions

View File

@ -11,6 +11,20 @@ I personally deploy it in my Kubernetes cluster and have it monitor the status o
core applications: https://status.twinnation.org/
## Table of Contents
- [Usage](#usage)
- [Configuration](#configuration)
- [Conditions](#conditions)
- [Docker](#docker)
- [Running the tests](#running-the-tests)
- [Using in Production](#using-in-production)
- [FAQ](#faq)
- [Sending a GraphQL request](#sending-a-graphql-request)
- [Configuring Slack alerts](#configuring-slack-alerts)
- [Configuring custom alert](#configuring-custom-alerts)
## Usage
By default, the configuration file is expected to be at `config/config.yaml`.
@ -23,14 +37,14 @@ Here's a simple example:
metrics: true # Whether to expose metrics at /metrics
services:
- name: twinnation # Name of your service, can be anything
url: https://twinnation.org/health
interval: 15s # Duration to wait between every status check (default: 10s)
url: "https://twinnation.org/health"
interval: 30s # Duration to wait between every status check (default: 10s)
conditions:
- "[STATUS] == 200" # Status must be 200
- "[BODY].status == UP" # The json path "$.status" must be equal to UP
- "[RESPONSE_TIME] < 300" # Response time must be under 300ms
- name: example
url: https://example.org/
url: "https://example.org/"
interval: 30s
conditions:
- "[STATUS] == 200"
@ -44,7 +58,7 @@ Note that you can also add environment variables in the your configuration file
| Parameter | Description | Default |
| --------------------------------- | --------------------------------------------------------------- | -------------- |
| `metrics` | Whether to expose metrics at /metrics | `false` |
| `alerting.slack` | Webhook to use for alerts of type `slack` | `""` |
| `services` | List of services to monitor | Required `[]` |
| `services[].name` | Name of the service. Can be anything. | Required `""` |
| `services[].url` | URL to send the request to | Required `""` |
| `services[].conditions` | Conditions used to determine the health of the service | `[]` |
@ -53,10 +67,16 @@ Note that you can also add environment variables in the your configuration file
| `services[].graphql` | Whether to wrap the body in a query param (`{"query":"$body"}`) | `false` |
| `services[].body` | Request body | `""` |
| `services[].headers` | Request headers | `{}` |
| `services[].alerts[].type` | Type of alert. Currently, only `slack` is supported | Required `""` |
| `services[].alerts[].type` | Type of alert. Valid types: `slack`, `custom` | Required `""` |
| `services[].alerts[].enabled` | Whether to enable the alert | `false` |
| `services[].alerts[].threshold` | Number of failures in a row needed before triggering the alert | `3` |
| `services[].alerts[].description` | Description of the alert. Will be included in the alert sent | `""` |
| `alerting` | Configuration for alerting | `{}` |
| `alerting.slack` | Webhook to use for alerts of type `slack` | `""` |
| `alerting.custom` | Configuration for custom actions on failure or alerts | `""` |
| `alerting.custom.url` | Custom alerting request url | `""` |
| `alerting.custom.body` | Custom alerting request body. | `""` |
| `alerting.custom.headers` | Custom alerting request headers | `{}` |
### Conditions
@ -145,11 +165,11 @@ will send a `POST` request to `http://localhost:8080/playground` with the follow
```yaml
alerting:
slack: https://hooks.slack.com/services/**********/**********/**********
slack: "https://hooks.slack.com/services/**********/**********/**********"
services:
- name: twinnation
interval: 30s
url: https://twinnation.org/health
url: "https://twinnation.org/health"
alerts:
- type: slack
enabled: true
@ -163,3 +183,42 @@ services:
- "[BODY].status == UP"
- "[RESPONSE_TIME] < 300"
```
### Configuring custom alerts
While they're called alerts, you can use this feature to call anything.
For instance, you could automate rollbacks by having an application that keeps tracks of new deployments, and by
leveraging Gatus, you could have Gatus call that application endpoint when a service starts failing. Your application
would then check if the service that started failing was recently deployed, and if it was, then automatically
roll it back.
The values `[ALERT_DESCRIPTION]` and `[SERVICE_NAME]` are automatically substituted for the alert description and the
service name accordingly in the body (`alerting.custom.body`) and the url (`alerting.custom.url`).
For all intents and purpose, we'll configure the custom alert with a Slack webhook, but you can call anything you want.
```yaml
alerting:
custom:
url: "https://hooks.slack.com/services/**********/**********/**********"
method: "POST"
body: |
{
"text": "[SERVICE_NAME] - [ALERT_DESCRIPTION]"
}
services:
- name: twinnation
interval: 30s
url: "https://twinnation.org/health"
alerts:
- type: custom
enabled: true
threshold: 10
description: "healthcheck failed 10 times in a row"
conditions:
- "[STATUS] == 200"
- "[BODY].status == UP"
- "[RESPONSE_TIME] < 300"
```

View File

@ -1,31 +0,0 @@
package alerting
import (
"bytes"
"encoding/json"
"fmt"
"github.com/TwinProduction/gatus/client"
"io/ioutil"
)
type requestBody struct {
Text string `json:"text"`
}
// SendSlackMessage sends a message to the given Slack webhook
func SendSlackMessage(webhookUrl, service, description string) error {
body, _ := json.Marshal(requestBody{Text: fmt.Sprintf("*[Gatus]*\n*service:* %s\n*description:* %s", service, description)})
response, err := client.GetHttpClient().Post(webhookUrl, "application/json", bytes.NewBuffer(body))
if err != nil {
return err
}
defer response.Body.Close()
output, err := ioutil.ReadAll(response.Body)
if err != nil {
return fmt.Errorf("unable to read response body: %v", err.Error())
}
if string(output) != "ok" {
return fmt.Errorf("error: %s", string(output))
}
return nil
}

View File

@ -9,7 +9,7 @@ services:
- "[RESPONSE_TIME] < 1000"
- name: twinnation-articles-api
interval: 30s
url: https://twinnation.org/api/v1/articles/24
url: "https://twinnation.org/api/v1/articles/24"
conditions:
- "[STATUS] == 200"
- "[BODY].id == 24"

View File

@ -22,7 +22,7 @@ var (
type Config struct {
Metrics bool `yaml:"metrics"`
Alerting *core.Alerting `yaml:"alerting"`
Alerting *core.AlertingConfig `yaml:"alerting"`
Services []*core.Service `yaml:"services"`
}

View File

@ -143,7 +143,7 @@ services:
t.Error("Metrics should've been false by default")
}
if config.Alerting == nil {
t.Fatal("config.Alerting shouldn't have been nil")
t.Fatal("config.AlertingConfig shouldn't have been nil")
}
if config.Alerting.Slack != "http://example.com" {
t.Errorf("Slack webhook should've been %s, but was %s", "http://example.com", config.Alerting.Slack)

View File

@ -19,4 +19,5 @@ type AlertType string
const (
SlackAlert AlertType = "slack"
CustomAlert AlertType = "custom"
)

View File

@ -1,5 +1,56 @@
package core
type Alerting struct {
import (
"bytes"
"fmt"
"github.com/TwinProduction/gatus/client"
"net/http"
"strings"
)
type AlertingConfig struct {
Slack string `yaml:"slack"`
Custom *CustomAlertProvider `yaml:"custom"`
}
type CustomAlertProvider struct {
Url string `yaml:"url"`
Method string `yaml:"method,omitempty"`
Body string `yaml:"body,omitempty"`
Headers map[string]string `yaml:"headers,omitempty"`
}
func (provider *CustomAlertProvider) buildRequest(serviceName, alertDescription string) *http.Request {
body := provider.Body
url := provider.Url
if strings.Contains(provider.Body, "[ALERT_DESCRIPTION]") {
body = strings.ReplaceAll(provider.Body, "[ALERT_DESCRIPTION]", alertDescription)
}
if strings.Contains(provider.Body, "[SERVICE_NAME]") {
body = strings.ReplaceAll(provider.Body, "[SERVICE_NAME]", serviceName)
}
if strings.Contains(provider.Url, "[ALERT_DESCRIPTION]") {
url = strings.ReplaceAll(provider.Url, "[ALERT_DESCRIPTION]", alertDescription)
}
if strings.Contains(provider.Url, "[SERVICE_NAME]") {
url = strings.ReplaceAll(provider.Url, "[SERVICE_NAME]", serviceName)
}
bodyBuffer := bytes.NewBuffer([]byte(body))
request, _ := http.NewRequest(provider.Method, url, bodyBuffer)
for k, v := range provider.Headers {
request.Header.Set(k, v)
}
return request
}
func (provider *CustomAlertProvider) Send(serviceName, alertDescription string) error {
request := provider.buildRequest(serviceName, alertDescription)
response, err := client.GetHttpClient().Do(request)
if err != nil {
return err
}
if response.StatusCode > 399 {
return fmt.Errorf("call to provider alert returned status code %d", response.StatusCode)
}
return nil
}

View File

@ -2,7 +2,6 @@ package watchdog
import (
"fmt"
"github.com/TwinProduction/gatus/alerting"
"github.com/TwinProduction/gatus/config"
"github.com/TwinProduction/gatus/core"
"github.com/TwinProduction/gatus/metric"
@ -59,13 +58,37 @@ func monitor(service *core.Service) {
cfg := config.Get()
if cfg.Alerting != nil {
for _, alertTriggered := range service.GetAlertsTriggered() {
var alertProvider *core.CustomAlertProvider
if alertTriggered.Type == core.SlackAlert {
if len(cfg.Alerting.Slack) > 0 {
log.Printf("[watchdog][monitor] Sending Slack alert because alert with description=%s has been triggered", alertTriggered.Description)
alerting.SendSlackMessage(cfg.Alerting.Slack, service.Name, alertTriggered.Description)
alertProvider = &core.CustomAlertProvider{
Url: cfg.Alerting.Slack,
Method: "POST",
Body: fmt.Sprintf(`{"text":"*[Gatus]*\n*service:* %s\n*description:* %s"}`, service.Name, alertTriggered.Description),
Headers: map[string]string{"Content-Type": "application/json"},
}
} else {
log.Printf("[watchdog][monitor] Not sending Slack alert despite being triggered, because there is no Slack webhook configured")
}
} else if alertTriggered.Type == core.CustomAlert {
if cfg.Alerting.Custom != nil && len(cfg.Alerting.Custom.Url) > 0 {
log.Printf("[watchdog][monitor] Sending custom alert because alert with description=%s has been triggered", alertTriggered.Description)
alertProvider = &core.CustomAlertProvider{
Url: cfg.Alerting.Custom.Url,
Method: cfg.Alerting.Custom.Method,
Body: cfg.Alerting.Custom.Body,
Headers: cfg.Alerting.Custom.Headers,
}
} else {
log.Printf("[watchdog][monitor] Not sending custom alert despite being triggered, because there is no custom url configured")
}
}
if alertProvider != nil {
err := alertProvider.Send(service.Name, alertTriggered.Description)
if err != nil {
log.Printf("[watchdog][monitor] Ran into error sending an alert: %s", err.Error())
}
}
}
}