From 37c471545372607143a6c4702b79025e1e53c255 Mon Sep 17 00:00:00 2001 From: TwinProduction Date: Thu, 27 Aug 2020 22:23:21 -0400 Subject: [PATCH] Support custom alert provider --- README.md | 73 ++++++++++++++++++++++++++++++++++++++----- alerting/slack.go | 31 ------------------ config.yaml | 2 +- config/config.go | 6 ++-- config/config_test.go | 2 +- core/alert.go | 3 +- core/alerting.go | 55 ++++++++++++++++++++++++++++++-- watchdog/watchdog.go | 27 ++++++++++++++-- 8 files changed, 151 insertions(+), 48 deletions(-) delete mode 100644 alerting/slack.go diff --git a/README.md b/README.md index b8df3071..29dd95fb 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,20 @@ I personally deploy it in my Kubernetes cluster and have it monitor the status o core applications: https://status.twinnation.org/ +## Table of Contents + +- [Usage](#usage) + - [Configuration](#configuration) + - [Conditions](#conditions) +- [Docker](#docker) +- [Running the tests](#running-the-tests) +- [Using in Production](#using-in-production) +- [FAQ](#faq) + - [Sending a GraphQL request](#sending-a-graphql-request) + - [Configuring Slack alerts](#configuring-slack-alerts) + - [Configuring custom alert](#configuring-custom-alerts) + + ## Usage By default, the configuration file is expected to be at `config/config.yaml`. @@ -23,14 +37,14 @@ Here's a simple example: metrics: true # Whether to expose metrics at /metrics services: - name: twinnation # Name of your service, can be anything - url: https://twinnation.org/health - interval: 15s # Duration to wait between every status check (default: 10s) + url: "https://twinnation.org/health" + interval: 30s # Duration to wait between every status check (default: 10s) conditions: - "[STATUS] == 200" # Status must be 200 - "[BODY].status == UP" # The json path "$.status" must be equal to UP - "[RESPONSE_TIME] < 300" # Response time must be under 300ms - name: example - url: https://example.org/ + url: "https://example.org/" interval: 30s conditions: - "[STATUS] == 200" @@ -44,7 +58,7 @@ Note that you can also add environment variables in the your configuration file | Parameter | Description | Default | | --------------------------------- | --------------------------------------------------------------- | -------------- | | `metrics` | Whether to expose metrics at /metrics | `false` | -| `alerting.slack` | Webhook to use for alerts of type `slack` | `""` | +| `services` | List of services to monitor | Required `[]` | | `services[].name` | Name of the service. Can be anything. | Required `""` | | `services[].url` | URL to send the request to | Required `""` | | `services[].conditions` | Conditions used to determine the health of the service | `[]` | @@ -53,10 +67,16 @@ Note that you can also add environment variables in the your configuration file | `services[].graphql` | Whether to wrap the body in a query param (`{"query":"$body"}`) | `false` | | `services[].body` | Request body | `""` | | `services[].headers` | Request headers | `{}` | -| `services[].alerts[].type` | Type of alert. Currently, only `slack` is supported | Required `""` | +| `services[].alerts[].type` | Type of alert. Valid types: `slack`, `custom` | Required `""` | | `services[].alerts[].enabled` | Whether to enable the alert | `false` | | `services[].alerts[].threshold` | Number of failures in a row needed before triggering the alert | `3` | | `services[].alerts[].description` | Description of the alert. Will be included in the alert sent | `""` | +| `alerting` | Configuration for alerting | `{}` | +| `alerting.slack` | Webhook to use for alerts of type `slack` | `""` | +| `alerting.custom` | Configuration for custom actions on failure or alerts | `""` | +| `alerting.custom.url` | Custom alerting request url | `""` | +| `alerting.custom.body` | Custom alerting request body. | `""` | +| `alerting.custom.headers` | Custom alerting request headers | `{}` | ### Conditions @@ -145,11 +165,11 @@ will send a `POST` request to `http://localhost:8080/playground` with the follow ```yaml alerting: - slack: https://hooks.slack.com/services/**********/**********/********** + slack: "https://hooks.slack.com/services/**********/**********/**********" services: - name: twinnation interval: 30s - url: https://twinnation.org/health + url: "https://twinnation.org/health" alerts: - type: slack enabled: true @@ -162,4 +182,43 @@ services: - "[STATUS] == 200" - "[BODY].status == UP" - "[RESPONSE_TIME] < 300" +``` + + +### Configuring custom alerts + +While they're called alerts, you can use this feature to call anything. + +For instance, you could automate rollbacks by having an application that keeps tracks of new deployments, and by +leveraging Gatus, you could have Gatus call that application endpoint when a service starts failing. Your application +would then check if the service that started failing was recently deployed, and if it was, then automatically +roll it back. + +The values `[ALERT_DESCRIPTION]` and `[SERVICE_NAME]` are automatically substituted for the alert description and the +service name accordingly in the body (`alerting.custom.body`) and the url (`alerting.custom.url`). + +For all intents and purpose, we'll configure the custom alert with a Slack webhook, but you can call anything you want. + +```yaml +alerting: + custom: + url: "https://hooks.slack.com/services/**********/**********/**********" + method: "POST" + body: | + { + "text": "[SERVICE_NAME] - [ALERT_DESCRIPTION]" + } +services: + - name: twinnation + interval: 30s + url: "https://twinnation.org/health" + alerts: + - type: custom + enabled: true + threshold: 10 + description: "healthcheck failed 10 times in a row" + conditions: + - "[STATUS] == 200" + - "[BODY].status == UP" + - "[RESPONSE_TIME] < 300" ``` \ No newline at end of file diff --git a/alerting/slack.go b/alerting/slack.go deleted file mode 100644 index d5908091..00000000 --- a/alerting/slack.go +++ /dev/null @@ -1,31 +0,0 @@ -package alerting - -import ( - "bytes" - "encoding/json" - "fmt" - "github.com/TwinProduction/gatus/client" - "io/ioutil" -) - -type requestBody struct { - Text string `json:"text"` -} - -// SendSlackMessage sends a message to the given Slack webhook -func SendSlackMessage(webhookUrl, service, description string) error { - body, _ := json.Marshal(requestBody{Text: fmt.Sprintf("*[Gatus]*\n*service:* %s\n*description:* %s", service, description)}) - response, err := client.GetHttpClient().Post(webhookUrl, "application/json", bytes.NewBuffer(body)) - if err != nil { - return err - } - defer response.Body.Close() - output, err := ioutil.ReadAll(response.Body) - if err != nil { - return fmt.Errorf("unable to read response body: %v", err.Error()) - } - if string(output) != "ok" { - return fmt.Errorf("error: %s", string(output)) - } - return nil -} diff --git a/config.yaml b/config.yaml index 5235c617..fbdeb567 100644 --- a/config.yaml +++ b/config.yaml @@ -9,7 +9,7 @@ services: - "[RESPONSE_TIME] < 1000" - name: twinnation-articles-api interval: 30s - url: https://twinnation.org/api/v1/articles/24 + url: "https://twinnation.org/api/v1/articles/24" conditions: - "[STATUS] == 200" - "[BODY].id == 24" diff --git a/config/config.go b/config/config.go index 2a714232..23638975 100644 --- a/config/config.go +++ b/config/config.go @@ -21,9 +21,9 @@ var ( ) type Config struct { - Metrics bool `yaml:"metrics"` - Alerting *core.Alerting `yaml:"alerting"` - Services []*core.Service `yaml:"services"` + Metrics bool `yaml:"metrics"` + Alerting *core.AlertingConfig `yaml:"alerting"` + Services []*core.Service `yaml:"services"` } func Get() *Config { diff --git a/config/config_test.go b/config/config_test.go index a6970ca8..c54784d6 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -143,7 +143,7 @@ services: t.Error("Metrics should've been false by default") } if config.Alerting == nil { - t.Fatal("config.Alerting shouldn't have been nil") + t.Fatal("config.AlertingConfig shouldn't have been nil") } if config.Alerting.Slack != "http://example.com" { t.Errorf("Slack webhook should've been %s, but was %s", "http://example.com", config.Alerting.Slack) diff --git a/core/alert.go b/core/alert.go index 8d43560c..6afc5342 100644 --- a/core/alert.go +++ b/core/alert.go @@ -18,5 +18,6 @@ type Alert struct { type AlertType string const ( - SlackAlert AlertType = "slack" + SlackAlert AlertType = "slack" + CustomAlert AlertType = "custom" ) diff --git a/core/alerting.go b/core/alerting.go index bfba579e..e4548ff9 100644 --- a/core/alerting.go +++ b/core/alerting.go @@ -1,5 +1,56 @@ package core -type Alerting struct { - Slack string `yaml:"slack"` +import ( + "bytes" + "fmt" + "github.com/TwinProduction/gatus/client" + "net/http" + "strings" +) + +type AlertingConfig struct { + Slack string `yaml:"slack"` + Custom *CustomAlertProvider `yaml:"custom"` +} + +type CustomAlertProvider struct { + Url string `yaml:"url"` + Method string `yaml:"method,omitempty"` + Body string `yaml:"body,omitempty"` + Headers map[string]string `yaml:"headers,omitempty"` +} + +func (provider *CustomAlertProvider) buildRequest(serviceName, alertDescription string) *http.Request { + body := provider.Body + url := provider.Url + if strings.Contains(provider.Body, "[ALERT_DESCRIPTION]") { + body = strings.ReplaceAll(provider.Body, "[ALERT_DESCRIPTION]", alertDescription) + } + if strings.Contains(provider.Body, "[SERVICE_NAME]") { + body = strings.ReplaceAll(provider.Body, "[SERVICE_NAME]", serviceName) + } + if strings.Contains(provider.Url, "[ALERT_DESCRIPTION]") { + url = strings.ReplaceAll(provider.Url, "[ALERT_DESCRIPTION]", alertDescription) + } + if strings.Contains(provider.Url, "[SERVICE_NAME]") { + url = strings.ReplaceAll(provider.Url, "[SERVICE_NAME]", serviceName) + } + bodyBuffer := bytes.NewBuffer([]byte(body)) + request, _ := http.NewRequest(provider.Method, url, bodyBuffer) + for k, v := range provider.Headers { + request.Header.Set(k, v) + } + return request +} + +func (provider *CustomAlertProvider) Send(serviceName, alertDescription string) error { + request := provider.buildRequest(serviceName, alertDescription) + response, err := client.GetHttpClient().Do(request) + if err != nil { + return err + } + if response.StatusCode > 399 { + return fmt.Errorf("call to provider alert returned status code %d", response.StatusCode) + } + return nil } diff --git a/watchdog/watchdog.go b/watchdog/watchdog.go index abb93eeb..5ed27f23 100644 --- a/watchdog/watchdog.go +++ b/watchdog/watchdog.go @@ -2,7 +2,6 @@ package watchdog import ( "fmt" - "github.com/TwinProduction/gatus/alerting" "github.com/TwinProduction/gatus/config" "github.com/TwinProduction/gatus/core" "github.com/TwinProduction/gatus/metric" @@ -59,13 +58,37 @@ func monitor(service *core.Service) { cfg := config.Get() if cfg.Alerting != nil { for _, alertTriggered := range service.GetAlertsTriggered() { + var alertProvider *core.CustomAlertProvider if alertTriggered.Type == core.SlackAlert { if len(cfg.Alerting.Slack) > 0 { log.Printf("[watchdog][monitor] Sending Slack alert because alert with description=%s has been triggered", alertTriggered.Description) - alerting.SendSlackMessage(cfg.Alerting.Slack, service.Name, alertTriggered.Description) + alertProvider = &core.CustomAlertProvider{ + Url: cfg.Alerting.Slack, + Method: "POST", + Body: fmt.Sprintf(`{"text":"*[Gatus]*\n*service:* %s\n*description:* %s"}`, service.Name, alertTriggered.Description), + Headers: map[string]string{"Content-Type": "application/json"}, + } } else { log.Printf("[watchdog][monitor] Not sending Slack alert despite being triggered, because there is no Slack webhook configured") } + } else if alertTriggered.Type == core.CustomAlert { + if cfg.Alerting.Custom != nil && len(cfg.Alerting.Custom.Url) > 0 { + log.Printf("[watchdog][monitor] Sending custom alert because alert with description=%s has been triggered", alertTriggered.Description) + alertProvider = &core.CustomAlertProvider{ + Url: cfg.Alerting.Custom.Url, + Method: cfg.Alerting.Custom.Method, + Body: cfg.Alerting.Custom.Body, + Headers: cfg.Alerting.Custom.Headers, + } + } else { + log.Printf("[watchdog][monitor] Not sending custom alert despite being triggered, because there is no custom url configured") + } + } + if alertProvider != nil { + err := alertProvider.Send(service.Name, alertTriggered.Description) + if err != nil { + log.Printf("[watchdog][monitor] Ran into error sending an alert: %s", err.Error()) + } } } }