mirror of
https://github.com/TwiN/gatus.git
synced 2024-11-25 01:13:40 +01:00
Support custom alert provider
This commit is contained in:
parent
4b57654592
commit
37c4715453
73
README.md
73
README.md
@ -11,6 +11,20 @@ I personally deploy it in my Kubernetes cluster and have it monitor the status o
|
||||
core applications: https://status.twinnation.org/
|
||||
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Usage](#usage)
|
||||
- [Configuration](#configuration)
|
||||
- [Conditions](#conditions)
|
||||
- [Docker](#docker)
|
||||
- [Running the tests](#running-the-tests)
|
||||
- [Using in Production](#using-in-production)
|
||||
- [FAQ](#faq)
|
||||
- [Sending a GraphQL request](#sending-a-graphql-request)
|
||||
- [Configuring Slack alerts](#configuring-slack-alerts)
|
||||
- [Configuring custom alert](#configuring-custom-alerts)
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
By default, the configuration file is expected to be at `config/config.yaml`.
|
||||
@ -23,14 +37,14 @@ Here's a simple example:
|
||||
metrics: true # Whether to expose metrics at /metrics
|
||||
services:
|
||||
- name: twinnation # Name of your service, can be anything
|
||||
url: https://twinnation.org/health
|
||||
interval: 15s # Duration to wait between every status check (default: 10s)
|
||||
url: "https://twinnation.org/health"
|
||||
interval: 30s # Duration to wait between every status check (default: 10s)
|
||||
conditions:
|
||||
- "[STATUS] == 200" # Status must be 200
|
||||
- "[BODY].status == UP" # The json path "$.status" must be equal to UP
|
||||
- "[RESPONSE_TIME] < 300" # Response time must be under 300ms
|
||||
- name: example
|
||||
url: https://example.org/
|
||||
url: "https://example.org/"
|
||||
interval: 30s
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
@ -44,7 +58,7 @@ Note that you can also add environment variables in the your configuration file
|
||||
| Parameter | Description | Default |
|
||||
| --------------------------------- | --------------------------------------------------------------- | -------------- |
|
||||
| `metrics` | Whether to expose metrics at /metrics | `false` |
|
||||
| `alerting.slack` | Webhook to use for alerts of type `slack` | `""` |
|
||||
| `services` | List of services to monitor | Required `[]` |
|
||||
| `services[].name` | Name of the service. Can be anything. | Required `""` |
|
||||
| `services[].url` | URL to send the request to | Required `""` |
|
||||
| `services[].conditions` | Conditions used to determine the health of the service | `[]` |
|
||||
@ -53,10 +67,16 @@ Note that you can also add environment variables in the your configuration file
|
||||
| `services[].graphql` | Whether to wrap the body in a query param (`{"query":"$body"}`) | `false` |
|
||||
| `services[].body` | Request body | `""` |
|
||||
| `services[].headers` | Request headers | `{}` |
|
||||
| `services[].alerts[].type` | Type of alert. Currently, only `slack` is supported | Required `""` |
|
||||
| `services[].alerts[].type` | Type of alert. Valid types: `slack`, `custom` | Required `""` |
|
||||
| `services[].alerts[].enabled` | Whether to enable the alert | `false` |
|
||||
| `services[].alerts[].threshold` | Number of failures in a row needed before triggering the alert | `3` |
|
||||
| `services[].alerts[].description` | Description of the alert. Will be included in the alert sent | `""` |
|
||||
| `alerting` | Configuration for alerting | `{}` |
|
||||
| `alerting.slack` | Webhook to use for alerts of type `slack` | `""` |
|
||||
| `alerting.custom` | Configuration for custom actions on failure or alerts | `""` |
|
||||
| `alerting.custom.url` | Custom alerting request url | `""` |
|
||||
| `alerting.custom.body` | Custom alerting request body. | `""` |
|
||||
| `alerting.custom.headers` | Custom alerting request headers | `{}` |
|
||||
|
||||
|
||||
### Conditions
|
||||
@ -145,11 +165,11 @@ will send a `POST` request to `http://localhost:8080/playground` with the follow
|
||||
|
||||
```yaml
|
||||
alerting:
|
||||
slack: https://hooks.slack.com/services/**********/**********/**********
|
||||
slack: "https://hooks.slack.com/services/**********/**********/**********"
|
||||
services:
|
||||
- name: twinnation
|
||||
interval: 30s
|
||||
url: https://twinnation.org/health
|
||||
url: "https://twinnation.org/health"
|
||||
alerts:
|
||||
- type: slack
|
||||
enabled: true
|
||||
@ -163,3 +183,42 @@ services:
|
||||
- "[BODY].status == UP"
|
||||
- "[RESPONSE_TIME] < 300"
|
||||
```
|
||||
|
||||
|
||||
### Configuring custom alerts
|
||||
|
||||
While they're called alerts, you can use this feature to call anything.
|
||||
|
||||
For instance, you could automate rollbacks by having an application that keeps tracks of new deployments, and by
|
||||
leveraging Gatus, you could have Gatus call that application endpoint when a service starts failing. Your application
|
||||
would then check if the service that started failing was recently deployed, and if it was, then automatically
|
||||
roll it back.
|
||||
|
||||
The values `[ALERT_DESCRIPTION]` and `[SERVICE_NAME]` are automatically substituted for the alert description and the
|
||||
service name accordingly in the body (`alerting.custom.body`) and the url (`alerting.custom.url`).
|
||||
|
||||
For all intents and purpose, we'll configure the custom alert with a Slack webhook, but you can call anything you want.
|
||||
|
||||
```yaml
|
||||
alerting:
|
||||
custom:
|
||||
url: "https://hooks.slack.com/services/**********/**********/**********"
|
||||
method: "POST"
|
||||
body: |
|
||||
{
|
||||
"text": "[SERVICE_NAME] - [ALERT_DESCRIPTION]"
|
||||
}
|
||||
services:
|
||||
- name: twinnation
|
||||
interval: 30s
|
||||
url: "https://twinnation.org/health"
|
||||
alerts:
|
||||
- type: custom
|
||||
enabled: true
|
||||
threshold: 10
|
||||
description: "healthcheck failed 10 times in a row"
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
- "[BODY].status == UP"
|
||||
- "[RESPONSE_TIME] < 300"
|
||||
```
|
@ -1,31 +0,0 @@
|
||||
package alerting
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/TwinProduction/gatus/client"
|
||||
"io/ioutil"
|
||||
)
|
||||
|
||||
type requestBody struct {
|
||||
Text string `json:"text"`
|
||||
}
|
||||
|
||||
// SendSlackMessage sends a message to the given Slack webhook
|
||||
func SendSlackMessage(webhookUrl, service, description string) error {
|
||||
body, _ := json.Marshal(requestBody{Text: fmt.Sprintf("*[Gatus]*\n*service:* %s\n*description:* %s", service, description)})
|
||||
response, err := client.GetHttpClient().Post(webhookUrl, "application/json", bytes.NewBuffer(body))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer response.Body.Close()
|
||||
output, err := ioutil.ReadAll(response.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to read response body: %v", err.Error())
|
||||
}
|
||||
if string(output) != "ok" {
|
||||
return fmt.Errorf("error: %s", string(output))
|
||||
}
|
||||
return nil
|
||||
}
|
@ -9,7 +9,7 @@ services:
|
||||
- "[RESPONSE_TIME] < 1000"
|
||||
- name: twinnation-articles-api
|
||||
interval: 30s
|
||||
url: https://twinnation.org/api/v1/articles/24
|
||||
url: "https://twinnation.org/api/v1/articles/24"
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
- "[BODY].id == 24"
|
||||
|
@ -22,7 +22,7 @@ var (
|
||||
|
||||
type Config struct {
|
||||
Metrics bool `yaml:"metrics"`
|
||||
Alerting *core.Alerting `yaml:"alerting"`
|
||||
Alerting *core.AlertingConfig `yaml:"alerting"`
|
||||
Services []*core.Service `yaml:"services"`
|
||||
}
|
||||
|
||||
|
@ -143,7 +143,7 @@ services:
|
||||
t.Error("Metrics should've been false by default")
|
||||
}
|
||||
if config.Alerting == nil {
|
||||
t.Fatal("config.Alerting shouldn't have been nil")
|
||||
t.Fatal("config.AlertingConfig shouldn't have been nil")
|
||||
}
|
||||
if config.Alerting.Slack != "http://example.com" {
|
||||
t.Errorf("Slack webhook should've been %s, but was %s", "http://example.com", config.Alerting.Slack)
|
||||
|
@ -19,4 +19,5 @@ type AlertType string
|
||||
|
||||
const (
|
||||
SlackAlert AlertType = "slack"
|
||||
CustomAlert AlertType = "custom"
|
||||
)
|
||||
|
@ -1,5 +1,56 @@
|
||||
package core
|
||||
|
||||
type Alerting struct {
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"github.com/TwinProduction/gatus/client"
|
||||
"net/http"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type AlertingConfig struct {
|
||||
Slack string `yaml:"slack"`
|
||||
Custom *CustomAlertProvider `yaml:"custom"`
|
||||
}
|
||||
|
||||
type CustomAlertProvider struct {
|
||||
Url string `yaml:"url"`
|
||||
Method string `yaml:"method,omitempty"`
|
||||
Body string `yaml:"body,omitempty"`
|
||||
Headers map[string]string `yaml:"headers,omitempty"`
|
||||
}
|
||||
|
||||
func (provider *CustomAlertProvider) buildRequest(serviceName, alertDescription string) *http.Request {
|
||||
body := provider.Body
|
||||
url := provider.Url
|
||||
if strings.Contains(provider.Body, "[ALERT_DESCRIPTION]") {
|
||||
body = strings.ReplaceAll(provider.Body, "[ALERT_DESCRIPTION]", alertDescription)
|
||||
}
|
||||
if strings.Contains(provider.Body, "[SERVICE_NAME]") {
|
||||
body = strings.ReplaceAll(provider.Body, "[SERVICE_NAME]", serviceName)
|
||||
}
|
||||
if strings.Contains(provider.Url, "[ALERT_DESCRIPTION]") {
|
||||
url = strings.ReplaceAll(provider.Url, "[ALERT_DESCRIPTION]", alertDescription)
|
||||
}
|
||||
if strings.Contains(provider.Url, "[SERVICE_NAME]") {
|
||||
url = strings.ReplaceAll(provider.Url, "[SERVICE_NAME]", serviceName)
|
||||
}
|
||||
bodyBuffer := bytes.NewBuffer([]byte(body))
|
||||
request, _ := http.NewRequest(provider.Method, url, bodyBuffer)
|
||||
for k, v := range provider.Headers {
|
||||
request.Header.Set(k, v)
|
||||
}
|
||||
return request
|
||||
}
|
||||
|
||||
func (provider *CustomAlertProvider) Send(serviceName, alertDescription string) error {
|
||||
request := provider.buildRequest(serviceName, alertDescription)
|
||||
response, err := client.GetHttpClient().Do(request)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if response.StatusCode > 399 {
|
||||
return fmt.Errorf("call to provider alert returned status code %d", response.StatusCode)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -2,7 +2,6 @@ package watchdog
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/TwinProduction/gatus/alerting"
|
||||
"github.com/TwinProduction/gatus/config"
|
||||
"github.com/TwinProduction/gatus/core"
|
||||
"github.com/TwinProduction/gatus/metric"
|
||||
@ -59,13 +58,37 @@ func monitor(service *core.Service) {
|
||||
cfg := config.Get()
|
||||
if cfg.Alerting != nil {
|
||||
for _, alertTriggered := range service.GetAlertsTriggered() {
|
||||
var alertProvider *core.CustomAlertProvider
|
||||
if alertTriggered.Type == core.SlackAlert {
|
||||
if len(cfg.Alerting.Slack) > 0 {
|
||||
log.Printf("[watchdog][monitor] Sending Slack alert because alert with description=%s has been triggered", alertTriggered.Description)
|
||||
alerting.SendSlackMessage(cfg.Alerting.Slack, service.Name, alertTriggered.Description)
|
||||
alertProvider = &core.CustomAlertProvider{
|
||||
Url: cfg.Alerting.Slack,
|
||||
Method: "POST",
|
||||
Body: fmt.Sprintf(`{"text":"*[Gatus]*\n*service:* %s\n*description:* %s"}`, service.Name, alertTriggered.Description),
|
||||
Headers: map[string]string{"Content-Type": "application/json"},
|
||||
}
|
||||
} else {
|
||||
log.Printf("[watchdog][monitor] Not sending Slack alert despite being triggered, because there is no Slack webhook configured")
|
||||
}
|
||||
} else if alertTriggered.Type == core.CustomAlert {
|
||||
if cfg.Alerting.Custom != nil && len(cfg.Alerting.Custom.Url) > 0 {
|
||||
log.Printf("[watchdog][monitor] Sending custom alert because alert with description=%s has been triggered", alertTriggered.Description)
|
||||
alertProvider = &core.CustomAlertProvider{
|
||||
Url: cfg.Alerting.Custom.Url,
|
||||
Method: cfg.Alerting.Custom.Method,
|
||||
Body: cfg.Alerting.Custom.Body,
|
||||
Headers: cfg.Alerting.Custom.Headers,
|
||||
}
|
||||
} else {
|
||||
log.Printf("[watchdog][monitor] Not sending custom alert despite being triggered, because there is no custom url configured")
|
||||
}
|
||||
}
|
||||
if alertProvider != nil {
|
||||
err := alertProvider.Send(service.Name, alertTriggered.Description)
|
||||
if err != nil {
|
||||
log.Printf("[watchdog][monitor] Ran into error sending an alert: %s", err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user