mirror of
https://github.com/TwiN/gatus.git
synced 2024-11-25 01:13:40 +01:00
Close #74: Add maintenance window
This commit is contained in:
parent
dc173b29bc
commit
fa4736c672
32
README.md
32
README.md
@ -47,6 +47,7 @@ For more details, see [Usage](#usage)
|
|||||||
- [Configuring Twilio alerts](#configuring-twilio-alerts)
|
- [Configuring Twilio alerts](#configuring-twilio-alerts)
|
||||||
- [Configuring custom alerts](#configuring-custom-alerts)
|
- [Configuring custom alerts](#configuring-custom-alerts)
|
||||||
- [Setting a default alert](#setting-a-default-alert)
|
- [Setting a default alert](#setting-a-default-alert)
|
||||||
|
- [Maintenance](#maintenance)
|
||||||
- [Deployment](#deployment)
|
- [Deployment](#deployment)
|
||||||
- [Docker](#docker)
|
- [Docker](#docker)
|
||||||
- [Helm Chart](#helm-chart)
|
- [Helm Chart](#helm-chart)
|
||||||
@ -736,6 +737,37 @@ services:
|
|||||||
- type: pagerduty
|
- type: pagerduty
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Maintenance
|
||||||
|
If you have maintenance windows, you may not want to be annoyed by alerts.
|
||||||
|
To do that, you'll have to use the maintenance configuration:
|
||||||
|
|
||||||
|
| Parameter | Description | Default |
|
||||||
|
|:----------------------- |:----------------------------------------------------------------------------- |:--------------- |
|
||||||
|
| `maintenance.enabled` | Whether the maintenance period is enabled | `true` |
|
||||||
|
| `maintenance.start` | Time at which the maintenance window starts in `hh:mm` format (e.g. `23:00`) | Required `""` |
|
||||||
|
| `maintenance.duration` | Duration of the maintenance window (e.g. `1h`, `30m`) | Required `""` |
|
||||||
|
| `maintenance.every` | Days on which the maintenance period applies (e.g. `[Monday, Thursday]`).<br />If left empty, the maintenance window applies every day | `[]` |
|
||||||
|
**Note that the maintenance configuration uses UTC.**
|
||||||
|
|
||||||
|
|
||||||
|
Here's an example:
|
||||||
|
```yaml
|
||||||
|
maintenance:
|
||||||
|
start: 23:00
|
||||||
|
duration: 1h
|
||||||
|
every: [Monday, Thursday]
|
||||||
|
```
|
||||||
|
Note that you can also specify each day on separate lines:
|
||||||
|
```yaml
|
||||||
|
maintenance:
|
||||||
|
start: 23:00
|
||||||
|
duration: 1h
|
||||||
|
every:
|
||||||
|
- Monday
|
||||||
|
- Thursday
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Deployment
|
## Deployment
|
||||||
Many examples can be found in the [examples](examples) folder, but this section will focus on the most popular ways of deploying Gatus.
|
Many examples can be found in the [examples](examples) folder, but this section will focus on the most popular ways of deploying Gatus.
|
||||||
|
@ -10,6 +10,7 @@ import (
|
|||||||
"github.com/TwinProduction/gatus/alerting"
|
"github.com/TwinProduction/gatus/alerting"
|
||||||
"github.com/TwinProduction/gatus/alerting/alert"
|
"github.com/TwinProduction/gatus/alerting/alert"
|
||||||
"github.com/TwinProduction/gatus/alerting/provider"
|
"github.com/TwinProduction/gatus/alerting/provider"
|
||||||
|
"github.com/TwinProduction/gatus/config/maintenance"
|
||||||
"github.com/TwinProduction/gatus/core"
|
"github.com/TwinProduction/gatus/core"
|
||||||
"github.com/TwinProduction/gatus/security"
|
"github.com/TwinProduction/gatus/security"
|
||||||
"github.com/TwinProduction/gatus/storage"
|
"github.com/TwinProduction/gatus/storage"
|
||||||
@ -82,6 +83,9 @@ type Config struct {
|
|||||||
// UI is the configuration for the UI
|
// UI is the configuration for the UI
|
||||||
UI *UIConfig `yaml:"ui"`
|
UI *UIConfig `yaml:"ui"`
|
||||||
|
|
||||||
|
// Maintenance is the configuration for creating a maintenance window in which no alerts are sent
|
||||||
|
Maintenance *maintenance.Config `yaml:"maintenance"`
|
||||||
|
|
||||||
filePath string // path to the file from which config was loaded from
|
filePath string // path to the file from which config was loaded from
|
||||||
lastFileModTime time.Time // last modification time
|
lastFileModTime time.Time // last modification time
|
||||||
}
|
}
|
||||||
@ -172,6 +176,9 @@ func parseAndValidateConfigBytes(yamlBytes []byte) (config *Config, err error) {
|
|||||||
if err := validateUIConfig(config); err != nil {
|
if err := validateUIConfig(config); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
if err := validateMaintenanceConfig(config); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
if err := validateStorageConfig(config); err != nil {
|
if err := validateStorageConfig(config); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -201,6 +208,17 @@ func validateStorageConfig(config *Config) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func validateMaintenanceConfig(config *Config) error {
|
||||||
|
if config.Maintenance == nil {
|
||||||
|
config.Maintenance = maintenance.GetDefaultConfig()
|
||||||
|
} else {
|
||||||
|
if err := config.Maintenance.ValidateAndSetDefaults(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func validateUIConfig(config *Config) error {
|
func validateUIConfig(config *Config) error {
|
||||||
if config.UI == nil {
|
if config.UI == nil {
|
||||||
config.UI = GetDefaultUIConfig()
|
config.UI = GetDefaultUIConfig()
|
||||||
|
@ -43,6 +43,11 @@ func TestParseAndValidateConfigBytes(t *testing.T) {
|
|||||||
config, err := parseAndValidateConfigBytes([]byte(fmt.Sprintf(`
|
config, err := parseAndValidateConfigBytes([]byte(fmt.Sprintf(`
|
||||||
storage:
|
storage:
|
||||||
file: %s
|
file: %s
|
||||||
|
maintenance:
|
||||||
|
enabled: true
|
||||||
|
start: 00:00
|
||||||
|
duration: 4h
|
||||||
|
every: [Monday, Thursday]
|
||||||
ui:
|
ui:
|
||||||
title: Test
|
title: Test
|
||||||
services:
|
services:
|
||||||
@ -79,6 +84,9 @@ services:
|
|||||||
if config.UI == nil || config.UI.Title != "Test" {
|
if config.UI == nil || config.UI.Title != "Test" {
|
||||||
t.Error("Expected Config.UI.Title to be Test")
|
t.Error("Expected Config.UI.Title to be Test")
|
||||||
}
|
}
|
||||||
|
if mc := config.Maintenance; mc == nil || mc.Start != "00:00" || !mc.IsEnabled() || mc.Duration != 4*time.Hour || len(mc.Every) != 2 {
|
||||||
|
t.Error("Expected Config.Maintenance to be configured properly")
|
||||||
|
}
|
||||||
if len(config.Services) != 3 {
|
if len(config.Services) != 3 {
|
||||||
t.Error("Should have returned two services")
|
t.Error("Should have returned two services")
|
||||||
}
|
}
|
||||||
|
133
config/maintenance/maintenance.go
Normal file
133
config/maintenance/maintenance.go
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
package maintenance
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
errInvalidMaintenanceStartFormat = errors.New("invalid maintenance start format: must be hh:mm, between 00:00 and 23:59 inclusively (e.g. 23:00)")
|
||||||
|
errInvalidMaintenanceDuration = errors.New("invalid maintenance duration: must be bigger than 0 (e.g. 30m)")
|
||||||
|
errInvalidDayName = fmt.Errorf("invalid value specified for 'on'. supported values are %s", longDayNames)
|
||||||
|
|
||||||
|
longDayNames = []string{
|
||||||
|
"Sunday",
|
||||||
|
"Monday",
|
||||||
|
"Tuesday",
|
||||||
|
"Wednesday",
|
||||||
|
"Thursday",
|
||||||
|
"Friday",
|
||||||
|
"Saturday",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
// Config allows for the configuration of a maintenance period.
|
||||||
|
// During this maintenance period, no alerts will be sent.
|
||||||
|
//
|
||||||
|
// Uses UTC.
|
||||||
|
type Config struct {
|
||||||
|
Enabled *bool `yaml:"enabled"` // Whether the maintenance period is enabled. Enabled by default if nil.
|
||||||
|
Start string `yaml:"start"` // Time at which the maintenance period starts (e.g. 23:00)
|
||||||
|
Duration time.Duration `yaml:"duration"` // Duration of the maintenance period (e.g. 4h)
|
||||||
|
|
||||||
|
// Every is a list of days of the week during which maintenance period applies.
|
||||||
|
// See longDayNames for list of valid values.
|
||||||
|
// Every day if empty.
|
||||||
|
Every []string `yaml:"every"`
|
||||||
|
|
||||||
|
durationToStartFromMidnight time.Duration
|
||||||
|
timeLocation *time.Location
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetDefaultConfig() *Config {
|
||||||
|
defaultValue := false
|
||||||
|
return &Config{
|
||||||
|
Enabled: &defaultValue,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsEnabled returns whether maintenance is enabled or not
|
||||||
|
func (c Config) IsEnabled() bool {
|
||||||
|
if c.Enabled == nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return *c.Enabled
|
||||||
|
}
|
||||||
|
|
||||||
|
// ValidateAndSetDefaults validates the maintenance configuration and sets the default values if necessary.
|
||||||
|
//
|
||||||
|
// Must be called once in the application's lifecycle before IsUnderMaintenance is called, since it
|
||||||
|
// also sets durationToStartFromMidnight.
|
||||||
|
func (c *Config) ValidateAndSetDefaults() error {
|
||||||
|
if c == nil || !c.IsEnabled() {
|
||||||
|
// Don't waste time validating if maintenance is not enabled.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
for _, day := range c.Every {
|
||||||
|
isDayValid := false
|
||||||
|
for _, longDayName := range longDayNames {
|
||||||
|
if day == longDayName {
|
||||||
|
isDayValid = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !isDayValid {
|
||||||
|
return errInvalidDayName
|
||||||
|
}
|
||||||
|
}
|
||||||
|
var err error
|
||||||
|
c.durationToStartFromMidnight, err = hhmmToDuration(c.Start)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if c.Duration <= 0 || c.Duration >= 24*time.Hour {
|
||||||
|
return errInvalidMaintenanceDuration
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsUnderMaintenance checks whether the services that Gatus monitors are within the configured maintenance window
|
||||||
|
func (c Config) IsUnderMaintenance() bool {
|
||||||
|
if !c.IsEnabled() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
now := time.Now().UTC()
|
||||||
|
dayWhereMaintenancePeriodWouldStart := now.Add(-c.Duration).Truncate(24 * time.Hour)
|
||||||
|
hasMaintenanceEveryDay := len(c.Every) == 0
|
||||||
|
hasMaintenancePeriodScheduledForThatWeekday := sort.SearchStrings(c.Every, dayWhereMaintenancePeriodWouldStart.Weekday().String()) != len(c.Every)
|
||||||
|
if !hasMaintenanceEveryDay && !hasMaintenancePeriodScheduledForThatWeekday {
|
||||||
|
// The day when the maintenance period would start is not scheduled
|
||||||
|
// to have any maintenance, so we can just return false.
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
startOfMaintenancePeriod := dayWhereMaintenancePeriodWouldStart.Add(c.durationToStartFromMidnight)
|
||||||
|
endOfMaintenancePeriod := startOfMaintenancePeriod.Add(c.Duration)
|
||||||
|
return now.After(startOfMaintenancePeriod) && now.Before(endOfMaintenancePeriod)
|
||||||
|
}
|
||||||
|
|
||||||
|
func hhmmToDuration(s string) (time.Duration, error) {
|
||||||
|
if len(s) != 5 {
|
||||||
|
return 0, errInvalidMaintenanceStartFormat
|
||||||
|
}
|
||||||
|
var hours, minutes int
|
||||||
|
var err error
|
||||||
|
if hours, err = extractNumericalValueFromPotentiallyZeroPaddedString(s[:2]); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
if minutes, err = extractNumericalValueFromPotentiallyZeroPaddedString(s[3:5]); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
duration := (time.Duration(hours) * time.Hour) + (time.Duration(minutes) * time.Minute)
|
||||||
|
if hours < 0 || hours > 23 || minutes < 0 || minutes > 59 || duration < 0 || duration >= 24*time.Hour {
|
||||||
|
return 0, errInvalidMaintenanceStartFormat
|
||||||
|
}
|
||||||
|
return duration, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractNumericalValueFromPotentiallyZeroPaddedString(s string) (int, error) {
|
||||||
|
return strconv.Atoi(strings.TrimPrefix(s, "0"))
|
||||||
|
}
|
193
config/maintenance/maintenance_test.go
Normal file
193
config/maintenance/maintenance_test.go
Normal file
@ -0,0 +1,193 @@
|
|||||||
|
package maintenance
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGetDefaultConfig(t *testing.T) {
|
||||||
|
if *GetDefaultConfig().Enabled {
|
||||||
|
t.Fatal("expected default config to be disabled by default")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConfig_Validate(t *testing.T) {
|
||||||
|
yes, no := true, false
|
||||||
|
scenarios := []struct {
|
||||||
|
name string
|
||||||
|
cfg *Config
|
||||||
|
expectedError error
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "nil",
|
||||||
|
cfg: nil,
|
||||||
|
expectedError: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "disabled",
|
||||||
|
cfg: &Config{
|
||||||
|
Enabled: &no,
|
||||||
|
},
|
||||||
|
expectedError: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid-day",
|
||||||
|
cfg: &Config{
|
||||||
|
Every: []string{"invalid-day"},
|
||||||
|
},
|
||||||
|
expectedError: errInvalidDayName,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid-day",
|
||||||
|
cfg: &Config{
|
||||||
|
Every: []string{"invalid-day"},
|
||||||
|
},
|
||||||
|
expectedError: errInvalidDayName,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid-start-format",
|
||||||
|
cfg: &Config{
|
||||||
|
Start: "0000",
|
||||||
|
},
|
||||||
|
expectedError: errInvalidMaintenanceStartFormat,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid-start-hours",
|
||||||
|
cfg: &Config{
|
||||||
|
Start: "25:00",
|
||||||
|
},
|
||||||
|
expectedError: errInvalidMaintenanceStartFormat,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid-start-minutes",
|
||||||
|
cfg: &Config{
|
||||||
|
Start: "0:61",
|
||||||
|
},
|
||||||
|
expectedError: errInvalidMaintenanceStartFormat,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid-start-minutes-non-numerical",
|
||||||
|
cfg: &Config{
|
||||||
|
Start: "00:zz",
|
||||||
|
},
|
||||||
|
expectedError: strconv.ErrSyntax,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid-start-hours-non-numerical",
|
||||||
|
cfg: &Config{
|
||||||
|
Start: "zz:00",
|
||||||
|
},
|
||||||
|
expectedError: strconv.ErrSyntax,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid-duration",
|
||||||
|
cfg: &Config{
|
||||||
|
Start: "23:00",
|
||||||
|
Duration: 0,
|
||||||
|
},
|
||||||
|
expectedError: errInvalidMaintenanceDuration,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "every-day-at-2300",
|
||||||
|
cfg: &Config{
|
||||||
|
Start: "23:00",
|
||||||
|
Duration: time.Hour,
|
||||||
|
},
|
||||||
|
expectedError: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "every-monday-at-0000",
|
||||||
|
cfg: &Config{
|
||||||
|
Start: "00:00",
|
||||||
|
Duration: 30 * time.Minute,
|
||||||
|
Every: []string{"Monday"},
|
||||||
|
},
|
||||||
|
expectedError: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "every-friday-and-sunday-at-0000-explicitly-enabled",
|
||||||
|
cfg: &Config{
|
||||||
|
Enabled: &yes,
|
||||||
|
Start: "08:00",
|
||||||
|
Duration: 8 * time.Hour,
|
||||||
|
Every: []string{"Friday", "Sunday"},
|
||||||
|
},
|
||||||
|
expectedError: nil,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, scenario := range scenarios {
|
||||||
|
t.Run(scenario.name, func(t *testing.T) {
|
||||||
|
err := scenario.cfg.ValidateAndSetDefaults()
|
||||||
|
if !errors.Is(err, scenario.expectedError) {
|
||||||
|
t.Errorf("expected %v, got %v", scenario.expectedError, err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConfig_IsUnderMaintenance(t *testing.T) {
|
||||||
|
yes, no := true, false
|
||||||
|
now := time.Now().UTC()
|
||||||
|
scenarios := []struct {
|
||||||
|
name string
|
||||||
|
cfg *Config
|
||||||
|
expected bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "disabled",
|
||||||
|
cfg: &Config{
|
||||||
|
Enabled: &no,
|
||||||
|
},
|
||||||
|
expected: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "under-maintenance-explicitly-enabled",
|
||||||
|
cfg: &Config{
|
||||||
|
Enabled: &yes,
|
||||||
|
Start: fmt.Sprintf("%02d:00", now.Hour()),
|
||||||
|
Duration: 2 * time.Hour,
|
||||||
|
},
|
||||||
|
expected: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "under-maintenance",
|
||||||
|
cfg: &Config{
|
||||||
|
Start: fmt.Sprintf("%02d:00", now.Hour()),
|
||||||
|
Duration: 2 * time.Hour,
|
||||||
|
},
|
||||||
|
expected: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "not-under-maintenance",
|
||||||
|
cfg: &Config{
|
||||||
|
Start: fmt.Sprintf("%02d:00", now.Add(-5*time.Hour).Hour()),
|
||||||
|
Duration: time.Hour,
|
||||||
|
},
|
||||||
|
expected: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "not-under-maintenance-today",
|
||||||
|
cfg: &Config{
|
||||||
|
Start: fmt.Sprintf("%02d:00", now.Hour()),
|
||||||
|
Duration: time.Hour,
|
||||||
|
Every: []string{now.Add(48 * time.Hour).Weekday().String()},
|
||||||
|
},
|
||||||
|
expected: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, scenario := range scenarios {
|
||||||
|
t.Run(scenario.name, func(t *testing.T) {
|
||||||
|
if scenario.cfg.ValidateAndSetDefaults() != nil {
|
||||||
|
t.Fatal("validation shouldn't have returned an error")
|
||||||
|
}
|
||||||
|
isUnderMaintenance := scenario.cfg.IsUnderMaintenance()
|
||||||
|
if isUnderMaintenance != scenario.expected {
|
||||||
|
t.Errorf("expected %v, got %v", scenario.expected, isUnderMaintenance)
|
||||||
|
t.Logf("start=%v; duration=%v; now=%v", scenario.cfg.Start, scenario.cfg.Duration, time.Now().UTC())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
@ -8,6 +8,7 @@ import (
|
|||||||
|
|
||||||
"github.com/TwinProduction/gatus/alerting"
|
"github.com/TwinProduction/gatus/alerting"
|
||||||
"github.com/TwinProduction/gatus/config"
|
"github.com/TwinProduction/gatus/config"
|
||||||
|
"github.com/TwinProduction/gatus/config/maintenance"
|
||||||
"github.com/TwinProduction/gatus/core"
|
"github.com/TwinProduction/gatus/core"
|
||||||
"github.com/TwinProduction/gatus/metric"
|
"github.com/TwinProduction/gatus/metric"
|
||||||
"github.com/TwinProduction/gatus/storage"
|
"github.com/TwinProduction/gatus/storage"
|
||||||
@ -27,17 +28,17 @@ func Monitor(cfg *config.Config) {
|
|||||||
ctx, cancelFunc = context.WithCancel(context.Background())
|
ctx, cancelFunc = context.WithCancel(context.Background())
|
||||||
for _, service := range cfg.Services {
|
for _, service := range cfg.Services {
|
||||||
if service.IsEnabled() {
|
if service.IsEnabled() {
|
||||||
// To prevent multiple requests from running at the same time, we'll wait for a little bit before each iteration
|
// To prevent multiple requests from running at the same time, we'll wait for a little before each iteration
|
||||||
time.Sleep(1111 * time.Millisecond)
|
time.Sleep(1111 * time.Millisecond)
|
||||||
go monitor(service, cfg.Alerting, cfg.DisableMonitoringLock, cfg.Metrics, cfg.Debug, ctx)
|
go monitor(service, cfg.Alerting, cfg.Maintenance, cfg.DisableMonitoringLock, cfg.Metrics, cfg.Debug, ctx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// monitor monitors a single service in a loop
|
// monitor monitors a single service in a loop
|
||||||
func monitor(service *core.Service, alertingConfig *alerting.Config, disableMonitoringLock, enabledMetrics, debug bool, ctx context.Context) {
|
func monitor(service *core.Service, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, disableMonitoringLock, enabledMetrics, debug bool, ctx context.Context) {
|
||||||
// Run it immediately on start
|
// Run it immediately on start
|
||||||
execute(service, alertingConfig, disableMonitoringLock, enabledMetrics, debug)
|
execute(service, alertingConfig, maintenanceConfig, disableMonitoringLock, enabledMetrics, debug)
|
||||||
// Loop for the next executions
|
// Loop for the next executions
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
@ -45,12 +46,12 @@ func monitor(service *core.Service, alertingConfig *alerting.Config, disableMoni
|
|||||||
log.Printf("[watchdog][monitor] Canceling current execution of group=%s; service=%s", service.Group, service.Name)
|
log.Printf("[watchdog][monitor] Canceling current execution of group=%s; service=%s", service.Group, service.Name)
|
||||||
return
|
return
|
||||||
case <-time.After(service.Interval):
|
case <-time.After(service.Interval):
|
||||||
execute(service, alertingConfig, disableMonitoringLock, enabledMetrics, debug)
|
execute(service, alertingConfig, maintenanceConfig, disableMonitoringLock, enabledMetrics, debug)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func execute(service *core.Service, alertingConfig *alerting.Config, disableMonitoringLock, enabledMetrics, debug bool) {
|
func execute(service *core.Service, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, disableMonitoringLock, enabledMetrics, debug bool) {
|
||||||
if !disableMonitoringLock {
|
if !disableMonitoringLock {
|
||||||
// By placing the lock here, we prevent multiple services from being monitored at the exact same time, which
|
// By placing the lock here, we prevent multiple services from being monitored at the exact same time, which
|
||||||
// could cause performance issues and return inaccurate results
|
// could cause performance issues and return inaccurate results
|
||||||
@ -72,7 +73,11 @@ func execute(service *core.Service, alertingConfig *alerting.Config, disableMoni
|
|||||||
len(result.Errors),
|
len(result.Errors),
|
||||||
result.Duration.Round(time.Millisecond),
|
result.Duration.Round(time.Millisecond),
|
||||||
)
|
)
|
||||||
HandleAlerting(service, result, alertingConfig, debug)
|
if !maintenanceConfig.IsUnderMaintenance() {
|
||||||
|
HandleAlerting(service, result, alertingConfig, debug)
|
||||||
|
} else if debug {
|
||||||
|
log.Println("[watchdog][execute] Not handling alerting because currently in the maintenance window")
|
||||||
|
}
|
||||||
if debug {
|
if debug {
|
||||||
log.Printf("[watchdog][execute] Waiting for interval=%s before monitoring group=%s service=%s again", service.Interval, service.Group, service.Name)
|
log.Printf("[watchdog][execute] Waiting for interval=%s before monitoring group=%s service=%s again", service.Interval, service.Group, service.Name)
|
||||||
}
|
}
|
||||||
@ -83,7 +88,9 @@ func execute(service *core.Service, alertingConfig *alerting.Config, disableMoni
|
|||||||
|
|
||||||
// UpdateServiceStatuses updates the slice of service statuses
|
// UpdateServiceStatuses updates the slice of service statuses
|
||||||
func UpdateServiceStatuses(service *core.Service, result *core.Result) {
|
func UpdateServiceStatuses(service *core.Service, result *core.Result) {
|
||||||
storage.Get().Insert(service, result)
|
if err := storage.Get().Insert(service, result); err != nil {
|
||||||
|
log.Println("[watchdog][UpdateServiceStatuses] Failed to insert data in storage:", err.Error())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shutdown stops monitoring all services
|
// Shutdown stops monitoring all services
|
||||||
|
Loading…
Reference in New Issue
Block a user