mirror of
https://github.com/TwiN/gatus.git
synced 2024-11-24 17:04:42 +01:00
feat(connectivity): Allow internet connection validation prior to endpoint execution (#461)
This commit is contained in:
parent
6908199716
commit
447e140479
23
README.md
23
README.md
@ -71,6 +71,7 @@ Have any feedback or questions? [Create a discussion](https://github.com/TwiN/ga
|
|||||||
- [OIDC](#oidc)
|
- [OIDC](#oidc)
|
||||||
- [TLS Encryption](#tls-encryption)
|
- [TLS Encryption](#tls-encryption)
|
||||||
- [Metrics](#metrics)
|
- [Metrics](#metrics)
|
||||||
|
- [Connectivity](#connectivity)
|
||||||
- [Remote instances (EXPERIMENTAL)](#remote-instances-experimental)
|
- [Remote instances (EXPERIMENTAL)](#remote-instances-experimental)
|
||||||
- [Deployment](#deployment)
|
- [Deployment](#deployment)
|
||||||
- [Docker](#docker)
|
- [Docker](#docker)
|
||||||
@ -1256,6 +1257,28 @@ endpoint on the same port your application is configured to run on (`web.port`).
|
|||||||
See [examples/docker-compose-grafana-prometheus](.examples/docker-compose-grafana-prometheus) for further documentation as well as an example.
|
See [examples/docker-compose-grafana-prometheus](.examples/docker-compose-grafana-prometheus) for further documentation as well as an example.
|
||||||
|
|
||||||
|
|
||||||
|
### Connectivity
|
||||||
|
| Parameter | Description | Default |
|
||||||
|
|:--------------------------------|:-------------------------------------------|:--------------|
|
||||||
|
| `connectivity` | Connectivity configuration | `{}` |
|
||||||
|
| `connectivity.checker` | Connectivity checker configuration | Required `{}` |
|
||||||
|
| `connectivity.checker.target` | Host to use for validating connectivity | Required `""` |
|
||||||
|
| `connectivity.checker.interval` | Interval at which to validate connectivity | `1m` |
|
||||||
|
|
||||||
|
While Gatus is used to monitor other services, it is possible for Gatus itself to lose connectivity to the internet.
|
||||||
|
In order to prevent Gatus from reporting endpoints as unhealthy when Gatus itself is unhealthy, you may configure
|
||||||
|
Gatus to periodically check for internet connectivity.
|
||||||
|
|
||||||
|
All endpoint executions are skipped while the connectivity checker deems connectivity to be down.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
connectivity:
|
||||||
|
checker:
|
||||||
|
target: 1.1.1.1:53
|
||||||
|
interval: 60s
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
### Remote instances (EXPERIMENTAL)
|
### Remote instances (EXPERIMENTAL)
|
||||||
This feature allows you to retrieve endpoint statuses from a remote Gatus instance.
|
This feature allows you to retrieve endpoint statuses from a remote Gatus instance.
|
||||||
|
|
||||||
|
@ -191,6 +191,9 @@ func TestCanCreateTCPConnection(t *testing.T) {
|
|||||||
if CanCreateTCPConnection("127.0.0.1", &Config{Timeout: 5 * time.Second}) {
|
if CanCreateTCPConnection("127.0.0.1", &Config{Timeout: 5 * time.Second}) {
|
||||||
t.Error("should've failed, because there's no port in the address")
|
t.Error("should've failed, because there's no port in the address")
|
||||||
}
|
}
|
||||||
|
if !CanCreateTCPConnection("1.1.1.1:53", &Config{Timeout: 5 * time.Second}) {
|
||||||
|
t.Error("should've succeeded, because that IP should always™ be up")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// This test checks if a HTTP client configured with `configureOAuth2()` automatically
|
// This test checks if a HTTP client configured with `configureOAuth2()` automatically
|
||||||
|
@ -14,6 +14,7 @@ import (
|
|||||||
"github.com/TwiN/gatus/v5/alerting"
|
"github.com/TwiN/gatus/v5/alerting"
|
||||||
"github.com/TwiN/gatus/v5/alerting/alert"
|
"github.com/TwiN/gatus/v5/alerting/alert"
|
||||||
"github.com/TwiN/gatus/v5/alerting/provider"
|
"github.com/TwiN/gatus/v5/alerting/provider"
|
||||||
|
"github.com/TwiN/gatus/v5/config/connectivity"
|
||||||
"github.com/TwiN/gatus/v5/config/maintenance"
|
"github.com/TwiN/gatus/v5/config/maintenance"
|
||||||
"github.com/TwiN/gatus/v5/config/remote"
|
"github.com/TwiN/gatus/v5/config/remote"
|
||||||
"github.com/TwiN/gatus/v5/config/ui"
|
"github.com/TwiN/gatus/v5/config/ui"
|
||||||
@ -91,6 +92,9 @@ type Config struct {
|
|||||||
// WARNING: This is in ALPHA and may change or be completely removed in the future
|
// WARNING: This is in ALPHA and may change or be completely removed in the future
|
||||||
Remote *remote.Config `yaml:"remote,omitempty"`
|
Remote *remote.Config `yaml:"remote,omitempty"`
|
||||||
|
|
||||||
|
// Connectivity is the configuration for connectivity
|
||||||
|
Connectivity *connectivity.Config `yaml:"connectivity,omitempty"`
|
||||||
|
|
||||||
configPath string // path to the file or directory from which config was loaded
|
configPath string // path to the file or directory from which config was loaded
|
||||||
lastFileModTime time.Time // last modification time
|
lastFileModTime time.Time // last modification time
|
||||||
}
|
}
|
||||||
@ -252,10 +256,20 @@ func parseAndValidateConfigBytes(yamlBytes []byte) (config *Config, err error) {
|
|||||||
if err := validateRemoteConfig(config); err != nil {
|
if err := validateRemoteConfig(config); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
if err := validateConnectivityConfig(config); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func validateConnectivityConfig(config *Config) error {
|
||||||
|
if config.Connectivity != nil {
|
||||||
|
return config.Connectivity.ValidateAndSetDefaults()
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func validateRemoteConfig(config *Config) error {
|
func validateRemoteConfig(config *Config) error {
|
||||||
if config.Remote != nil {
|
if config.Remote != nil {
|
||||||
if err := config.Remote.ValidateAndSetDefaults(); err != nil {
|
if err := config.Remote.ValidateAndSetDefaults(); err != nil {
|
||||||
|
53
config/connectivity/connectivity.go
Normal file
53
config/connectivity/connectivity.go
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
package connectivity
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/TwiN/gatus/v5/client"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
ErrInvalidInterval = errors.New("connectivity.checker.interval must be 5s or higher")
|
||||||
|
ErrInvalidDNSTarget = errors.New("connectivity.checker.target must be suffixed with :53")
|
||||||
|
)
|
||||||
|
|
||||||
|
// Config is the configuration for the connectivity checker.
|
||||||
|
type Config struct {
|
||||||
|
Checker *Checker `yaml:"checker,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Config) ValidateAndSetDefaults() error {
|
||||||
|
if c.Checker != nil {
|
||||||
|
if c.Checker.Interval == 0 {
|
||||||
|
c.Checker.Interval = 60 * time.Second
|
||||||
|
} else if c.Checker.Interval < 5*time.Second {
|
||||||
|
return ErrInvalidInterval
|
||||||
|
}
|
||||||
|
if !strings.HasSuffix(c.Checker.Target, ":53") {
|
||||||
|
return ErrInvalidDNSTarget
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Checker is the configuration for making sure Gatus has access to the internet.
|
||||||
|
type Checker struct {
|
||||||
|
Target string `yaml:"target"` // e.g. 1.1.1.1:53
|
||||||
|
Interval time.Duration `yaml:"interval,omitempty"`
|
||||||
|
|
||||||
|
isConnected bool
|
||||||
|
lastCheck time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c Checker) Check() bool {
|
||||||
|
return client.CanCreateTCPConnection(c.Target, &client.Config{Timeout: 5 * time.Second})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Checker) IsConnected() bool {
|
||||||
|
if now := time.Now(); now.After(c.lastCheck.Add(c.Interval)) {
|
||||||
|
c.lastCheck, c.isConnected = now, c.Check()
|
||||||
|
}
|
||||||
|
return c.isConnected
|
||||||
|
}
|
62
config/connectivity/connectivity_test.go
Normal file
62
config/connectivity/connectivity_test.go
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
package connectivity
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestConfig(t *testing.T) {
|
||||||
|
scenarios := []struct {
|
||||||
|
name string
|
||||||
|
cfg *Config
|
||||||
|
expectedErr error
|
||||||
|
expectedInterval time.Duration
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "good-config",
|
||||||
|
cfg: &Config{Checker: &Checker{Target: "1.1.1.1:53", Interval: 10 * time.Second}},
|
||||||
|
expectedInterval: 10 * time.Second,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "good-config-with-default-interval",
|
||||||
|
cfg: &Config{Checker: &Checker{Target: "8.8.8.8:53", Interval: 0}},
|
||||||
|
expectedInterval: 60 * time.Second,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "config-with-interval-too-low",
|
||||||
|
cfg: &Config{Checker: &Checker{Target: "1.1.1.1:53", Interval: 4 * time.Second}},
|
||||||
|
expectedErr: ErrInvalidInterval,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "config-with-invalid-target-due-to-missing-port",
|
||||||
|
cfg: &Config{Checker: &Checker{Target: "1.1.1.1", Interval: 15 * time.Second}},
|
||||||
|
expectedErr: ErrInvalidDNSTarget,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "config-with-invalid-target-due-to-invalid-dns-port",
|
||||||
|
cfg: &Config{Checker: &Checker{Target: "1.1.1.1:52", Interval: 15 * time.Second}},
|
||||||
|
expectedErr: ErrInvalidDNSTarget,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, scenario := range scenarios {
|
||||||
|
t.Run(scenario.name, func(t *testing.T) {
|
||||||
|
err := scenario.cfg.ValidateAndSetDefaults()
|
||||||
|
if fmt.Sprintf("%s", err) != fmt.Sprintf("%s", scenario.expectedErr) {
|
||||||
|
t.Errorf("expected error %v, got %v", scenario.expectedErr, err)
|
||||||
|
}
|
||||||
|
if err == nil && scenario.expectedErr == nil {
|
||||||
|
if scenario.cfg.Checker.Interval != scenario.expectedInterval {
|
||||||
|
t.Errorf("expected interval %v, got %v", scenario.expectedInterval, scenario.cfg.Checker.Interval)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestChecker_IsConnected(t *testing.T) {
|
||||||
|
checker := &Checker{Target: "1.1.1.1:53", Interval: 10 * time.Second}
|
||||||
|
if !checker.IsConnected() {
|
||||||
|
t.Error("expected checker.IsConnected() to be true")
|
||||||
|
}
|
||||||
|
}
|
@ -8,6 +8,7 @@ import (
|
|||||||
|
|
||||||
"github.com/TwiN/gatus/v5/alerting"
|
"github.com/TwiN/gatus/v5/alerting"
|
||||||
"github.com/TwiN/gatus/v5/config"
|
"github.com/TwiN/gatus/v5/config"
|
||||||
|
"github.com/TwiN/gatus/v5/config/connectivity"
|
||||||
"github.com/TwiN/gatus/v5/config/maintenance"
|
"github.com/TwiN/gatus/v5/config/maintenance"
|
||||||
"github.com/TwiN/gatus/v5/core"
|
"github.com/TwiN/gatus/v5/core"
|
||||||
"github.com/TwiN/gatus/v5/metrics"
|
"github.com/TwiN/gatus/v5/metrics"
|
||||||
@ -30,15 +31,15 @@ func Monitor(cfg *config.Config) {
|
|||||||
if endpoint.IsEnabled() {
|
if endpoint.IsEnabled() {
|
||||||
// To prevent multiple requests from running at the same time, we'll wait for a little before each iteration
|
// To prevent multiple requests from running at the same time, we'll wait for a little before each iteration
|
||||||
time.Sleep(777 * time.Millisecond)
|
time.Sleep(777 * time.Millisecond)
|
||||||
go monitor(endpoint, cfg.Alerting, cfg.Maintenance, cfg.DisableMonitoringLock, cfg.Metrics, cfg.Debug, ctx)
|
go monitor(endpoint, cfg.Alerting, cfg.Maintenance, cfg.Connectivity, cfg.DisableMonitoringLock, cfg.Metrics, cfg.Debug, ctx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// monitor a single endpoint in a loop
|
// monitor a single endpoint in a loop
|
||||||
func monitor(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, disableMonitoringLock, enabledMetrics, debug bool, ctx context.Context) {
|
func monitor(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock, enabledMetrics, debug bool, ctx context.Context) {
|
||||||
// Run it immediately on start
|
// Run it immediately on start
|
||||||
execute(endpoint, alertingConfig, maintenanceConfig, disableMonitoringLock, enabledMetrics, debug)
|
execute(endpoint, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics, debug)
|
||||||
// Loop for the next executions
|
// Loop for the next executions
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
@ -46,16 +47,22 @@ func monitor(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenan
|
|||||||
log.Printf("[watchdog][monitor] Canceling current execution of group=%s; endpoint=%s", endpoint.Group, endpoint.Name)
|
log.Printf("[watchdog][monitor] Canceling current execution of group=%s; endpoint=%s", endpoint.Group, endpoint.Name)
|
||||||
return
|
return
|
||||||
case <-time.After(endpoint.Interval):
|
case <-time.After(endpoint.Interval):
|
||||||
execute(endpoint, alertingConfig, maintenanceConfig, disableMonitoringLock, enabledMetrics, debug)
|
execute(endpoint, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics, debug)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func execute(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, disableMonitoringLock, enabledMetrics, debug bool) {
|
func execute(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock, enabledMetrics, debug bool) {
|
||||||
if !disableMonitoringLock {
|
if !disableMonitoringLock {
|
||||||
// By placing the lock here, we prevent multiple endpoints from being monitored at the exact same time, which
|
// By placing the lock here, we prevent multiple endpoints from being monitored at the exact same time, which
|
||||||
// could cause performance issues and return inaccurate results
|
// could cause performance issues and return inaccurate results
|
||||||
monitoringMutex.Lock()
|
monitoringMutex.Lock()
|
||||||
|
defer monitoringMutex.Unlock()
|
||||||
|
}
|
||||||
|
// If there's a connectivity checker configured, check if Gatus has internet connectivity
|
||||||
|
if connectivityConfig != nil && connectivityConfig.Checker != nil && !connectivityConfig.Checker.IsConnected() {
|
||||||
|
log.Println("[watchdog][execute] No connectivity; skipping execution")
|
||||||
|
return
|
||||||
}
|
}
|
||||||
if debug {
|
if debug {
|
||||||
log.Printf("[watchdog][execute] Monitoring group=%s; endpoint=%s", endpoint.Group, endpoint.Name)
|
log.Printf("[watchdog][execute] Monitoring group=%s; endpoint=%s", endpoint.Group, endpoint.Name)
|
||||||
@ -79,9 +86,6 @@ func execute(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenan
|
|||||||
if debug {
|
if debug {
|
||||||
log.Printf("[watchdog][execute] Waiting for interval=%s before monitoring group=%s endpoint=%s again", endpoint.Interval, endpoint.Group, endpoint.Name)
|
log.Printf("[watchdog][execute] Waiting for interval=%s before monitoring group=%s endpoint=%s again", endpoint.Interval, endpoint.Group, endpoint.Name)
|
||||||
}
|
}
|
||||||
if !disableMonitoringLock {
|
|
||||||
monitoringMutex.Unlock()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// UpdateEndpointStatuses updates the slice of endpoint statuses
|
// UpdateEndpointStatuses updates the slice of endpoint statuses
|
||||||
|
Loading…
Reference in New Issue
Block a user