mirror of
https://github.com/TwiN/gatus.git
synced 2024-11-21 23:43:27 +01:00
feat(connectivity): Allow internet connection validation prior to endpoint execution (#461)
This commit is contained in:
parent
6908199716
commit
447e140479
23
README.md
23
README.md
@ -71,6 +71,7 @@ Have any feedback or questions? [Create a discussion](https://github.com/TwiN/ga
|
||||
- [OIDC](#oidc)
|
||||
- [TLS Encryption](#tls-encryption)
|
||||
- [Metrics](#metrics)
|
||||
- [Connectivity](#connectivity)
|
||||
- [Remote instances (EXPERIMENTAL)](#remote-instances-experimental)
|
||||
- [Deployment](#deployment)
|
||||
- [Docker](#docker)
|
||||
@ -1256,6 +1257,28 @@ endpoint on the same port your application is configured to run on (`web.port`).
|
||||
See [examples/docker-compose-grafana-prometheus](.examples/docker-compose-grafana-prometheus) for further documentation as well as an example.
|
||||
|
||||
|
||||
### Connectivity
|
||||
| Parameter | Description | Default |
|
||||
|:--------------------------------|:-------------------------------------------|:--------------|
|
||||
| `connectivity` | Connectivity configuration | `{}` |
|
||||
| `connectivity.checker` | Connectivity checker configuration | Required `{}` |
|
||||
| `connectivity.checker.target` | Host to use for validating connectivity | Required `""` |
|
||||
| `connectivity.checker.interval` | Interval at which to validate connectivity | `1m` |
|
||||
|
||||
While Gatus is used to monitor other services, it is possible for Gatus itself to lose connectivity to the internet.
|
||||
In order to prevent Gatus from reporting endpoints as unhealthy when Gatus itself is unhealthy, you may configure
|
||||
Gatus to periodically check for internet connectivity.
|
||||
|
||||
All endpoint executions are skipped while the connectivity checker deems connectivity to be down.
|
||||
|
||||
```yaml
|
||||
connectivity:
|
||||
checker:
|
||||
target: 1.1.1.1:53
|
||||
interval: 60s
|
||||
```
|
||||
|
||||
|
||||
### Remote instances (EXPERIMENTAL)
|
||||
This feature allows you to retrieve endpoint statuses from a remote Gatus instance.
|
||||
|
||||
|
@ -191,6 +191,9 @@ func TestCanCreateTCPConnection(t *testing.T) {
|
||||
if CanCreateTCPConnection("127.0.0.1", &Config{Timeout: 5 * time.Second}) {
|
||||
t.Error("should've failed, because there's no port in the address")
|
||||
}
|
||||
if !CanCreateTCPConnection("1.1.1.1:53", &Config{Timeout: 5 * time.Second}) {
|
||||
t.Error("should've succeeded, because that IP should always™ be up")
|
||||
}
|
||||
}
|
||||
|
||||
// This test checks if a HTTP client configured with `configureOAuth2()` automatically
|
||||
|
@ -14,6 +14,7 @@ import (
|
||||
"github.com/TwiN/gatus/v5/alerting"
|
||||
"github.com/TwiN/gatus/v5/alerting/alert"
|
||||
"github.com/TwiN/gatus/v5/alerting/provider"
|
||||
"github.com/TwiN/gatus/v5/config/connectivity"
|
||||
"github.com/TwiN/gatus/v5/config/maintenance"
|
||||
"github.com/TwiN/gatus/v5/config/remote"
|
||||
"github.com/TwiN/gatus/v5/config/ui"
|
||||
@ -91,6 +92,9 @@ type Config struct {
|
||||
// WARNING: This is in ALPHA and may change or be completely removed in the future
|
||||
Remote *remote.Config `yaml:"remote,omitempty"`
|
||||
|
||||
// Connectivity is the configuration for connectivity
|
||||
Connectivity *connectivity.Config `yaml:"connectivity,omitempty"`
|
||||
|
||||
configPath string // path to the file or directory from which config was loaded
|
||||
lastFileModTime time.Time // last modification time
|
||||
}
|
||||
@ -252,10 +256,20 @@ func parseAndValidateConfigBytes(yamlBytes []byte) (config *Config, err error) {
|
||||
if err := validateRemoteConfig(config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := validateConnectivityConfig(config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func validateConnectivityConfig(config *Config) error {
|
||||
if config.Connectivity != nil {
|
||||
return config.Connectivity.ValidateAndSetDefaults()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateRemoteConfig(config *Config) error {
|
||||
if config.Remote != nil {
|
||||
if err := config.Remote.ValidateAndSetDefaults(); err != nil {
|
||||
|
53
config/connectivity/connectivity.go
Normal file
53
config/connectivity/connectivity.go
Normal file
@ -0,0 +1,53 @@
|
||||
package connectivity
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/TwiN/gatus/v5/client"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrInvalidInterval = errors.New("connectivity.checker.interval must be 5s or higher")
|
||||
ErrInvalidDNSTarget = errors.New("connectivity.checker.target must be suffixed with :53")
|
||||
)
|
||||
|
||||
// Config is the configuration for the connectivity checker.
|
||||
type Config struct {
|
||||
Checker *Checker `yaml:"checker,omitempty"`
|
||||
}
|
||||
|
||||
func (c *Config) ValidateAndSetDefaults() error {
|
||||
if c.Checker != nil {
|
||||
if c.Checker.Interval == 0 {
|
||||
c.Checker.Interval = 60 * time.Second
|
||||
} else if c.Checker.Interval < 5*time.Second {
|
||||
return ErrInvalidInterval
|
||||
}
|
||||
if !strings.HasSuffix(c.Checker.Target, ":53") {
|
||||
return ErrInvalidDNSTarget
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Checker is the configuration for making sure Gatus has access to the internet.
|
||||
type Checker struct {
|
||||
Target string `yaml:"target"` // e.g. 1.1.1.1:53
|
||||
Interval time.Duration `yaml:"interval,omitempty"`
|
||||
|
||||
isConnected bool
|
||||
lastCheck time.Time
|
||||
}
|
||||
|
||||
func (c Checker) Check() bool {
|
||||
return client.CanCreateTCPConnection(c.Target, &client.Config{Timeout: 5 * time.Second})
|
||||
}
|
||||
|
||||
func (c *Checker) IsConnected() bool {
|
||||
if now := time.Now(); now.After(c.lastCheck.Add(c.Interval)) {
|
||||
c.lastCheck, c.isConnected = now, c.Check()
|
||||
}
|
||||
return c.isConnected
|
||||
}
|
62
config/connectivity/connectivity_test.go
Normal file
62
config/connectivity/connectivity_test.go
Normal file
@ -0,0 +1,62 @@
|
||||
package connectivity
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestConfig(t *testing.T) {
|
||||
scenarios := []struct {
|
||||
name string
|
||||
cfg *Config
|
||||
expectedErr error
|
||||
expectedInterval time.Duration
|
||||
}{
|
||||
{
|
||||
name: "good-config",
|
||||
cfg: &Config{Checker: &Checker{Target: "1.1.1.1:53", Interval: 10 * time.Second}},
|
||||
expectedInterval: 10 * time.Second,
|
||||
},
|
||||
{
|
||||
name: "good-config-with-default-interval",
|
||||
cfg: &Config{Checker: &Checker{Target: "8.8.8.8:53", Interval: 0}},
|
||||
expectedInterval: 60 * time.Second,
|
||||
},
|
||||
{
|
||||
name: "config-with-interval-too-low",
|
||||
cfg: &Config{Checker: &Checker{Target: "1.1.1.1:53", Interval: 4 * time.Second}},
|
||||
expectedErr: ErrInvalidInterval,
|
||||
},
|
||||
{
|
||||
name: "config-with-invalid-target-due-to-missing-port",
|
||||
cfg: &Config{Checker: &Checker{Target: "1.1.1.1", Interval: 15 * time.Second}},
|
||||
expectedErr: ErrInvalidDNSTarget,
|
||||
},
|
||||
{
|
||||
name: "config-with-invalid-target-due-to-invalid-dns-port",
|
||||
cfg: &Config{Checker: &Checker{Target: "1.1.1.1:52", Interval: 15 * time.Second}},
|
||||
expectedErr: ErrInvalidDNSTarget,
|
||||
},
|
||||
}
|
||||
for _, scenario := range scenarios {
|
||||
t.Run(scenario.name, func(t *testing.T) {
|
||||
err := scenario.cfg.ValidateAndSetDefaults()
|
||||
if fmt.Sprintf("%s", err) != fmt.Sprintf("%s", scenario.expectedErr) {
|
||||
t.Errorf("expected error %v, got %v", scenario.expectedErr, err)
|
||||
}
|
||||
if err == nil && scenario.expectedErr == nil {
|
||||
if scenario.cfg.Checker.Interval != scenario.expectedInterval {
|
||||
t.Errorf("expected interval %v, got %v", scenario.expectedInterval, scenario.cfg.Checker.Interval)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestChecker_IsConnected(t *testing.T) {
|
||||
checker := &Checker{Target: "1.1.1.1:53", Interval: 10 * time.Second}
|
||||
if !checker.IsConnected() {
|
||||
t.Error("expected checker.IsConnected() to be true")
|
||||
}
|
||||
}
|
@ -8,6 +8,7 @@ import (
|
||||
|
||||
"github.com/TwiN/gatus/v5/alerting"
|
||||
"github.com/TwiN/gatus/v5/config"
|
||||
"github.com/TwiN/gatus/v5/config/connectivity"
|
||||
"github.com/TwiN/gatus/v5/config/maintenance"
|
||||
"github.com/TwiN/gatus/v5/core"
|
||||
"github.com/TwiN/gatus/v5/metrics"
|
||||
@ -30,15 +31,15 @@ func Monitor(cfg *config.Config) {
|
||||
if endpoint.IsEnabled() {
|
||||
// To prevent multiple requests from running at the same time, we'll wait for a little before each iteration
|
||||
time.Sleep(777 * time.Millisecond)
|
||||
go monitor(endpoint, cfg.Alerting, cfg.Maintenance, cfg.DisableMonitoringLock, cfg.Metrics, cfg.Debug, ctx)
|
||||
go monitor(endpoint, cfg.Alerting, cfg.Maintenance, cfg.Connectivity, cfg.DisableMonitoringLock, cfg.Metrics, cfg.Debug, ctx)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// monitor a single endpoint in a loop
|
||||
func monitor(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, disableMonitoringLock, enabledMetrics, debug bool, ctx context.Context) {
|
||||
func monitor(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock, enabledMetrics, debug bool, ctx context.Context) {
|
||||
// Run it immediately on start
|
||||
execute(endpoint, alertingConfig, maintenanceConfig, disableMonitoringLock, enabledMetrics, debug)
|
||||
execute(endpoint, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics, debug)
|
||||
// Loop for the next executions
|
||||
for {
|
||||
select {
|
||||
@ -46,16 +47,22 @@ func monitor(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenan
|
||||
log.Printf("[watchdog][monitor] Canceling current execution of group=%s; endpoint=%s", endpoint.Group, endpoint.Name)
|
||||
return
|
||||
case <-time.After(endpoint.Interval):
|
||||
execute(endpoint, alertingConfig, maintenanceConfig, disableMonitoringLock, enabledMetrics, debug)
|
||||
execute(endpoint, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics, debug)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func execute(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, disableMonitoringLock, enabledMetrics, debug bool) {
|
||||
func execute(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock, enabledMetrics, debug bool) {
|
||||
if !disableMonitoringLock {
|
||||
// By placing the lock here, we prevent multiple endpoints from being monitored at the exact same time, which
|
||||
// could cause performance issues and return inaccurate results
|
||||
monitoringMutex.Lock()
|
||||
defer monitoringMutex.Unlock()
|
||||
}
|
||||
// If there's a connectivity checker configured, check if Gatus has internet connectivity
|
||||
if connectivityConfig != nil && connectivityConfig.Checker != nil && !connectivityConfig.Checker.IsConnected() {
|
||||
log.Println("[watchdog][execute] No connectivity; skipping execution")
|
||||
return
|
||||
}
|
||||
if debug {
|
||||
log.Printf("[watchdog][execute] Monitoring group=%s; endpoint=%s", endpoint.Group, endpoint.Name)
|
||||
@ -79,9 +86,6 @@ func execute(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenan
|
||||
if debug {
|
||||
log.Printf("[watchdog][execute] Waiting for interval=%s before monitoring group=%s endpoint=%s again", endpoint.Interval, endpoint.Group, endpoint.Name)
|
||||
}
|
||||
if !disableMonitoringLock {
|
||||
monitoringMutex.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
// UpdateEndpointStatuses updates the slice of endpoint statuses
|
||||
|
Loading…
Reference in New Issue
Block a user