Prevent multiple services from being evaluated at the same time

2025-06-25 04:02:28 +02:00 · 2020-04-06 18:58:13 -04:00 · 2020-04-06 18:58:13 -04:00 · fe82465c19
commit fe82465c19
parent ab73c4666e
7 changed files with 36 additions and 32 deletions
--- a/README.md
+++ b/README.md
@ -18,7 +18,7 @@ You can specify a custom path by setting the `GATUS_CONFIG_FILE` environment var
 metrics: true         # Whether to expose metrics at /metrics
 services:
  - name: twinnation  # Name of your service, can be anything
-    url: https://twinnation.org/actuator/health
+    url: https://twinnation.org/health
    interval: 15s     # Duration to wait between every status check (opt. default: 10s)
    conditions:
      - "[STATUS] == 200"
--- a/config.yaml
+++ b/config.yaml
@ -1,7 +1,7 @@
 metrics: true
 services:
  - name: Twinnation
-    url: https://twinnation.org/actuator/health
+    url: https://twinnation.org/health
    interval: 30s
    conditions:
      - "[STATUS] == 200"
--- a/core/types.go
+++ b/core/types.go
@ -76,7 +76,7 @@ func (service *Service) EvaluateConditions() *Result {
 		result.Success = false
 	}
 	for _, condition := range service.Conditions {
-		success := condition.Evaluate(result)
+		success := condition.evaluate(result)
 		if !success {
 			result.Success = false
 		}
@ -93,7 +93,7 @@ type ConditionResult struct {

 type Condition string

-func (c *Condition) Evaluate(result *Result) bool {
+func (c *Condition) evaluate(result *Result) bool {
 	condition := string(*c)
 	if strings.Contains(condition, "==") {
 		parts := sanitizeAndResolve(strings.Split(condition, "=="), result)
--- a/core/types_test.go
+++ b/core/types_test.go
@ -7,7 +7,7 @@ import (
 func TestEvaluateWithIp(t *testing.T) {
 	condition := Condition("[IP] == 127.0.0.1")
 	result := &Result{Ip: "127.0.0.1"}
-	condition.Evaluate(result)
+	condition.evaluate(result)
 	if !result.ConditionResults[0].Success {
 		t.Errorf("Condition '%s' should have been a success", condition)
 	}
@ -16,7 +16,7 @@ func TestEvaluateWithIp(t *testing.T) {
 func TestEvaluateWithStatus(t *testing.T) {
 	condition := Condition("[STATUS] == 201")
 	result := &Result{HttpStatus: 201}
-	condition.Evaluate(result)
+	condition.evaluate(result)
 	if !result.ConditionResults[0].Success {
 		t.Errorf("Condition '%s' should have been a success", condition)
 	}
@ -25,7 +25,7 @@ func TestEvaluateWithStatus(t *testing.T) {
 func TestEvaluateWithFailure(t *testing.T) {
 	condition := Condition("[STATUS] == 200")
 	result := &Result{HttpStatus: 500}
-	condition.Evaluate(result)
+	condition.evaluate(result)
 	if result.ConditionResults[0].Success {
 		t.Errorf("Condition '%s' should have been a failure", condition)
 	}
--- a/example/docker-compose-grafana-prometheus/config.yaml
+++ b/example/docker-compose-grafana-prometheus/config.yaml
@ -1,7 +1,7 @@
 metrics: true
 services:
  - name: TwiNNatioN
-    url: https://twinnation.org/actuator/health
+    url: https://twinnation.org/health
    interval: 10s
    conditions:
      - "[STATUS] == 200"
--- a/example/kubernetes/gatus.yaml
+++ b/example/kubernetes/gatus.yaml
@ -4,7 +4,7 @@ data:
    metrics: true
    services:
      - name: TwiNNatioN
-        url: https://twinnation.org/actuator/health
+        url: https://twinnation.org/health
        interval: 1m
        conditions:
          - "[STATUS] == 200"
--- a/watchdog/watchdog.go
+++ b/watchdog/watchdog.go
@ -20,28 +20,32 @@ func GetServiceResults() *map[string][]*core.Result {

 func Monitor(cfg *config.Config) {
 	for _, service := range cfg.Services {
-		go func(service *core.Service) {
-			for {
-				log.Printf("[watchdog][Monitor] Monitoring serviceName=%s", service.Name)
-				result := service.EvaluateConditions()
-				metric.PublishMetricsForService(service, result)
-				rwLock.Lock()
-				serviceResults[service.Name] = append(serviceResults[service.Name], result)
-				if len(serviceResults[service.Name]) > 20 {
-					serviceResults[service.Name] = serviceResults[service.Name][1:]
-				}
-				rwLock.Unlock()
-				log.Printf(
-					"[watchdog][Monitor] Finished monitoring serviceName=%s; errors=%d; requestDuration=%s",
-					service.Name,
-					len(result.Errors),
-					result.Duration.Round(time.Millisecond),
-				)
-				log.Printf("[watchdog][Monitor] Waiting interval=%s before monitoring serviceName=%s", service.Interval, service.Name)
-				time.Sleep(service.Interval)
-			}
-		}(service)
-		// To prevent multiple requests from running exactly at the same time
-		time.Sleep(100 * time.Millisecond)
+		go monitor(service)
+		// To prevent multiple requests from running at the same time
+		time.Sleep(500 * time.Millisecond)
+	}
+}
+
+func monitor(service *core.Service) {
+	for {
+		// By placing the lock here, we prevent multiple services from being monitored at the exact same time, which
+		// could cause performance issues and return inaccurate results
+		rwLock.Lock()
+		log.Printf("[watchdog][Monitor] Monitoring serviceName=%s", service.Name)
+		result := service.EvaluateConditions()
+		metric.PublishMetricsForService(service, result)
+		serviceResults[service.Name] = append(serviceResults[service.Name], result)
+		if len(serviceResults[service.Name]) > 20 {
+			serviceResults[service.Name] = serviceResults[service.Name][1:]
+		}
+		rwLock.Unlock()
+		log.Printf(
+			"[watchdog][Monitor] Finished monitoring serviceName=%s; errors=%d; requestDuration=%s",
+			service.Name,
+			len(result.Errors),
+			result.Duration.Round(time.Millisecond),
+		)
+		log.Printf("[watchdog][Monitor] Waiting interval=%s before monitoring serviceName=%s", service.Interval, service.Name)
+		time.Sleep(service.Interval)
 	}
 }