Merge pull request #52 from TwinProduction/service-groups

Add service groups
This commit is contained in:
Chris C 2020-11-27 08:48:54 -05:00 committed by GitHub
commit 35c232d925
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 254 additions and 66 deletions

BIN
.github/assets/service-groups.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

View File

@ -43,6 +43,7 @@ core applications: https://status.twinnation.org/
- [Monitoring using DNS queries](#monitoring-using-dns-queries) - [Monitoring using DNS queries](#monitoring-using-dns-queries)
- [Basic authentication](#basic-authentication) - [Basic authentication](#basic-authentication)
- [disable-monitoring-lock](#disable-monitoring-lock) - [disable-monitoring-lock](#disable-monitoring-lock)
- [Service groups](#service-groups)
## Features ## Features
@ -97,6 +98,7 @@ Note that you can also add environment variables in the configuration file (i.e.
| `metrics` | Whether to expose metrics at /metrics | `false` | | `metrics` | Whether to expose metrics at /metrics | `false` |
| `services` | List of services to monitor | Required `[]` | | `services` | List of services to monitor | Required `[]` |
| `services[].name` | Name of the service. Can be anything. | Required `""` | | `services[].name` | Name of the service. Can be anything. | Required `""` |
| `services[].group` | Group name. Used to group multiple services together on the dashboard. See [Service groups](#service-groups). | `""` |
| `services[].url` | URL to send the request to | Required `""` | | `services[].url` | URL to send the request to | Required `""` |
| `services[].method` | Request method | `GET` | | `services[].method` | Request method | `GET` |
| `services[].insecure` | Whether to skip verifying the server's certificate chain and host name | `false` | | `services[].insecure` | Whether to skip verifying the server's certificate chain and host name | `false` |
@ -614,3 +616,49 @@ There are three main reasons why you might want to disable the monitoring lock:
technically, if you create 100 services with a 1 seconds interval, Gatus will send 100 requests per second) technically, if you create 100 services with a 1 seconds interval, Gatus will send 100 requests per second)
- You have a _lot_ of services to monitor - You have a _lot_ of services to monitor
- You want to test multiple services at very short interval (< 5s) - You want to test multiple services at very short interval (< 5s)
### Service groups
Service groups are used for grouping multiple services together on the dashboard.
```yaml
services:
- name: frontend
group: core
url: "https://example.org/"
interval: 5m
conditions:
- "[STATUS] == 200"
- name: backend
group: core
url: "https://example.org/"
interval: 5m
conditions:
- "[STATUS] == 200"
- name: monitoring
group: internal
url: "https://example.org/"
interval: 5m
conditions:
- "[STATUS] == 200"
- name: nas
group: internal
url: "https://example.org/"
interval: 5m
conditions:
- "[STATUS] == 200"
- name: random service that isn't part of a group
url: "https://example.org/"
interval: 5m
conditions:
- "[STATUS] == 200"
```
The configuration above will result in a dashboard that looks like this:
![Gatus Service Groups](.github/assets/service-groups.png)

View File

@ -1,15 +1,37 @@
metrics: true
services: services:
- name: twinnation - name: frontend
group: core
url: "https://twinnation.org/health" url: "https://twinnation.org/health"
interval: 30s interval: 1m
conditions: conditions:
- "[STATUS] == 200" - "[STATUS] == 200"
- "[BODY].status == UP" - "[BODY].status == UP"
- "[RESPONSE_TIME] < 1000" - "[RESPONSE_TIME] < 1000"
- name: backend
group: core
url: "http://example.org/"
interval: 5m
conditions:
- "[STATUS] == 200"
- name: monitoring
group: internal
url: "http://example.com/"
interval: 5m
conditions:
- "[STATUS] == 200"
- name: nas
group: internal
url: "https://example.org/"
interval: 5m
conditions:
- "[STATUS] == 200"
- name: cat-fact - name: cat-fact
url: "https://cat-fact.herokuapp.com/facts/random" url: "https://cat-fact.herokuapp.com/facts/random"
interval: 1m interval: 5m
conditions: conditions:
- "[STATUS] == 200" - "[STATUS] == 200"
- "[BODY].deleted == false" - "[BODY].deleted == false"

10
core/condition-result.go Normal file
View File

@ -0,0 +1,10 @@
package core
// ConditionResult result of a Condition
type ConditionResult struct {
// Condition that was evaluated
Condition string `json:"condition"`
// Success whether the condition was met (successful) or not (failed)
Success bool `json:"success"`
}

11
core/health-status.go Normal file
View File

@ -0,0 +1,11 @@
package core
// HealthStatus is the status of Gatus
type HealthStatus struct {
// Status is the state of Gatus (UP/DOWN)
Status string `json:"status"`
// Message is an accompanying description of why the status is as reported.
// If the Status is UP, no message will be provided
Message string `json:"message,omitempty"`
}

View File

@ -4,22 +4,12 @@ import (
"time" "time"
) )
// HealthStatus is the status of Gatus
type HealthStatus struct {
// Status is the state of Gatus (UP/DOWN)
Status string `json:"status"`
// Message is an accompanying description of why the status is as reported.
// If the Status is UP, no message will be provided
Message string `json:"message,omitempty"`
}
// Result of the evaluation of a Service // Result of the evaluation of a Service
type Result struct { type Result struct {
// HTTPStatus is the HTTP response status code // HTTPStatus is the HTTP response status code
HTTPStatus int `json:"status"` HTTPStatus int `json:"status"`
// DNSRCode is the response code of DNS query in human readable version // DNSRCode is the response code of a DNS query in a human readable format
DNSRCode string `json:"dns-rcode"` DNSRCode string `json:"dns-rcode"`
// Body is the response body // Body is the response body
@ -52,12 +42,3 @@ type Result struct {
// CertificateExpiration is the duration before the certificate expires // CertificateExpiration is the duration before the certificate expires
CertificateExpiration time.Duration `json:"certificate-expiration,omitempty"` CertificateExpiration time.Duration `json:"certificate-expiration,omitempty"`
} }
// ConditionResult result of a Condition
type ConditionResult struct {
// Condition that was evaluated
Condition string `json:"condition"`
// Success whether the condition was met (successful) or not (failed)
Success bool `json:"success"`
}

27
core/service-status.go Normal file
View File

@ -0,0 +1,27 @@
package core
// ServiceStatus contains the evaluation Results of a Service
type ServiceStatus struct {
// Group the service is a part of. Used for grouping multiple services together on the front end.
Group string `json:"group,omitempty"`
// Results is the list of service evaluation results
Results []*Result `json:"results"`
}
// NewServiceStatus creates a new ServiceStatus
func NewServiceStatus(service *Service) *ServiceStatus {
return &ServiceStatus{
Group: service.Group,
Results: make([]*Result, 0),
}
}
// AddResult adds a Result to ServiceStatus.Results and makes sure that there are
// no more than 20 results in the Results slice
func (ss *ServiceStatus) AddResult(result *Result) {
ss.Results = append(ss.Results, result)
if len(ss.Results) > 20 {
ss.Results = ss.Results[1:]
}
}

View File

@ -0,0 +1,22 @@
package core
import "testing"
func TestNewServiceStatus(t *testing.T) {
service := &Service{Group: "test"}
serviceStatus := NewServiceStatus(service)
if serviceStatus.Group != service.Group {
t.Errorf("expected %s, got %s", service.Group, serviceStatus.Group)
}
}
func TestServiceStatus_AddResult(t *testing.T) {
service := &Service{Group: "test"}
serviceStatus := NewServiceStatus(service)
for i := 0; i < 50; i++ {
serviceStatus.AddResult(&Result{})
}
if len(serviceStatus.Results) != 20 {
t.Errorf("expected serviceStatus.Results to not exceed a length of 20")
}
}

View File

@ -30,6 +30,9 @@ type Service struct {
// Name of the service. Can be anything. // Name of the service. Can be anything.
Name string `yaml:"name"` Name string `yaml:"name"`
// Group the service is a part of. Used for grouping multiple services together on the front end.
Group string `yaml:"group,omitempty"`
// URL to send the request to // URL to send the request to
URL string `yaml:"url"` URL string `yaml:"url"`

30
main.go
View File

@ -18,19 +18,19 @@ import (
const cacheTTL = 10 * time.Second const cacheTTL = 10 * time.Second
var ( var (
cachedServiceResults []byte cachedServiceStatuses []byte
cachedServiceResultsGzipped []byte cachedServiceStatusesGzipped []byte
cachedServiceResultsTimestamp time.Time cachedServiceStatusesTimestamp time.Time
) )
func main() { func main() {
cfg := loadConfiguration() cfg := loadConfiguration()
resultsHandler := serviceResultsHandler statusesHandler := serviceStatusesHandler
if cfg.Security != nil && cfg.Security.IsValid() { if cfg.Security != nil && cfg.Security.IsValid() {
resultsHandler = security.Handler(serviceResultsHandler, cfg.Security) statusesHandler = security.Handler(serviceStatusesHandler, cfg.Security)
} }
http.HandleFunc("/favicon.ico", favIconHandler) // favicon needs to be always served from the root http.HandleFunc("/favicon.ico", favIconHandler) // favicon needs to be always served from the root
http.HandleFunc(cfg.Web.PrependWithContextRoot("/api/v1/results"), resultsHandler) http.HandleFunc(cfg.Web.PrependWithContextRoot("/api/v1/statuses"), statusesHandler)
http.HandleFunc(cfg.Web.PrependWithContextRoot("/health"), healthHandler) http.HandleFunc(cfg.Web.PrependWithContextRoot("/health"), healthHandler)
http.Handle(cfg.Web.ContextRoot, GzipHandler(http.StripPrefix(cfg.Web.ContextRoot, http.FileServer(http.Dir("./static"))))) http.Handle(cfg.Web.ContextRoot, GzipHandler(http.StripPrefix(cfg.Web.ContextRoot, http.FileServer(http.Dir("./static")))))
@ -56,29 +56,29 @@ func loadConfiguration() *config.Config {
return config.Get() return config.Get()
} }
func serviceResultsHandler(writer http.ResponseWriter, r *http.Request) { func serviceStatusesHandler(writer http.ResponseWriter, r *http.Request) {
if isExpired := cachedServiceResultsTimestamp.IsZero() || time.Now().Sub(cachedServiceResultsTimestamp) > cacheTTL; isExpired { if isExpired := cachedServiceStatusesTimestamp.IsZero() || time.Now().Sub(cachedServiceStatusesTimestamp) > cacheTTL; isExpired {
buffer := &bytes.Buffer{} buffer := &bytes.Buffer{}
gzipWriter := gzip.NewWriter(buffer) gzipWriter := gzip.NewWriter(buffer)
data, err := watchdog.GetJSONEncodedServiceResults() data, err := watchdog.GetJSONEncodedServiceStatuses()
if err != nil { if err != nil {
log.Printf("[main][serviceResultsHandler] Unable to marshal object to JSON: %s", err.Error()) log.Printf("[main][serviceStatusesHandler] Unable to marshal object to JSON: %s", err.Error())
writer.WriteHeader(http.StatusInternalServerError) writer.WriteHeader(http.StatusInternalServerError)
_, _ = writer.Write([]byte("Unable to marshal object to JSON")) _, _ = writer.Write([]byte("Unable to marshal object to JSON"))
return return
} }
gzipWriter.Write(data) gzipWriter.Write(data)
gzipWriter.Close() gzipWriter.Close()
cachedServiceResults = data cachedServiceStatuses = data
cachedServiceResultsGzipped = buffer.Bytes() cachedServiceStatusesGzipped = buffer.Bytes()
cachedServiceResultsTimestamp = time.Now() cachedServiceStatusesTimestamp = time.Now()
} }
var data []byte var data []byte
if strings.Contains(r.Header.Get("Accept-Encoding"), "gzip") { if strings.Contains(r.Header.Get("Accept-Encoding"), "gzip") {
writer.Header().Set("Content-Encoding", "gzip") writer.Header().Set("Content-Encoding", "gzip")
data = cachedServiceResultsGzipped data = cachedServiceStatusesGzipped
} else { } else {
data = cachedServiceResults data = cachedServiceStatuses
} }
writer.Header().Add("Content-type", "application/json") writer.Header().Add("Content-type", "application/json")
writer.WriteHeader(http.StatusOK) writer.WriteHeader(http.StatusOK)

View File

@ -99,6 +99,13 @@
#settings select:focus { #settings select:focus {
box-shadow: none; box-shadow: none;
} }
.service-group {
cursor: pointer;
user-select: none;
}
.service-group h5:hover {
color: #1b1e21 !important;
}
</style> </style>
</head> </head>
<body> <body>
@ -162,7 +169,7 @@
function showTooltip(serviceName, index, element) { function showTooltip(serviceName, index, element) {
userClickedStatus = false; userClickedStatus = false;
clearTimeout(timerHandler); clearTimeout(timerHandler);
let serviceResult = serviceStatuses[serviceName][index]; let serviceResult = serviceStatuses[serviceName].results[index];
$("#tooltip-timestamp").text(prettifyTimestamp(serviceResult.timestamp)); $("#tooltip-timestamp").text(prettifyTimestamp(serviceResult.timestamp));
$("#tooltip-response-time").text(parseInt(serviceResult.duration/1000000) + "ms"); $("#tooltip-response-time").text(parseInt(serviceResult.duration/1000000) + "ms");
// Populate the condition section // Populate the condition section
@ -219,8 +226,8 @@
return "<span class='status badge badge-danger' style='width: 5%' onmouseenter='showTooltip(\""+serviceName+"\", "+index+", this)' onmouseleave='fadeTooltip()' onclick='userClickedStatus = !userClickedStatus;'>X</span>"; return "<span class='status badge badge-danger' style='width: 5%' onmouseenter='showTooltip(\""+serviceName+"\", "+index+", this)' onmouseleave='fadeTooltip()' onclick='userClickedStatus = !userClickedStatus;'>X</span>";
} }
function refreshResults() { function refreshStatuses() {
$.getJSON("./api/v1/results", function (data) { $.getJSON("./api/v1/statuses", function (data) {
// Update the table only if there's a change // Update the table only if there's a change
if (JSON.stringify(serviceStatuses) !== JSON.stringify(data)) { if (JSON.stringify(serviceStatuses) !== JSON.stringify(data)) {
serviceStatuses = data; serviceStatuses = data;
@ -230,16 +237,17 @@
} }
function buildTable() { function buildTable() {
let output = ""; let outputByGroup = {};
for (let serviceName in serviceStatuses) { for (let serviceName in serviceStatuses) {
let serviceStatusOverTime = ""; let serviceStatusOverTime = "";
let hostname = serviceStatuses[serviceName][serviceStatuses[serviceName].length-1].hostname let serviceStatus = serviceStatuses[serviceName];
let hostname = serviceStatus.results[serviceStatus.results.length-1].hostname;
let minResponseTime = null; let minResponseTime = null;
let maxResponseTime = null; let maxResponseTime = null;
let newestTimestamp = null; let newestTimestamp = null;
let oldestTimestamp = null; let oldestTimestamp = null;
for (let key in serviceStatuses[serviceName]) { for (let key in serviceStatus.results) {
let serviceResult = serviceStatuses[serviceName][key]; let serviceResult = serviceStatus.results[key];
serviceStatusOverTime = createStatusBadge(serviceName, key, serviceResult.success) + serviceStatusOverTime; serviceStatusOverTime = createStatusBadge(serviceName, key, serviceResult.success) + serviceStatusOverTime;
const responseTime = parseInt(serviceResult.duration/1000000); const responseTime = parseInt(serviceResult.duration/1000000);
if (minResponseTime == null || minResponseTime > responseTime) { if (minResponseTime == null || minResponseTime > responseTime) {
@ -256,8 +264,8 @@
oldestTimestamp = timestamp; oldestTimestamp = timestamp;
} }
} }
output += "" let output = ""
+ "<div class='container py-3 border-left border-right border-top border-black'>" + "<div class='container py-3 border-left border-right border-top border-black rounded-0'>"
+ " <div class='row mb-2'>" + " <div class='row mb-2'>"
+ " <div class='col-md-10'>" + " <div class='col-md-10'>"
+ " <span class='font-weight-bold'>" + serviceName + "</span> <span class='text-secondary font-weight-lighter'>- " + hostname + "</span>" + " <span class='font-weight-bold'>" + serviceName + "</span> <span class='text-secondary font-weight-lighter'>- " + hostname + "</span>"
@ -280,10 +288,59 @@
+ " </div>" + " </div>"
+ " </div>" + " </div>"
+ "</div>"; + "</div>";
// create an empty entry if this group is new
if (!outputByGroup[serviceStatus.group]) {
outputByGroup[serviceStatus.group] = "";
}
outputByGroup[serviceStatus.group] += output;
}
let output = "";
for (let group in outputByGroup) {
// Services that don't have a group should be skipped and left for last
if (group === 'undefined') {
continue
}
let key = group.replace(/[^a-zA-Z0-9]/g, '');
let existingGroupContentSelector = $("#service-group-" + key + "-content");
let isCurrentlyHidden = existingGroupContentSelector.length && existingGroupContentSelector[0].style.display === 'none';
let groupStatus = "<span class='text-success'>&#10003;</span>";
if (outputByGroup[group].includes("badge badge-danger")) {
groupStatus = "<span class='text-warning'>~</span>";
}
output += ""
+ "<div class='mt-" + (output.length ? '4' : '3') + "'>"
+ " <div class='container pt-2 border-left border-right border-top border-black border-bottom service-group' id='service-group-" + key + "' data-group='" + key + "' onclick='toggleGroup(this)'>"
+ " <h5 class='text-secondary text-monospace pb-0'>"
+ " " + groupStatus + " " + group
+ " <span class='float-right service-group-arrow' id='service-group-" + key + "-arrow'>" + (isCurrentlyHidden ? "&#9660;" : "&#9650;") + "</span>"
+ " </h5>"
+ " </div>"
+ " <div class='service-group-content' id='service-group-" + key + "-content' style='" + (isCurrentlyHidden ? "display: none;" : "") + "'>"
+ " " + outputByGroup[group]
+ " </div>"
+ "</div>";
}
// Add all services that don't have a group at the end
if (outputByGroup['undefined']) {
output += ""
+ "<div class='mt-" + (output.length ? '4' : '3') + "'>"
+ " " + outputByGroup['undefined']
+ "</div>"
} }
$("#results").html(output); $("#results").html(output);
} }
function toggleGroup(element) {
let selector = $("#service-group-" + element.dataset.group + "-content");
selector.toggle("fast", function() {
if (selector.length && selector[0].style.display === 'none') {
$("#service-group-" + element.dataset.group + "-arrow").html("&#9660;");
} else {
$("#service-group-" + element.dataset.group + "-arrow").html("&#9650;");
}
});
}
function prettifyTimestamp(timestamp) { function prettifyTimestamp(timestamp) {
let date = new Date(timestamp); let date = new Date(timestamp);
let YYYY = date.getFullYear(); let YYYY = date.getFullYear();
@ -318,15 +375,15 @@
} }
function setRefreshInterval(seconds) { function setRefreshInterval(seconds) {
refreshResults(); refreshStatuses();
refreshIntervalHandler = setInterval(function() { refreshIntervalHandler = setInterval(function() {
refreshResults(); refreshStatuses();
}, seconds * 1000) }, seconds * 1000);
} }
$("#refresh-rate").change(function() { $("#refresh-rate").change(function() {
clearInterval(refreshIntervalHandler); clearInterval(refreshIntervalHandler);
setRefreshInterval($(this).val()) setRefreshInterval($(this).val());
}); });
setRefreshInterval(30); setRefreshInterval(30);
$("#refresh-rate").val(30); $("#refresh-rate").val(30);

View File

@ -13,22 +13,22 @@ import (
) )
var ( var (
serviceResults = make(map[string][]*core.Result) serviceStatuses = make(map[string]*core.ServiceStatus)
// serviceResultsMutex is used to prevent concurrent map access // serviceStatusesMutex is used to prevent concurrent map access
serviceResultsMutex sync.RWMutex serviceStatusesMutex sync.RWMutex
// monitoringMutex is used to prevent multiple services from being evaluated at the same time. // monitoringMutex is used to prevent multiple services from being evaluated at the same time.
// Without this, conditions using response time may become inaccurate. // Without this, conditions using response time may become inaccurate.
monitoringMutex sync.Mutex monitoringMutex sync.Mutex
) )
// GetJSONEncodedServiceResults returns a list of the last 20 results for each services encoded using json.Marshal. // GetJSONEncodedServiceStatuses returns a list of core.ServiceStatus for each services encoded using json.Marshal.
// The reason why the encoding is done here is because we use a mutex to prevent concurrent map access. // The reason why the encoding is done here is because we use a mutex to prevent concurrent map access.
func GetJSONEncodedServiceResults() ([]byte, error) { func GetJSONEncodedServiceStatuses() ([]byte, error) {
serviceResultsMutex.RLock() serviceStatusesMutex.RLock()
data, err := json.Marshal(serviceResults) data, err := json.Marshal(serviceStatuses)
serviceResultsMutex.RUnlock() serviceStatusesMutex.RUnlock()
return data, err return data, err
} }
@ -55,12 +55,7 @@ func monitor(service *core.Service) {
} }
result := service.EvaluateHealth() result := service.EvaluateHealth()
metric.PublishMetricsForService(service, result) metric.PublishMetricsForService(service, result)
serviceResultsMutex.Lock() UpdateServiceStatuses(service, result)
serviceResults[service.Name] = append(serviceResults[service.Name], result)
if len(serviceResults[service.Name]) > 20 {
serviceResults[service.Name] = serviceResults[service.Name][1:]
}
serviceResultsMutex.Unlock()
var extra string var extra string
if !result.Success { if !result.Success {
extra = fmt.Sprintf("responseBody=%s", result.Body) extra = fmt.Sprintf("responseBody=%s", result.Body)
@ -83,3 +78,15 @@ func monitor(service *core.Service) {
time.Sleep(service.Interval) time.Sleep(service.Interval)
} }
} }
// UpdateServiceStatuses updates the slice of service statuses
func UpdateServiceStatuses(service *core.Service, result *core.Result) {
serviceStatusesMutex.Lock()
serviceStatus, exists := serviceStatuses[service.Name]
if !exists {
serviceStatus = core.NewServiceStatus(service)
serviceStatuses[service.Name] = serviceStatus
}
serviceStatus.AddResult(result)
serviceStatusesMutex.Unlock()
}