From e91462ce41b83aad340fdf3e2aa4cad0f4060a0d Mon Sep 17 00:00:00 2001 From: TwinProduction Date: Sun, 18 Apr 2021 00:51:47 -0400 Subject: [PATCH] Unify uptime hourly metrics under Uptime.HourlyStatistics and add metric for response time --- core/uptime.go | 93 ++++++++++++++++++++++++++++++--------- core/uptime_bench_test.go | 24 ++++++++++ core/uptime_test.go | 56 +++++++++++++++-------- 3 files changed, 134 insertions(+), 39 deletions(-) create mode 100644 core/uptime_bench_test.go diff --git a/core/uptime.go b/core/uptime.go index 530e8fb7..d122295a 100644 --- a/core/uptime.go +++ b/core/uptime.go @@ -1,6 +1,7 @@ package core import ( + "log" "time" ) @@ -10,6 +11,7 @@ const ( ) // Uptime is the struct that contains the relevant data for calculating the uptime as well as the uptime itself +// and some other statistics type Uptime struct { // LastSevenDays is the uptime percentage over the past 7 days LastSevenDays float64 `json:"7d"` @@ -22,43 +24,62 @@ type Uptime struct { // SuccessfulExecutionsPerHour is a map containing the number of successes (value) // for every hourly unix timestamps (key) + // Deprecated SuccessfulExecutionsPerHour map[int64]uint64 `json:"-"` // TotalExecutionsPerHour is a map containing the total number of checks (value) // for every hourly unix timestamps (key) + // Deprecated TotalExecutionsPerHour map[int64]uint64 `json:"-"` + + // HourlyStatistics is a map containing metrics collected (value) for every hourly unix timestamps (key) + HourlyStatistics map[int64]*HourlyUptimeStatistics `json:"-"` +} + +// HourlyUptimeStatistics is a struct containing all metrics collected over the course of an hour +type HourlyUptimeStatistics struct { + TotalExecutions uint64 // Total number of checks + SuccessfulExecutions uint64 // Number of successful executions + TotalExecutionsResponseTime uint64 // Total response time for all executions } // NewUptime creates a new Uptime func NewUptime() *Uptime { return &Uptime{ - SuccessfulExecutionsPerHour: make(map[int64]uint64), - TotalExecutionsPerHour: make(map[int64]uint64), + HourlyStatistics: make(map[int64]*HourlyUptimeStatistics), } } // ProcessResult processes the result by extracting the relevant from the result and recalculating the uptime // if necessary func (uptime *Uptime) ProcessResult(result *Result) { - if uptime.SuccessfulExecutionsPerHour == nil || uptime.TotalExecutionsPerHour == nil { - uptime.SuccessfulExecutionsPerHour = make(map[int64]uint64) - uptime.TotalExecutionsPerHour = make(map[int64]uint64) + // XXX: Remove this on v3.0.0 + if len(uptime.SuccessfulExecutionsPerHour) != 0 || len(uptime.TotalExecutionsPerHour) != 0 { + uptime.migrateToHourlyStatistics() + } + if uptime.HourlyStatistics == nil { + uptime.HourlyStatistics = make(map[int64]*HourlyUptimeStatistics) } unixTimestampFlooredAtHour := result.Timestamp.Unix() - (result.Timestamp.Unix() % 3600) - if result.Success { - uptime.SuccessfulExecutionsPerHour[unixTimestampFlooredAtHour]++ + hourlyStats, _ := uptime.HourlyStatistics[unixTimestampFlooredAtHour] + if hourlyStats == nil { + hourlyStats = &HourlyUptimeStatistics{} + uptime.HourlyStatistics[unixTimestampFlooredAtHour] = hourlyStats } - uptime.TotalExecutionsPerHour[unixTimestampFlooredAtHour]++ + if result.Success { + hourlyStats.SuccessfulExecutions++ + } + hourlyStats.TotalExecutions++ + hourlyStats.TotalExecutionsResponseTime += uint64(result.Duration.Milliseconds()) // Clean up only when we're starting to have too many useless keys // Note that this is only triggered when there are more entries than there should be after // 10 days, despite the fact that we are deleting everything that's older than 7 days. // This is to prevent re-iterating on every `ProcessResult` as soon as the uptime has been logged for 7 days. - if len(uptime.TotalExecutionsPerHour) > numberOfHoursInTenDays { + if len(uptime.HourlyStatistics) > numberOfHoursInTenDays { sevenDaysAgo := time.Now().Add(-(sevenDays + time.Hour)).Unix() - for hourlyUnixTimestamp := range uptime.TotalExecutionsPerHour { + for hourlyUnixTimestamp := range uptime.HourlyStatistics { if sevenDaysAgo > hourlyUnixTimestamp { - delete(uptime.TotalExecutionsPerHour, hourlyUnixTimestamp) - delete(uptime.SuccessfulExecutionsPerHour, hourlyUnixTimestamp) + delete(uptime.HourlyStatistics, hourlyUnixTimestamp) } } } @@ -77,6 +98,16 @@ func (uptime *Uptime) ProcessResult(result *Result) { uptime.recalculate() } } + // cute print + //b, _ := json.MarshalIndent(uptime.TotalExecutionsPerHour, "", " ") + //fmt.Println("TotalExecutionsPerHour:", string(b)) + //b, _ = json.MarshalIndent(uptime.SuccessfulExecutionsPerHour, "", " ") + //fmt.Println("SuccessfulExecutionsPerHour:", string(b)) + //b, _ = json.MarshalIndent(uptime.TotalRequestResponseTimePerHour, "", " ") + //fmt.Println("TotalRequestResponseTimePerHour:", string(b)) + //for unixTimestamp, executions := range uptime.TotalExecutionsPerHour { + // fmt.Printf("average for %d was %d\n", unixTimestamp, uptime.TotalRequestResponseTimePerHour[unixTimestamp]/executions) + //} } func (uptime *Uptime) recalculate() { @@ -86,17 +117,20 @@ func (uptime *Uptime) recalculate() { timestamp := now.Add(-sevenDays) for now.Sub(timestamp) >= 0 { hourlyUnixTimestamp := timestamp.Unix() - (timestamp.Unix() % 3600) - successCountForTimestamp := uptime.SuccessfulExecutionsPerHour[hourlyUnixTimestamp] - totalCountForTimestamp := uptime.TotalExecutionsPerHour[hourlyUnixTimestamp] - uptimeBrackets["7d_success"] += successCountForTimestamp - uptimeBrackets["7d_total"] += totalCountForTimestamp + hourlyStats := uptime.HourlyStatistics[hourlyUnixTimestamp] + if hourlyStats == nil || hourlyStats.TotalExecutions == 0 { + timestamp = timestamp.Add(time.Hour) + continue + } + uptimeBrackets["7d_success"] += hourlyStats.SuccessfulExecutions + uptimeBrackets["7d_total"] += hourlyStats.TotalExecutions if now.Sub(timestamp) <= 24*time.Hour { - uptimeBrackets["24h_success"] += successCountForTimestamp - uptimeBrackets["24h_total"] += totalCountForTimestamp + uptimeBrackets["24h_success"] += hourlyStats.SuccessfulExecutions + uptimeBrackets["24h_total"] += hourlyStats.TotalExecutions } if now.Sub(timestamp) <= time.Hour { - uptimeBrackets["1h_success"] += successCountForTimestamp - uptimeBrackets["1h_total"] += totalCountForTimestamp + uptimeBrackets["1h_success"] += hourlyStats.SuccessfulExecutions + uptimeBrackets["1h_total"] += hourlyStats.TotalExecutions } timestamp = timestamp.Add(time.Hour) } @@ -110,3 +144,22 @@ func (uptime *Uptime) recalculate() { uptime.LastHour = float64(uptimeBrackets["1h_success"]) / float64(uptimeBrackets["1h_total"]) } } + +func (uptime *Uptime) migrateToHourlyStatistics() { + log.Println("[migrateToHourlyStatistics] Got", len(uptime.SuccessfulExecutionsPerHour), "entries for successful executions and", len(uptime.TotalExecutionsPerHour), "entries for total executions") + uptime.HourlyStatistics = make(map[int64]*HourlyUptimeStatistics) + for hourlyUnixTimestamp, totalExecutions := range uptime.TotalExecutionsPerHour { + if totalExecutions == 0 { + log.Println("[migrateToHourlyStatistics] Skipping entry at", hourlyUnixTimestamp, "because total number of executions is 0") + continue + } + uptime.HourlyStatistics[hourlyUnixTimestamp] = &HourlyUptimeStatistics{ + TotalExecutions: totalExecutions, + SuccessfulExecutions: uptime.SuccessfulExecutionsPerHour[hourlyUnixTimestamp], + TotalExecutionsResponseTime: 0, + } + } + log.Println("[migrateToHourlyStatistics] Migrated", len(uptime.HourlyStatistics), "entries") + uptime.SuccessfulExecutionsPerHour = nil + uptime.TotalExecutionsPerHour = nil +} diff --git a/core/uptime_bench_test.go b/core/uptime_bench_test.go new file mode 100644 index 00000000..c362a10a --- /dev/null +++ b/core/uptime_bench_test.go @@ -0,0 +1,24 @@ +package core + +import ( + "testing" + "time" +) + +func BenchmarkUptime_ProcessResult(b *testing.B) { + uptime := NewUptime() + now := time.Now() + now = time.Date(now.Year(), now.Month(), now.Day(), now.Hour(), 0, 0, 0, now.Location()) + // Start 12000 days ago + timestamp := now.Add(-12000 * 24 * time.Hour) + for n := 0; n < b.N; n++ { + uptime.ProcessResult(&Result{ + Duration: 18 * time.Millisecond, + Success: n%15 == 0, + Timestamp: timestamp, + }) + // Simulate service with an interval of 3 minutes + timestamp = timestamp.Add(3 * time.Minute) + } + b.ReportAllocs() +} diff --git a/core/uptime_test.go b/core/uptime_test.go index 246b2308..06623704 100644 --- a/core/uptime_test.go +++ b/core/uptime_test.go @@ -12,33 +12,39 @@ func TestUptime_ProcessResult(t *testing.T) { checkUptimes(t, serviceStatus, 0.00, 0.00, 0.00) - uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-7 * 24 * time.Hour), Success: true}) + now := time.Now() + now = time.Date(now.Year(), now.Month(), now.Day(), now.Hour(), 0, 0, 0, now.Location()) + uptime.ProcessResult(&Result{Timestamp: now.Add(-7 * 24 * time.Hour), Success: true}) checkUptimes(t, serviceStatus, 1.00, 0.00, 0.00) - uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-6 * 24 * time.Hour), Success: false}) + uptime.ProcessResult(&Result{Timestamp: now.Add(-6 * 24 * time.Hour), Success: false}) checkUptimes(t, serviceStatus, 0.50, 0.00, 0.00) - uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-8 * 24 * time.Hour), Success: true}) + uptime.ProcessResult(&Result{Timestamp: now.Add(-8 * 24 * time.Hour), Success: true}) checkUptimes(t, serviceStatus, 0.50, 0.00, 0.00) - uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-24 * time.Hour), Success: true}) - uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-12 * time.Hour), Success: true}) + uptime.ProcessResult(&Result{Timestamp: now.Add(-24 * time.Hour), Success: true}) + uptime.ProcessResult(&Result{Timestamp: now.Add(-12 * time.Hour), Success: true}) checkUptimes(t, serviceStatus, 0.75, 1.00, 0.00) - uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-1 * time.Hour), Success: true}) - uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-30 * time.Minute), Success: false}) - uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-15 * time.Minute), Success: false}) - uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-10 * time.Minute), Success: false}) + uptime.ProcessResult(&Result{Timestamp: now.Add(-1 * time.Hour), Success: true, Duration: 10 * time.Millisecond}) + checkHourlyStatistics(t, uptime.HourlyStatistics[now.Unix()-now.Unix()%3600-3600], 10, 1, 1) + uptime.ProcessResult(&Result{Timestamp: now.Add(-30 * time.Minute), Success: false, Duration: 500 * time.Millisecond}) + checkHourlyStatistics(t, uptime.HourlyStatistics[now.Unix()-now.Unix()%3600-3600], 510, 2, 1) + uptime.ProcessResult(&Result{Timestamp: now.Add(-15 * time.Minute), Success: false, Duration: 25 * time.Millisecond}) + checkHourlyStatistics(t, uptime.HourlyStatistics[now.Unix()-now.Unix()%3600-3600], 535, 3, 1) + + uptime.ProcessResult(&Result{Timestamp: now.Add(-10 * time.Minute), Success: false}) checkUptimes(t, serviceStatus, 0.50, 0.50, 0.25) - uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-120 * time.Hour), Success: true}) - uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-119 * time.Hour), Success: true}) - uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-118 * time.Hour), Success: true}) - uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-117 * time.Hour), Success: true}) - uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-10 * time.Hour), Success: true}) - uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-8 * time.Hour), Success: true}) - uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-30 * time.Minute), Success: true}) - uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-25 * time.Minute), Success: true}) + uptime.ProcessResult(&Result{Timestamp: now.Add(-120 * time.Hour), Success: true}) + uptime.ProcessResult(&Result{Timestamp: now.Add(-119 * time.Hour), Success: true}) + uptime.ProcessResult(&Result{Timestamp: now.Add(-118 * time.Hour), Success: true}) + uptime.ProcessResult(&Result{Timestamp: now.Add(-117 * time.Hour), Success: true}) + uptime.ProcessResult(&Result{Timestamp: now.Add(-10 * time.Hour), Success: true}) + uptime.ProcessResult(&Result{Timestamp: now.Add(-8 * time.Hour), Success: true}) + uptime.ProcessResult(&Result{Timestamp: now.Add(-30 * time.Minute), Success: true}) + uptime.ProcessResult(&Result{Timestamp: now.Add(-25 * time.Minute), Success: true}) checkUptimes(t, serviceStatus, 0.75, 0.70, 0.50) } @@ -51,8 +57,8 @@ func TestServiceStatus_AddResultUptimeIsCleaningUpAfterItself(t *testing.T) { timestamp := now.Add(-12 * 24 * time.Hour) for timestamp.Unix() <= now.Unix() { serviceStatus.AddResult(&Result{Timestamp: timestamp, Success: true}) - if len(serviceStatus.Uptime.SuccessfulExecutionsPerHour) > numberOfHoursInTenDays { - t.Errorf("At no point in time should there be more than %d entries in serviceStatus.SuccessfulExecutionsPerHour, but there are %d", numberOfHoursInTenDays, len(serviceStatus.Uptime.SuccessfulExecutionsPerHour)) + if len(serviceStatus.Uptime.HourlyStatistics) > numberOfHoursInTenDays { + t.Errorf("At no point in time should there be more than %d entries in serviceStatus.SuccessfulExecutionsPerHour, but there are %d", numberOfHoursInTenDays, len(serviceStatus.Uptime.HourlyStatistics)) } if now.Sub(timestamp) > time.Hour && serviceStatus.Uptime.LastHour != 0 { t.Error("most recent timestamp > 1h ago, expected serviceStatus.Uptime.LastHour to be 0, got", serviceStatus.Uptime.LastHour) @@ -76,3 +82,15 @@ func checkUptimes(t *testing.T, status *ServiceStatus, expectedUptimeDuringLastS t.Errorf("expected status.Uptime.LastHour to be %f, got %f", expectedUptimeDuringLastHour, status.Uptime.LastHour) } } + +func checkHourlyStatistics(t *testing.T, hourlyUptimeStatistics *HourlyUptimeStatistics, expectedTotalExecutionsResponseTime uint64, expectedTotalExecutions uint64, expectedSuccessfulExecutions uint64) { + if hourlyUptimeStatistics.TotalExecutionsResponseTime != expectedTotalExecutionsResponseTime { + t.Error("TotalExecutionsResponseTime should've been", expectedTotalExecutionsResponseTime, "got", hourlyUptimeStatistics.TotalExecutionsResponseTime) + } + if hourlyUptimeStatistics.TotalExecutions != expectedTotalExecutions { + t.Error("TotalExecutions should've been", expectedTotalExecutions, "got", hourlyUptimeStatistics.TotalExecutions) + } + if hourlyUptimeStatistics.SuccessfulExecutions != expectedSuccessfulExecutions { + t.Error("SuccessfulExecutions should've been", expectedSuccessfulExecutions, "got", hourlyUptimeStatistics.SuccessfulExecutions) + } +}