mirror of
https://github.com/zrepl/zrepl.git
synced 2024-11-22 16:34:32 +01:00
595 lines
14 KiB
JSON
595 lines
14 KiB
JSON
|
{
|
||
|
"annotations": {
|
||
|
"list": [
|
||
|
{
|
||
|
"$$hashKey": "object:3351",
|
||
|
"builtIn": 1,
|
||
|
"datasource": "-- Grafana --",
|
||
|
"enable": true,
|
||
|
"hide": true,
|
||
|
"iconColor": "rgba(0, 211, 255, 1)",
|
||
|
"name": "Annotations & Alerts",
|
||
|
"type": "dashboard"
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
"editable": true,
|
||
|
"gnetId": null,
|
||
|
"graphTooltip": 1,
|
||
|
"id": 7,
|
||
|
"iteration": 1552746418826,
|
||
|
"links": [],
|
||
|
"panels": [
|
||
|
{
|
||
|
"content": "# zrepl Prometheus Metrics\n\nzrepl exposes Prometheus metrics and ships with this Grafana dashboard.\nThe exported metrics are suitable for health checks:\n\n* The log should generally be warning & error-free\n * The `Log Messages that require attention` graph visualizes log message counts indicating problems.\n* The number of goroutines should not grow unboundedly over time.\n * During replication, the number of goroutines can be way higher than during idle time.\n * If the goroutine count grows with each replication, there is clearly a goroutine leak. Please open a bug report.\n* The sys memory consumption should not grow unboundedly over time.\n * Note that the Go runtime pre-allocates some of its heap from the OS.\n * zrepl actually uses much less memory than allocated from the OS.\n * Since Go 1.11, Go pre-allocates more aggressively.\n* Monitor that some data is replicated, although that metric does not guarantee that replication was successful.\n\n**In general, note that the exported metrics are not stable unless declared otherwise.**",
|
||
|
"gridPos": {
|
||
|
"h": 9,
|
||
|
"w": 24,
|
||
|
"x": 0,
|
||
|
"y": 0
|
||
|
},
|
||
|
"id": 35,
|
||
|
"mode": "markdown",
|
||
|
"title": "Panel Title",
|
||
|
"type": "text"
|
||
|
},
|
||
|
{
|
||
|
"aliasColors": {},
|
||
|
"bars": false,
|
||
|
"dashLength": 10,
|
||
|
"dashes": false,
|
||
|
"datasource": null,
|
||
|
"fill": 1,
|
||
|
"gridPos": {
|
||
|
"h": 9,
|
||
|
"w": 12,
|
||
|
"x": 0,
|
||
|
"y": 9
|
||
|
},
|
||
|
"id": 15,
|
||
|
"legend": {
|
||
|
"avg": false,
|
||
|
"current": false,
|
||
|
"max": false,
|
||
|
"min": false,
|
||
|
"show": true,
|
||
|
"total": false,
|
||
|
"values": false
|
||
|
},
|
||
|
"lines": true,
|
||
|
"linewidth": 1,
|
||
|
"links": [],
|
||
|
"nullPointMode": "null",
|
||
|
"percentage": false,
|
||
|
"pointradius": 5,
|
||
|
"points": false,
|
||
|
"renderer": "flot",
|
||
|
"seriesOverrides": [],
|
||
|
"spaceLength": 10,
|
||
|
"stack": true,
|
||
|
"steppedLine": false,
|
||
|
"targets": [
|
||
|
{
|
||
|
"$$hashKey": "object:3436",
|
||
|
"expr": "up{job='$prom_job_name'}",
|
||
|
"format": "time_series",
|
||
|
"intervalFactor": 1,
|
||
|
"refId": "A"
|
||
|
}
|
||
|
],
|
||
|
"thresholds": [],
|
||
|
"timeFrom": null,
|
||
|
"timeShift": null,
|
||
|
"title": "zrepl Instances Up",
|
||
|
"tooltip": {
|
||
|
"shared": true,
|
||
|
"sort": 0,
|
||
|
"value_type": "individual"
|
||
|
},
|
||
|
"type": "graph",
|
||
|
"xaxis": {
|
||
|
"buckets": null,
|
||
|
"mode": "time",
|
||
|
"name": null,
|
||
|
"show": true,
|
||
|
"values": []
|
||
|
},
|
||
|
"yaxes": [
|
||
|
{
|
||
|
"format": "short",
|
||
|
"label": null,
|
||
|
"logBase": 1,
|
||
|
"max": "5",
|
||
|
"min": "0",
|
||
|
"show": true
|
||
|
},
|
||
|
{
|
||
|
"format": "short",
|
||
|
"label": null,
|
||
|
"logBase": 1,
|
||
|
"max": null,
|
||
|
"min": null,
|
||
|
"show": true
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"aliasColors": {},
|
||
|
"bars": false,
|
||
|
"dashLength": 10,
|
||
|
"dashes": false,
|
||
|
"datasource": null,
|
||
|
"fill": 1,
|
||
|
"gridPos": {
|
||
|
"h": 9,
|
||
|
"w": 12,
|
||
|
"x": 12,
|
||
|
"y": 9
|
||
|
},
|
||
|
"id": 22,
|
||
|
"legend": {
|
||
|
"avg": false,
|
||
|
"current": false,
|
||
|
"max": false,
|
||
|
"min": false,
|
||
|
"show": true,
|
||
|
"total": false,
|
||
|
"values": false
|
||
|
},
|
||
|
"lines": true,
|
||
|
"linewidth": 1,
|
||
|
"links": [],
|
||
|
"nullPointMode": "null",
|
||
|
"percentage": false,
|
||
|
"pointradius": 5,
|
||
|
"points": false,
|
||
|
"renderer": "flot",
|
||
|
"seriesOverrides": [],
|
||
|
"spaceLength": 10,
|
||
|
"stack": false,
|
||
|
"steppedLine": false,
|
||
|
"targets": [
|
||
|
{
|
||
|
"expr": "increase(zrepl_daemon_log_entries{job='$prom_job_name',level=~'warn|error'}[$__interval])",
|
||
|
"format": "time_series",
|
||
|
"intervalFactor": 1,
|
||
|
"refId": "A"
|
||
|
}
|
||
|
],
|
||
|
"thresholds": [],
|
||
|
"timeFrom": null,
|
||
|
"timeShift": null,
|
||
|
"title": "Log Messages that require attention",
|
||
|
"tooltip": {
|
||
|
"shared": true,
|
||
|
"sort": 0,
|
||
|
"value_type": "individual"
|
||
|
},
|
||
|
"type": "graph",
|
||
|
"xaxis": {
|
||
|
"buckets": null,
|
||
|
"mode": "time",
|
||
|
"name": null,
|
||
|
"show": true,
|
||
|
"values": []
|
||
|
},
|
||
|
"yaxes": [
|
||
|
{
|
||
|
"format": "short",
|
||
|
"label": null,
|
||
|
"logBase": 1,
|
||
|
"max": null,
|
||
|
"min": "0",
|
||
|
"show": true
|
||
|
},
|
||
|
{
|
||
|
"format": "short",
|
||
|
"label": null,
|
||
|
"logBase": 1,
|
||
|
"max": null,
|
||
|
"min": null,
|
||
|
"show": true
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"aliasColors": {},
|
||
|
"bars": false,
|
||
|
"dashLength": 10,
|
||
|
"dashes": false,
|
||
|
"datasource": null,
|
||
|
"fill": 1,
|
||
|
"gridPos": {
|
||
|
"h": 9,
|
||
|
"w": 12,
|
||
|
"x": 0,
|
||
|
"y": 18
|
||
|
},
|
||
|
"id": 33,
|
||
|
"legend": {
|
||
|
"avg": false,
|
||
|
"current": false,
|
||
|
"max": false,
|
||
|
"min": false,
|
||
|
"show": true,
|
||
|
"total": false,
|
||
|
"values": false
|
||
|
},
|
||
|
"lines": true,
|
||
|
"linewidth": 1,
|
||
|
"links": [],
|
||
|
"nullPointMode": "null",
|
||
|
"percentage": false,
|
||
|
"pointradius": 5,
|
||
|
"points": false,
|
||
|
"renderer": "flot",
|
||
|
"seriesOverrides": [
|
||
|
{
|
||
|
"alias": "/replicated bytes in last.*/",
|
||
|
"yaxis": 2
|
||
|
}
|
||
|
],
|
||
|
"spaceLength": 10,
|
||
|
"stack": false,
|
||
|
"steppedLine": false,
|
||
|
"targets": [
|
||
|
{
|
||
|
"expr": "sum(rate(zrepl_replication_bytes_replicated{job='$prom_job_name'}[$__interval])) by (zrepl_job)",
|
||
|
"format": "time_series",
|
||
|
"hide": false,
|
||
|
"interval": "",
|
||
|
"intervalFactor": 1,
|
||
|
"legendFormat": "replication data rate zrepl_job={{zrepl_job}}",
|
||
|
"refId": "A"
|
||
|
},
|
||
|
{
|
||
|
"expr": "sum(increase(zrepl_replication_bytes_replicated{job='$prom_job_name'}[10m])) by (zrepl_job)",
|
||
|
"format": "time_series",
|
||
|
"hide": false,
|
||
|
"interval": "",
|
||
|
"intervalFactor": 1,
|
||
|
"legendFormat": "replicated bytes in last 10min zrepl_job={{zrepl_job}}",
|
||
|
"refId": "B"
|
||
|
}
|
||
|
],
|
||
|
"thresholds": [],
|
||
|
"timeFrom": null,
|
||
|
"timeShift": null,
|
||
|
"title": "Replication Data Rate and Volume(integrates last 10min)",
|
||
|
"tooltip": {
|
||
|
"shared": true,
|
||
|
"sort": 0,
|
||
|
"value_type": "individual"
|
||
|
},
|
||
|
"type": "graph",
|
||
|
"xaxis": {
|
||
|
"buckets": null,
|
||
|
"mode": "time",
|
||
|
"name": null,
|
||
|
"show": true,
|
||
|
"values": []
|
||
|
},
|
||
|
"yaxes": [
|
||
|
{
|
||
|
"format": "Bps",
|
||
|
"label": null,
|
||
|
"logBase": 1,
|
||
|
"max": null,
|
||
|
"min": null,
|
||
|
"show": true
|
||
|
},
|
||
|
{
|
||
|
"format": "bytes",
|
||
|
"label": null,
|
||
|
"logBase": 1,
|
||
|
"max": null,
|
||
|
"min": null,
|
||
|
"show": true
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"aliasColors": {},
|
||
|
"bars": false,
|
||
|
"dashLength": 10,
|
||
|
"dashes": false,
|
||
|
"datasource": null,
|
||
|
"fill": 1,
|
||
|
"gridPos": {
|
||
|
"h": 9,
|
||
|
"w": 12,
|
||
|
"x": 12,
|
||
|
"y": 18
|
||
|
},
|
||
|
"id": 23,
|
||
|
"legend": {
|
||
|
"avg": false,
|
||
|
"current": false,
|
||
|
"max": false,
|
||
|
"min": false,
|
||
|
"show": true,
|
||
|
"total": false,
|
||
|
"values": false
|
||
|
},
|
||
|
"lines": true,
|
||
|
"linewidth": 1,
|
||
|
"links": [],
|
||
|
"nullPointMode": "null",
|
||
|
"percentage": false,
|
||
|
"pointradius": 5,
|
||
|
"points": false,
|
||
|
"renderer": "flot",
|
||
|
"seriesOverrides": [],
|
||
|
"spaceLength": 10,
|
||
|
"stack": false,
|
||
|
"steppedLine": false,
|
||
|
"targets": [
|
||
|
{
|
||
|
"expr": "sum(increase(zrepl_daemon_log_entries{job='$prom_job_name',zrepl_job=~\"^[^_].*\"}[$__interval])) by (instance,zrepl_job)",
|
||
|
"format": "time_series",
|
||
|
"intervalFactor": 1,
|
||
|
"refId": "A"
|
||
|
}
|
||
|
],
|
||
|
"thresholds": [],
|
||
|
"timeFrom": null,
|
||
|
"timeShift": null,
|
||
|
"title": "Log Activity (without internal jobs)",
|
||
|
"tooltip": {
|
||
|
"shared": true,
|
||
|
"sort": 0,
|
||
|
"value_type": "individual"
|
||
|
},
|
||
|
"type": "graph",
|
||
|
"xaxis": {
|
||
|
"buckets": null,
|
||
|
"mode": "time",
|
||
|
"name": null,
|
||
|
"show": true,
|
||
|
"values": []
|
||
|
},
|
||
|
"yaxes": [
|
||
|
{
|
||
|
"format": "short",
|
||
|
"label": null,
|
||
|
"logBase": 1,
|
||
|
"max": null,
|
||
|
"min": "0",
|
||
|
"show": true
|
||
|
},
|
||
|
{
|
||
|
"format": "short",
|
||
|
"label": null,
|
||
|
"logBase": 1,
|
||
|
"max": null,
|
||
|
"min": null,
|
||
|
"show": true
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"aliasColors": {},
|
||
|
"bars": false,
|
||
|
"dashLength": 10,
|
||
|
"dashes": false,
|
||
|
"datasource": null,
|
||
|
"fill": 1,
|
||
|
"gridPos": {
|
||
|
"h": 9,
|
||
|
"w": 12,
|
||
|
"x": 0,
|
||
|
"y": 27
|
||
|
},
|
||
|
"id": 17,
|
||
|
"legend": {
|
||
|
"avg": false,
|
||
|
"current": false,
|
||
|
"max": false,
|
||
|
"min": false,
|
||
|
"show": true,
|
||
|
"total": false,
|
||
|
"values": false
|
||
|
},
|
||
|
"lines": true,
|
||
|
"linewidth": 1,
|
||
|
"links": [],
|
||
|
"nullPointMode": "null",
|
||
|
"percentage": false,
|
||
|
"pointradius": 5,
|
||
|
"points": false,
|
||
|
"renderer": "flot",
|
||
|
"seriesOverrides": [],
|
||
|
"spaceLength": 10,
|
||
|
"stack": false,
|
||
|
"steppedLine": false,
|
||
|
"targets": [
|
||
|
{
|
||
|
"$$hashKey": "object:3535",
|
||
|
"expr": "go_memstats_sys_bytes{job='$prom_job_name'}",
|
||
|
"format": "time_series",
|
||
|
"hide": false,
|
||
|
"intervalFactor": 1,
|
||
|
"refId": "A"
|
||
|
}
|
||
|
],
|
||
|
"thresholds": [],
|
||
|
"timeFrom": null,
|
||
|
"timeShift": null,
|
||
|
"title": "Memory Allocated by the Go runtime from the OS (should not grow unboundedly)",
|
||
|
"tooltip": {
|
||
|
"shared": true,
|
||
|
"sort": 0,
|
||
|
"value_type": "individual"
|
||
|
},
|
||
|
"type": "graph",
|
||
|
"xaxis": {
|
||
|
"buckets": null,
|
||
|
"mode": "time",
|
||
|
"name": null,
|
||
|
"show": true,
|
||
|
"values": []
|
||
|
},
|
||
|
"yaxes": [
|
||
|
{
|
||
|
"format": "bytes",
|
||
|
"label": null,
|
||
|
"logBase": 1,
|
||
|
"max": null,
|
||
|
"min": "0",
|
||
|
"show": true
|
||
|
},
|
||
|
{
|
||
|
"format": "short",
|
||
|
"label": null,
|
||
|
"logBase": 1,
|
||
|
"max": null,
|
||
|
"min": null,
|
||
|
"show": true
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"aliasColors": {},
|
||
|
"bars": false,
|
||
|
"dashLength": 10,
|
||
|
"dashes": false,
|
||
|
"datasource": null,
|
||
|
"fill": 1,
|
||
|
"gridPos": {
|
||
|
"h": 9,
|
||
|
"w": 12,
|
||
|
"x": 12,
|
||
|
"y": 27
|
||
|
},
|
||
|
"id": 19,
|
||
|
"legend": {
|
||
|
"avg": false,
|
||
|
"current": false,
|
||
|
"max": false,
|
||
|
"min": false,
|
||
|
"show": true,
|
||
|
"total": false,
|
||
|
"values": false
|
||
|
},
|
||
|
"lines": true,
|
||
|
"linewidth": 1,
|
||
|
"links": [],
|
||
|
"nullPointMode": "null",
|
||
|
"percentage": false,
|
||
|
"pointradius": 5,
|
||
|
"points": false,
|
||
|
"renderer": "flot",
|
||
|
"seriesOverrides": [],
|
||
|
"spaceLength": 10,
|
||
|
"stack": false,
|
||
|
"steppedLine": false,
|
||
|
"targets": [
|
||
|
{
|
||
|
"expr": "go_goroutines{job='$prom_job_name'}",
|
||
|
"format": "time_series",
|
||
|
"intervalFactor": 1,
|
||
|
"refId": "A"
|
||
|
}
|
||
|
],
|
||
|
"thresholds": [],
|
||
|
"timeFrom": null,
|
||
|
"timeShift": null,
|
||
|
"title": "number of goroutines (should not grow unboundedly)",
|
||
|
"tooltip": {
|
||
|
"shared": true,
|
||
|
"sort": 0,
|
||
|
"value_type": "individual"
|
||
|
},
|
||
|
"type": "graph",
|
||
|
"xaxis": {
|
||
|
"buckets": null,
|
||
|
"mode": "time",
|
||
|
"name": null,
|
||
|
"show": true,
|
||
|
"values": []
|
||
|
},
|
||
|
"yaxes": [
|
||
|
{
|
||
|
"format": "short",
|
||
|
"label": null,
|
||
|
"logBase": 1,
|
||
|
"max": null,
|
||
|
"min": "0",
|
||
|
"show": true
|
||
|
},
|
||
|
{
|
||
|
"format": "short",
|
||
|
"label": null,
|
||
|
"logBase": 1,
|
||
|
"max": null,
|
||
|
"min": null,
|
||
|
"show": true
|
||
|
}
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"refresh": "1m",
|
||
|
"schemaVersion": 16,
|
||
|
"style": "dark",
|
||
|
"tags": [],
|
||
|
"templating": {
|
||
|
"list": [
|
||
|
{
|
||
|
"allValue": null,
|
||
|
"current": {
|
||
|
"text": "zrepl",
|
||
|
"value": "zrepl"
|
||
|
},
|
||
|
"datasource": "prometheus",
|
||
|
"hide": 0,
|
||
|
"includeAll": false,
|
||
|
"label": "Prometheus Job Name",
|
||
|
"multi": false,
|
||
|
"name": "prom_job_name",
|
||
|
"options": [],
|
||
|
"query": "label_values(up, job)",
|
||
|
"refresh": 1,
|
||
|
"regex": "",
|
||
|
"sort": 1,
|
||
|
"tagValuesQuery": "",
|
||
|
"tags": [],
|
||
|
"tagsQuery": "",
|
||
|
"type": "query",
|
||
|
"useTags": false
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
"time": {
|
||
|
"from": "now-2d",
|
||
|
"to": "now"
|
||
|
},
|
||
|
"timepicker": {
|
||
|
"refresh_intervals": [
|
||
|
"5s",
|
||
|
"10s",
|
||
|
"30s",
|
||
|
"1m",
|
||
|
"5m",
|
||
|
"15m",
|
||
|
"30m",
|
||
|
"1h",
|
||
|
"2h",
|
||
|
"1d"
|
||
|
],
|
||
|
"time_options": [
|
||
|
"5m",
|
||
|
"15m",
|
||
|
"1h",
|
||
|
"6h",
|
||
|
"12h",
|
||
|
"24h",
|
||
|
"2d",
|
||
|
"7d",
|
||
|
"30d"
|
||
|
]
|
||
|
},
|
||
|
"timezone": "",
|
||
|
"title": "zrepl 0.1",
|
||
|
"uid": "xTljn4qmk",
|
||
|
"version": 6
|
||
|
}
|