{ "annotations": { "list": [ { "$$hashKey": "object:3351", "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "editable": true, "gnetId": null, "graphTooltip": 1, "id": 7, "iteration": 1552746418826, "links": [], "panels": [ { "content": "# zrepl Prometheus Metrics\n\nzrepl exposes Prometheus metrics and ships with this Grafana dashboard.\nThe exported metrics are suitable for health checks:\n\n* The log should generally be warning & error-free\n * The `Log Messages that require attention` graph visualizes log message counts indicating problems.\n* The number of goroutines should not grow unboundedly over time.\n * During replication, the number of goroutines can be way higher than during idle time.\n * If the goroutine count grows with each replication, there is clearly a goroutine leak. Please open a bug report.\n* The sys memory consumption should not grow unboundedly over time.\n * Note that the Go runtime pre-allocates some of its heap from the OS.\n * zrepl actually uses much less memory than allocated from the OS.\n * Since Go 1.11, Go pre-allocates more aggressively.\n* Monitor that some data is replicated, although that metric does not guarantee that replication was successful.\n\n**In general, note that the exported metrics are not stable unless declared otherwise.**", "gridPos": { "h": 9, "w": 24, "x": 0, "y": 0 }, "id": 35, "mode": "markdown", "title": "Panel Title", "type": "text" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 9 }, "id": 15, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "$$hashKey": "object:3436", "expr": "up{job='$prom_job_name'}", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "zrepl Instances Up", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": "5", "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 9 }, "id": 22, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "increase(zrepl_daemon_log_entries{job='$prom_job_name',level=~'warn|error'}[$__interval])", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Log Messages that require attention", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 18 }, "id": 33, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "/replicated bytes in last.*/", "yaxis": 2 } ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(zrepl_replication_bytes_replicated{job='$prom_job_name'}[$__interval])) by (zrepl_job)", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, "legendFormat": "replication data rate zrepl_job={{zrepl_job}}", "refId": "A" }, { "expr": "sum(increase(zrepl_replication_bytes_replicated{job='$prom_job_name'}[10m])) by (zrepl_job)", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, "legendFormat": "replicated bytes in last 10min zrepl_job={{zrepl_job}}", "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Replication Data Rate and Volume(integrates last 10min)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 18 }, "id": 23, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(increase(zrepl_daemon_log_entries{job='$prom_job_name',zrepl_job=~\"^[^_].*\"}[$__interval])) by (instance,zrepl_job)", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Log Activity (without internal jobs)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 27 }, "id": 17, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "$$hashKey": "object:3535", "expr": "go_memstats_sys_bytes{job='$prom_job_name'}", "format": "time_series", "hide": false, "intervalFactor": 1, "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Memory Allocated by the Go runtime from the OS (should not grow unboundedly)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 27 }, "id": 19, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "go_goroutines{job='$prom_job_name'}", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "number of goroutines (should not grow unboundedly)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "refresh": "1m", "schemaVersion": 16, "style": "dark", "tags": [], "templating": { "list": [ { "allValue": null, "current": { "text": "zrepl", "value": "zrepl" }, "datasource": "prometheus", "hide": 0, "includeAll": false, "label": "Prometheus Job Name", "multi": false, "name": "prom_job_name", "options": [], "query": "label_values(up, job)", "refresh": 1, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-2d", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "", "title": "zrepl 0.1", "uid": "xTljn4qmk", "version": 6 }