diff --git a/daemon/filters/fsmapfilter.go b/daemon/filters/fsmapfilter.go index 9def7b3..b28da20 100644 --- a/daemon/filters/fsmapfilter.go +++ b/daemon/filters/fsmapfilter.go @@ -160,6 +160,14 @@ func (m DatasetMapFilter) Filter(p *zfs.DatasetPath) (pass bool, err error) { return } +func (m DatasetMapFilter) UserSpecifiedDatasets() (datasets zfs.UserSpecifiedDatasetsSet) { + datasets = make(zfs.UserSpecifiedDatasetsSet) + for i := range m.entries { + datasets[m.entries[i].path.ToString()] = true + } + return +} + // Construct a new filter-only DatasetMapFilter from a mapping // The new filter allows exactly those paths that were not forbidden by the mapping. func (m DatasetMapFilter) InvertedFilter() (inv *DatasetMapFilter, err error) { diff --git a/dist/grafana/grafana-prometheus-zrepl.json b/dist/grafana/grafana-prometheus-zrepl.json index ba5ed47..e1564af 100644 --- a/dist/grafana/grafana-prometheus-zrepl.json +++ b/dist/grafana/grafana-prometheus-zrepl.json @@ -1,1282 +1,1513 @@ { - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "Prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__elements": {}, - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "9.3.6" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph (old)", - "version": "" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - }, - { - "type": "panel", - "id": "stat", - "name": "Stat", - "version": "" - }, - { - "type": "panel", - "id": "text", - "name": "Text", - "version": "" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "9.3.6" }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 1, - "id": null, - "links": [], - "liveNow": false, - "panels": [ + { + "type": "panel", + "id": "graph", + "name": "Graph (old)", + "version": "" + }, + { + "type": "panel", + "id": "heatmap", + "name": "Heatmap", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + } + ], + "annotations": { + "list": [ { + "builtIn": 1, "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "type": "datasource", + "uid": "grafana" }, - "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 0 + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" }, - "id": 35, - "options": { - "code": { - "language": "plaintext", - "showLineNumbers": false, - "showMiniMap": false - }, - "content": "# zrepl Prometheus Metrics\n\nzrepl exposes Prometheus metrics and ships with this Grafana dashboard.\nThe exported metrics are suitable for health checks:\n\n* The log should generally be warning & error-free\n * The `Log Messages that require attention` graph visualizes log message at levels that generally indicate problems.\n* The number of goroutines should not grow unboundedly over time.\n * During replication, the number of goroutines can be way higher than during idle time.\n * If the goroutine count grows with each replication, there is clearly a goroutine leak. Please open a bug report.\n* Memory consumption should not grow unboundedly over time.\n * Note that the Go runtime pre-allocates some of its heap from the OS.\n * zrepl actually uses much less memory than allocated from the OS.\n * Since Go 1.11, Go pre-allocates more aggressively.\n* Monitor that some data is replicated, although that metric does not guarantee that replication was successful.\n\n**In general, note that the exported metrics are not stable unless declared otherwise.**", - "mode": "markdown" - }, - "pluginVersion": "9.3.6", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "refId": "A" - } - ], - "title": "Panel Title", - "type": "text" + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 35, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false }, - "description": "Number of filesystems that failed replications", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "0": { - "text": "All OK" - }, - "-1": { - "text": "All failed" - } + "content": "# zrepl Prometheus Metrics\n\nzrepl exposes Prometheus metrics and ships with this Grafana dashboard.\nThe exported metrics are suitable for health checks:\n\n* The log should generally be warning & error-free\n * The `Log Messages that require attention` graph visualizes log message at levels that generally indicate problems.\n* In most setups, there shouldn't be any unmatched filesystem filter rules.\n* The number of goroutines should not grow unboundedly over time.\n * During replication, the number of goroutines can be way higher than during idle time.\n * If the goroutine count grows with each replication, there is clearly a goroutine leak. Please open a bug report.\n* Memory consumption should not grow unboundedly over time.\n * Note that the Go runtime pre-allocates some of its heap from the OS.\n * zrepl actually uses much less memory than allocated from the OS.\n * Since Go 1.11, Go pre-allocates more aggressively.\n* Monitor that some data is replicated, although that metric does not guarantee that replication was successful.\n\n**In general, note that the exported metrics are not stable unless declared otherwise.**", + "mode": "markdown" + }, + "pluginVersion": "9.3.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "refId": "A" + } + ], + "title": "Panel Title", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Number of filesystems that failed replications", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "text": "All OK" }, - "type": "value" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#bf1b00", - "value": null - }, - { - "color": "#508642", - "value": 0 - }, - { - "color": "#bf1b00", - "value": 1 + "-1": { + "text": "All failed" } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 50, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "background", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "/^__name__$/", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.3.6", - "repeat": "zrepl_job_name", - "repeatDirection": "h", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "expr": "zrepl_replication_filesystem_errors{job=\"$prom_job_name\",zrepl_job=\"$zrepl_job_name\"}", - "format": "time_series", - "groupBy": [ + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ { - "params": [ - "$__interval" - ], - "type": "time" + "color": "#bf1b00", + "value": null }, { - "params": [ - "null" - ], - "type": "fill" + "color": "#508642", + "value": 0 + }, + { + "color": "#bf1b00", + "value": 1 } - ], - "instant": true, - "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "orderByTime": "ASC", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - } - ], - "title": "Failed replications $zrepl_job_name", - "type": "stat" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "links": [] + ] }, - "overrides": [] + "unit": "none" }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 4, - "w": 12, - "x": 0, - "y": 13 - }, - "hiddenSeries": false, - "id": 48, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 50, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "/^__name__$/", "values": false }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "9.3.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "expr": "sgn(zrepl_start_time{job='$prom_job_name'})", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}@version={{raw}}", - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "zrepl Instances Up", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "max": "5", - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } + "textMode": "auto" }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 12, - "x": 12, - "y": 13 - }, - "hiddenSeries": false, - "id": 44, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "9.3.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "expr": "zrepl_trace_active_tasks", - "format": "time_series", - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "active tasks tracked by the zrepl trace module (should not grow unboundedly)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 12, - "x": 0, - "y": 17 - }, - "hiddenSeries": false, - "id": 42, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "9.3.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/replicated bytes in last.*/", - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "expr": "sum(increase(zrepl_replication_bytes_replicated{job='$prom_job_name'}[1d])) by (zrepl_job)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "1d", - "intervalFactor": 1, - "legendFormat": "zrepl_job={{zrepl_job}}", - "refId": "B" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Daily Replication Data Volume", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "bytes", - "logBase": 1, - "show": false - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 4, - "w": 12, - "x": 12, - "y": 18 - }, - "hiddenSeries": false, - "id": 22, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "9.3.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "expr": "increase(zrepl_daemon_log_entries{job='$prom_job_name',level=~'warn|error'}[1m])", - "format": "time_series", - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Log Messages that require attention", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 12, - "x": 0, - "y": 22 - }, - "hiddenSeries": false, - "id": 33, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "9.3.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/replicated bytes in last.*/", - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "expr": "sum(rate(zrepl_replication_bytes_replicated{job='$prom_job_name'}[10m])) by (zrepl_job)", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "replication data ratein last 10min zrepl_job={{zrepl_job}}te zrepl_job={{zrepl_job}}", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "expr": "sum(increase(zrepl_replication_bytes_replicated{job='$prom_job_name'}[10m])) by (zrepl_job)", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "replicated bytes in last 10min zrepl_job={{zrepl_job}}", - "refId": "B" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Replication Data Rate and Volume(over last 10min)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "logBase": 1, - "show": true - }, - { - "format": "bytes", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 12, - "x": 12, - "y": 22 - }, - "hiddenSeries": false, - "id": 23, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "9.3.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "expr": "sum(increase(zrepl_daemon_log_entries{job='$prom_job_name',zrepl_job=~\"^[^_].*\"}[1m])) by (instance,zrepl_job)", - "format": "time_series", - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Log Activity (without internal jobs)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 12, - "x": 0, - "y": 27 - }, - "hiddenSeries": false, - "id": 41, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "expr": "go_memstats_alloc_bytes{job='$prom_job_name'}", - "format": "time_series", - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Go Memory Allocations (should not grow unboundedly)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "show": true - }, - { - "format": "short", - "label": "", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 12, - "x": 12, - "y": 27 - }, - "hiddenSeries": false, - "id": 47, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "expr": "zrepl_endpoint_abstractions_cache_entry_count", - "format": "time_series", - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "zfs abstractions cache entry count (should not be zero and not grow unboundedly)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 12, - "x": 0, - "y": 32 - }, - "hiddenSeries": false, - "id": 17, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "expr": "go_memstats_sys_bytes{job='$prom_job_name'}", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Memory Allocated by the Go runtime from the OS (should not grow unboundedly)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 12, - "x": 0, - "y": 37 - }, - "hiddenSeries": false, - "id": 19, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "expr": "go_goroutines{job='$prom_job_name'}", - "format": "time_series", - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "number of goroutines (should not grow unboundedly)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - } - ], - "refresh": "5s", - "schemaVersion": 37, - "style": "dark", - "tags": [], - "templating": { - "list": [ + "pluginVersion": "9.3.6", + "repeat": "zrepl_job_name", + "repeatDirection": "h", + "targets": [ { - "current": {}, "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "definition": "", - "hide": 0, - "includeAll": false, - "label": "Prometheus Job Name", - "multi": false, - "name": "prom_job_name", - "options": [], - "query": { - "query": "label_values(up, job)", - "refId": "Prometheus-prom_job_name-Variable-Query" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": {}, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "definition": "label_values(zrepl_replication_filesystem_errors{job=\"$prom_job_name\"}, zrepl_job)", - "hide": 2, - "includeAll": true, - "label": "Zrepl Job Name", - "multi": true, - "name": "zrepl_job_name", - "options": [], - "query": { - "query": "label_values(zrepl_replication_filesystem_errors{job=\"$prom_job_name\"}, zrepl_job)", - "refId": "Prometheus-zrepl_job_name-Variable-Query" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false + "expr": "zrepl_replication_filesystem_errors{job=\"$prom_job_name\",zrepl_job=\"$zrepl_job_name\"}", + "format": "time_series", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] } - ] - }, - "time": { - "from": "now-2d", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] + "title": "Failed replications $zrepl_job_name", + "type": "stat" }, - "timezone": "", + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 4, + "w": 12, + "x": 0, + "y": 13 + }, + "hiddenSeries": false, + "id": 48, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.3.6", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sgn(zrepl_start_time{job='$prom_job_name'})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}@version={{raw}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "zrepl Instances Up", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": "5", + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 13 + }, + "hiddenSeries": false, + "id": 44, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.3.6", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "zrepl_trace_active_tasks", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "active tasks tracked by the zrepl trace module (should not grow unboundedly)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "gridPos": { + "h": 3, + "w": 5, + "x": 0, + "y": 17 + }, + "id": 56, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "### Unmatched Filesystems Rules\n\nFilesystem filter rules which mention datasets that didn't exist in the `zfs list` output.", + "mode": "markdown" + }, + "pluginVersion": "9.3.6", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 7, + "x": 5, + "y": 17 + }, + "id": 52, + "maxDataPoints": 10, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-red", + "min": 0, + "mode": "opacity", + "reverse": false, + "scale": "linear", + "scheme": "Oranges", + "steps": 2 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": false + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false + } + }, + "pluginVersion": "9.3.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "increase(zrepl_zfs_list_unmatched_user_specified_dataset_count{job=\"$prom_job_name\"}[$__interval])", + "format": "time_series", + "legendFormat": "{{jobid}}", + "range": true, + "refId": "A" + } + ], + "title": "Occurences increase[$__interval]", + "transformations": [], + "type": "heatmap" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 4, + "w": 12, + "x": 12, + "y": 18 + }, + "hiddenSeries": false, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.3.6", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "increase(zrepl_daemon_log_entries{job='$prom_job_name',level=~'warn|error'}[1m])", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Log Messages that require attention", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "color-background-solid", + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "dark-red", + "value": 0.01 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "jobid" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "transparent", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 5, + "x": 0, + "y": 20 + }, + "id": 54, + "maxDataPoints": 20, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Value" + } + ] + }, + "pluginVersion": "9.3.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "increase(zrepl_zfs_list_unmatched_user_specified_dataset_count{job=\"$prom_job_name\"}[$__range])", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "{{jobid}}", + "range": false, + "refId": "A" + } + ], + "title": "Occurences [Dashboard Range]", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": false, + "instance": true, + "job": true + }, + "indexByName": {}, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 22 + }, + "hiddenSeries": false, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.3.6", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(zrepl_daemon_log_entries{job='$prom_job_name',zrepl_job=~\"^[^_].*\"}[1m])) by (instance,zrepl_job)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Log Activity (without internal jobs)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 25 + }, + "hiddenSeries": false, + "id": 42, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.3.6", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/replicated bytes in last.*/", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(zrepl_replication_bytes_replicated{job='$prom_job_name'}[1d])) by (zrepl_job)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "1d", + "intervalFactor": 1, + "legendFormat": "zrepl_job={{zrepl_job}}", + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Daily Replication Data Volume", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "bytes", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 27 + }, + "hiddenSeries": false, + "id": 47, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.3.6", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "zrepl_endpoint_abstractions_cache_entry_count", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "zfs abstractions cache entry count (should not be zero and not grow unboundedly)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 30 + }, + "hiddenSeries": false, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.3.6", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/replicated bytes in last.*/", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(rate(zrepl_replication_bytes_replicated{job='$prom_job_name'}[10m])) by (zrepl_job)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "replication data ratein last 10min zrepl_job={{zrepl_job}}te zrepl_job={{zrepl_job}}", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(zrepl_replication_bytes_replicated{job='$prom_job_name'}[10m])) by (zrepl_job)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "replicated bytes in last 10min zrepl_job={{zrepl_job}}", + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Replication Data Rate and Volume(over last 10min)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "logBase": 1, + "show": true + }, + { + "format": "bytes", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 35 + }, + "hiddenSeries": false, + "id": 41, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.3.6", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "go_memstats_alloc_bytes{job='$prom_job_name'}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Go Memory Allocations (should not grow unboundedly)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "logBase": 1, + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 40 + }, + "hiddenSeries": false, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.3.6", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "go_memstats_sys_bytes{job='$prom_job_name'}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory Allocated by the Go runtime from the OS (should not grow unboundedly)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 45 + }, + "hiddenSeries": false, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.3.6", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "go_goroutines{job='$prom_job_name'}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "number of goroutines (should not grow unboundedly)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + } + ], + "refresh": "5s", + "schemaVersion": 37, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Prometheus Job Name", + "multi": false, + "name": "prom_job_name", + "options": [], + "query": { + "query": "label_values(up, job)", + "refId": "Prometheus-prom_job_name-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(zrepl_replication_filesystem_errors{job=\"$prom_job_name\"}, zrepl_job)", + "hide": 2, + "includeAll": true, + "label": "Zrepl Job Name", + "multi": true, + "name": "zrepl_job_name", + "options": [], + "query": { + "query": "label_values(zrepl_replication_filesystem_errors{job=\"$prom_job_name\"}, zrepl_job)", + "refId": "Prometheus-zrepl_job_name-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-2d", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", "title": "zrepl", "uid": "etQuvBnGz", "version": 1, - "weekStart": "" - } + "weekStart": "" +} diff --git a/endpoint/endpoint.go b/endpoint/endpoint.go index b9c1dbc..dcc9d9f 100644 --- a/endpoint/endpoint.go +++ b/endpoint/endpoint.go @@ -428,6 +428,7 @@ func (p *Sender) Receive(ctx context.Context, r *pdu.ReceiveReq, _ io.ReadCloser type FSFilter interface { // FIXME unused Filter(path *zfs.DatasetPath) (pass bool, err error) + UserSpecifiedDatasets() zfs.UserSpecifiedDatasetsSet } // FIXME: can we get away without error types here? @@ -587,6 +588,12 @@ func (f subroot) Filter(p *zfs.DatasetPath) (pass bool, err error) { return p.HasPrefix(f.localRoot) && !p.Equal(f.localRoot), nil } +func (f subroot) UserSpecifiedDatasets() zfs.UserSpecifiedDatasetsSet { + return zfs.UserSpecifiedDatasetsSet{ + f.localRoot.ToString(): true, + } +} + func (f subroot) MapToLocal(fs string) (*zfs.DatasetPath, error) { p, err := zfs.NewDatasetPath(fs) if err != nil { diff --git a/zfs/mapping.go b/zfs/mapping.go index f414c9d..6be5632 100644 --- a/zfs/mapping.go +++ b/zfs/mapping.go @@ -3,12 +3,20 @@ package zfs import ( "context" "fmt" + + "github.com/zrepl/zrepl/zfs/zfscmd" ) type DatasetFilter interface { Filter(p *DatasetPath) (pass bool, err error) + // The caller owns the returned set. + // Implementations should return a copy. + UserSpecifiedDatasets() UserSpecifiedDatasetsSet } +// A set of dataset names that the user specified in the configuration file. +type UserSpecifiedDatasetsSet map[string]bool + // Returns a DatasetFilter that does not filter (passes all paths) func NoFilter() DatasetFilter { return noFilter{} @@ -18,7 +26,8 @@ type noFilter struct{} var _ DatasetFilter = noFilter{} -func (noFilter) Filter(p *DatasetPath) (pass bool, err error) { return true, nil } +func (noFilter) Filter(p *DatasetPath) (pass bool, err error) { return true, nil } +func (noFilter) UserSpecifiedDatasets() UserSpecifiedDatasetsSet { return nil } func ZFSListMapping(ctx context.Context, filter DatasetFilter) (datasets []*DatasetPath, err error) { res, err := ZFSListMappingProperties(ctx, filter, nil) @@ -61,6 +70,7 @@ func ZFSListMappingProperties(ctx context.Context, filter DatasetFilter, propert go ZFSListChan(ctx, rchan, properties, nil, "-r", "-t", "filesystem,volume") + unmatchedUserSpecifiedDatasets := filter.UserSpecifiedDatasets() datasets = make([]ZFSListMappingPropertiesResult, 0) for r := range rchan { @@ -74,6 +84,8 @@ func ZFSListMappingProperties(ctx context.Context, filter DatasetFilter, propert return } + delete(unmatchedUserSpecifiedDatasets, path.ToString()) + pass, filterErr := filter.Filter(path) if filterErr != nil { return nil, fmt.Errorf("error calling filter: %s", filterErr) @@ -87,5 +99,9 @@ func ZFSListMappingProperties(ctx context.Context, filter DatasetFilter, propert } + jobid := zfscmd.GetJobIDOrDefault(ctx, "__nojobid") + metric := prom.ZFSListUnmatchedUserSpecifiedDatasetCount.WithLabelValues(jobid) + metric.Add(float64(len(unmatchedUserSpecifiedDatasets))) + return } diff --git a/zfs/prometheus.go b/zfs/prometheus.go index 3934418..a1dbf15 100644 --- a/zfs/prometheus.go +++ b/zfs/prometheus.go @@ -3,10 +3,11 @@ package zfs import "github.com/prometheus/client_golang/prometheus" var prom struct { - ZFSListFilesystemVersionDuration *prometheus.HistogramVec - ZFSSnapshotDuration *prometheus.HistogramVec - ZFSBookmarkDuration *prometheus.HistogramVec - ZFSDestroyDuration *prometheus.HistogramVec + ZFSListFilesystemVersionDuration *prometheus.HistogramVec + ZFSSnapshotDuration *prometheus.HistogramVec + ZFSBookmarkDuration *prometheus.HistogramVec + ZFSDestroyDuration *prometheus.HistogramVec + ZFSListUnmatchedUserSpecifiedDatasetCount *prometheus.GaugeVec } func init() { @@ -34,6 +35,15 @@ func init() { Name: "destroy_duration", Help: "Duration it took to destroy a dataset", }, []string{"dataset_type", "filesystem"}) + prom.ZFSListUnmatchedUserSpecifiedDatasetCount = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "zrepl", + Subsystem: "zfs", + Name: "list_unmatched_user_specified_dataset_count", + Help: "When evaluating a DatsetFilter against zfs list output, this counter " + + "is incremented for every DatasetFilter rule that did not match any " + + "filesystem name in the zfs list output. Monitor for increases to detect filesystem " + + "filter rules that have no effect because they don't match any local filesystem.", + }, []string{"jobid"}) } func PrometheusRegister(registry prometheus.Registerer) error { @@ -49,5 +59,8 @@ func PrometheusRegister(registry prometheus.Registerer) error { if err := registry.Register(prom.ZFSDestroyDuration); err != nil { return err } + if err := registry.Register(prom.ZFSListUnmatchedUserSpecifiedDatasetCount); err != nil { + return err + } return nil } diff --git a/zfs/zfscmd/zfscmd_context.go b/zfs/zfscmd/zfscmd_context.go index 2497c87..9238d5d 100644 --- a/zfs/zfscmd/zfscmd_context.go +++ b/zfs/zfscmd/zfscmd_context.go @@ -19,6 +19,10 @@ func WithJobID(ctx context.Context, jobID string) context.Context { return context.WithValue(ctx, contextKeyJobID, jobID) } +func GetJobIDOrDefault(ctx context.Context, def string) string { + return getJobIDOrDefault(ctx, def) +} + func getJobIDOrDefault(ctx context.Context, def string) string { ret, ok := ctx.Value(contextKeyJobID).(string) if !ok {