mirror of
https://github.com/zrepl/zrepl.git
synced 2024-11-21 16:03:32 +01:00
bc5e1ede04
This PR adds a Prometheus counter called `zrepl_zfs_list_unmatched_user_specified_dataset_count`. Monitor for increases of the counter to detect filesystem filter rules that have no effect because they don't match any local filesystem. An example use case for this is the following story: 1. Someone sets up zrepl with `filesystems` filter for `zroot/pg14<`. 2. During the upgrade to Postgres 15, they rename the dataset to `zroot/pg15`, but forget to update the zrepl `filesystems` filter. 3. zrepl will not snapshot / replicate the `zroot/pg15<` datasets. Since `filesystems` rules are always evaluated on the side that has the datasets, we can smuggle this functionality into the `zfs` module's `ZFSList` function that is used by all jobs with a `filesystems` filter. Dashboard changes: - histogram with increase in $__interval, one row per job - table with increase in $__range - explainer text box, so, people know what the previous two are about We had to re-arrange some panels, hence the Git diff isn't great. closes https://github.com/zrepl/zrepl/pull/653 Co-authored-by: Christian Schwarz <me@cschwarz.com> Co-authored-by: Goran Mekić <meka@tilda.center>
1514 lines
34 KiB
JSON
1514 lines
34 KiB
JSON
{
|
|
"__inputs": [
|
|
{
|
|
"name": "DS_PROMETHEUS",
|
|
"label": "Prometheus",
|
|
"description": "",
|
|
"type": "datasource",
|
|
"pluginId": "prometheus",
|
|
"pluginName": "Prometheus"
|
|
}
|
|
],
|
|
"__elements": {},
|
|
"__requires": [
|
|
{
|
|
"type": "grafana",
|
|
"id": "grafana",
|
|
"name": "Grafana",
|
|
"version": "9.3.6"
|
|
},
|
|
{
|
|
"type": "panel",
|
|
"id": "graph",
|
|
"name": "Graph (old)",
|
|
"version": ""
|
|
},
|
|
{
|
|
"type": "panel",
|
|
"id": "heatmap",
|
|
"name": "Heatmap",
|
|
"version": ""
|
|
},
|
|
{
|
|
"type": "datasource",
|
|
"id": "prometheus",
|
|
"name": "Prometheus",
|
|
"version": "1.0.0"
|
|
},
|
|
{
|
|
"type": "panel",
|
|
"id": "stat",
|
|
"name": "Stat",
|
|
"version": ""
|
|
},
|
|
{
|
|
"type": "panel",
|
|
"id": "table",
|
|
"name": "Table",
|
|
"version": ""
|
|
},
|
|
{
|
|
"type": "panel",
|
|
"id": "text",
|
|
"name": "Text",
|
|
"version": ""
|
|
}
|
|
],
|
|
"annotations": {
|
|
"list": [
|
|
{
|
|
"builtIn": 1,
|
|
"datasource": {
|
|
"type": "datasource",
|
|
"uid": "grafana"
|
|
},
|
|
"enable": true,
|
|
"hide": true,
|
|
"iconColor": "rgba(0, 211, 255, 1)",
|
|
"name": "Annotations & Alerts",
|
|
"target": {
|
|
"limit": 100,
|
|
"matchAny": false,
|
|
"tags": [],
|
|
"type": "dashboard"
|
|
},
|
|
"type": "dashboard"
|
|
}
|
|
]
|
|
},
|
|
"editable": true,
|
|
"fiscalYearStartMonth": 0,
|
|
"graphTooltip": 1,
|
|
"id": null,
|
|
"links": [],
|
|
"liveNow": false,
|
|
"panels": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"gridPos": {
|
|
"h": 10,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 0
|
|
},
|
|
"id": 35,
|
|
"options": {
|
|
"code": {
|
|
"language": "plaintext",
|
|
"showLineNumbers": false,
|
|
"showMiniMap": false
|
|
},
|
|
"content": "# zrepl Prometheus Metrics\n\nzrepl exposes Prometheus metrics and ships with this Grafana dashboard.\nThe exported metrics are suitable for health checks:\n\n* The log should generally be warning & error-free\n * The `Log Messages that require attention` graph visualizes log message at levels that generally indicate problems.\n* In most setups, there shouldn't be any unmatched filesystem filter rules.\n* The number of goroutines should not grow unboundedly over time.\n * During replication, the number of goroutines can be way higher than during idle time.\n * If the goroutine count grows with each replication, there is clearly a goroutine leak. Please open a bug report.\n* Memory consumption should not grow unboundedly over time.\n * Note that the Go runtime pre-allocates some of its heap from the OS.\n * zrepl actually uses much less memory than allocated from the OS.\n * Since Go 1.11, Go pre-allocates more aggressively.\n* Monitor that some data is replicated, although that metric does not guarantee that replication was successful.\n\n**In general, note that the exported metrics are not stable unless declared otherwise.**",
|
|
"mode": "markdown"
|
|
},
|
|
"pluginVersion": "9.3.6",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Panel Title",
|
|
"type": "text"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "Number of filesystems that failed replications",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [
|
|
{
|
|
"options": {
|
|
"0": {
|
|
"text": "All OK"
|
|
},
|
|
"-1": {
|
|
"text": "All failed"
|
|
}
|
|
},
|
|
"type": "value"
|
|
}
|
|
],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "#bf1b00",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "#508642",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "#bf1b00",
|
|
"value": 1
|
|
}
|
|
]
|
|
},
|
|
"unit": "none"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 3,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 10
|
|
},
|
|
"id": 50,
|
|
"links": [],
|
|
"maxDataPoints": 100,
|
|
"options": {
|
|
"colorMode": "background",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "horizontal",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"mean"
|
|
],
|
|
"fields": "/^__name__$/",
|
|
"values": false
|
|
},
|
|
"textMode": "auto"
|
|
},
|
|
"pluginVersion": "9.3.6",
|
|
"repeat": "zrepl_job_name",
|
|
"repeatDirection": "h",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"expr": "zrepl_replication_filesystem_errors{job=\"$prom_job_name\",zrepl_job=\"$zrepl_job_name\"}",
|
|
"format": "time_series",
|
|
"groupBy": [
|
|
{
|
|
"params": [
|
|
"$__interval"
|
|
],
|
|
"type": "time"
|
|
},
|
|
{
|
|
"params": [
|
|
"null"
|
|
],
|
|
"type": "fill"
|
|
}
|
|
],
|
|
"instant": true,
|
|
"interval": "",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "",
|
|
"orderByTime": "ASC",
|
|
"policy": "default",
|
|
"refId": "A",
|
|
"resultFormat": "time_series",
|
|
"select": [
|
|
[
|
|
{
|
|
"params": [
|
|
"value"
|
|
],
|
|
"type": "field"
|
|
},
|
|
{
|
|
"params": [],
|
|
"type": "mean"
|
|
}
|
|
]
|
|
],
|
|
"tags": []
|
|
}
|
|
],
|
|
"title": "Failed replications $zrepl_job_name",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"links": []
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 13
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 48,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": true,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [],
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.3.6",
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": true,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"expr": "sgn(zrepl_start_time{job='$prom_job_name'})",
|
|
"format": "time_series",
|
|
"interval": "",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{instance}}@version={{raw}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "zrepl Instances Up",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"max": "5",
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"links": []
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 13
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 44,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": true,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [],
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.3.6",
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"expr": "zrepl_trace_active_tasks",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "active tasks tracked by the zrepl trace module (should not grow unboundedly)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"gridPos": {
|
|
"h": 3,
|
|
"w": 5,
|
|
"x": 0,
|
|
"y": 17
|
|
},
|
|
"id": 56,
|
|
"options": {
|
|
"code": {
|
|
"language": "plaintext",
|
|
"showLineNumbers": false,
|
|
"showMiniMap": false
|
|
},
|
|
"content": "### Unmatched Filesystems Rules\n\nFilesystem filter rules which mention datasets that didn't exist in the `zfs list` output.",
|
|
"mode": "markdown"
|
|
},
|
|
"pluginVersion": "9.3.6",
|
|
"type": "text"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"custom": {
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
}
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 7,
|
|
"x": 5,
|
|
"y": 17
|
|
},
|
|
"id": 52,
|
|
"maxDataPoints": 10,
|
|
"options": {
|
|
"calculate": false,
|
|
"cellGap": 1,
|
|
"color": {
|
|
"exponent": 0.5,
|
|
"fill": "dark-red",
|
|
"min": 0,
|
|
"mode": "opacity",
|
|
"reverse": false,
|
|
"scale": "linear",
|
|
"scheme": "Oranges",
|
|
"steps": 2
|
|
},
|
|
"exemplars": {
|
|
"color": "rgba(255,0,255,0.7)"
|
|
},
|
|
"filterValues": {
|
|
"le": 1e-9
|
|
},
|
|
"legend": {
|
|
"show": false
|
|
},
|
|
"rowsFrame": {
|
|
"layout": "auto"
|
|
},
|
|
"tooltip": {
|
|
"show": true,
|
|
"yHistogram": false
|
|
},
|
|
"yAxis": {
|
|
"axisPlacement": "left",
|
|
"reverse": false
|
|
}
|
|
},
|
|
"pluginVersion": "9.3.6",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "increase(zrepl_zfs_list_unmatched_user_specified_dataset_count{job=\"$prom_job_name\"}[$__interval])",
|
|
"format": "time_series",
|
|
"legendFormat": "{{jobid}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Occurences increase[$__interval]",
|
|
"transformations": [],
|
|
"type": "heatmap"
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"links": []
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 18
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 22,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": true,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [],
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.3.6",
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"expr": "increase(zrepl_daemon_log_entries{job='$prom_job_name',level=~'warn|error'}[1m])",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Log Messages that require attention",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"custom": {
|
|
"align": "auto",
|
|
"displayMode": "color-background-solid",
|
|
"inspect": false
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "dark-red",
|
|
"value": 0.01
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "jobid"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "color",
|
|
"value": {
|
|
"fixedColor": "transparent",
|
|
"mode": "fixed"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 5,
|
|
"x": 0,
|
|
"y": 20
|
|
},
|
|
"id": 54,
|
|
"maxDataPoints": 20,
|
|
"options": {
|
|
"footer": {
|
|
"fields": "",
|
|
"reducer": [
|
|
"sum"
|
|
],
|
|
"show": false
|
|
},
|
|
"showHeader": true,
|
|
"sortBy": [
|
|
{
|
|
"desc": true,
|
|
"displayName": "Value"
|
|
}
|
|
]
|
|
},
|
|
"pluginVersion": "9.3.6",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"editorMode": "code",
|
|
"exemplar": false,
|
|
"expr": "increase(zrepl_zfs_list_unmatched_user_specified_dataset_count{job=\"$prom_job_name\"}[$__range])",
|
|
"format": "table",
|
|
"instant": true,
|
|
"interval": "",
|
|
"legendFormat": "{{jobid}}",
|
|
"range": false,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Occurences [Dashboard Range]",
|
|
"transformations": [
|
|
{
|
|
"id": "organize",
|
|
"options": {
|
|
"excludeByName": {
|
|
"Time": true,
|
|
"Value": false,
|
|
"instance": true,
|
|
"job": true
|
|
},
|
|
"indexByName": {},
|
|
"renameByName": {}
|
|
}
|
|
}
|
|
],
|
|
"type": "table"
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"links": []
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 22
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 23,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": true,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [],
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.3.6",
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"expr": "sum(increase(zrepl_daemon_log_entries{job='$prom_job_name',zrepl_job=~\"^[^_].*\"}[1m])) by (instance,zrepl_job)",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Log Activity (without internal jobs)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": true,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"links": []
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 25
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 42,
|
|
"legend": {
|
|
"alignAsTable": false,
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": true,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": false,
|
|
"linewidth": 1,
|
|
"links": [],
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.3.6",
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [
|
|
{
|
|
"alias": "/replicated bytes in last.*/",
|
|
"yaxis": 2
|
|
}
|
|
],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"expr": "sum(increase(zrepl_replication_bytes_replicated{job='$prom_job_name'}[1d])) by (zrepl_job)",
|
|
"format": "time_series",
|
|
"hide": false,
|
|
"instant": false,
|
|
"interval": "1d",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "zrepl_job={{zrepl_job}}",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Daily Replication Data Volume",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "bytes",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "bytes",
|
|
"logBase": 1,
|
|
"show": false
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"links": []
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 27
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 47,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": true,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [],
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.3.6",
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"expr": "zrepl_endpoint_abstractions_cache_entry_count",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "zfs abstractions cache entry count (should not be zero and not grow unboundedly)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"links": []
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 30
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 33,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": true,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [],
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.3.6",
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [
|
|
{
|
|
"alias": "/replicated bytes in last.*/",
|
|
"yaxis": 2
|
|
}
|
|
],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"expr": "sum(rate(zrepl_replication_bytes_replicated{job='$prom_job_name'}[10m])) by (zrepl_job)",
|
|
"format": "time_series",
|
|
"hide": false,
|
|
"interval": "",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "replication data ratein last 10min zrepl_job={{zrepl_job}}te zrepl_job={{zrepl_job}}",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"expr": "sum(increase(zrepl_replication_bytes_replicated{job='$prom_job_name'}[10m])) by (zrepl_job)",
|
|
"format": "time_series",
|
|
"hide": false,
|
|
"interval": "",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "replicated bytes in last 10min zrepl_job={{zrepl_job}}",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Replication Data Rate and Volume(over last 10min)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "Bps",
|
|
"logBase": 1,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "bytes",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"links": []
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 35
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 41,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": false,
|
|
"show": true,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [],
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.3.6",
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"expr": "go_memstats_alloc_bytes{job='$prom_job_name'}",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Go Memory Allocations (should not grow unboundedly)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "bytes",
|
|
"logBase": 1,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": "",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"links": []
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 40
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 17,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": true,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [],
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.3.6",
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"expr": "go_memstats_sys_bytes{job='$prom_job_name'}",
|
|
"format": "time_series",
|
|
"hide": false,
|
|
"intervalFactor": 1,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Memory Allocated by the Go runtime from the OS (should not grow unboundedly)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "bytes",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"links": []
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 45
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 19,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": true,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [],
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.3.6",
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"expr": "go_goroutines{job='$prom_job_name'}",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "number of goroutines (should not grow unboundedly)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
}
|
|
],
|
|
"refresh": "5s",
|
|
"schemaVersion": 37,
|
|
"style": "dark",
|
|
"tags": [],
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"current": {},
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"definition": "",
|
|
"hide": 0,
|
|
"includeAll": false,
|
|
"label": "Prometheus Job Name",
|
|
"multi": false,
|
|
"name": "prom_job_name",
|
|
"options": [],
|
|
"query": {
|
|
"query": "label_values(up, job)",
|
|
"refId": "Prometheus-prom_job_name-Variable-Query"
|
|
},
|
|
"refresh": 1,
|
|
"regex": "",
|
|
"skipUrlSync": false,
|
|
"sort": 1,
|
|
"tagValuesQuery": "",
|
|
"tagsQuery": "",
|
|
"type": "query",
|
|
"useTags": false
|
|
},
|
|
{
|
|
"current": {},
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"definition": "label_values(zrepl_replication_filesystem_errors{job=\"$prom_job_name\"}, zrepl_job)",
|
|
"hide": 2,
|
|
"includeAll": true,
|
|
"label": "Zrepl Job Name",
|
|
"multi": true,
|
|
"name": "zrepl_job_name",
|
|
"options": [],
|
|
"query": {
|
|
"query": "label_values(zrepl_replication_filesystem_errors{job=\"$prom_job_name\"}, zrepl_job)",
|
|
"refId": "Prometheus-zrepl_job_name-Variable-Query"
|
|
},
|
|
"refresh": 1,
|
|
"regex": "",
|
|
"skipUrlSync": false,
|
|
"sort": 1,
|
|
"tagValuesQuery": "",
|
|
"tagsQuery": "",
|
|
"type": "query",
|
|
"useTags": false
|
|
}
|
|
]
|
|
},
|
|
"time": {
|
|
"from": "now-2d",
|
|
"to": "now"
|
|
},
|
|
"timepicker": {
|
|
"refresh_intervals": [
|
|
"10s",
|
|
"30s",
|
|
"1m",
|
|
"5m",
|
|
"15m",
|
|
"30m",
|
|
"1h",
|
|
"2h",
|
|
"1d"
|
|
],
|
|
"time_options": [
|
|
"5m",
|
|
"15m",
|
|
"1h",
|
|
"6h",
|
|
"12h",
|
|
"24h",
|
|
"2d",
|
|
"7d",
|
|
"30d"
|
|
]
|
|
},
|
|
"timezone": "",
|
|
"title": "zrepl",
|
|
"uid": "etQuvBnGz",
|
|
"version": 1,
|
|
"weekStart": ""
|
|
}
|