mirror of
https://github.com/zrepl/zrepl.git
synced 2024-11-25 09:54:47 +01:00
1336c91865
refs #196
261 lines
7.8 KiB
Plaintext
261 lines
7.8 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import seaborn as sns\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import re\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# ./parsed.json is the stdout of the scraper tool in this directory\n",
|
|
"df = pd.read_json(\"./parsed.json\", lines=True)\n",
|
|
"df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def parse_ds(entity):\n",
|
|
" m = re.search(r\"(?P<dataset>[^@#]*)([@#].+)?\", entity)\n",
|
|
" return m.group(\"dataset\")\n",
|
|
" \n",
|
|
"def parse_cmd(row):\n",
|
|
" cmd = row.Cmd\n",
|
|
" binary, verb, *tail = re.split(r\"\\s+\", cmd) # NOTE whitespace in dataset names => don't use comp\n",
|
|
" \n",
|
|
" dataset = None\n",
|
|
" if binary == \"zfs\":\n",
|
|
" if verb == \"send\": \n",
|
|
" if len(tail) == 0:\n",
|
|
" verb = \"send-feature-test\"\n",
|
|
" else:\n",
|
|
" dataset = parse_ds(tail[-1])\n",
|
|
" if \"-n\" in tail:\n",
|
|
" verb = \"send-dry\"\n",
|
|
" elif verb == \"recv\" or verb == \"receive\":\n",
|
|
" verb = \"receive\"\n",
|
|
" if len(tail) > 0:\n",
|
|
" dataset = parse_ds(tail[-1])\n",
|
|
" else:\n",
|
|
" verb = \"receive-CLI-test\"\n",
|
|
" elif verb == \"get\":\n",
|
|
" dataset = parse_ds(tail[-1])\n",
|
|
" elif verb == \"list\":\n",
|
|
" if \"-r\" in tail and \"-d\" in tail and \"1\" in tail:\n",
|
|
" dataset = parse_ds(tail[-1])\n",
|
|
" verb = \"list-single-dataset\"\n",
|
|
" else:\n",
|
|
" dataset = \"!ALL_POOLS!\"\n",
|
|
" verb = \"list-all-filesystems\"\n",
|
|
" elif verb == \"bookmark\":\n",
|
|
" dataset = parse_ds(tail[-2])\n",
|
|
" elif verb == \"hold\":\n",
|
|
" dataset = parse_ds(tail[-1])\n",
|
|
" elif verb == \"snapshot\":\n",
|
|
" dataset = parse_ds(tail[-1])\n",
|
|
" elif verb == \"release\":\n",
|
|
" dss = tail[-1].split(\",\")\n",
|
|
" if len(dss) > 1:\n",
|
|
" raise Exception(\"cannot handle batch-release\")\n",
|
|
" dataset = parse_ds(dss[0])\n",
|
|
" elif verb == \"holds\" and \"-H\" in tail:\n",
|
|
" dss = tail[-1].split(\",\")\n",
|
|
" if len(dss) > 1:\n",
|
|
" raise Exception(\"cannot handle batch-holds\")\n",
|
|
" dataset = parse_ds(dss[0])\n",
|
|
" elif verb == \"destroy\":\n",
|
|
" dss = tail[-1].split(\",\")\n",
|
|
" if len(dss) > 1:\n",
|
|
" raise Exception(\"cannot handle batch-holds\")\n",
|
|
" dataset = parse_ds(dss[0])\n",
|
|
" \n",
|
|
" return {'action':binary + \"-\" + verb, 'dataset': dataset }\n",
|
|
" \n",
|
|
" \n",
|
|
"res = df.apply(parse_cmd, axis='columns', result_type='expand')\n",
|
|
"res = pd.concat([df, res], axis='columns')\n",
|
|
"for cat in [\"action\", \"dataset\"]:\n",
|
|
" res[cat] = res[cat].astype('category')\n",
|
|
"\n",
|
|
"res[\"LogTimeUnix\"] = pd.to_datetime(res.LogTime)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"res[\"OtherTime\"] = res.TotalTime - res.Usertime - res.Systime\n",
|
|
"x = res.melt(id_vars=[\"action\", \"dataset\"], value_vars=[\"TotalTime\", \"OtherTime\", \"Usertime\", \"Systime\"])\n",
|
|
"x"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(\"commands with NaN values\")\n",
|
|
"set(x[x.isna().any(axis=1)].action.values)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# (~x.action.astype('str').isin([\"zfs-send\", \"zfs-recv\"]))\n",
|
|
"totaltimes = x[(x.variable == \"TotalTime\")].groupby([\"action\", \"dataset\"]).sum().reset_index()\n",
|
|
"display(totaltimes)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"totaltimes_by_action = totaltimes.groupby(\"action\").sum().sort_values(by=\"value\")\n",
|
|
"totaltimes_by_action.plot.barh()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"totaltimes.groupby(\"dataset\").sum().plot.barh(fontsize=5)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"most_expensive_action = totaltimes_by_action.idxmax().value\n",
|
|
"display(most_expensive_action)\n",
|
|
"most_expensive_action_by_dataset = totaltimes[totaltimes.action == most_expensive_action].groupby(\"dataset\").sum().sort_values(by=\"value\")\n",
|
|
"most_expensive_action_by_dataset.plot.barh(rot=50, fontsize=5, figsize=(10, 20))\n",
|
|
"plt.savefig('most-expensive-command.pdf')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"res"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# %matplotlib notebook \n",
|
|
"\n",
|
|
"# res.index = res.LogTimeUnix\n",
|
|
"\n",
|
|
"# resampled = res.pivot(columns='action', values='TotalTime').resample(\"1s\").sum()\n",
|
|
"# resampled.cumsum().plot()\n",
|
|
"# res[\"BeginTime\"] = res.LogTimeUnix.dt.total_seconds()\n",
|
|
"# holds = res[res.action == \"zfs-holds\"]\n",
|
|
"# sns.stripplot(x=\"LogTimeUnix\", y=\"action\", data=res)\n",
|
|
"# res[\"LogTimeUnix\"].resample(\"20min\").sum()\n",
|
|
"# res[res.action == \"zfs-holds\"].plot.scatter(x=\"LogTimeUnix\", y=\"TotalTime\")\n",
|
|
"\n",
|
|
"#res[res.action == \"zfs-holds\"].pivot(columns='action', values=['TotalTime', 'Systime', \"Usertime\"]).resample(\"1s\").sum().cumsum().plot()\n",
|
|
"pivoted = res.reset_index(drop=True).pivot_table(values=['TotalTime', 'Systime', \"Usertime\"], index=\"LogTimeUnix\", columns=\"action\")\n",
|
|
"pivoted"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"pivoted.cumsum()[[(\"TotalTime\", \"zfs-holds\"),(\"Systime\", \"zfs-holds\"),(\"Usertime\", \"zfs-holds\")]].plot()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"pivoted = res.reset_index(drop=True).pivot_table(values=['TotalTime'], index=\"LogTimeUnix\", columns=\"action\")\n",
|
|
"cum_invocation_counts_per_action = pivoted.isna().astype(int).cumsum()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"cum_invocation_counts_per_action"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# zfs-get as reference value\n",
|
|
"cum_invocation_counts_per_action[[(\"TotalTime\",\"zfs-holds\"),(\"TotalTime\",\"zfs-get\")]].plot()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|