{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import re\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# ./parsed.json is the stdout of the scraper tool in this directory\n", "df = pd.read_json(\"./parsed.json\", lines=True)\n", "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def parse_ds(entity):\n", " m = re.search(r\"(?P[^@#]*)([@#].+)?\", entity)\n", " return m.group(\"dataset\")\n", " \n", "def parse_cmd(row):\n", " cmd = row.Cmd\n", " binary, verb, *tail = re.split(r\"\\s+\", cmd) # NOTE whitespace in dataset names => don't use comp\n", " \n", " dataset = None\n", " if binary == \"zfs\":\n", " if verb == \"send\": \n", " if len(tail) == 0:\n", " verb = \"send-feature-test\"\n", " else:\n", " dataset = parse_ds(tail[-1])\n", " if \"-n\" in tail:\n", " verb = \"send-dry\"\n", " elif verb == \"recv\" or verb == \"receive\":\n", " verb = \"receive\"\n", " if len(tail) > 0:\n", " dataset = parse_ds(tail[-1])\n", " else:\n", " verb = \"receive-CLI-test\"\n", " elif verb == \"get\":\n", " dataset = parse_ds(tail[-1])\n", " elif verb == \"list\":\n", " if \"-r\" in tail and \"-d\" in tail and \"1\" in tail:\n", " dataset = parse_ds(tail[-1])\n", " verb = \"list-single-dataset\"\n", " else:\n", " dataset = \"!ALL_POOLS!\"\n", " verb = \"list-all-filesystems\"\n", " elif verb == \"bookmark\":\n", " dataset = parse_ds(tail[-2])\n", " elif verb == \"hold\":\n", " dataset = parse_ds(tail[-1])\n", " elif verb == \"snapshot\":\n", " dataset = parse_ds(tail[-1])\n", " elif verb == \"release\":\n", " dss = tail[-1].split(\",\")\n", " if len(dss) > 1:\n", " raise Exception(\"cannot handle batch-release\")\n", " dataset = parse_ds(dss[0])\n", " elif verb == \"holds\" and \"-H\" in tail:\n", " dss = tail[-1].split(\",\")\n", " if len(dss) > 1:\n", " raise Exception(\"cannot handle batch-holds\")\n", " dataset = parse_ds(dss[0])\n", " elif verb == \"destroy\":\n", " dss = tail[-1].split(\",\")\n", " if len(dss) > 1:\n", " raise Exception(\"cannot handle batch-holds\")\n", " dataset = parse_ds(dss[0])\n", " \n", " return {'action':binary + \"-\" + verb, 'dataset': dataset }\n", " \n", " \n", "res = df.apply(parse_cmd, axis='columns', result_type='expand')\n", "res = pd.concat([df, res], axis='columns')\n", "for cat in [\"action\", \"dataset\"]:\n", " res[cat] = res[cat].astype('category')\n", "\n", "res[\"LogTimeUnix\"] = pd.to_datetime(res.LogTime)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "res[\"OtherTime\"] = res.TotalTime - res.Usertime - res.Systime\n", "x = res.melt(id_vars=[\"action\", \"dataset\"], value_vars=[\"TotalTime\", \"OtherTime\", \"Usertime\", \"Systime\"])\n", "x" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"commands with NaN values\")\n", "set(x[x.isna().any(axis=1)].action.values)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# (~x.action.astype('str').isin([\"zfs-send\", \"zfs-recv\"]))\n", "totaltimes = x[(x.variable == \"TotalTime\")].groupby([\"action\", \"dataset\"]).sum().reset_index()\n", "display(totaltimes)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "totaltimes_by_action = totaltimes.groupby(\"action\").sum().sort_values(by=\"value\")\n", "totaltimes_by_action.plot.barh()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "totaltimes.groupby(\"dataset\").sum().plot.barh(fontsize=5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "most_expensive_action = totaltimes_by_action.idxmax().value\n", "display(most_expensive_action)\n", "most_expensive_action_by_dataset = totaltimes[totaltimes.action == most_expensive_action].groupby(\"dataset\").sum().sort_values(by=\"value\")\n", "most_expensive_action_by_dataset.plot.barh(rot=50, fontsize=5, figsize=(10, 20))\n", "plt.savefig('most-expensive-command.pdf')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "res" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# %matplotlib notebook \n", "\n", "# res.index = res.LogTimeUnix\n", "\n", "# resampled = res.pivot(columns='action', values='TotalTime').resample(\"1s\").sum()\n", "# resampled.cumsum().plot()\n", "# res[\"BeginTime\"] = res.LogTimeUnix.dt.total_seconds()\n", "# holds = res[res.action == \"zfs-holds\"]\n", "# sns.stripplot(x=\"LogTimeUnix\", y=\"action\", data=res)\n", "# res[\"LogTimeUnix\"].resample(\"20min\").sum()\n", "# res[res.action == \"zfs-holds\"].plot.scatter(x=\"LogTimeUnix\", y=\"TotalTime\")\n", "\n", "#res[res.action == \"zfs-holds\"].pivot(columns='action', values=['TotalTime', 'Systime', \"Usertime\"]).resample(\"1s\").sum().cumsum().plot()\n", "pivoted = res.reset_index(drop=True).pivot_table(values=['TotalTime', 'Systime', \"Usertime\"], index=\"LogTimeUnix\", columns=\"action\")\n", "pivoted" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pivoted.cumsum()[[(\"TotalTime\", \"zfs-holds\"),(\"Systime\", \"zfs-holds\"),(\"Usertime\", \"zfs-holds\")]].plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pivoted = res.reset_index(drop=True).pivot_table(values=['TotalTime'], index=\"LogTimeUnix\", columns=\"action\")\n", "cum_invocation_counts_per_action = pivoted.isna().astype(int).cumsum()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cum_invocation_counts_per_action" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# zfs-get as reference value\n", "cum_invocation_counts_per_action[[(\"TotalTime\",\"zfs-holds\"),(\"TotalTime\",\"zfs-get\")]].plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 2 }